Skip to content

Domain types

dve.metadata_parser.domain_types

Domain specific type definitions for use in validators.

ConFormattedDate

Bases: date

A date, provided as a date or a string in a specific format.

Source code in src/dve/metadata_parser/domain_types.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
class ConFormattedDate(dt.date):
    """A date, provided as a date or a string in a specific format."""

    DATE_FORMAT: ClassVar[Optional[str]] = None
    """The specific format of the date as a Python 'strptime' string."""
    strict: ClassVar[Optional[bool]] = False
    """Add additional check to ensure that date supplied meets the date format exactly."""
    ge: ClassVar[Optional[dt.date]] = None
    """The earliest date allowed."""
    le: ClassVar[Optional[dt.date]] = None
    """The latest date allowed."""
    gt: ClassVar[Optional[dt.date]] = None
    """The earliest date allowed."""
    lt: ClassVar[Optional[dt.date]] = None
    """The latest date allowed."""

    @classmethod
    def validate(cls, value: Optional[Union[dt.date, str]]) -> Optional[dt.date]:
        """Validate a passed datetime or string."""
        if value is None:
            return value

        if isinstance(value, dt.date):
            date = value
        elif cls.DATE_FORMAT is not None:
            try:
                date = dt.datetime.strptime(value, cls.DATE_FORMAT).date()
                if cls.strict and (date.strftime(cls.DATE_FORMAT) != value):
                    raise ValueError
            except ValueError as err:
                raise ValueError(
                    f"Unable to parse provided datetime in format {cls.DATE_FORMAT}"
                ) from err  # pylint: disable=line-too-long
        else:
            raise ValueError("No date format provided")

        return date

    @classmethod
    def validate_range(cls, value) -> Optional[dt.date]:
        """Validates that the date falls within any constraints provided"""
        if cls.ge is not None and value < cls.ge:
            raise ValueError(f"Date must be greater than or equal to {cls.ge}")

        if cls.le is not None and value > cls.le:
            raise ValueError(f"Date must be less than or equal to {cls.le}")

        if cls.gt is not None and value <= cls.gt:
            raise ValueError(f"Date must be greater than {cls.gt}")

        if cls.lt is not None and value >= cls.lt:
            raise ValueError(f"Date must be less than {cls.lt}")

        return value

    @classmethod
    def __get_validators__(cls) -> Iterator[classmethod]:
        """Gets all validators"""
        yield cls.validate  # type: ignore
        yield cls.validate_range  # type: ignore

DATE_FORMAT = None class-attribute

The specific format of the date as a Python 'strptime' string.

ge = None class-attribute

The earliest date allowed.

gt = None class-attribute

The earliest date allowed.

le = None class-attribute

The latest date allowed.

lt = None class-attribute

The latest date allowed.

strict = False class-attribute

Add additional check to ensure that date supplied meets the date format exactly.

__get_validators__() classmethod

Gets all validators

Source code in src/dve/metadata_parser/domain_types.py
313
314
315
316
317
@classmethod
def __get_validators__(cls) -> Iterator[classmethod]:
    """Gets all validators"""
    yield cls.validate  # type: ignore
    yield cls.validate_range  # type: ignore

validate(value) classmethod

Validate a passed datetime or string.

Source code in src/dve/metadata_parser/domain_types.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
@classmethod
def validate(cls, value: Optional[Union[dt.date, str]]) -> Optional[dt.date]:
    """Validate a passed datetime or string."""
    if value is None:
        return value

    if isinstance(value, dt.date):
        date = value
    elif cls.DATE_FORMAT is not None:
        try:
            date = dt.datetime.strptime(value, cls.DATE_FORMAT).date()
            if cls.strict and (date.strftime(cls.DATE_FORMAT) != value):
                raise ValueError
        except ValueError as err:
            raise ValueError(
                f"Unable to parse provided datetime in format {cls.DATE_FORMAT}"
            ) from err  # pylint: disable=line-too-long
    else:
        raise ValueError("No date format provided")

    return date

validate_range(value) classmethod

Validates that the date falls within any constraints provided

Source code in src/dve/metadata_parser/domain_types.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
@classmethod
def validate_range(cls, value) -> Optional[dt.date]:
    """Validates that the date falls within any constraints provided"""
    if cls.ge is not None and value < cls.ge:
        raise ValueError(f"Date must be greater than or equal to {cls.ge}")

    if cls.le is not None and value > cls.le:
        raise ValueError(f"Date must be less than or equal to {cls.le}")

    if cls.gt is not None and value <= cls.gt:
        raise ValueError(f"Date must be greater than {cls.gt}")

    if cls.lt is not None and value >= cls.lt:
        raise ValueError(f"Date must be less than {cls.lt}")

    return value

FormattedDatetime

Bases: datetime

A datetime, provided as a datetime or a string in a specific format.

Source code in src/dve/metadata_parser/domain_types.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
class FormattedDatetime(dt.datetime):
    """A datetime, provided as a datetime or a string in a specific format."""

    DATE_FORMAT: ClassVar[Optional[str]] = None
    """The specific format of the datetime as a Python 'strptime' string."""
    TIMEZONE_TREATMENT: ClassVar[Literal["forbid", "permit", "require"]] = "permit"
    """How to treat the presence of timezone-related information."""
    DEFAULT_PATTERNS: Sequence[str] = list(
        map(
            "".join,
            itertools.product(
                ("%Y-%m-%d", "%Y%m%d"),
                ("T", " ", ""),
                ("%H:%M:%S", "%H%M%S"),
                ("", ".%f"),
                ("%z", ""),
            ),
        )
    )
    """A sequence of datetime format patterns to try if `DATE_FORMAT` is unset."""

    @staticmethod
    def reformat_nhs_string_format(string: str) -> str:
        """Reformat the NHS's preferred string format to something sensible."""
        string = string.replace("T", "")
        return "".join(
            (
                "-".join((string[:4], string[4:6], string[6:8])),
                "T",
                ":".join((string[8:10], string[10:12], string[12:14])),
                "+",
                string[14:16],
                ":00",
            )
        )

    @classmethod
    def parse_datetime(cls, string: str) -> dt.datetime:
        """Attempt to parse a datetime using various formats in sequence."""
        string = string.strip()
        if string.endswith("Z"):  # Convert 'zulu' time to UTC.
            string = string[:-1] + "+00:00"

        if re.match(r"^([0-9]{16}|([0-9]{8}T[0-9]{8}))$", string):
            string = cls.reformat_nhs_string_format(string)

        for pattern in cls.DEFAULT_PATTERNS:
            try:
                datetime = dt.datetime.strptime(string, pattern)
            except ValueError:
                continue

            return datetime  # pragma: no cover
        raise ValueError("Unable to parse provided datetime")

    @classmethod
    def validate(cls, value: Optional[Union[dt.datetime, str]]) -> Optional[dt.datetime]:
        """Validate a passed datetime or string."""
        if value is None:
            return value

        if isinstance(value, dt.datetime):
            datetime = value
        elif cls.DATE_FORMAT is not None:
            try:
                datetime = dt.datetime.strptime(value, cls.DATE_FORMAT)
            except ValueError as err:
                raise ValueError(
                    f"Unable to parse provided datetime in format {cls.DATE_FORMAT}"
                ) from err  # pylint: disable=line-too-long
        else:
            datetime = cls.parse_datetime(value)

        if cls.TIMEZONE_TREATMENT == "forbid" and datetime.tzinfo:
            raise ValueError("Provided datetime has timezone, but this is forbidden for this field")
        if cls.TIMEZONE_TREATMENT == "require" and not datetime.tzinfo:
            raise ValueError(
                "Provided datetime missing timezone, but this is required for this field"
            )  # pylint: disable=line-too-long
        return datetime

    @classmethod
    def __get_validators__(cls) -> Iterator[classmethod]:
        """Gets all validators"""
        yield cls.validate  # type: ignore

DATE_FORMAT = None class-attribute

The specific format of the datetime as a Python 'strptime' string.

DEFAULT_PATTERNS = list(map(''.join, itertools.product(('%Y-%m-%d', '%Y%m%d'), ('T', ' ', ''), ('%H:%M:%S', '%H%M%S'), ('', '.%f'), ('%z', '')))) class-attribute instance-attribute

A sequence of datetime format patterns to try if DATE_FORMAT is unset.

TIMEZONE_TREATMENT = 'permit' class-attribute

How to treat the presence of timezone-related information.

__get_validators__() classmethod

Gets all validators

Source code in src/dve/metadata_parser/domain_types.py
429
430
431
432
@classmethod
def __get_validators__(cls) -> Iterator[classmethod]:
    """Gets all validators"""
    yield cls.validate  # type: ignore

parse_datetime(string) classmethod

Attempt to parse a datetime using various formats in sequence.

Source code in src/dve/metadata_parser/domain_types.py
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
@classmethod
def parse_datetime(cls, string: str) -> dt.datetime:
    """Attempt to parse a datetime using various formats in sequence."""
    string = string.strip()
    if string.endswith("Z"):  # Convert 'zulu' time to UTC.
        string = string[:-1] + "+00:00"

    if re.match(r"^([0-9]{16}|([0-9]{8}T[0-9]{8}))$", string):
        string = cls.reformat_nhs_string_format(string)

    for pattern in cls.DEFAULT_PATTERNS:
        try:
            datetime = dt.datetime.strptime(string, pattern)
        except ValueError:
            continue

        return datetime  # pragma: no cover
    raise ValueError("Unable to parse provided datetime")

reformat_nhs_string_format(string) staticmethod

Reformat the NHS's preferred string format to something sensible.

Source code in src/dve/metadata_parser/domain_types.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
@staticmethod
def reformat_nhs_string_format(string: str) -> str:
    """Reformat the NHS's preferred string format to something sensible."""
    string = string.replace("T", "")
    return "".join(
        (
            "-".join((string[:4], string[4:6], string[6:8])),
            "T",
            ":".join((string[8:10], string[10:12], string[12:14])),
            "+",
            string[14:16],
            ":00",
        )
    )

validate(value) classmethod

Validate a passed datetime or string.

Source code in src/dve/metadata_parser/domain_types.py
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
@classmethod
def validate(cls, value: Optional[Union[dt.datetime, str]]) -> Optional[dt.datetime]:
    """Validate a passed datetime or string."""
    if value is None:
        return value

    if isinstance(value, dt.datetime):
        datetime = value
    elif cls.DATE_FORMAT is not None:
        try:
            datetime = dt.datetime.strptime(value, cls.DATE_FORMAT)
        except ValueError as err:
            raise ValueError(
                f"Unable to parse provided datetime in format {cls.DATE_FORMAT}"
            ) from err  # pylint: disable=line-too-long
    else:
        datetime = cls.parse_datetime(value)

    if cls.TIMEZONE_TREATMENT == "forbid" and datetime.tzinfo:
        raise ValueError("Provided datetime has timezone, but this is forbidden for this field")
    if cls.TIMEZONE_TREATMENT == "require" and not datetime.tzinfo:
        raise ValueError(
            "Provided datetime missing timezone, but this is required for this field"
        )  # pylint: disable=line-too-long
    return datetime

FormattedTime

Bases: time

A time, provided as a datetime or a string in a specific format.

Source code in src/dve/metadata_parser/domain_types.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
class FormattedTime(dt.time):
    """A time, provided as a datetime or a string in a specific format."""

    TIME_FORMAT: ClassVar[Optional[str]] = None
    """The specific format of the time."""
    TIMEZONE_TREATMENT: ClassVar[Literal["forbid", "permit", "require"]] = "permit"
    """How to treat the presence of timezone-related information."""
    DEFAULT_PATTERNS: Sequence[str] = list(
        # 24 hour time pattern combinations
        map(
            "".join,
            itertools.product(
                ("%H:%M:%S", "%H%M%S"),
                ("", ".%f"),
                ("%p", "%P", ""),
                ("%z", ""),
            ),
        )
    ) + list(
        # 12 hour time pattern combinations
        map(
            "".join,
            itertools.product(
                ("%I:%M:%S", "%I%M%S"),
                ("", ".%f"),
                ("%z", ""),
                (" %p", "%p", "%P", " %P", ""),
            ),
        )
    )
    """A sequence of time format patterns to try if `TIME_FORMAT` is unset."""

    @classmethod
    def convert_to_time(cls, value: dt.datetime) -> dt.time:
        """
        Convert `datetime.datetime` to `datetime.time`. If datetime contains timezone info, that
        will be retained.
        """
        if value.tzinfo:
            return value.timetz()

        return value.time()

    @classmethod
    def parse_time(cls, string: str) -> dt.time:
        """Attempt to parse a datetime using various formats in sequence."""
        string = string.strip()
        if string.endswith("Z"):  # Convert 'zulu' time to UTC.
            string = string[:-1] + "+00:00"

        for pattern in cls.DEFAULT_PATTERNS:
            try:
                datetime = dt.datetime.strptime(string, pattern)
            except ValueError:
                continue

            time = cls.convert_to_time(datetime)

            return time  # pragma: no cover
        raise ValueError("Unable to parse provided time")

    @classmethod
    def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
        """Validate a passed time, datetime or string."""
        if value is None:
            return value

        if isinstance(value, dt.time):
            new_time = value
        elif isinstance(value, dt.datetime):
            new_time = cls.convert_to_time(value)
        else:
            if cls.TIME_FORMAT is not None:
                try:
                    new_time = dt.datetime.strptime(value, cls.TIME_FORMAT)  # type: ignore
                    new_time = cls.convert_to_time(new_time)  # type: ignore
                except ValueError as err:
                    raise ValueError(
                        f"Unable to parse provided time in format {cls.TIME_FORMAT}"
                    ) from err
            else:
                new_time = cls.parse_time(value)

        if cls.TIMEZONE_TREATMENT == "forbid" and new_time.tzinfo:
            raise ValueError("Provided time has timezone, but this is forbidden for this field")
        if cls.TIMEZONE_TREATMENT == "require" and not new_time.tzinfo:
            raise ValueError("Provided time missing timezone, but this is required for this field")

        return new_time

    @classmethod
    def __get_validators__(cls) -> Iterator[classmethod]:
        """Gets all validators"""
        yield cls.validate  # type: ignore

DEFAULT_PATTERNS = list(map(''.join, itertools.product(('%H:%M:%S', '%H%M%S'), ('', '.%f'), ('%p', '%P', ''), ('%z', '')))) + list(map(''.join, itertools.product(('%I:%M:%S', '%I%M%S'), ('', '.%f'), ('%z', ''), (' %p', '%p', '%P', ' %P', '')))) class-attribute instance-attribute

A sequence of time format patterns to try if TIME_FORMAT is unset.

TIMEZONE_TREATMENT = 'permit' class-attribute

How to treat the presence of timezone-related information.

TIME_FORMAT = None class-attribute

The specific format of the time.

__get_validators__() classmethod

Gets all validators

Source code in src/dve/metadata_parser/domain_types.py
525
526
527
528
@classmethod
def __get_validators__(cls) -> Iterator[classmethod]:
    """Gets all validators"""
    yield cls.validate  # type: ignore

convert_to_time(value) classmethod

Convert datetime.datetime to datetime.time. If datetime contains timezone info, that will be retained.

Source code in src/dve/metadata_parser/domain_types.py
467
468
469
470
471
472
473
474
475
476
@classmethod
def convert_to_time(cls, value: dt.datetime) -> dt.time:
    """
    Convert `datetime.datetime` to `datetime.time`. If datetime contains timezone info, that
    will be retained.
    """
    if value.tzinfo:
        return value.timetz()

    return value.time()

parse_time(string) classmethod

Attempt to parse a datetime using various formats in sequence.

Source code in src/dve/metadata_parser/domain_types.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
@classmethod
def parse_time(cls, string: str) -> dt.time:
    """Attempt to parse a datetime using various formats in sequence."""
    string = string.strip()
    if string.endswith("Z"):  # Convert 'zulu' time to UTC.
        string = string[:-1] + "+00:00"

    for pattern in cls.DEFAULT_PATTERNS:
        try:
            datetime = dt.datetime.strptime(string, pattern)
        except ValueError:
            continue

        time = cls.convert_to_time(datetime)

        return time  # pragma: no cover
    raise ValueError("Unable to parse provided time")

validate(value) classmethod

Validate a passed time, datetime or string.

Source code in src/dve/metadata_parser/domain_types.py
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
@classmethod
def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
    """Validate a passed time, datetime or string."""
    if value is None:
        return value

    if isinstance(value, dt.time):
        new_time = value
    elif isinstance(value, dt.datetime):
        new_time = cls.convert_to_time(value)
    else:
        if cls.TIME_FORMAT is not None:
            try:
                new_time = dt.datetime.strptime(value, cls.TIME_FORMAT)  # type: ignore
                new_time = cls.convert_to_time(new_time)  # type: ignore
            except ValueError as err:
                raise ValueError(
                    f"Unable to parse provided time in format {cls.TIME_FORMAT}"
                ) from err
        else:
            new_time = cls.parse_time(value)

    if cls.TIMEZONE_TREATMENT == "forbid" and new_time.tzinfo:
        raise ValueError("Provided time has timezone, but this is forbidden for this field")
    if cls.TIMEZONE_TREATMENT == "require" and not new_time.tzinfo:
        raise ValueError("Provided time missing timezone, but this is required for this field")

    return new_time

NHSNumber

Bases: ConstrainedStr

A constrained string which validates an NHS number.

Validation criteria

The following criteria are used for validation (after separators are removed). An NHS number: - Is 10 characters long - Is a number - Has the correct check digit.

Check digit calculation

The check digit (the last digit of the NHS number) should match the 'checksum' calculated from the number. This checksum is calculated as follows: 1. The first 9 digits of the number are multiplied by factors from 10 to 2 (i.e. the first is multiplied by 10, the second is multiplied by 9, ...) 2. The sum of the products from stage 1 is taken 3. The checksum is calculated by applying the following logic to the mod-11 (remainder after dividing by 11) of the sum from stage 2. This checksum must match the last digit of the NHS number.

    * If 0: the checksum is 0
    * If 1: the NHS number is invalid
    * If between 2 and 11: the checksum is 11 minus the mod-11
These rules are taken from the data dictionary definition

https://datadictionary.nhs.uk/attributes/nhs_number.html

Warning Emission

Warnings will be emitted in the following circumstances:

  • The NHS number is a sentinel value, commonly used for a specific purpose.
  • The NHS number starts with '9'; this indicates a test number.
  • The NHS number is a palindrome; this indicates a test number.
Source code in src/dve/metadata_parser/domain_types.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
class NHSNumber(types.ConstrainedStr):
    """A constrained string which validates an NHS number.

    ### Validation criteria

    The following criteria are used for validation (after separators are removed).
    An NHS number:
     - Is 10 characters long
     - Is a number
     - Has the correct check digit.

    ### Check digit calculation

    The check digit (the last digit of the NHS number) should match the
    'checksum' calculated from the number. This checksum is calculated as follows:
     1. The first 9 digits of the number are multiplied by factors from 10 to 2
        (i.e. the first is multiplied by 10, the second is multiplied by 9, ...)
     2. The sum of the products from stage 1 is taken
     3. The checksum is calculated by applying the following logic to the mod-11
        (remainder after dividing by 11) of the sum from stage 2. This checksum
        must match the last digit of the NHS number.

            * If 0: the checksum is 0
            * If 1: the NHS number is invalid
            * If between 2 and 11: the checksum is 11 minus the mod-11

    These rules are taken from the data dictionary definition:
        https://datadictionary.nhs.uk/attributes/nhs_number.html

    ### Warning Emission

    Warnings will be emitted in the following circumstances:

     - The NHS number is a sentinel value, commonly used for a specific purpose.
     - The NHS number starts with '9'; this indicates a test number.
     - The NHS number is a palindrome; this indicates a test number.

    """

    SENTINEL_VALUES: ClassVar[dict[str, str]] = {
        "0000000000": "returned by MPS to indicate no match",
        "1111111111": "common example value given for patient-facing forms",
        "9999999999": "returned by MPS to indicate multiple matches",
        "0123456789": "common example value given for patient-facing forms",
    }
    """
    Sentinel NHS number values, which are normally used for specific purposes
    and annoyingly happen to pass checksum validation.

    """

    _FACTORS: ClassVar[tuple[int, ...]] = (10, 9, 8, 7, 6, 5, 4, 3, 2)
    """Weights for the NHS number digits in the checksum."""

    warn_on_test_numbers = True

    @classmethod
    def _warn_for_possible_invalid_number(cls, nhs_number: str, loc: str) -> None:
        """Emit warnings for possible invalid NHS numbers."""
        reason = None

        sentinel_reason = cls.SENTINEL_VALUES.get(nhs_number)
        if cls.warn_on_test_numbers and sentinel_reason:
            reason = sentinel_reason
        elif cls.warn_on_test_numbers and nhs_number.startswith("9"):
            reason = "NHS number starts with '9': this indicates a test number"
        elif cls.warn_on_test_numbers and nhs_number == nhs_number[::-1]:
            reason = "NHS number is a palindrome: this indicates a test number"

        if reason:
            warnings.warn(exc.LocWarning(f"NHS number possibly invalid ({reason})", loc))

    @staticmethod
    def ensure_format(nhs_number: Optional[str]) -> str:
        """Coerce an NHS number string to the correct format, raising an error if
        coersion fails.

        """
        if nhs_number is None:
            raise ValueError("NHS number not provided")

        nhs_number = str(nhs_number).replace(" ", "").replace("-", "")
        if len(nhs_number) != 10 or not nhs_number.isnumeric():
            raise ValueError("NHS number invalid (not a 10 digit number after separator removal)")
        return nhs_number

    @staticmethod
    def confirm_checksum_validates(nhs_number: str) -> bool:
        """Does the mod 11 check on the NHSnumber. This requires a 10 digit numeric string."""
        *digits, check_digit = iter(nhs_number)
        # weighted multiplication
        factors = (10, 9, 8, 7, 6, 5, 4, 3, 2)
        total = sum(int(digit) * factor for digit, factor in zip(digits, factors))
        remainder = total % 11
        check = 11 - (remainder or 11)
        return check == int(check_digit)

    @classmethod
    def check_validates(cls, value: Optional[str]) -> bool:
        """Check whether an NHS number is valid, returning `True` for valid numbers
        and `False` for invalid numbers.

        """
        try:
            nhs_number = cls.ensure_format(value)
            is_valid = cls.confirm_checksum_validates(nhs_number)
        except ValueError:
            return False
        return is_valid

    @classmethod
    def validate(cls, value: Optional[str], field: fields.ModelField) -> str:  # type: ignore
        """Validates the given postcode"""
        nhs_number = cls.ensure_format(value)

        if cls.confirm_checksum_validates(nhs_number):
            # TODO: Get a better way to get 'loc' here.
            cls._warn_for_possible_invalid_number(nhs_number, field.name)
            return nhs_number
        raise ValueError("NHS number invalid (incorrect check digit: cannot be a real NHS number)")

SENTINEL_VALUES = {'0000000000': 'returned by MPS to indicate no match', '1111111111': 'common example value given for patient-facing forms', '9999999999': 'returned by MPS to indicate multiple matches', '0123456789': 'common example value given for patient-facing forms'} class-attribute

Sentinel NHS number values, which are normally used for specific purposes and annoyingly happen to pass checksum validation.

check_validates(value) classmethod

Check whether an NHS number is valid, returning True for valid numbers and False for invalid numbers.

Source code in src/dve/metadata_parser/domain_types.py
141
142
143
144
145
146
147
148
149
150
151
152
@classmethod
def check_validates(cls, value: Optional[str]) -> bool:
    """Check whether an NHS number is valid, returning `True` for valid numbers
    and `False` for invalid numbers.

    """
    try:
        nhs_number = cls.ensure_format(value)
        is_valid = cls.confirm_checksum_validates(nhs_number)
    except ValueError:
        return False
    return is_valid

confirm_checksum_validates(nhs_number) staticmethod

Does the mod 11 check on the NHSnumber. This requires a 10 digit numeric string.

Source code in src/dve/metadata_parser/domain_types.py
130
131
132
133
134
135
136
137
138
139
@staticmethod
def confirm_checksum_validates(nhs_number: str) -> bool:
    """Does the mod 11 check on the NHSnumber. This requires a 10 digit numeric string."""
    *digits, check_digit = iter(nhs_number)
    # weighted multiplication
    factors = (10, 9, 8, 7, 6, 5, 4, 3, 2)
    total = sum(int(digit) * factor for digit, factor in zip(digits, factors))
    remainder = total % 11
    check = 11 - (remainder or 11)
    return check == int(check_digit)

ensure_format(nhs_number) staticmethod

Coerce an NHS number string to the correct format, raising an error if coersion fails.

Source code in src/dve/metadata_parser/domain_types.py
116
117
118
119
120
121
122
123
124
125
126
127
128
@staticmethod
def ensure_format(nhs_number: Optional[str]) -> str:
    """Coerce an NHS number string to the correct format, raising an error if
    coersion fails.

    """
    if nhs_number is None:
        raise ValueError("NHS number not provided")

    nhs_number = str(nhs_number).replace(" ", "").replace("-", "")
    if len(nhs_number) != 10 or not nhs_number.isnumeric():
        raise ValueError("NHS number invalid (not a 10 digit number after separator removal)")
    return nhs_number

validate(value, field) classmethod

Validates the given postcode

Source code in src/dve/metadata_parser/domain_types.py
154
155
156
157
158
159
160
161
162
163
@classmethod
def validate(cls, value: Optional[str], field: fields.ModelField) -> str:  # type: ignore
    """Validates the given postcode"""
    nhs_number = cls.ensure_format(value)

    if cls.confirm_checksum_validates(nhs_number):
        # TODO: Get a better way to get 'loc' here.
        cls._warn_for_possible_invalid_number(nhs_number, field.name)
        return nhs_number
    raise ValueError("NHS number invalid (incorrect check digit: cannot be a real NHS number)")

OrgID

Bases: _SimpleRegexValidator

A validator for OrgID. This does not validate that the OrgID is for a 'real' organisation.

That should be done using a database reference table.

Source code in src/dve/metadata_parser/domain_types.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
class OrgID(_SimpleRegexValidator):
    """A validator for OrgID. This does not validate that the OrgID is
    for a 'real' organisation.

    That should be done using a database reference table.

    """

    regex = re.compile(r"^[A-Z0-9]{3,5}$")
    strip_whitespace = False

    @classmethod
    def validate(cls, value: str) -> str:
        """Validates the given OrgID"""
        if not value:
            raise ValueError("org_id not provided")
        return super().validate(value)

validate(value) classmethod

Validates the given OrgID

Source code in src/dve/metadata_parser/domain_types.py
250
251
252
253
254
255
@classmethod
def validate(cls, value: str) -> str:
    """Validates the given OrgID"""
    if not value:
        raise ValueError("org_id not provided")
    return super().validate(value)

Postcode

Bases: ConstrainedStr

Postcode constrained string

Source code in src/dve/metadata_parser/domain_types.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
class Postcode(types.ConstrainedStr):
    """Postcode constrained string"""

    regex: re.Pattern = POSTCODE_REGEX
    strip_whitespace = True
    apply_normalize = True

    @staticmethod
    def normalize(_postcode: str) -> Optional[str]:
        """Strips internal and external spaces"""
        _postcode = _postcode.replace(" ", "")
        if not _postcode or _postcode.lower() in NULL_POSTCODES:
            return None
        _postcode = _postcode.replace(" ", "")
        return " ".join((_postcode[0:-3], _postcode[-3:])).upper()

    @classmethod
    def validate(cls, value: str) -> Optional[str]:  # type: ignore
        """Validates the given postcode"""
        if cls.apply_normalize and value:
            value = cls.normalize(value)  # type: ignore

        if not value:
            return None

        if not cls.regex.match(value):
            raise ValueError("Invalid Postcode submitted")

        return value

normalize(_postcode) staticmethod

Strips internal and external spaces

Source code in src/dve/metadata_parser/domain_types.py
183
184
185
186
187
188
189
190
@staticmethod
def normalize(_postcode: str) -> Optional[str]:
    """Strips internal and external spaces"""
    _postcode = _postcode.replace(" ", "")
    if not _postcode or _postcode.lower() in NULL_POSTCODES:
        return None
    _postcode = _postcode.replace(" ", "")
    return " ".join((_postcode[0:-3], _postcode[-3:])).upper()

validate(value) classmethod

Validates the given postcode

Source code in src/dve/metadata_parser/domain_types.py
192
193
194
195
196
197
198
199
200
201
202
203
204
@classmethod
def validate(cls, value: str) -> Optional[str]:  # type: ignore
    """Validates the given postcode"""
    if cls.apply_normalize and value:
        value = cls.normalize(value)  # type: ignore

    if not value:
        return None

    if not cls.regex.match(value):
        raise ValueError("Invalid Postcode submitted")

    return value

ReportingPeriod

Bases: date

A reporting period field, with the type of reporting period supplied

Source code in src/dve/metadata_parser/domain_types.py
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
class ReportingPeriod(dt.date):
    """A reporting period field, with the type of reporting period supplied"""

    REPORTING_PERIOD_TYPE: ClassVar[Literal["start", "end"]]
    DATE_FORMAT: ClassVar[str] = "%Y-%m-%d"

    @classmethod
    def parse_datetime(cls, value: str) -> dt.date:
        """Attempt to parse string to date"""
        try:
            date = dt.datetime.strptime(value, cls.DATE_FORMAT).date()
            return date
        except ValueError as err:
            raise ValueError(f"Unable to parse provided date in format {cls.DATE_FORMAT}") from err

    @staticmethod
    def start_of_month(value: dt.date) -> bool:
        """Check if the date supplied is the start of the month"""
        return value.day == 1

    @staticmethod
    def end_of_month(value: dt.date) -> bool:
        """Check if the date supplied is the end of the month"""

        def last_day_of_month(value: dt.date):
            """Calculate the last day in the month of the supplied date"""
            if value.month == 12:
                return value.replace(day=31)
            return value.replace(month=value.month + 1, day=1) - dt.timedelta(days=1)

        return value == last_day_of_month(value)

    @classmethod
    def validate(cls, value: Union[dt.date, str]) -> Optional[dt.date]:
        """Validate if the value is a valid reporting period"""
        if isinstance(value, str):
            value = cls.parse_datetime(value)
        if cls.REPORTING_PERIOD_TYPE == "start":
            if not cls.start_of_month(value):
                raise ValueError("Reporting date supplied is not at the start of the month")
        else:
            if not cls.end_of_month(value):
                raise ValueError("Reporting date supplied is not at end of the month")
        return value

    @classmethod
    def __get_validators__(cls) -> Iterator[classmethod]:
        """Gets all validators"""
        yield cls.validate  # type: ignore

__get_validators__() classmethod

Gets all validators

Source code in src/dve/metadata_parser/domain_types.py
613
614
615
616
@classmethod
def __get_validators__(cls) -> Iterator[classmethod]:
    """Gets all validators"""
    yield cls.validate  # type: ignore

end_of_month(value) staticmethod

Check if the date supplied is the end of the month

Source code in src/dve/metadata_parser/domain_types.py
588
589
590
591
592
593
594
595
596
597
598
@staticmethod
def end_of_month(value: dt.date) -> bool:
    """Check if the date supplied is the end of the month"""

    def last_day_of_month(value: dt.date):
        """Calculate the last day in the month of the supplied date"""
        if value.month == 12:
            return value.replace(day=31)
        return value.replace(month=value.month + 1, day=1) - dt.timedelta(days=1)

    return value == last_day_of_month(value)

parse_datetime(value) classmethod

Attempt to parse string to date

Source code in src/dve/metadata_parser/domain_types.py
574
575
576
577
578
579
580
581
@classmethod
def parse_datetime(cls, value: str) -> dt.date:
    """Attempt to parse string to date"""
    try:
        date = dt.datetime.strptime(value, cls.DATE_FORMAT).date()
        return date
    except ValueError as err:
        raise ValueError(f"Unable to parse provided date in format {cls.DATE_FORMAT}") from err

start_of_month(value) staticmethod

Check if the date supplied is the start of the month

Source code in src/dve/metadata_parser/domain_types.py
583
584
585
586
@staticmethod
def start_of_month(value: dt.date) -> bool:
    """Check if the date supplied is the start of the month"""
    return value.day == 1

validate(value) classmethod

Validate if the value is a valid reporting period

Source code in src/dve/metadata_parser/domain_types.py
600
601
602
603
604
605
606
607
608
609
610
611
@classmethod
def validate(cls, value: Union[dt.date, str]) -> Optional[dt.date]:
    """Validate if the value is a valid reporting period"""
    if isinstance(value, str):
        value = cls.parse_datetime(value)
    if cls.REPORTING_PERIOD_TYPE == "start":
        if not cls.start_of_month(value):
            raise ValueError("Reporting date supplied is not at the start of the month")
    else:
        if not cls.end_of_month(value):
            raise ValueError("Reporting date supplied is not at end of the month")
    return value

alphanumeric(min_digits=1, max_digits=1) cached

Return a regex-validated class which will ensure that passed numbers are alphanumeric.

Source code in src/dve/metadata_parser/domain_types.py
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
@lru_cache()
@validate_arguments
def alphanumeric(
    min_digits: types.NonNegativeInt = 1,
    max_digits: types.PositiveInt = 1,
) -> type[_SimpleRegexValidator]:
    """Return a regex-validated class which will ensure that
    passed numbers are alphanumeric.

    """
    an_group_str = r"[A-Za-z0-9]"
    if max_digits == min_digits:
        type_name = f"AN{max_digits}"
        pattern_str = f"{an_group_str}{{{max_digits}}}"
    else:
        type_name = f"AN{min_digits}_{max_digits}"
        pattern_str = f"{an_group_str}{{{min_digits},{max_digits}}}"

    dict_ = _SimpleRegexValidator.__dict__.copy()
    dict_["regex"] = re.compile(f"^{pattern_str}$")

    return type(
        type_name,
        (_SimpleRegexValidator, *_SimpleRegexValidator.__bases__),
        dict_,
    )

conformatteddate(date_format=None, strict=False, ge=None, le=None, gt=None, lt=None) cached

Return a formatted date class with a set date format and timezone treatment.

Source code in src/dve/metadata_parser/domain_types.py
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
@lru_cache()
@validate_arguments
def conformatteddate(
    date_format: Optional[str] = None,
    strict: Optional[bool] = False,
    ge: Optional[dt.date] = None,  # pylint: disable=invalid-name
    le: Optional[dt.date] = None,  # pylint: disable=invalid-name
    gt: Optional[dt.date] = None,  # pylint: disable=invalid-name
    lt: Optional[dt.date] = None,  # pylint: disable=invalid-name
) -> type[ConFormattedDate]:
    """Return a formatted date class with a set date format
    and timezone treatment.

    """
    if date_format is None:
        return ConFormattedDate

    dict_ = ConFormattedDate.__dict__.copy()
    dict_["DATE_FORMAT"] = date_format
    dict_["strict"] = strict
    dict_["ge"] = ge
    dict_["le"] = le
    dict_["gt"] = gt
    dict_["lt"] = lt

    return type("FormattedDatetime", (ConFormattedDate, *ConFormattedDate.__bases__), dict_)

formatteddatetime(date_format=None, timezone_treatment='permit') cached

Return a formatted datetime class with a set date format and timezone treatment.

Source code in src/dve/metadata_parser/domain_types.py
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
@lru_cache()
@validate_arguments
def formatteddatetime(
    date_format: Optional[str] = None,
    timezone_treatment: Literal["forbid", "permit", "require"] = "permit",
) -> type[FormattedDatetime]:
    """Return a formatted datetime class with a set date format
    and timezone treatment.

    """
    if date_format is None and timezone_treatment == "permit":
        return FormattedDatetime

    dict_ = FormattedDatetime.__dict__.copy()
    dict_["DATE_FORMAT"] = date_format
    dict_["TIMEZONE_TREATMENT"] = timezone_treatment

    return type("FormattedDatetime", (FormattedDatetime, *FormattedDatetime.__bases__), dict_)

formattedtime(time_format=None, timezone_treatment='permit') cached

Return a formatted time class with a set time format and timezone treatment.

Source code in src/dve/metadata_parser/domain_types.py
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
@lru_cache()
@validate_arguments
def formattedtime(
    time_format: Optional[str] = None,
    timezone_treatment: Literal["forbid", "permit", "require"] = "permit",
) -> type[FormattedTime]:
    """Return a formatted time class with a set time format and timezone treatment."""
    if time_format is None and timezone_treatment == "permit":
        return FormattedTime

    dict_ = FormattedTime.__dict__.copy()
    dict_["TIME_FORMAT"] = time_format
    dict_["TIMEZONE_TREATMENT"] = timezone_treatment

    return type("FormattedTime", (FormattedTime, *FormattedTime.__bases__), dict_)

identifier(min_digits=1, max_digits=1) cached

Return a regex-validated class which will ensure that passed strings are alphanumeric or in a fixed set of special characters for identifiers.

Source code in src/dve/metadata_parser/domain_types.py
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
@lru_cache()
@validate_arguments
def identifier(
    min_digits: types.NonNegativeInt = 1,
    max_digits: types.PositiveInt = 1,
) -> type[_SimpleRegexValidator]:
    """
    Return a regex-validated class which will ensure that
    passed strings are alphanumeric or in a fixed set of
    special characters for identifiers.
    """
    id_group_str = r"[A-Za-z0-9_\-=\/\\#:; ().`*!,|+'\^\[\]]"
    if max_digits == min_digits:
        type_name = f"AN{max_digits}"
        pattern_str = rf"{id_group_str}{{{max_digits}}}"
    else:
        type_name = f"AN{min_digits}_{max_digits}"
        pattern_str = rf"{id_group_str}{{{min_digits},{max_digits}}}"

    dict_ = _SimpleRegexValidator.__dict__.copy()
    dict_["regex"] = re.compile(f"^{pattern_str}$")

    return type(
        type_name,
        (_SimpleRegexValidator, *_SimpleRegexValidator.__bases__),
        dict_,
    )

permissive_nhs_number(warn_on_test_numbers=False) cached

Defaults to not checking for test numbers

Source code in src/dve/metadata_parser/domain_types.py
166
167
168
169
170
171
172
173
@lru_cache()
@validate_arguments
def permissive_nhs_number(warn_on_test_numbers: bool = False):
    """Defaults to not checking for test numbers"""
    dict_ = NHSNumber.__dict__.copy()
    dict_["warn_on_test_numbers"] = warn_on_test_numbers

    return type("NHSNumber", (NHSNumber, *NHSNumber.__bases__), dict_)

postcode(strip_whitespace=True, to_upper=False, to_lower=False, strict=False, min_length=None, max_length=None, curtail_length=None, regex=POSTCODE_REGEX, apply_normalize=True) cached

Return a formatted date class with a set date format and timezone treatment.

Source code in src/dve/metadata_parser/domain_types.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
@lru_cache()
@validate_arguments
def postcode(
    # pylint: disable=R0913
    strip_whitespace: Optional[bool] = True,
    to_upper: Optional[bool] = False,
    to_lower: Optional[bool] = False,
    strict: Optional[bool] = False,
    min_length: Optional[int] = None,
    max_length: Optional[int] = None,
    curtail_length: Optional[int] = None,
    regex: Optional[str] = POSTCODE_REGEX,  # type: ignore
    apply_normalize: Optional[bool] = True,
) -> type[Postcode]:
    """Return a formatted date class with a set date format
    and timezone treatment.

    """
    dict_ = Postcode.__dict__.copy()
    dict_["strip_whitespace"] = strip_whitespace
    dict_["to_upper"] = to_upper
    dict_["to_lower"] = to_lower
    dict_["strict"] = strict
    dict_["min_length"] = min_length
    dict_["max_length"] = max_length
    dict_["curtail_length"] = curtail_length
    dict_["regex"] = regex
    dict_["apply_normalize"] = apply_normalize

    return type("Postcode", (Postcode, *Postcode.__bases__), dict_)

reportingperiod(reporting_period_type, date_format='%Y-%m-%d')

Return a check on whether a reporting period date is a valid date, and is the start/ end of the month supplied depending on reporting period type

Source code in src/dve/metadata_parser/domain_types.py
620
621
622
623
624
625
626
627
628
629
630
631
@validate_arguments
def reportingperiod(
    reporting_period_type: Literal["start", "end"], date_format: Optional[str] = "%Y-%m-%d"
) -> type[ReportingPeriod]:
    """Return a check on whether a reporting period date is a valid date,
    and is the start/ end of the month supplied depending on reporting period type
    """
    dict_ = ReportingPeriod.__dict__.copy()
    dict_["REPORTING_PERIOD_TYPE"] = reporting_period_type
    dict_["DATE_FORMAT"] = date_format

    return type("ReportingPeriod", (ReportingPeriod, *ReportingPeriod.__bases__), dict_)