Skip to content

Automatic type resolver extraction from dataclasses #55

@Kamforka

Description

@Kamforka

I was wondering if the project could benefit from an automatic type resolver extraction feature.

I have an example implementation that I created for my own use-case, but I found it quite generic, and I believe it might make sense to add it to the core library.

The type resolver implementation looks like this:

import datetime as dt
import types
import typing
from decimal import Decimal

import rule_engine

PYTYPE_TO_ENGINETYPE = {
    list: rule_engine.DataType.ARRAY,
    tuple: rule_engine.DataType.ARRAY,
    dt.datetime: rule_engine.DataType.DATETIME,
    dt.date: rule_engine.DataType.DATETIME,
    int: rule_engine.DataType.FLOAT,
    float: rule_engine.DataType.FLOAT,
    Decimal: rule_engine.DataType.FLOAT,
    types.NoneType: rule_engine.DataType.NULL,
    set: rule_engine.DataType.SET,
    str: rule_engine.DataType.STRING,
    dict: rule_engine.DataType.MAPPING,
    None: rule_engine.DataType.UNDEFINED,
}


def parse_compound_array_enginetype(type_alias):
    main_pytype = type_alias.__origin__
    if hasattr(type_alias, "__args__") and main_pytype is not tuple:
        sub_pytype = type_alias.__args__[0]
    else:
        sub_pytype = None

    main_enginetype = PYTYPE_TO_ENGINETYPE[main_pytype]
    if sub_pytype not in PYTYPE_TO_ENGINETYPE:
        sub_enginetype = parse_compound_enginetype(sub_pytype)
    else:
        sub_enginetype = PYTYPE_TO_ENGINETYPE[sub_pytype]

    return main_enginetype(sub_enginetype)


def parse_compound_mapping_enginetype(type_alias):
    main_pytype = type_alias.__origin__
    if hasattr(type_alias, "__args__"):
        key_pytype = type_alias.__args__[0]
        value_pytype = type_alias.__args__[1]
    else:
        key_pytype = None
        value_pytype = None

    main_enginetype = PYTYPE_TO_ENGINETYPE[main_pytype]

    if key_pytype in PYTYPE_TO_ENGINETYPE:
        key_enginetype = PYTYPE_TO_ENGINETYPE[key_pytype]
    else:
        key_enginetype = parse_compound_enginetype(key_pytype)

    if value_pytype in PYTYPE_TO_ENGINETYPE:
        value_enginetype = PYTYPE_TO_ENGINETYPE[value_pytype]
    else:
        value_enginetype = parse_compound_enginetype(value_pytype)

    return main_enginetype(key_enginetype, value_enginetype)


def parse_compound_enginetype(type_alias):
    if hasattr(type_alias, "__origin__"):
        if type_alias.__origin__ in (list, tuple, set):
            return parse_compound_array_enginetype(type_alias)
        if type_alias.__origin__ is dict:
            return parse_compound_mapping_enginetype(type_alias)
    if isinstance(type_alias, typing._LiteralGenericAlias):
        python_type = type(type_alias.__args__[0])
        return PYTYPE_TO_ENGINETYPE[python_type]

    return rule_engine.DataType.UNDEFINED


def type_resolver_from_dataclass(cls: type):
    if not dataclasses.is_dataclass(cls):
        raise Exception

    type_resolver = {}
    for fieldname, fieldtype in cls.__annotations__.items():
        if fieldtype in PYTYPE_TO_ENGINETYPE:
            type_resolver[fieldname] = PYTYPE_TO_ENGINETYPE[fieldtype]
        else:
            compound_type = parse_compound_enginetype(fieldtype)
            type_resolver[fieldname] = compound_type
    return type_resolver

And to test it one can do:

import dataclasses
import datetime as dt
import typing

ChoiceText = typing.Literal["one", "two", "three"]
Order = typing.Literal[1, 2, 3]

@dataclasses.dataclass
class Model:
    id: int
    title: str
    tags: list[str]
    shares: typing.Dict[str, typing.List]
    index: typing.Dict
    uniques: set[float]
    created: dt.datetime
    undefined: str | int | float
    customers: typing.List[Decimal]
    singles: tuple[int]
    pairs: tuple[str, int]
    choice: ChoiceText
    orders: list[Order]

type_resolver = type_resolver_from_dataclass(Model)

It should produce a type resolver like the below:

{
 'choice': <_DataTypeDef name=STRING python_type=str >,
 'created': <_DataTypeDef name=DATETIME python_type=datetime >,
 'customers': <_ArrayDataTypeDef name=ARRAY python_type=tuple value_type=FLOAT >,
 'id': <_DataTypeDef name=FLOAT python_type=Decimal >,
 'index': <_MappingDataTypeDef name=MAPPING python_type=dict key_type=UNDEFINED value_type=UNDEFINED >,
 'orders': <_ArrayDataTypeDef name=ARRAY python_type=tuple value_type=FLOAT >,
 'pairs': <_ArrayDataTypeDef name=ARRAY python_type=tuple value_type=UNDEFINED >,
 'shares': <_MappingDataTypeDef name=MAPPING python_type=dict key_type=STRING value_type=ARRAY >,
 'singles': <_ArrayDataTypeDef name=ARRAY python_type=tuple value_type=UNDEFINED >,
 'tags': <_ArrayDataTypeDef name=ARRAY python_type=tuple value_type=STRING >,
 'title': <_DataTypeDef name=STRING python_type=str >,
 'undefined': <_DataTypeDef name=UNDEFINED python_type=UNDEFINED >,
 'uniques': <_SetDataTypeDef name=SET python_type=set value_type=FLOAT >
}

Please tell me if you think it makes sense to add it to the lib and I can work it out more.

Metadata

Metadata

Assignees

Labels

feature requestRequests for new functionality

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions