Python dataclass from a nested dict

mbatchkarov :

The standard library in 3.7 can recursively convert a dataclass into a dict (example from the docs):

from dataclasses import dataclass, asdict
from typing import List

@dataclass
class Point:
     x: int
     y: int

@dataclass
class C:
     mylist: List[Point]

p = Point(10, 20)
assert asdict(p) == {'x': 10, 'y': 20}

c = C([Point(0, 0), Point(10, 4)])
tmp = {'mylist': [{'x': 0, 'y': 0}, {'x': 10, 'y': 4}]}
assert asdict(c) == tmp

I am looking for a way to turn a dict back into a dataclass when there is nesting. Something like C(**tmp) only works if the fields of the data class are simple types and not themselves dataclasses. I am familiar with jsonpickle, which however comes with a prominent security warning.

meowgoesthedog :

Below is the CPython implementation of asdict – or specifically, the internal recursive helper function _asdict_inner that it uses:

# Source: https://github.com/python/cpython/blob/master/Lib/dataclasses.py

def _asdict_inner(obj, dict_factory):
    if _is_dataclass_instance(obj):
        result = []
        for f in fields(obj):
            value = _asdict_inner(getattr(obj, f.name), dict_factory)
            result.append((f.name, value))
        return dict_factory(result)
    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
        # [large block of author comments]
        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
    elif isinstance(obj, (list, tuple)):
        # [ditto]
        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
    elif isinstance(obj, dict):
        return type(obj)((_asdict_inner(k, dict_factory),
                          _asdict_inner(v, dict_factory))
                         for k, v in obj.items())
    else:
        return copy.deepcopy(obj)

asdict simply calls the above with some assertions, and dict_factory=dict by default.

How can this be adapted to create an output dictionary with the required type-tagging, as mentioned in the comments?


1. Adding type information

My attempt involved creating a custom return wrapper inheriting from dict:

class TypeDict(dict):
    def __init__(self, t, *args, **kwargs):
        super(TypeDict, self).__init__(*args, **kwargs)

        if not isinstance(t, type):
            raise TypeError("t must be a type")

        self._type = t

    @property
    def type(self):
        return self._type

Looking at the original code, only the first clause needs to be modified to use this wrapper, as the other clauses only handle containers of dataclass-es:

# only use dict for now; easy to add back later
def _todict_inner(obj):
    if is_dataclass_instance(obj):
        result = []
        for f in fields(obj):
            value = _todict_inner(getattr(obj, f.name))
            result.append((f.name, value))
        return TypeDict(type(obj), result)

    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
        return type(obj)(*[_todict_inner(v) for v in obj])
    elif isinstance(obj, (list, tuple)):
        return type(obj)(_todict_inner(v) for v in obj)
    elif isinstance(obj, dict):
        return type(obj)((_todict_inner(k), _todict_inner(v))
                         for k, v in obj.items())
    else:
        return copy.deepcopy(obj)

Imports:

from dataclasses import dataclass, fields, is_dataclass

# thanks to Patrick Haugh
from typing import *

# deepcopy 
import copy

Functions used:

# copy of the internal function _is_dataclass_instance
def is_dataclass_instance(obj):
    return is_dataclass(obj) and not is_dataclass(obj.type)

# the adapted version of asdict
def todict(obj):
    if not is_dataclass_instance(obj):
         raise TypeError("todict() should be called on dataclass instances")
    return _todict_inner(obj)

Tests with the example dataclasses:

c = C([Point(0, 0), Point(10, 4)])

print(c)
cd = todict(c)

print(cd)
# {'mylist': [{'x': 0, 'y': 0}, {'x': 10, 'y': 4}]}

print(cd.type)
# <class '__main__.C'>

Results are as expected.


2. Converting back to a dataclass

The recursive routine used by asdict can be re-used for the reverse process, with some relatively minor changes:

def _fromdict_inner(obj):
    # reconstruct the dataclass using the type tag
    if is_dataclass_dict(obj):
        result = {}
        for name, data in obj.items():
            result[name] = _fromdict_inner(data)
        return obj.type(**result)

    # exactly the same as before (without the tuple clause)
    elif isinstance(obj, (list, tuple)):
        return type(obj)(_fromdict_inner(v) for v in obj)
    elif isinstance(obj, dict):
        return type(obj)((_fromdict_inner(k), _fromdict_inner(v))
                         for k, v in obj.items())
    else:
        return copy.deepcopy(obj)

Functions used:

def is_dataclass_dict(obj):
    return isinstance(obj, TypeDict)

def fromdict(obj):
    if not is_dataclass_dict(obj):
        raise TypeError("fromdict() should be called on TypeDict instances")
    return _fromdict_inner(obj)

Test:

c = C([Point(0, 0), Point(10, 4)])
cd = todict(c)
cf = fromdict(cd)

print(c)
# C(mylist=[Point(x=0, y=0), Point(x=10, y=4)])

print(cf)
# C(mylist=[Point(x=0, y=0), Point(x=10, y=4)])

Again as expected.

Collected from the Internet

Please contact [email protected] to delete if infringement.

edited at
0

Comments

0 comments
Login to comment

Related

Update dataclass fields from a dict in python

Creating nested dataclass objects in Python

dict from dataclass returns empty dictionary

create nested dict from nested list python

Initialize Python dataclass from dictionary

How to unpack nested JSON into Python Dataclass

Recursively changing values of a nested dataclass in python

Change nested json estructure from python dict

Python: cannot get a value from nested dict

Remove keys from a nested dict (Python keys)

Python Create dataframe from nested dict with lists

Python Creating new dict from specific keys in other dict (nested)

What is the proper way in Python to define a dataclass that has both an auto generated __init__ and an additional init2 from a dict of values

Python dataclass call parent field from constructor

Is it possible to prevent reading from a frozen python dataclass?

Dict from nested dataclasses

Parse nested Python dict

nested dict python keys

Threading in python with nested dict

Python : Generate nested dict

Python: Dataclass that inherits from base Dataclass, how do I upgrade a value from base to the new class?

How to check if data exists from multiple nested dict using Python

Create file-tree from nested dict | Python 2.7

Distinguishing Nested Dict Assignment from List Value Access in a Python Dictionary

How to print more than one value from nested dict in Python

Nested DICT in Python3 from 2 other DICTs

Remove specified vertices from a graph nested dict python

Error: unhashable type: 'dict' with @dataclass

Pandas dataframe to dict using dataclass