Module liquer.state_types
State types represent the additional properties of data types that can be used as a state: - state type must be representable as a (short) string identifier - state must be serializable (and deserializable) as bytes - mime type must be known for a serialized form State types are registered in a state type registry, where they can be looked up either by a qualified type name or a state type identifier. (Therefore state type identifiers should be carefully chosen to not to clash with qualified names. It is encouraged to only use lower case characters as state type identifiers.)
Intended use: - state can be served via a web service, - state can be serialized into a file, database or key/value store.
Deserialization requires (besides the byte representation of the data) as well identifier of the state type, which identifies a registered state type and thus the deserialization method. Though state type provides a prefered (default) format, multiple formats may be used for serialization (and deserialization) if needed. The serialization format is selected by file extension passed to serialization (as_bytes) or deserialization (from_bytes) method. Since the format determines the mime type, as bytes return besides serialized data as well the actual mime type relevant for the serialization.
Note that for the successful serialization/deserialization strategy (e.g. for caching), the following approaches can be used: - use the default extension/format/mimetype - i.e. do not specify the extension or set extension=None in as_bytes/from_bytes - specify fixed extension
Serialization/deserialization may as well keep being unspecified. In that case, the state the capability of a state being cached or served is limited.
Expand source code
"""State types represent the additional properties of data types that can be used as a state:
- state type must be representable as a (short) string identifier
- state must be serializable (and deserializable) as bytes
- mime type must be known for a serialized form
State types are registered in a state type registry, where they can be looked up either by
a qualified type name or a state type identifier. (Therefore state type identifiers should be carefully chosen
to not to clash with qualified names. It is encouraged to only use lower case characters as state type identifiers.)
Intended use:
- state can be served via a web service,
- state can be serialized into a file, database or key/value store.
Deserialization requires (besides the byte representation of the data) as well identifier of the state type,
which identifies a registered state type and thus the deserialization method.
Though state type provides a prefered (default) format,
multiple formats may be used for serialization (and deserialization) if needed.
The serialization format is selected by file extension passed to serialization (as_bytes) or deserialization (from_bytes) method.
Since the format determines the mime type, as bytes return besides serialized data as well the actual mime type relevant for the serialization.
Note that for the successful serialization/deserialization strategy (e.g. for caching), the following approaches
can be used:
- use the default extension/format/mimetype - i.e. do not specify the extension or set extension=None in as_bytes/from_bytes
- specify fixed extension
Serialization/deserialization may as well keep being unspecified. In that case, the state the capability
of a state being cached or served is limited.
"""
from io import BytesIO, StringIO
import json
from copy import deepcopy
import base64
import pickle
from liquer.constants import mimetype_from_extension, MIMETYPES
def get_type_qualname(cls):
"""Get a string uniquely identifying the supplied class"""
if isinstance(cls, str):
return cls
if cls.__module__ == "__main__":
return cls.__qualname__
return f"{cls.__module__}.{cls.__qualname__}"
class StateTypesRegistry(object):
"""State type registry takes care of registering and lookup of state types.
It is typically accessed as a singleton via state_type_registry() function.
default_state_type attribute is used if state type is not recognized.
"""
def __init__(self):
self.state_types_dictionary = {}
self.register(bytes, BytesStateType())
self.register(str, TextStateType())
self.register(dict, DictStateType())
self.register(type(None), JsonStateType())
self.register(int, JsonStateType())
self.register(float, JsonStateType())
self.default_state_type = PickleStateType()
def register(self, type_qualname, state_type):
"""Register a new state type for a qualified type name"""
type_qualname = get_type_qualname(type_qualname)
self.state_types_dictionary[type_qualname] = state_type
self.state_types_dictionary[state_type.identifier()] = state_type
return self
def get(self, type_qualname):
"""Get state type object for a qualified type name
If the qualified type name is not recognized, default_state_type is returned.
"""
if type_qualname is None:
return self.default_state_type
type_qualname = get_type_qualname(type_qualname)
return self.state_types_dictionary.get(type_qualname, self.default_state_type)
def from_type_identifier(self, type_identifier):
"""Get state type object for a qualified type name
If the qualified type name is not recognized, default_state_type is returned.
"""
for x in self.state_types_dictionary.values():
if x.identifier() == type_identifier:
return x
_state_types_registry = None
def state_types_registry():
"""Returns the global state types registry (singleton)"""
global _state_types_registry
if _state_types_registry is None:
_state_types_registry = StateTypesRegistry()
return _state_types_registry
def data_characteristics(data):
"""Convenience function to return data characteristics for supplied data.
Data characteristics must be a dictionary containing at least a "description"
element with a string description of the data and a "type_identifier".
Type identifier is duplicate of the type identifier found in metadata, but makes
the data characteristics self-contained.
"""
st = state_types_registry().get(get_type_qualname(type(data)))
ch = st.data_characteristics(data)
if not isinstance(ch, dict):
raise Exception(
f"Data characteristics for {st.identifier()} must be a dictionary"
)
ch["description"] = ch.get("description", "")
ch["type_identifier"] = ch.get("type_ident", st.identifier())
return ch
def type_identifier_of(data):
"""Convenience function to return a state type identifier for supplied data"""
return state_types_registry().get(get_type_qualname(type(data))).identifier()
def state_type_from_type_identifier(type_identifier):
"""Convenience function to return a state type for supplied type identifier"""
return state_types_registry().from_type_identifier(type_identifier)
def register_state_type(type_qualname, state_type):
"""Function to register new state type for a qualified type name
type_qualname can be a string (module.ClassName) or a class/type object (not a data instance)
"""
type_qualname = get_type_qualname(type_qualname)
state_types_registry().register(type_qualname, state_type)
def encode_state_data(data, extension=None):
"""Helper function to encode state data.
Extension decides which data format is used for encoding.
If not supplied, a default extension defined for the state type is used.
Returns a tuple with binary representation of the data, mime type and state type identifier.
"""
reg = state_types_registry()
t = reg.get(get_type_qualname(type(data)))
b, mime = t.as_bytes(data, extension=extension)
return b, mime, t.identifier()
def decode_state_data(b, type_identifier, extension=None):
"""Helper function to decode state data.
Requires binary representation of the state data and state type identifier.
Extension decides which data format is used for decoding.
If not supplied, a default extension defined for the state type is used.
Returns a tuple with binary representation of the data, mime type and state type identifier.
"""
t = state_types_registry().get(type_identifier)
return t.from_bytes(b, extension=extension)
def copy_state_data(data):
"""Helper function to get a deep copy of a state data."""
reg = state_types_registry()
t = reg.get(get_type_qualname(type(data)))
return t.copy(data)
class StateType(object):
"""Abstract state type basis"""
def identifier(self):
"""String identifier of the state type"""
raise NotImplementedError(
"State type class must define a state type identifier"
)
def default_extension(self):
"""Default file extension; determines the default data format
Must be consistent with the default_mimetype.
"""
raise NotImplementedError("State type class must define the default extension")
def default_filename(self):
"""Default file name"""
return "data." + self.default_extension()
def default_mimetype(self):
"""Default mime type - must be consistent with the default_extension"""
return MIMETYPES.get(self.default_extension(), "text/plain")
def is_type_of(self, data):
"""Returns true if data is of this state type"""
return False
def as_bytes(self, data, extension=None):
"""Serialize data as bytes.
Data must be of this state type. Extension determines the serialization format. If none, default extension is used.
"""
raise NotImplementedError(
"State type class must define serialization to bytes (as_bytes)"
)
def from_bytes(self, b: bytes, extension=None):
"""Deserialize data from bytes.
Data must be a binary representation of this state type.
Extension determines the serialization format. If none, default extension is used.
"""
raise NotImplementedError(
"State type class must define deserialization from bytes (from_bytes)"
)
def copy(self, data):
"""Create a deep copy of data.
Data must be of this state type."""
return self.from_bytes(self.as_bytes(data)[:])
def data_characteristics(self, data):
"""Create state-type-dependent data characteristics for supplied data.
Returned data characteristics must be a dictionary containing at least a "description"
element with a string description of the data and a "type_identifier".
Type identifier is duplicate of the type identifier found in metadata, but makes
the data characteristics self-contained.
This method should not be called directly, but via the data_characteristics function,
which might fix and validate some issues.
"""
return dict(description="")
class DictStateType(StateType):
"""JSON serializable data."""
def identifier(self):
return "dictionary"
def default_extension(self):
return "json"
def is_type_of(self, data):
return isinstance(data, dict)
def encode_element(self, data_element):
if isinstance(data_element, (int, float, str)) or data_element is None:
return json.dumps(data_element)
else:
reg = state_types_registry()
t = reg.get(get_type_qualname(type(data_element)))
extension = t.default_extension()
b, mime = t.as_bytes(data_element, extension=extension)
txt = base64.b64encode(b).decode("utf-8")
return '[%-10s, %-4s, "%s"]' % (
f'"{t.identifier()}"',
f'"{extension}"',
txt,
)
def decode_element(self, data_element_encoded):
if isinstance(data_element_encoded, list):
type_identifier, extension, b64 = data_element_encoded
b = base64.b64decode(b64)
return decode_state_data(b, type_identifier, extension)
else:
return data_element_encoded
def as_bytes(self, data, extension=None):
if extension is None:
extension = self.default_extension()
if extension == "djson":
d = "{\n"
sep = ""
for key, value in data.items():
assert isinstance(key, str)
d += sep
d += "%-20s%s" % (f'"{key}":', self.encode_element(value))
sep = ",\n"
d += "\n}"
return d.encode("utf-8"), mimetype_from_extension("djson")
elif extension == "json":
return json.dumps(data).encode("utf-8"), mimetype_from_extension("json")
raise Exception(f"Unsupported file extension: {extension}")
def from_bytes(self, b: bytes, extension=None):
if extension is None:
extension = self.default_extension()
if extension == "djson":
d = {}
for key, value in json.loads(b.decode("utf-8")).items():
d[key] = self.decode_element(value)
return d
elif extension == "json":
return json.loads(b.decode("utf-8"))
def copy(self, data):
return deepcopy(data)
def data_characteristics(self, data):
return dict(
description=f"Dictionary with {len(data)} items.",
keys=sorted(str(k) for k in data.keys()),
)
class JsonStateType(StateType):
"""JSON serializable data."""
def identifier(self):
return "generic"
def default_extension(self):
return "json"
def is_type_of(self, data):
return True
def as_bytes(self, data, extension=None):
if extension is None:
extension = self.default_extension()
if extension == "json":
return json.dumps(data).encode("utf-8"), self.default_mimetype()
elif extension in ["html", "htm"]:
if isinstance(data, str):
return data.encode("utf-8"), mimetype_from_extension("html")
else:
return (
f"<pre>{json.dumps(data)}</pre>".encode("utf-8"),
mimetype_from_extension("html"),
)
raise Exception(f"Unsupported file extension: {extension}")
def from_bytes(self, b: bytes, extension=None):
if extension is None:
extension = self.default_extension()
assert extension == "json"
return json.loads(b.decode("utf-8"))
def copy(self, data):
return deepcopy(data)
def data_characteristics(self, data):
if isinstance(data, dict):
return dict(
description=f"Dictionary with {len(data)} items.",
keys=sorted(str(k) for k in data.keys()),
)
elif isinstance(data, dict):
return dict(description=f"Array with {len(data)} items.")
elif isinstance(data, str):
return dict(description=f"Text {len(data)} characters long.")
elif isinstance(data, bool):
return dict(description=f"Bool {data}")
elif isinstance(data, int):
return dict(description=f"Integer {data}")
elif isinstance(data, float):
return dict(description=f"Float {data}")
elif data is None:
return dict(description=f"None")
else:
return dict(description=f"Data of type {type(data)}")
class PickleStateType(StateType):
"""Pickle-serializable data."""
def identifier(self):
return "pickle"
def default_extension(self):
return "pickle"
def is_type_of(self, data):
return True
def as_bytes(self, data, extension=None):
if extension is None:
extension = self.default_extension()
if extension in ["pkl", "pickle"]:
return pickle.dumps(data), mimetype_from_extension("pickle")
elif extension == "json":
return json.dumps(data).encode("utf-8"), mimetype_from_extension("json")
elif extension in ["html", "htm"]:
if isinstance(data, str):
return data.encode("utf-8"), mimetype_from_extension("html")
else:
return (
f"<pre>{json.dumps(data)}</pre>".encode("utf-8"),
mimetype_from_extension("html"),
)
raise Exception(f"Unsupported file extension: {extension}")
def from_bytes(self, b: bytes, extension=None):
if extension is None:
extension = self.default_extension()
if extension in ["pkl", "pickle"]:
return pickle.loads(b)
elif extension == "json":
return json.loads(b.decode("utf-8"))
raise Exception(f"Unsupported file extension: {extension}")
def copy(self, data):
return deepcopy(data)
def data_characteristics(self, data):
if isinstance(data, dict):
return dict(
description=f"Dictionary with {len(data)} items.",
keys=sorted(str(k) for k in data.keys()),
)
elif isinstance(data, dict):
return dict(description=f"Array with {len(data)} items.")
elif isinstance(data, str):
return dict(description=f"Text {len(data)} characters long.")
elif isinstance(data, bool):
return dict(description=f"Bool {data}")
elif isinstance(data, int):
return dict(description=f"Integer {data}")
elif isinstance(data, float):
return dict(description=f"Float {data}")
elif data is None:
return dict(description=f"None")
else:
return dict(description=f"Data of type {type(data)}")
class BytesStateType(StateType):
"""Binary data"""
def identifier(self):
return "bytes"
def default_extension(self):
return "b"
def default_mimetype(self):
return "application/octet-stream"
def is_type_of(self, data):
return isinstance(data, bytes)
def as_bytes(self, data, extension=None):
return data, mimetype_from_extension(extension)
def from_bytes(self, b: bytes, extension=None):
return b
def copy(self, data):
return deepcopy(data)
def data_characteristics(self, data):
return dict(description=f"{len(data)} bytes")
class TextStateType(StateType):
"""Text data (string)"""
def identifier(self):
return "text"
def default_extension(self):
return "txt"
def default_mimetype(self):
return "text/plain"
def is_type_of(self, data):
return isinstance(data, str)
def as_bytes(self, data, extension=None):
if extension is None:
extension = self.default_extension()
mime = self.default_mimetype()
else:
mime = mimetype_from_extension(extension, "text/plain")
return data.encode("utf-8"), mime
def from_bytes(self, b: bytes, extension=None):
return b.decode("utf-8")
def copy(self, data):
return data[:]
def data_characteristics(self, data):
return dict(description=f"Text {len(data)} characters long.")
Functions
def copy_state_data(data)
-
Helper function to get a deep copy of a state data.
Expand source code
def copy_state_data(data): """Helper function to get a deep copy of a state data.""" reg = state_types_registry() t = reg.get(get_type_qualname(type(data))) return t.copy(data)
def data_characteristics(data)
-
Convenience function to return data characteristics for supplied data. Data characteristics must be a dictionary containing at least a "description" element with a string description of the data and a "type_identifier". Type identifier is duplicate of the type identifier found in metadata, but makes the data characteristics self-contained.
Expand source code
def data_characteristics(data): """Convenience function to return data characteristics for supplied data. Data characteristics must be a dictionary containing at least a "description" element with a string description of the data and a "type_identifier". Type identifier is duplicate of the type identifier found in metadata, but makes the data characteristics self-contained. """ st = state_types_registry().get(get_type_qualname(type(data))) ch = st.data_characteristics(data) if not isinstance(ch, dict): raise Exception( f"Data characteristics for {st.identifier()} must be a dictionary" ) ch["description"] = ch.get("description", "") ch["type_identifier"] = ch.get("type_ident", st.identifier()) return ch
def decode_state_data(b, type_identifier, extension=None)
-
Helper function to decode state data. Requires binary representation of the state data and state type identifier. Extension decides which data format is used for decoding. If not supplied, a default extension defined for the state type is used. Returns a tuple with binary representation of the data, mime type and state type identifier.
Expand source code
def decode_state_data(b, type_identifier, extension=None): """Helper function to decode state data. Requires binary representation of the state data and state type identifier. Extension decides which data format is used for decoding. If not supplied, a default extension defined for the state type is used. Returns a tuple with binary representation of the data, mime type and state type identifier. """ t = state_types_registry().get(type_identifier) return t.from_bytes(b, extension=extension)
def encode_state_data(data, extension=None)
-
Helper function to encode state data. Extension decides which data format is used for encoding. If not supplied, a default extension defined for the state type is used. Returns a tuple with binary representation of the data, mime type and state type identifier.
Expand source code
def encode_state_data(data, extension=None): """Helper function to encode state data. Extension decides which data format is used for encoding. If not supplied, a default extension defined for the state type is used. Returns a tuple with binary representation of the data, mime type and state type identifier. """ reg = state_types_registry() t = reg.get(get_type_qualname(type(data))) b, mime = t.as_bytes(data, extension=extension) return b, mime, t.identifier()
def get_type_qualname(cls)
-
Get a string uniquely identifying the supplied class
Expand source code
def get_type_qualname(cls): """Get a string uniquely identifying the supplied class""" if isinstance(cls, str): return cls if cls.__module__ == "__main__": return cls.__qualname__ return f"{cls.__module__}.{cls.__qualname__}"
def register_state_type(type_qualname, state_type)
-
Function to register new state type for a qualified type name type_qualname can be a string (module.ClassName) or a class/type object (not a data instance)
Expand source code
def register_state_type(type_qualname, state_type): """Function to register new state type for a qualified type name type_qualname can be a string (module.ClassName) or a class/type object (not a data instance) """ type_qualname = get_type_qualname(type_qualname) state_types_registry().register(type_qualname, state_type)
def state_type_from_type_identifier(type_identifier)
-
Convenience function to return a state type for supplied type identifier
Expand source code
def state_type_from_type_identifier(type_identifier): """Convenience function to return a state type for supplied type identifier""" return state_types_registry().from_type_identifier(type_identifier)
def state_types_registry()
-
Returns the global state types registry (singleton)
Expand source code
def state_types_registry(): """Returns the global state types registry (singleton)""" global _state_types_registry if _state_types_registry is None: _state_types_registry = StateTypesRegistry() return _state_types_registry
def type_identifier_of(data)
-
Convenience function to return a state type identifier for supplied data
Expand source code
def type_identifier_of(data): """Convenience function to return a state type identifier for supplied data""" return state_types_registry().get(get_type_qualname(type(data))).identifier()
Classes
class BytesStateType
-
Binary data
Expand source code
class BytesStateType(StateType): """Binary data""" def identifier(self): return "bytes" def default_extension(self): return "b" def default_mimetype(self): return "application/octet-stream" def is_type_of(self, data): return isinstance(data, bytes) def as_bytes(self, data, extension=None): return data, mimetype_from_extension(extension) def from_bytes(self, b: bytes, extension=None): return b def copy(self, data): return deepcopy(data) def data_characteristics(self, data): return dict(description=f"{len(data)} bytes")
Ancestors
Inherited members
class DictStateType
-
JSON serializable data.
Expand source code
class DictStateType(StateType): """JSON serializable data.""" def identifier(self): return "dictionary" def default_extension(self): return "json" def is_type_of(self, data): return isinstance(data, dict) def encode_element(self, data_element): if isinstance(data_element, (int, float, str)) or data_element is None: return json.dumps(data_element) else: reg = state_types_registry() t = reg.get(get_type_qualname(type(data_element))) extension = t.default_extension() b, mime = t.as_bytes(data_element, extension=extension) txt = base64.b64encode(b).decode("utf-8") return '[%-10s, %-4s, "%s"]' % ( f'"{t.identifier()}"', f'"{extension}"', txt, ) def decode_element(self, data_element_encoded): if isinstance(data_element_encoded, list): type_identifier, extension, b64 = data_element_encoded b = base64.b64decode(b64) return decode_state_data(b, type_identifier, extension) else: return data_element_encoded def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() if extension == "djson": d = "{\n" sep = "" for key, value in data.items(): assert isinstance(key, str) d += sep d += "%-20s%s" % (f'"{key}":', self.encode_element(value)) sep = ",\n" d += "\n}" return d.encode("utf-8"), mimetype_from_extension("djson") elif extension == "json": return json.dumps(data).encode("utf-8"), mimetype_from_extension("json") raise Exception(f"Unsupported file extension: {extension}") def from_bytes(self, b: bytes, extension=None): if extension is None: extension = self.default_extension() if extension == "djson": d = {} for key, value in json.loads(b.decode("utf-8")).items(): d[key] = self.decode_element(value) return d elif extension == "json": return json.loads(b.decode("utf-8")) def copy(self, data): return deepcopy(data) def data_characteristics(self, data): return dict( description=f"Dictionary with {len(data)} items.", keys=sorted(str(k) for k in data.keys()), )
Ancestors
Methods
def decode_element(self, data_element_encoded)
-
Expand source code
def decode_element(self, data_element_encoded): if isinstance(data_element_encoded, list): type_identifier, extension, b64 = data_element_encoded b = base64.b64decode(b64) return decode_state_data(b, type_identifier, extension) else: return data_element_encoded
def encode_element(self, data_element)
-
Expand source code
def encode_element(self, data_element): if isinstance(data_element, (int, float, str)) or data_element is None: return json.dumps(data_element) else: reg = state_types_registry() t = reg.get(get_type_qualname(type(data_element))) extension = t.default_extension() b, mime = t.as_bytes(data_element, extension=extension) txt = base64.b64encode(b).decode("utf-8") return '[%-10s, %-4s, "%s"]' % ( f'"{t.identifier()}"', f'"{extension}"', txt, )
Inherited members
class JsonStateType
-
JSON serializable data.
Expand source code
class JsonStateType(StateType): """JSON serializable data.""" def identifier(self): return "generic" def default_extension(self): return "json" def is_type_of(self, data): return True def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() if extension == "json": return json.dumps(data).encode("utf-8"), self.default_mimetype() elif extension in ["html", "htm"]: if isinstance(data, str): return data.encode("utf-8"), mimetype_from_extension("html") else: return ( f"<pre>{json.dumps(data)}</pre>".encode("utf-8"), mimetype_from_extension("html"), ) raise Exception(f"Unsupported file extension: {extension}") def from_bytes(self, b: bytes, extension=None): if extension is None: extension = self.default_extension() assert extension == "json" return json.loads(b.decode("utf-8")) def copy(self, data): return deepcopy(data) def data_characteristics(self, data): if isinstance(data, dict): return dict( description=f"Dictionary with {len(data)} items.", keys=sorted(str(k) for k in data.keys()), ) elif isinstance(data, dict): return dict(description=f"Array with {len(data)} items.") elif isinstance(data, str): return dict(description=f"Text {len(data)} characters long.") elif isinstance(data, bool): return dict(description=f"Bool {data}") elif isinstance(data, int): return dict(description=f"Integer {data}") elif isinstance(data, float): return dict(description=f"Float {data}") elif data is None: return dict(description=f"None") else: return dict(description=f"Data of type {type(data)}")
Ancestors
Inherited members
class PickleStateType
-
Pickle-serializable data.
Expand source code
class PickleStateType(StateType): """Pickle-serializable data.""" def identifier(self): return "pickle" def default_extension(self): return "pickle" def is_type_of(self, data): return True def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() if extension in ["pkl", "pickle"]: return pickle.dumps(data), mimetype_from_extension("pickle") elif extension == "json": return json.dumps(data).encode("utf-8"), mimetype_from_extension("json") elif extension in ["html", "htm"]: if isinstance(data, str): return data.encode("utf-8"), mimetype_from_extension("html") else: return ( f"<pre>{json.dumps(data)}</pre>".encode("utf-8"), mimetype_from_extension("html"), ) raise Exception(f"Unsupported file extension: {extension}") def from_bytes(self, b: bytes, extension=None): if extension is None: extension = self.default_extension() if extension in ["pkl", "pickle"]: return pickle.loads(b) elif extension == "json": return json.loads(b.decode("utf-8")) raise Exception(f"Unsupported file extension: {extension}") def copy(self, data): return deepcopy(data) def data_characteristics(self, data): if isinstance(data, dict): return dict( description=f"Dictionary with {len(data)} items.", keys=sorted(str(k) for k in data.keys()), ) elif isinstance(data, dict): return dict(description=f"Array with {len(data)} items.") elif isinstance(data, str): return dict(description=f"Text {len(data)} characters long.") elif isinstance(data, bool): return dict(description=f"Bool {data}") elif isinstance(data, int): return dict(description=f"Integer {data}") elif isinstance(data, float): return dict(description=f"Float {data}") elif data is None: return dict(description=f"None") else: return dict(description=f"Data of type {type(data)}")
Ancestors
Inherited members
class StateType
-
Abstract state type basis
Expand source code
class StateType(object): """Abstract state type basis""" def identifier(self): """String identifier of the state type""" raise NotImplementedError( "State type class must define a state type identifier" ) def default_extension(self): """Default file extension; determines the default data format Must be consistent with the default_mimetype. """ raise NotImplementedError("State type class must define the default extension") def default_filename(self): """Default file name""" return "data." + self.default_extension() def default_mimetype(self): """Default mime type - must be consistent with the default_extension""" return MIMETYPES.get(self.default_extension(), "text/plain") def is_type_of(self, data): """Returns true if data is of this state type""" return False def as_bytes(self, data, extension=None): """Serialize data as bytes. Data must be of this state type. Extension determines the serialization format. If none, default extension is used. """ raise NotImplementedError( "State type class must define serialization to bytes (as_bytes)" ) def from_bytes(self, b: bytes, extension=None): """Deserialize data from bytes. Data must be a binary representation of this state type. Extension determines the serialization format. If none, default extension is used. """ raise NotImplementedError( "State type class must define deserialization from bytes (from_bytes)" ) def copy(self, data): """Create a deep copy of data. Data must be of this state type.""" return self.from_bytes(self.as_bytes(data)[:]) def data_characteristics(self, data): """Create state-type-dependent data characteristics for supplied data. Returned data characteristics must be a dictionary containing at least a "description" element with a string description of the data and a "type_identifier". Type identifier is duplicate of the type identifier found in metadata, but makes the data characteristics self-contained. This method should not be called directly, but via the data_characteristics function, which might fix and validate some issues. """ return dict(description="")
Subclasses
- StoredDataframeIteratorStateType
- DatafusionContextStateType
- DatafusionDataframeStateType
- liquer.ext.lq_hxl.HxlStateType
- MatplotlibFigureStateType
- OpenpyxlWorkbookStateType
- DataframeStateType
- PILImageStateType
- PolarsDataframeStateType
- PolarsLazyframeStateType
- PolarsSQLContextStateType
- PPTXPresentationStateType
- BytesStateType
- DictStateType
- JsonStateType
- PickleStateType
- TextStateType
Methods
def as_bytes(self, data, extension=None)
-
Serialize data as bytes. Data must be of this state type. Extension determines the serialization format. If none, default extension is used.
Expand source code
def as_bytes(self, data, extension=None): """Serialize data as bytes. Data must be of this state type. Extension determines the serialization format. If none, default extension is used. """ raise NotImplementedError( "State type class must define serialization to bytes (as_bytes)" )
def copy(self, data)
-
Create a deep copy of data. Data must be of this state type.
Expand source code
def copy(self, data): """Create a deep copy of data. Data must be of this state type.""" return self.from_bytes(self.as_bytes(data)[:])
def data_characteristics(self, data)
-
Create state-type-dependent data characteristics for supplied data. Returned data characteristics must be a dictionary containing at least a "description" element with a string description of the data and a "type_identifier". Type identifier is duplicate of the type identifier found in metadata, but makes the data characteristics self-contained.
This method should not be called directly, but via the data_characteristics function, which might fix and validate some issues.
Expand source code
def data_characteristics(self, data): """Create state-type-dependent data characteristics for supplied data. Returned data characteristics must be a dictionary containing at least a "description" element with a string description of the data and a "type_identifier". Type identifier is duplicate of the type identifier found in metadata, but makes the data characteristics self-contained. This method should not be called directly, but via the data_characteristics function, which might fix and validate some issues. """ return dict(description="")
def default_extension(self)
-
Default file extension; determines the default data format Must be consistent with the default_mimetype.
Expand source code
def default_extension(self): """Default file extension; determines the default data format Must be consistent with the default_mimetype. """ raise NotImplementedError("State type class must define the default extension")
def default_filename(self)
-
Default file name
Expand source code
def default_filename(self): """Default file name""" return "data." + self.default_extension()
def default_mimetype(self)
-
Default mime type - must be consistent with the default_extension
Expand source code
def default_mimetype(self): """Default mime type - must be consistent with the default_extension""" return MIMETYPES.get(self.default_extension(), "text/plain")
def from_bytes(self, b: bytes, extension=None)
-
Deserialize data from bytes. Data must be a binary representation of this state type. Extension determines the serialization format. If none, default extension is used.
Expand source code
def from_bytes(self, b: bytes, extension=None): """Deserialize data from bytes. Data must be a binary representation of this state type. Extension determines the serialization format. If none, default extension is used. """ raise NotImplementedError( "State type class must define deserialization from bytes (from_bytes)" )
def identifier(self)
-
String identifier of the state type
Expand source code
def identifier(self): """String identifier of the state type""" raise NotImplementedError( "State type class must define a state type identifier" )
def is_type_of(self, data)
-
Returns true if data is of this state type
Expand source code
def is_type_of(self, data): """Returns true if data is of this state type""" return False
class StateTypesRegistry
-
State type registry takes care of registering and lookup of state types. It is typically accessed as a singleton via state_type_registry() function.
default_state_type attribute is used if state type is not recognized.
Expand source code
class StateTypesRegistry(object): """State type registry takes care of registering and lookup of state types. It is typically accessed as a singleton via state_type_registry() function. default_state_type attribute is used if state type is not recognized. """ def __init__(self): self.state_types_dictionary = {} self.register(bytes, BytesStateType()) self.register(str, TextStateType()) self.register(dict, DictStateType()) self.register(type(None), JsonStateType()) self.register(int, JsonStateType()) self.register(float, JsonStateType()) self.default_state_type = PickleStateType() def register(self, type_qualname, state_type): """Register a new state type for a qualified type name""" type_qualname = get_type_qualname(type_qualname) self.state_types_dictionary[type_qualname] = state_type self.state_types_dictionary[state_type.identifier()] = state_type return self def get(self, type_qualname): """Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned. """ if type_qualname is None: return self.default_state_type type_qualname = get_type_qualname(type_qualname) return self.state_types_dictionary.get(type_qualname, self.default_state_type) def from_type_identifier(self, type_identifier): """Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned. """ for x in self.state_types_dictionary.values(): if x.identifier() == type_identifier: return x
Methods
def from_type_identifier(self, type_identifier)
-
Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned.
Expand source code
def from_type_identifier(self, type_identifier): """Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned. """ for x in self.state_types_dictionary.values(): if x.identifier() == type_identifier: return x
def get(self, type_qualname)
-
Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned.
Expand source code
def get(self, type_qualname): """Get state type object for a qualified type name If the qualified type name is not recognized, default_state_type is returned. """ if type_qualname is None: return self.default_state_type type_qualname = get_type_qualname(type_qualname) return self.state_types_dictionary.get(type_qualname, self.default_state_type)
def register(self, type_qualname, state_type)
-
Register a new state type for a qualified type name
Expand source code
def register(self, type_qualname, state_type): """Register a new state type for a qualified type name""" type_qualname = get_type_qualname(type_qualname) self.state_types_dictionary[type_qualname] = state_type self.state_types_dictionary[state_type.identifier()] = state_type return self
class TextStateType
-
Text data (string)
Expand source code
class TextStateType(StateType): """Text data (string)""" def identifier(self): return "text" def default_extension(self): return "txt" def default_mimetype(self): return "text/plain" def is_type_of(self, data): return isinstance(data, str) def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() mime = self.default_mimetype() else: mime = mimetype_from_extension(extension, "text/plain") return data.encode("utf-8"), mime def from_bytes(self, b: bytes, extension=None): return b.decode("utf-8") def copy(self, data): return data[:] def data_characteristics(self, data): return dict(description=f"Text {len(data)} characters long.")
Ancestors
Inherited members