Module liquer.ext.lq_openpyxl
Expand source code
from io import StringIO, BytesIO
import pandas as pd
from liquer.state_types import StateType, register_state_type
from liquer.constants import mimetype_from_extension
from liquer.commands import command, first_command
from openpyxl import Workbook, load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from tempfile import TemporaryDirectory
from pathlib import Path
from liquer.context import get_context
from itertools import islice
class OpenpyxlWorkbookStateType(StateType):
def identifier(self):
return "openpyxl_workbook"
def default_extension(self):
return "xlsx"
def is_type_of(self, data):
return isinstance(data, Workbook)
def as_bytes(self, data, extension=None):
if extension is None:
extension = self.default_extension()
assert self.is_type_of(data)
mimetype = mimetype_from_extension(extension)
if extension in ("xlsx", "xltx"):
with TemporaryDirectory() as tmpdir:
path = Path(tmpdir) / f"data.{extension}"
data.save(str(path))
b = path.read_bytes()
return b, mimetype
else:
raise Exception(
f"Serialization: file extension {extension} is not supported by openpyxl_workbook type."
)
def from_bytes(self, b: bytes, extension=None):
if extension is None:
extension = self.default_extension()
f = BytesIO()
f.write(b)
f.seek(0)
if extension in ("xlsx", "xltx"):
return load_workbook(f)
raise Exception(
f"Deserialization: file extension {extension} is not supported by openpyxl_workbook type."
)
def copy(self, data):
return self.from_bytes(self.as_bytes(data)[0])
def data_characteristics(self, data):
return dict(description=f"Excel workbook with {len(data.sheetnames)} sheets.")
OPENPYXL_WORKBOOK_STATE_TYPE = OpenpyxlWorkbookStateType()
register_state_type(Workbook, OPENPYXL_WORKBOOK_STATE_TYPE)
@command
def workbook(data, index=True, header=True, context=None):
"""Convert bytes or a dataframe to a workbook"""
context = get_context(context)
if type(data) == bytes:
context.info("Workbook from bytes")
return OPENPYXL_WORKBOOK_STATE_TYPE.from_bytes(data)
elif isinstance(data, pd.DataFrame):
context.info("Workbook from pandas DataFrame")
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df, index=index, header=header):
ws.append(r)
return wb
elif isinstance(data, Workbook):
return data
raise Exception(f"Unsupported workbook type: {type(data)}")
@command
def workbook_sheet_df(wb, sheet=None, context=None):
"""Extract a workbook sheet as a data-frame"""
context = get_context(context)
if type(wb) == bytes:
wb = workbook(wb, context=context)
if sheet in ("", None):
context.info("Using active sheet")
ws = wb.active
else:
ws = wb[sheet]
try:
i = int(sheet)
sheet = wb.sheetnames[i]
context.info(f"Using sheet {i} with name '{sheet}'")
except:
pass
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = (islice(r, 1, None) for r in data)
df = pd.DataFrame(data, index=idx, columns=cols)
return df
Functions
def workbook(data, index=True, header=True, context=None)
-
Convert bytes or a dataframe to a workbook
Expand source code
@command def workbook(data, index=True, header=True, context=None): """Convert bytes or a dataframe to a workbook""" context = get_context(context) if type(data) == bytes: context.info("Workbook from bytes") return OPENPYXL_WORKBOOK_STATE_TYPE.from_bytes(data) elif isinstance(data, pd.DataFrame): context.info("Workbook from pandas DataFrame") wb = Workbook() ws = wb.active for r in dataframe_to_rows(df, index=index, header=header): ws.append(r) return wb elif isinstance(data, Workbook): return data raise Exception(f"Unsupported workbook type: {type(data)}")
def workbook_sheet_df(wb, sheet=None, context=None)
-
Extract a workbook sheet as a data-frame
Expand source code
@command def workbook_sheet_df(wb, sheet=None, context=None): """Extract a workbook sheet as a data-frame""" context = get_context(context) if type(wb) == bytes: wb = workbook(wb, context=context) if sheet in ("", None): context.info("Using active sheet") ws = wb.active else: ws = wb[sheet] try: i = int(sheet) sheet = wb.sheetnames[i] context.info(f"Using sheet {i} with name '{sheet}'") except: pass data = ws.values cols = next(data)[1:] data = list(data) idx = [r[0] for r in data] data = (islice(r, 1, None) for r in data) df = pd.DataFrame(data, index=idx, columns=cols) return df
Classes
class OpenpyxlWorkbookStateType
-
Abstract state type basis
Expand source code
class OpenpyxlWorkbookStateType(StateType): def identifier(self): return "openpyxl_workbook" def default_extension(self): return "xlsx" def is_type_of(self, data): return isinstance(data, Workbook) def as_bytes(self, data, extension=None): if extension is None: extension = self.default_extension() assert self.is_type_of(data) mimetype = mimetype_from_extension(extension) if extension in ("xlsx", "xltx"): with TemporaryDirectory() as tmpdir: path = Path(tmpdir) / f"data.{extension}" data.save(str(path)) b = path.read_bytes() return b, mimetype else: raise Exception( f"Serialization: file extension {extension} is not supported by openpyxl_workbook type." ) def from_bytes(self, b: bytes, extension=None): if extension is None: extension = self.default_extension() f = BytesIO() f.write(b) f.seek(0) if extension in ("xlsx", "xltx"): return load_workbook(f) raise Exception( f"Deserialization: file extension {extension} is not supported by openpyxl_workbook type." ) def copy(self, data): return self.from_bytes(self.as_bytes(data)[0]) def data_characteristics(self, data): return dict(description=f"Excel workbook with {len(data.sheetnames)} sheets.")
Ancestors
Inherited members