Source code for metatable.metatable
"""
Table data structure that supports the introduction of user-defined workflow
combinators and the use of these combinators in concise workflow descriptions.
"""
from __future__ import annotations
from typing import Any, Union, Optional, Callable, Iterable
import doctest
import itertools
import symbolism
[docs]class metatable:
"""
Class for the extensible metatable data structure.
:param iterable: Iterable of rows corresponding to the data in this instance.
:param name: Instance name.
:param header: Header row consisting of column names.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> list(iter(t))
[['a', 0], ['b', 1], ['c', 2]]
All rows in an instance can be updated in-place using a symbolic
representation of the transformation that must be applied to each
row.
>>> t = metatable([['char', 'num'], ['a', 0], ['b', 1]], header=True)
>>> t.update({1: column(0)})
[['char', 'num'], ['a', 'a'], ['b', 'b']]
Find more examples under the entries for the :obj:`update` and
:obj:`update_filter` methods.
"""
@staticmethod
def _eval(r: list, i: int, e: Union[column, type, symbolism.symbol]) -> Any:
"""
Evaluation of a symbolic expression that may contain
references to specific attributes/columns of a row.
"""
if isinstance(e, column):
index = e.evaluate()
return r[index] if index < len(r) else None
if e is row:
return i
if isinstance(e, symbolism.symbol):
return \
e.evaluate() \
if len(e) == 0 else \
e.instance(*[metatable._eval(r, i, p) for p in e.parameters])
return e
@staticmethod
def _upd(update_filter_index_row: tuple) -> list:
"""
Internal method for the work to be completed for each row
during an invocation of an update on the table.
"""
((update, filter_, column_max), (index, row_)) = update_filter_index_row
# Fill columns that are in the range but that have no expression
# in the update tasks.
for col in update:
while col > len(row_) - 1:
row_.append(None)
drops = [] # Columns to drop once updates are evaluated.
row__ = list(row_)
# In strict mode, drop columns which do not appear in the update task.
if column_max is not None:
row__ = [v for (c, v) in enumerate(row__) if c <= column_max]
for (col_, upd) in update.items():
if upd is not drop:
row__[col_] = metatable._eval(row_, index, upd)
else:
drops.append(col_)
# Apply filter first and then drop columns.
if filter_ is None or metatable._eval(row__, index, filter_):
row__ = [v for (c, v) in enumerate(row__) if c not in drops] # Drop columns.
return [row__]
# Row was filtered out.
return []
def __init__(
self: metatable,
iterable: Iterable,
name: Optional[str] = None,
header: Optional[bool] = False
):
"""
Constructor for a table instance that draws data from an iterable.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> list(t)
[['a', 0], ['b', 1], ['c', 2]]
"""
self.iterable = iterable
self.name = name
self.header = header
[docs] def __iter__(self: metatable) -> Iterable:
"""
Return this instance as an iterable.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> list(iter(t))
[['a', 0], ['b', 1], ['c', 2]]
"""
for row_ in self.iterable:
yield row_
[docs] def map(
self: metatable,
function: Callable,
iterable: Iterable,
progress: Callable
) -> Iterable:
"""
Internal method for mapping over the data in the table. This method
can be redefined in derived classes to change how rows are processed
(*e.g.*, to introduce multiprocessing).
:param function: Function to apply to every item in the iterable.
:param iterable: Iterable of items to which the function should be applied
(this should normally be the instance itself).
:param progress: Function that returns its iterable input and reports progress.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> list(t.map(lambda row: [[row[1], row[0]]], t, lambda _: _))
[[0, 'a'], [1, 'b'], [2, 'c']]
"""
return (row for rows in progress(map(function, iterable)) for row in rows)
[docs] def update_filter( # pylint: disable=too-many-arguments
self: metatable,
update: symbolism.symbol,
filter: symbolism.symbol, # pylint: disable=redefined-builtin
header: Optional[list] = None,
strict: Optional[bool] = False,
progress: Optional[Callable] = (lambda *a, **ka: a[0])
) -> list:
"""
Perform update-then-filter operations across the entire table, based on
symbolic expressions for the update and filter task(s). The result of
the operation is returned.
:param update: Symbolic expression that represents an update operation
(to be applied to every row).
:param filter: Symbolic expression that represents a filter predicate
(to be tested for every row).
:param header: Header row for the overall result of this method.
:param strict: Drop columns that do not explicitly appear in the update expression.
:param progress: Function that returns its iterable input and reports progress.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> t.update_filter({0: column(1)}, column(1) > symbolism.symbol(0))
[[1, 1], [2, 2]]
This instance is modified in-place, so iterating over it again yields
the updated version.
>>> list(t)
[[1, 1], [2, 2]]
This method can be used in combination with the :obj:`row` class to
introduce the row index into a column during the update.
>>> t = metatable([['a'], ['b'], ['c']])
>>> t.update_filter({3: row}, column(3) < 2)
[['a', None, None, 0], ['b', None, None, 1]]
>>> list(t)
[['a', None, None, 0], ['b', None, None, 1]]
"""
(rows_in, rows_out) = (iter(self), [])
# If the update task is a list (representing the operation that yields
# each column starting from the left-most one), convert it into a dictionary.
update = dict(enumerate(update)) if isinstance(update, (tuple, list)) else update
# Determine the column with the highest index in the update task.
column_max = max(update.keys())
# Update the header row if it exists and no replacement header is specified.
if self.header and header is None:
for row_ in rows_in:
# Fill columns that are in the range but that have no expression
# in the update tasks.
for col in update:
while col > len(row_) - 1:
row_.append(None)
# In strict mode, drop columns which do not appear in the update task.
if strict:
row_ = [v for (c, v) in enumerate(row_) if c <= column_max]
# Drop columns in header as indicated in update specification.
drops = [col_ for (col_, upd) in update.items() if upd is drop]
row_ = [v for (c, v) in enumerate(row_) if c not in drops]
# Add only this header row.
rows_out.append(row_)
break
elif self.header and header is not None: # A replacement header has been specified.
rows_in = itertools.islice(rows_in, 1, None) # Skip the old header row.
rows_out.append(header)
rows_out.extend(self.map(
metatable._upd,
zip(
itertools.repeat((update, filter, column_max if strict else None)),
enumerate(rows_in)
),
progress
))
self.iterable = rows_out
return rows_out
[docs] def update(
self: metatable,
update: symbolism.symbol,
header: Optional[list] = None,
strict: Optional[bool] = False,
progress: Optional[Callable] = (lambda *a, **ka: a[0])
) -> list:
"""
Update operation across the entire table, based on a symbolic expression
for the update task(s).
:param update: Symbolic expression that represents an update operation
(to be applied to every row).
:param header: Header row for the overall result of this method.
:param strict: Drop columns that do not explicitly appear in the update expression.
:param progress: Function that returns its iterable input and reports progress.
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> t.update({0: column(1)}) # Replace first-column value with second-column value.
[[0, 0], [1, 1], [2, 2]]
>>> list(t)
[[0, 0], [1, 1], [2, 2]]
If a header row is present (and should be preserved when performing the update),
this can be indicated using the ``header`` argument.
>>> t = metatable([['char', 'num'], ['a', 0], ['b', 1]], header=True)
>>> t.update({1: column(0)})
[['char', 'num'], ['a', 'a'], ['b', 'b']]
This method can be used in combination with the :obj:`drop` class in order
to indicate that a column should be dropped during the update.
>>> t.update({0: drop})
[['num'], ['a'], ['b']]
>>> t.update({0: drop})
[[], [], []]
>>> t = metatable([['a', 0, True], ['b', 1, True], ['c', 2, False]])
>>> t.update([column(1), column(0), drop])
[[0, 'a'], [1, 'b'], [2, 'c']]
If the ``strict`` argument is assigned the value ``True``, then columns
that do not explicitly appear in the update task specification are dropped.
>>> t = metatable([['c', 'n', 'b'], ['a', 0, True], ['b', 1, True]], header=True)
>>> t.update([column(1), column(0)], strict=True, header=['n', 'c'])
[['n', 'c'], [0, 'a'], [1, 'b']]
>>> t.update([column(1)], strict=True)
[['n'], ['a'], ['b']]
>>> t.update([column(0)], strict=True, header=['c'])
[['c'], ['a'], ['b']]
>>> t.update({2: 'x'})
[['c', None, None], ['a', None, 'x'], ['b', None, 'x']]
Other common operations (such as the functions pre-defined within the
`symbolism <https://pypi.org/project/symbolism>`__ library) can be used
to introduce a new computed column (in which the entry for that column
in every row is computed using zero or more of the values from that row
found in the existing columns).
>>> t = metatable([['a', 0], ['b'], ['c', 2]])
>>> t.update({2: symbolism.is_(column(1), None)})
[['a', 0, False], ['b', None, True], ['c', 2, False]]
"""
return self.update_filter(update, None, header, strict, progress)
[docs]class row: # pylint: disable=too-few-public-methods
"""
Symbolic representation of a row index (for use with methods such as
:obj:`metatable.update`).
>>> t = metatable([['a'], ['b'], ['c']])
>>> t.update_filter({3: row}, column(3) < 2)
[['a', None, None, 0], ['b', None, None, 1]]
"""
[docs]class drop: # pylint: disable=too-few-public-methods
"""
Symbolic representation of a column drop operation (for use with methods
such as :obj:`metatable.update`).
>>> t = metatable([['char', 'num'], ['a', 0], ['b', 1]], header=True)
>>> t.update({1: column(0)})
[['char', 'num'], ['a', 'a'], ['b', 'b']]
>>> t.update({0: drop})
[['num'], ['a'], ['b']]
"""
[docs]class column(symbolism.symbol): # pylint: disable=too-few-public-methods
"""
Symbolic representation of a column specifier, such as a numerical index
or an attribute name (for use with methods such as :obj:`metatable.update`).
>>> t = metatable([['a', 0], ['b', 1], ['c', 2]])
>>> t.update_filter({0: column(1)}, column(1) > symbolism.symbol(0))
[[1, 1], [2, 2]]
"""
if __name__ == '__main__':
doctest.testmod() # pragma: no cover