Source code for py2store.misc

"""Functions to read from and write to misc sources
"""

import os
import json
import pickle
import csv
import gzip
from io import StringIO

from py2store.stores.local_store import LocalBinaryStore
from py2store.slib.s_zipfile import FilesOfZip
from py2store.slib.s_configparser import ConfigReader, ConfigStore
from py2store.util import imdict


def csv_fileobj(
    csv_data, *args, **kwargs
):  # TODO: Use extended wraps func to inject
    fp = StringIO('')
    writer = csv.writer(fp)
    writer.writerows(csv_data, *args, **kwargs)
    fp.seek(0)
    return fp.read().encode()


def identity_method(x):
    return x


# TODO: Enhance default handling so users can have their own defaults (checking for local config file etc.)
# Note: If you're tempted to add third-party cases here (like yaml, pandas):
#   DO NOT!! Defaults must work only with builtins (or misc would be non-deterministic)
dflt_func_key = lambda self, k: os.path.splitext(k)[1]
dflt_dflt_incoming_val_trans = staticmethod(identity_method)

dflt_incoming_val_trans_for_key = {
    '.bin': identity_method,
    '.csv': lambda v: list(csv.reader(StringIO(v.decode()))),
    '.txt': lambda v: v.decode(),
    '.pkl': lambda v: pickle.loads(v),
    '.pickle': lambda v: pickle.loads(v),
    '.json': lambda v: json.loads(v),
    '.zip': FilesOfZip,
    '.gzip': gzip.decompress,
    '.ini': lambda v: ConfigStore(
        v, interpolation=ConfigReader.ExtendedInterpolation(),
    ),
}

dflt_outgoing_val_trans_for_key = {
    '.bin': identity_method,
    '.csv': csv_fileobj,
    '.txt': lambda v: v.encode(),
    '.pkl': lambda v: pickle.dumps(v),
    '.pickle': lambda v: pickle.dumps(v),
    '.json': lambda v: json.dumps(v).encode(),
    '.gzip': gzip.compress,
    '.ini': lambda v: ConfigStore(
        v, interpolation=ConfigReader.ExtendedInterpolation()
    ),
}

synset_of_ext = {'.ini': {'.cnf', '.conf', '.config'}, '.gzip': ['.gz']}
for _user_this, _for_these_extensions in synset_of_ext.items():
    for _d in [
        dflt_incoming_val_trans_for_key,
        dflt_outgoing_val_trans_for_key,
    ]:
        if _user_this in _d:
            for _ext in _for_these_extensions:
                _d[_ext] = _d[_user_this]


# TODO: Different misc objects (function, class, default instance) should be a aligned more


[docs]class MiscReaderMixin: """Mixin to transform incoming vals according to the key their under. Warning: If used as a subclass, this mixin should (in general) be placed before the store >>> # make a reader that will wrap a dict >>> class MiscReader(MiscReaderMixin, dict): ... def __init__(self, d, ... incoming_val_trans_for_key=None, ... dflt_incoming_val_trans=None, ... func_key=None): ... dict.__init__(self, d) ... MiscReaderMixin.__init__(self, incoming_val_trans_for_key, dflt_incoming_val_trans, func_key) ... >>> >>> incoming_val_trans_for_key = dict( ... MiscReaderMixin._incoming_val_trans_for_key, # take the existing defaults... ... **{'.bin': lambda v: [ord(x) for x in v.decode()], # ... override how to handle the .bin extension ... '.reverse_this': lambda v: v[::-1] # add a new extension (and how to handle it) ... }) >>> >>> import pickle >>> d = { ... 'a.bin': b'abc123', ... 'a.reverse_this': b'abc123', ... 'a.csv': b'event,year\\n Magna Carta,1215\\n Guido,1956', ... 'a.txt': b'this is not a text', ... 'a.pkl': pickle.dumps(['text', [str, map], {'a list': [1, 2, 3]}]), ... 'a.json': '{"str": "field", "int": 42, "float": 3.14, "array": [1, 2], "nested": {"a": 1, "b": 2}}', ... } >>> >>> s = MiscReader(d=d, incoming_val_trans_for_key=incoming_val_trans_for_key) >>> list(s) ['a.bin', 'a.reverse_this', 'a.csv', 'a.txt', 'a.pkl', 'a.json'] >>> s['a.bin'] [97, 98, 99, 49, 50, 51] >>> s['a.reverse_this'] b'321cba' >>> s['a.csv'] [['event', 'year'], [' Magna Carta', '1215'], [' Guido', '1956']] >>> s['a.pkl'] ['text', [<class 'str'>, <class 'map'>], {'a list': [1, 2, 3]}] >>> s['a.json'] {'str': 'field', 'int': 42, 'float': 3.14, 'array': [1, 2], 'nested': {'a': 1, 'b': 2}} """ _func_key = lambda self, k: os.path.splitext(k)[1] _dflt_incoming_val_trans = staticmethod(identity_method) _incoming_val_trans_for_key = imdict(dflt_incoming_val_trans_for_key) def __init__( self, incoming_val_trans_for_key=None, dflt_incoming_val_trans=None, func_key=None, ): if incoming_val_trans_for_key is not None: self._incoming_val_trans_for_key = incoming_val_trans_for_key if dflt_incoming_val_trans is not None: self._dflt_incoming_val_trans = dflt_incoming_val_trans if func_key is not None: self._func_key = func_key def __getitem__(self, k): func_key = self._func_key(k) trans_func = self._incoming_val_trans_for_key.get( func_key, self._dflt_incoming_val_trans ) return trans_func(super().__getitem__(k))
try: from py2store.examples.dropbox_w_urllib import bytes_from_dropbox except Exception: _dropbox_as_special_case = False else: _dropbox_as_special_case = True # TODO: I'd really like to reuse MiscReaderMixin here! There's a lot of potential. # TODO: For more flexibility, the default store should probably be a UriReader (that doesn't exist yet) # If store argument of get_obj was a type instead of an instance, or if MiscReaderMixin was a transformer, if would # be easier -- but would it make their individual concerns mixed? # Also, preset and postget (trans.wrap_kvs(...)) now exist. Let's use them here.
[docs]def get_obj( k, store=LocalBinaryStore(path_format=''), incoming_val_trans_for_key=imdict(dflt_incoming_val_trans_for_key), dflt_incoming_val_trans=identity_method, func_key=lambda k: os.path.splitext(k)[1], ): """A quick way to get an object, with default... everything (but the key, you know, a clue of what you want)""" if k.startswith('http://') or k.startswith('https://'): if _dropbox_as_special_case and ( k.startswith('http://www.dropbox.com') or k.startswith('https://www.dropbox.com') ): v = bytes_from_dropbox(k) else: import urllib.request with urllib.request.urlopen(k) as response: v = response.read() else: if isinstance( store, LocalBinaryStore ): # being extra careful to only do this if default local store # preprocessing the key if it starts with '.', '..', or '~' if k.startswith('.') or k.startswith('..'): k = os.path.abspath(k) elif k.startswith('~'): k = os.path.expanduser(k) v = store[k] trans_func = (incoming_val_trans_for_key or {}).get( func_key(k), dflt_incoming_val_trans ) return trans_func(v)
# TODO: I'd really like to reuse MiscReaderMixin here! There's a lot of potential. # Same comment as for get_obj.
[docs]class MiscGetter: """ An object to write (and only write) to a store (default local files) with automatic deserialization according to a property of the key (default: file extension). >>> from py2store.misc import get_obj, misc_objs_get >>> import os >>> import json >>> >>> pjoin = lambda *p: os.path.join(os.path.expanduser('~'), *p) >>> path = pjoin('tmp.json') >>> d = {'a': {'b': {'c': [1, 2, 3]}}} >>> json.dump(d, open(path, 'w')) # putting a json file there, the normal way, so we can use it later >>> >>> k = path >>> t = get_obj(k) # if you'd like to use a function >>> assert t == d >>> tt = misc_objs_get[k] # if you'd like to use an object (note: can get, but nothing else (no list, set, del, etc)) >>> assert tt == d >>> t {'a': {'b': {'c': [1, 2, 3]}}} """ def __init__( self, store=LocalBinaryStore(path_format=''), incoming_val_trans_for_key=imdict(dflt_incoming_val_trans_for_key), dflt_incoming_val_trans=identity_method, func_key=lambda k: os.path.splitext(k)[1], ): self.store = store self.incoming_val_trans_for_key = incoming_val_trans_for_key self.dflt_incoming_val_trans = dflt_incoming_val_trans self.func_key = func_key def __getitem__(self, k): return get_obj( k, self.store, self.incoming_val_trans_for_key, self.dflt_incoming_val_trans, self.func_key, ) def __iter__(self): # Disabling "manually" to avoid iteration falling back on __getitem__ with integers # To know more, see: # https://stackoverflow.com/questions/37941523/pip-uninstall-no-files-were-found-to-uninstall # https://www.python.org/dev/peps/pep-0234/ raise NotImplementedError( "By default, there's no iteration in MiscGetter. " 'But feel free to subclass if you ' 'have a particular sense of what the iteration should yield!' )
misc_objs_get = MiscGetter() # TODO: Make this be more tightly couples with the actual default used in get_obj and MiscGetter (avoid misalignments) misc_objs_get.dflt_incoming_val_trans_for_key = dflt_incoming_val_trans_for_key
[docs]class MiscStoreMixin(MiscReaderMixin): r"""Mixin to transform incoming and outgoing vals according to the key their under. Warning: If used as a subclass, this mixin should (in general) be placed before the store See also: preset and postget args from wrap_kvs decorator from py2store.trans. >>> # Make a class to wrap a dict with a layer that transforms written and read values >>> class MiscStore(MiscStoreMixin, dict): ... def __init__(self, d, ... incoming_val_trans_for_key=None, outgoing_val_trans_for_key=None, ... dflt_incoming_val_trans=None, dflt_outgoing_val_trans=None, ... func_key=None): ... dict.__init__(self, d) ... MiscStoreMixin.__init__(self, incoming_val_trans_for_key, outgoing_val_trans_for_key, ... dflt_incoming_val_trans, dflt_outgoing_val_trans, func_key) ... >>> >>> outgoing_val_trans_for_key = dict( ... MiscStoreMixin._outgoing_val_trans_for_key, # take the existing defaults... ... **{'.bin': lambda v: ''.join([chr(x) for x in v]).encode(), # ... override how to handle the .bin extension ... '.reverse_this': lambda v: v[::-1] # add a new extension (and how to handle it) ... }) >>> ss = MiscStore(d={}, # store starts empty ... incoming_val_trans_for_key={}, # overriding incoming trans so we can see the raw data later ... outgoing_val_trans_for_key=outgoing_val_trans_for_key) ... >>> # here's what we're going to write in the store >>> data_to_write = { ... 'a.bin': [97, 98, 99, 49, 50, 51], ... 'a.reverse_this': b'321cba', ... 'a.csv': [['event', 'year'], [' Magna Carta', '1215'], [' Guido', '1956']], ... 'a.txt': 'this is not a text', ... 'a.pkl': ['text', [str, map], {'a list': [1, 2, 3]}], ... 'a.json': {'str': 'field', 'int': 42, 'float': 3.14, 'array': [1, 2], 'nested': {'a': 1, 'b': 2}}} >>> # write this data in our store >>> for k, v in data_to_write.items(): ... ss[k] = v >>> list(ss) ['a.bin', 'a.reverse_this', 'a.csv', 'a.txt', 'a.pkl', 'a.json'] >>> # Looking at the contents (what was actually stored/written) >>> for k, v in ss.items(): ... if k != 'a.pkl': ... print(f"{k}: {v}") ... else: # need to verify pickle data differently, since printing contents is problematic in doctest ... assert pickle.loads(v) == data_to_write['a.pkl'] a.bin: b'abc123' a.reverse_this: b'abc123' a.csv: b'event,year\r\n Magna Carta,1215\r\n Guido,1956\r\n' a.txt: b'this is not a text' a.json: b'{"str": "field", "int": 42, "float": 3.14, "array": [1, 2], "nested": {"a": 1, "b": 2}}' """ _dflt_outgoing_val_trans_for_key = staticmethod(identity_method) _outgoing_val_trans_for_key = dflt_outgoing_val_trans_for_key def __init__( self, incoming_val_trans_for_key=None, outgoing_val_trans_for_key=None, dflt_incoming_val_trans=None, dflt_outgoing_val_trans=None, func_key=None, ): super().__init__( incoming_val_trans_for_key, dflt_incoming_val_trans, func_key ) if outgoing_val_trans_for_key is not None: self._outgoing_val_trans_for_key = outgoing_val_trans_for_key if dflt_outgoing_val_trans is not None: self._dflt_outgoing_val_trans = dflt_outgoing_val_trans def __setitem__(self, k, v): func_key = self._func_key(k) trans_func = self._outgoing_val_trans_for_key.get( func_key, self._dflt_outgoing_val_trans_for_key ) return super().__setitem__(k, trans_func(v))
# TODO: I'd really like to reuse MiscStoreMixin here! There's a lot of potential. # If store argument of get_obj was a type instead of an instance, or if MiscReaderMixin was a transformer, if would # be easier -- but would it make their individual concerns mixed?
[docs]def set_obj( k, v, store=LocalBinaryStore(path_format=''), outgoing_val_trans_for_key=imdict(dflt_outgoing_val_trans_for_key), func_key=lambda k: os.path.splitext(k)[1], ): """A quick way to get an object, with default... everything (but the key, you know, a clue of what you want)""" trans_func = outgoing_val_trans_for_key.get( func_key(k), dflt_outgoing_val_trans_for_key ) store[k] = trans_func(v)
# TODO: I'd really like to reuse MiscReaderMixin here! There's a lot of potential. # Same comment as above.
[docs]class MiscGetterAndSetter(MiscGetter): """ An object to read and write (and nothing else) to a store (default local) with automatic (de)serialization according to a property of the key (default: file extension). >>> from py2store.misc import set_obj, misc_objs # the function and the object >>> import json >>> import os >>> >>> pjoin = lambda *p: os.path.join(os.path.expanduser('~'), *p) >>> >>> d = {'a': {'b': {'c': [1, 2, 3]}}} >>> misc_objs[pjoin('tmp.json')] = d >>> filepath = os.path.expanduser('~/tmp.json') >>> assert misc_objs[filepath] == d # yep, it's there, and can be retrieved >>> assert json.load(open(filepath)) == d # in case you don't believe it's an actual json file >>> >>> # using pickle >>> misc_objs[pjoin('tmp.pkl')] = d >>> assert misc_objs[pjoin('tmp.pkl')] == d >>> >>> # using txt >>> misc_objs[pjoin('tmp.txt')] = 'hello world!' >>> assert misc_objs[pjoin('tmp.txt')] == 'hello world!' >>> >>> # using csv >>> misc_objs[pjoin('tmp.csv')] = [[1,2,3], ['a','b','c']] >>> assert misc_objs[pjoin('tmp.csv')] == [['1','2','3'], ['a','b','c']] # yeah, well, not numbers, but you deal with it >>> >>> # using bin ... misc_objs[pjoin('tmp.bin')] = b'let us pretend these are bytes of an audio waveform' >>> assert misc_objs[pjoin('tmp.bin')] == b'let us pretend these are bytes of an audio waveform' """ def __init__( self, store=LocalBinaryStore(path_format=''), incoming_val_trans_for_key=imdict(dflt_incoming_val_trans_for_key), outgoing_val_trans_for_key=imdict(dflt_outgoing_val_trans_for_key), dflt_incoming_val_trans=identity_method, func_key=lambda k: os.path.splitext(k)[1], ): self.store = store self.incoming_val_trans_for_key = incoming_val_trans_for_key self.outgoing_val_trans_for_key = outgoing_val_trans_for_key self.dflt_incoming_val_trans = dflt_incoming_val_trans self.func_key = func_key def __setitem__(self, k, v): return set_obj( k, v, self.store, self.outgoing_val_trans_for_key, self.func_key )
misc_objs = MiscGetterAndSetter()