Source code for pyexcel.book

"""
    pyexcel.readers
    ~~~~~~~~~~~~~~~~~~~

    Uniform interface for describing a excel book

    :copyright: (c) 2014-2015 by Onni Software Ltd.
    :license: New BSD License, see LICENSE for more details
"""
from .iterators import SheetIterator
from .sheets import Sheet
from .utils import to_dict, local_uuid
from ._compact import OrderedDict
from .presentation import outsource


[docs]class Book(object): """Read an excel book that has one or more sheets For csv file, there will be just one sheet """
[docs] def __init__(self, sheets={}, filename="memory", path=None): """Book constructor Selecting a specific book according to filename extension :param OrderedDict/dict sheets: a dictionary of data :param str filename: the physical file :param str path: the relative path or abosolute path :param set keywords: additional parameters to be passed on """ self.path = path self.filename = filename self.name_array = [] self.load_from_sheets(sheets)
def load_from_sheets(self, sheets): """Load content from existing sheets :param dict sheets: a dictionary of sheets. Each sheet is a list of lists """ self.sheets = OrderedDict() keys = sheets.keys() if not isinstance(sheets, OrderedDict): # if the end user does not care about the order # we put alphatical order keys = sorted(keys) for name in keys: self.sheets.update({name: self.get_sheet(sheets[name], name)}) self.name_array = list(self.sheets.keys()) def get_sheet(self, array, name): """Create a sheet from a list of lists""" return Sheet(array, name) def __iter__(self): return SheetIterator(self)
[docs] def number_of_sheets(self): """Return the number of sheets""" return len(self.name_array)
[docs] def sheet_names(self): """Return all sheet names""" return self.name_array
def sheet_by_name(self, name): """Get the sheet with the specified name""" return self.sheets[name] def sheet_by_index(self, index): """Get the sheet with the specified index""" if index < len(self.name_array): sheet_name = self.name_array[index] return self.sheets[sheet_name] def remove_sheet(self, sheet): """Remove a sheet""" if isinstance(sheet, int): if sheet < len(self.name_array): sheet_name = self.name_array[sheet] del self.sheets[sheet_name] self.name_array = list(self.sheets.keys()) else: raise IndexError elif isinstance(sheet, str): if sheet in self.name_array: del self.sheets[sheet] self.name_array = list(self.sheets.keys()) else: raise KeyError else: raise TypeError def __getitem__(self, key): """Override operator[]""" if type(key) == int: return self.sheet_by_index(key) else: return self.sheet_by_name(key) def __delitem__(self, other): """Override del book[index]""" self.remove_sheet(other) return self def __add__(self, other): """Override operator + example:: book3 = book1 + book2 book3 = book1 + book2["Sheet 1"] """ content = {} a = to_dict(self) for k in a.keys(): new_key = k if len(a.keys()) == 1: new_key = "%s_%s" % (self.filename, k) content[new_key] = a[k] if isinstance(other, Book): b = to_dict(other) for l in b.keys(): new_key = l if len(b.keys()) == 1: new_key = other.filename if new_key in content: uid = local_uuid() new_key = "%s_%s" % (l, uid) content[new_key] = b[l] elif isinstance(other, Sheet): new_key = other.name if new_key in content: uid = local_uuid() new_key = "%s_%s" % (other.name, uid) content[new_key] = other.to_array() else: raise TypeError c = Book() c.load_from_sheets(content) return c def __iadd__(self, other): """Operator overloading += example:: book += book2 book += book2["Sheet1"] """ if isinstance(other, Book): names = other.sheet_names() for name in names: new_key = name if len(names) == 1: new_key = other.filename if new_key in self.name_array: uid = local_uuid() new_key = "%s_%s" % (name, uid) self.sheets[new_key] = self.get_sheet(other[name].to_array(), new_key) elif isinstance(other, Sheet): new_key = other.name if new_key in self.name_array: uid = local_uuid() new_key = "%s_%s" % (other.name, uid) self.sheets[new_key] = self.get_sheet(other.to_array(), new_key) else: raise TypeError self.name_array = list(self.sheets.keys()) return self
[docs] def save_to(self, source): """Save to a writeable data source""" source.write_data(self)
[docs] def save_as(self, filename): """Save the content to a new file :param str filename: a file path """ from .sources import BookSource out_source = BookSource(file_name=filename) self.save_to(out_source)
[docs] def save_to_memory(self, file_type, stream, **keywords): """Save the content to a memory stream :param iostream stream: a memory stream """ self.save_as((file_type, stream), **keywords)
def save_to_django_models(self, models, initializers=None, mapdicts=None, batch_size=None): """Save to database table through django model :param models: a list of database models, that is accepted by :meth:`Sheet.save_to_django_model`. The sequence of tables matters when there is dependencies in between the tables. For example, **Car** is made by **Car Maker**. **Car Maker** table should be specified before **Car** table. """ from .sources import BookDjangoSource out_source = BookDjangoSource( models=models, initializers=initializers, mapdicts=mapdicts, batch_size=batch_size ) self.save_to(out_source)
[docs] def save_to_database(self, session, tables, initializers=None, mapdicts=None, auto_commit=True): """Save data in sheets to database tables :param session: database session :param tables: a list of database tables, that is accepted by :meth:`Sheet.save_to_database`. The sequence of tables matters when there is dependencies in between the tables. For example, **Car** is made by **Car Maker**. **Car Maker** table should be specified before **Car** table. """ from .sources import BookSQLSource out_source = BookSQLSource( session=session, tables=tables, initializers=initializers, mapdicts=mapdicts, auto_commit=auto_commit ) self.save_to(out_source)
[docs] def to_dict(self): """Convert the book to a dictionary""" from .utils import to_dict return to_dict(self)
def __repr__(self): return self.__str__() @outsource def __str__(self): ret = "" for sheet in self.sheets: ret += str(self.sheets[sheet]) ret += "\n" return ret.strip('\n')