"""
pyexcel.readers
~~~~~~~~~~~~~~~~~~~
Uniform interface for describing a excel book
:copyright: (c) 2014-2015 by Onni Software Ltd.
:license: New BSD License, see LICENSE for more details
"""
from .iterators import SheetIterator
from .sheets import Sheet, SheetStream
from .utils import to_dict, local_uuid
from ._compact import OrderedDict
class BookStream(object):
"""Read an excel book that has one or more sheets
For csv file, there will be just one sheet
"""
def __init__(self, sheets=None, filename="memory", path=None):
"""Book constructor
Selecting a specific book according to filename extension
:param OrderedDict/dict sheets: a dictionary of data
:param str filename: the physical file
:param str path: the relative path or abosolute path
:param set keywords: additional parameters to be passed on
"""
self.path = path
self.filename = filename
self.name_array = []
if sheets:
self.load_from_sheets(sheets)
else:
self.sheets = {}
def load_from_sheets(self, sheets):
"""Load content from existing sheets
:param dict sheets: a dictionary of sheets. Each sheet is
a list of lists
"""
if sheets is None:
return
self.sheets = OrderedDict()
keys = sheets.keys()
if not isinstance(sheets, OrderedDict):
# if the end user does not care about the order
# we put alphatical order
keys = sorted(keys)
for name in keys:
sheet = SheetStream(name, sheets[name])
# this sheets keep sheet order
self.sheets.update({name: sheet})
# this provide the convenience of access the sheet
self.__dict__[name] = sheet
self.name_array = list(self.sheets.keys())
def save_to(self, source):
"""Save to a writeable data source"""
source.write_data(self)
def to_book(self):
return Book(self.to_dict(),
filename=self.filename,
path=self.path)
def to_dict(self):
"""
Get book data structure as a dictionary
"""
the_dict = OrderedDict()
for sheet in self:
the_dict.update({sheet.name: sheet.payload})
return the_dict
def __iter__(self):
return SheetIterator(self)
def number_of_sheets(self):
"""Return the number of sheets"""
return len(self.name_array)
def __getitem__(self, index):
if index < len(self.name_array):
sheet_name = self.name_array[index]
return self.sheets[sheet_name]
[docs]class Book(object):
"""Read an excel book that has one or more sheets
For csv file, there will be just one sheet
"""
[docs] def __init__(self, sheets=None, filename="memory", path=None):
"""Book constructor
Selecting a specific book according to filename extension
:param OrderedDict/dict sheets: a dictionary of data
:param str filename: the physical file
:param str path: the relative path or abosolute path
:param set keywords: additional parameters to be passed on
"""
self.path = path
self.filename = filename
self.name_array = []
self.load_from_sheets(sheets)
@classmethod
def register_presentation(cls, file_type):
setattr(cls, file_type, property(presenter(file_type)))
setattr(cls, 'get_%s' % file_type, presenter(file_type))
def load_from_sheets(self, sheets):
"""Load content from existing sheets
:param dict sheets: a dictionary of sheets. Each sheet is
a list of lists
"""
self.sheets = OrderedDict()
if sheets is None:
return
keys = sheets.keys()
if not isinstance(sheets, OrderedDict):
# if the end user does not care about the order
# we put alphatical order
keys = sorted(keys)
for name in keys:
sheet = self.get_sheet(sheets[name], name)
# this sheets keep sheet order
self.sheets.update({name: sheet})
# this provide the convenience of access the sheet
self.__dict__[name] = sheet
self.name_array = list(self.sheets.keys())
def get_sheet(self, array, name):
"""Create a sheet from a list of lists"""
return Sheet(array, name)
def __iter__(self):
return SheetIterator(self)
[docs] def number_of_sheets(self):
"""Return the number of sheets"""
return len(self.name_array)
[docs] def sheet_names(self):
"""Return all sheet names"""
return self.name_array
def sheet_by_name(self, name):
"""Get the sheet with the specified name"""
return self.sheets[name]
def sheet_by_index(self, index):
"""Get the sheet with the specified index"""
if index < len(self.name_array):
sheet_name = self.name_array[index]
return self.sheets[sheet_name]
def remove_sheet(self, sheet):
"""Remove a sheet"""
if isinstance(sheet, int):
if sheet < len(self.name_array):
sheet_name = self.name_array[sheet]
del self.sheets[sheet_name]
self.name_array = list(self.sheets.keys())
else:
raise IndexError
elif isinstance(sheet, str):
if sheet in self.name_array:
del self.sheets[sheet]
self.name_array = list(self.sheets.keys())
else:
raise KeyError
else:
raise TypeError
def __getitem__(self, key):
"""Override operator[]"""
if isinstance(key, int):
return self.sheet_by_index(key)
else:
return self.sheet_by_name(key)
def __delitem__(self, other):
"""Override del book[index]"""
self.remove_sheet(other)
return self
def __add__(self, other):
"""Override operator +
example::
book3 = book1 + book2
book3 = book1 + book2["Sheet 1"]
"""
content = {}
current_dict = to_dict(self)
for k in current_dict.keys():
new_key = k
if len(current_dict.keys()) == 1:
new_key = "%s_%s" % (self.filename, k)
content[new_key] = current_dict[k]
if isinstance(other, Book):
other_dict = to_dict(other)
for l in other_dict.keys():
new_key = l
if len(other_dict.keys()) == 1:
new_key = other.filename
if new_key in content:
uid = local_uuid()
new_key = "%s_%s" % (l, uid)
content[new_key] = other_dict[l]
elif isinstance(other, Sheet):
new_key = other.name
if new_key in content:
uid = local_uuid()
new_key = "%s_%s" % (other.name, uid)
content[new_key] = other.to_array()
else:
raise TypeError
output = Book()
output.load_from_sheets(content)
return output
def __iadd__(self, other):
"""Operator overloading +=
example::
book += book2
book += book2["Sheet1"]
"""
if isinstance(other, Book):
names = other.sheet_names()
for name in names:
new_key = name
if len(names) == 1:
new_key = other.filename
if new_key in self.name_array:
uid = local_uuid()
new_key = "%s_%s" % (name, uid)
self.sheets[new_key] = self.get_sheet(other[name].to_array(),
new_key)
elif isinstance(other, Sheet):
new_key = other.name
if new_key in self.name_array:
uid = local_uuid()
new_key = "%s_%s" % (other.name, uid)
self.sheets[new_key] = self.get_sheet(other.to_array(), new_key)
else:
raise TypeError
self.name_array = list(self.sheets.keys())
return self
[docs] def save_to(self, source):
"""Save to a writeable data source"""
source.write_data(self)
[docs] def save_as(self, filename):
"""Save the content to a new file
:param str filename: a file path
"""
from .factory import SourceFactory
out_source = SourceFactory.get_writeable_book_source(
file_name=filename)
self.save_to(out_source)
[docs] def save_to_memory(self, file_type, stream=None, **keywords):
"""Save the content to a memory stream
:param file_type: what format the stream is in
:param stream: a memory stream. Note in Python 3, for csv and tsv
format, please pass an instance of StringIO. For xls,
xlsx, and ods, an instance of BytesIO.
"""
from .factory import SourceFactory
out_source = SourceFactory.get_writeable_book_source(
file_type=file_type,
file_stream=stream,
**keywords)
self.save_to(out_source)
return out_source.content
def save_to_django_models(self, models,
initializers=None, mapdicts=None,
batch_size=None):
"""Save to database table through django model
:param models: a list of database models, that is accepted by
:meth:`Sheet.save_to_django_model`. The sequence
of tables matters when there is dependencies in
between the tables. For example, **Car** is made
by **Car Maker**. **Car Maker** table should be
specified before **Car** table.
:param initializers: a list of intialization functions for your
tables and the sequence should match tables,
:param mapdicts: custom map dictionary for your data columns
and the sequence should match tables
"""
from .factory import SourceFactory
out_source = SourceFactory.get_writeable_book_source(
models=models,
initializers=initializers,
mapdicts=mapdicts,
batch_size=batch_size
)
self.save_to(out_source)
[docs] def save_to_database(self, session, tables,
initializers=None, mapdicts=None,
auto_commit=True):
"""Save data in sheets to database tables
:param session: database session
:param tables: a list of database tables, that is accepted by
:meth:`Sheet.save_to_database`. The sequence of tables
matters when there is dependencies in between the
tables. For example, **Car** is made by **Car Maker**.
**Car Maker** table should
be specified before **Car** table.
:param initializers: a list of intialization functions for your
tables and the sequence should match tables,
:param mapdicts: custom map dictionary for your data columns
and the sequence should match tables
:param auto_commit: by default, data is committed.
"""
from .factory import SourceFactory
out_source = SourceFactory.get_writeable_book_source(
session=session,
tables=tables,
initializers=initializers,
mapdicts=mapdicts,
auto_commit=auto_commit
)
self.save_to(out_source)
[docs] def to_dict(self):
"""Convert the book to a dictionary"""
from .utils import to_dict
return to_dict(self)
def __repr__(self):
return self.texttable
def __str__(self):
return self.texttable
def presenter(file_type=None):
def custom_presenter(self, **keywords):
from .factory import SourceFactory
memory_source = SourceFactory.get_writeable_book_source(
file_type=file_type,
**keywords)
self.save_to(memory_source)
return memory_source.content.getvalue()
return custom_presenter