"""
pyexcel.readers
~~~~~~~~~~~~~~~~~~~
Uniform interface for reading different excel file formats
:copyright: (c) 2014 by C. W.
:license: GPL v3
"""
from iterators import (HBRTLIterator,
HTLBRIterator,
VBRTLIterator,
VTLBRIterator,
RowIterator,
RowReverseIterator,
ColumnIterator,
ColumnReverseIterator,
SeriesColumnIterator)
from filters import (RowFilter,
ColumnIndexFilter)
[docs]class CSVReader:
"""
csv reader
"""
def __init__(self, file):
import csv
self.array = []
reader = csv.reader(open(file, 'rb'), dialect=csv.excel)
self.array.extend(reader)
[docs] def number_of_rows(self):
"""
Number of rows in the csv file
"""
return len(self.array)
[docs] def number_of_columns(self):
"""
Number of columns in the csv file
assuming the length of each row is uniform
"""
if len(self.array) > 1:
return len(self.array[0])
else:
return 0
[docs] def cell_value(self, row, column):
"""
Random access to the csv cells
"""
value = self.array[row][column]
try:
if "." in value:
return float(value)
else:
return int(value)
except ValueError:
return value
[docs]class XLSReader:
"""
xls reader
Currently only support first sheet in the file
"""
def __init__(self, file):
import xlrd
self.workbook = xlrd.open_workbook(file)
self.worksheet = self.workbook.sheet_by_index(0)
[docs] def number_of_rows(self):
"""
Number of rows in the xls file
"""
return self.worksheet.nrows
[docs] def number_of_columns(self):
"""
Number of columns in the xls file
"""
return self.worksheet.ncols
[docs] def cell_value(self, row, column):
"""
Random access to the xls cells
"""
return self.worksheet.cell_value(row, column)
[docs]class ODSReaderImp(CSVReader):
"""
ods reader
Currently only support first sheet in the file
"""
def __init__(self, file):
import ext.odsreader as odsreader
self.ods = odsreader.ODSReader(file)
keys = self.ods.SHEETS.keys()
self.array = self.ods.getSheet(keys[0])
[docs]class Reader:
"""
Wrapper class to unify csv, xls and xlsx reader
"""
def __init__(self, file):
"""
Reader constructor
Selecting a specific reader according to file extension
"""
if (file.endswith(".xlsm") or file.endswith(".xlsx") or file.endswith(".xls")):
self.reader = XLSReader(file)
elif file.endswith(".csv"):
self.reader = CSVReader(file)
elif file.endswith(".ods"):
self.reader = ODSReaderImp(file)
else:
raise NotImplementedError("can not open %s" % file)
def __iter__(self):
"""
Default iterator to go through each cell one by one from top row to
bottom row and from left to right
"""
return self.enumerate()
[docs] def enumerate(self):
"""
Default iterator to go through each cell one by one from top row to
bottom row and from left to right
"""
return HTLBRIterator(self)
[docs] def reverse(self):
"""
Reverse iterator to go through each cell one by one from
bottom row to top row and from right to left
"""
return HBRTLIterator(self)
[docs] def vertical(self):
"""
Default iterator to go through each cell one by one from
leftmost column to rightmost row and from top to bottom
"""
return VTLBRIterator(self)
[docs] def rvertical(self):
"""
Default iterator to go through each cell one by one from rightmost
column to leftmost row and from bottom to top
"""
return VBRTLIterator(self)
[docs] def rows(self):
"""
Returns a row iterator to go through each row from top to bottom
"""
return RowIterator(self)
[docs] def rrows(self):
"""
Returns a row iterator to go through each row from bottom to top
"""
return RowReverseIterator(self)
[docs] def columns(self):
"""
Returns a column iterator to go through each column from left to right
"""
return ColumnIterator(self)
[docs] def rcolumns(self):
"""
Returns a column iterator to go through each column from right to left
"""
return ColumnReverseIterator(self)
[docs] def number_of_rows(self):
"""
Number of rows in the data file
"""
return self.reader.number_of_rows()
[docs] def number_of_columns(self):
"""
Number of columns in the data file
"""
return self.reader.number_of_columns()
[docs] def cell_value(self, row, column):
"""
Random access to the data cells
"""
if row in self.row_range() and column in self.column_range():
return self.reader.cell_value(row, column)
else:
return None
[docs] def row_range(self):
"""
Utility function to get row range
"""
return range(0, self.reader.number_of_rows())
[docs] def column_range(self):
"""
Utility function to get column range
"""
return range(0, self.reader.number_of_columns())
[docs] def row_at(self, index):
"""
Returns an array that collects all data at the specified row
"""
if index in self.row_range():
cell_array = []
for i in self.column_range():
cell_array.append(self.cell_value(index, i))
return cell_array
else:
return None
[docs] def column_at(self, index):
"""
Returns an array that collects all data at the specified column
"""
if index in self.column_range():
cell_array = []
for i in self.row_range():
cell_array.append(self.cell_value(i, index))
return cell_array
else:
return None
[docs] def contains(self, predicate):
for r in self.rows():
if predicate(r):
return True
else:
return False
[docs]class FilterableReader(Reader):
"""
Reader that can be applied one filter
"""
_filter = None
[docs] def row_range(self):
if self._filter:
new_rows = self.reader.number_of_rows() - self._filter.rows()
return range(0, new_rows)
else:
return range(0, self.reader.number_of_rows())
[docs] def column_range(self):
if self._filter:
new_cols = self.reader.number_of_columns() - self._filter.columns()
return range(0, new_cols)
else:
return range(0, self.reader.number_of_columns())
[docs] def number_of_rows(self):
"""
Number of rows in the data file
"""
if self._filter:
return self.reader.number_of_rows() - self._filter.rows()
else:
return self.reader.number_of_rows()
[docs] def number_of_columns(self):
"""
Number of columns in the data file
"""
if self._filter:
return self.reader.number_of_columns() - self._filter.columns()
else:
return self.reader.number_of_columns()
[docs] def cell_value(self, row, column):
"""
Random access to the data cells
"""
if row in self.row_range() and column in self.column_range():
if self._filter:
new_row, new_column = self._filter.translate(row, column)
return self.reader.cell_value(new_row, new_column)
else:
return self.reader.cell_value(row, column)
else:
return None
[docs] def filter(self, afilter):
afilter.validate_filter(self)
self._filter = afilter
return self
[docs]class GenericSeriesReader(FilterableReader):
"""
For data with column headers
x y z
1 2 3
4 5 6
This class has a default filter that filter out
row 0 as headers. Extra functions were added
to return headers at row 0
"""
def __init__(self, reader):
self.reader = reader
# filter out the first row
self.filter(RowFilter([0]))
self.headers = None
def _headers(self):
self.headers = []
for i in self.column_range():
self.headers.append(self.reader.cell_value(0, i))
[docs] def series(self):
self._headers()
return self.headers
[docs] def named_column_at(self, name):
self._headers()
index = self.headers.index(name)
column_array = self.column_at(index)
return {name: column_array}
def __iter__(self):
return SeriesColumnIterator(self)
[docs]class StaticSeriesReader(GenericSeriesReader):
"""
Static Series Reader. No filters can be applied.
"""
def __init__(self, file):
reader = Reader(file)
GenericSeriesReader.__init__(self, reader)
[docs]class ColumnFilterableSeriesReader(GenericSeriesReader):
"""
Columns can be filtered but not rows
"""
def __init__(self, file):
self.reader = FilterableReader(file)
GenericSeriesReader.filter(self, RowFilter([0]))
self.headers = None
[docs] def filter(self, filter):
self.reader.filter(filter)
self._filter.validate_filter(self)
[docs]class SeriesReader(GenericSeriesReader):
"""
rows other than header row can be filtered. row number
has been shifted by 1 as header row is protected.
columns can be filtered.
"""
def __init__(self, file):
self.reader = ColumnFilterableSeriesReader(file)
[docs] def series(self):
return self.reader.series()
[docs] def named_column_at(self, name):
headers = self.series()
index = headers.index(name)
column_array = self.column_at(index)
return {name: column_array}
[docs] def filter(self, afilter):
if isinstance(afilter, ColumnIndexFilter):
self.reader.filter(afilter)
else:
GenericSeriesReader.filter(self, afilter)