Source code for pyexcel.filters

"""
    pyexcel.filters
    ~~~~~~~~~~~~~~~

    Filtering functions for pyexcel readers

    :copyright: (c) 2014-2015 by Onni Software Ltd.
    :license: New BSD License, see LICENSE for more details

    Design note for filter algorithm::

        #1 2 3 4 5 6 7  <- original index
        #  x     x
        #1   3 4   6 7  <- filtered index
        #1   2 3   4 5  <- actual index after filtering

    Design note for multiple filter algorithm::

        #    1 2 3 4 5 6 7 8 9
        f1     x       x
             1   2 3 4   5 6 7
        f2       x   x     x
             1     2     3   4
        f3         x
             1           2   3
"""
from ._compact import PY2


class IndexFilter:
    """A generic index filter"""
    def __init__(self, func):
        """Constructor
        :param Function func: a evaluation function
        """
        self.eval_func = func
        self.shallow_eval_func = None
        # indices to be filtered out
        self.indices = None

    def invert(self):
        if self.eval_func:
            if self.shallow_eval_func is None:
                self.shallow_eval_func = self.eval_func
                self.eval_func = lambda value: not self.shallow_eval_func(value)
            else:
                self.eval_func = self.shallow_eval_func
                self.shallow_eval_func = None
        return self
        
    def rows(self):
        """Rows that were filtered out
        """
        return 0

    def columns(self):
        """Columns that were filtered out"""
        return 0

    def validate_filter(self, reader):
        """
        Find out which column index to be filtered

        :param Matrix reader: a Matrix instance

        """
        pass

    def translate(self, row, column):
        """Map the row, column after filtering to the
        original ones before filtering"""
        pass


[docs]class RegionFilter(IndexFilter): """Filter on both row index and column index"""
[docs] def __init__(self, row_slice, column_slice): """Constructor :param slice row_slice: row index range :param slice column_slice: column index range """ self.row_indices = range(row_slice.start, row_slice.stop, row_slice.step) self.column_indices = range(column_slice.start, column_slice.stop, column_slice.step) if not PY2: self.row_indices = list(self.row_indices) self.column_indices = list(self.column_indices)
def columns(self): """Columns that were filtered out""" return len(self.column_indices) def rows(self): """Rows that were filtered out""" return len(self.row_indices) def validate_filter(self, reader): self.row_indices = [i for i in reader.row_range() if i not in self.row_indices] self.column_indices = [i for i in reader.column_range() if i not in self.column_indices] def translate(self, row, column): """Map the row, column after filtering to the original ones before filtering :param int row: row index after filtering :param int column: column index after filtering :returns: set of (row, new_column) """ new_column = column if self.column_indices: for i in self.column_indices: if i <= new_column: new_column += 1 new_row = row if self.row_indices: for i in self.row_indices: if i <= new_row: new_row += 1 return new_row, new_column
class ColumnIndexFilter(IndexFilter): """A column filter that operates on column indices""" def columns(self): """Columns that were filtered out""" return len(self.indices) def validate_filter(self, reader): """ Find out which column index to be filtered :param Matrix reader: a Matrix instance """ self.indices = [i for i in reader.column_range() if self.eval_func(i)] def translate(self, row, column): """Map the row, column after filtering to the original ones before filtering :param int row: row index after filtering :param int column: column index after filtering :returns: set of (row, new_column) """ if self.indices: new_column = column for i in self.indices: if i <= new_column: new_column += 1 return row, new_column else: return row, column
[docs]class ColumnFilter(ColumnIndexFilter): """Filters out a list of columns"""
[docs] def __init__(self, indices): """Constructor :param list indices: a list of column indices to be filtered out """ eval_func = lambda x: x in indices ColumnIndexFilter.__init__(self, eval_func)
[docs]class SingleColumnFilter(ColumnIndexFilter): """Filters out a single column index"""
[docs] def __init__(self, index): """Constructor :param list indices: a list of column indices to be filtered out """ eval_func = lambda x: x == index ColumnIndexFilter.__init__(self, eval_func)
[docs]class OddColumnFilter(ColumnIndexFilter): """Filters out odd indexed columns * column 0 is regarded as the first column. * column 1 is regarded as the seocond column -> this will be filtered out """
[docs] def __init__(self): eval_func = lambda x: (x+1) % 2 == 1 ColumnIndexFilter.__init__(self, eval_func)
[docs]class EvenColumnFilter(ColumnIndexFilter): """Filters out even indexed columns * column 0 is regarded as the first column. -> this will be filtered out * column 1 is regarded as the seocond column """
[docs] def __init__(self): eval_func = lambda x: (x+1) % 2 == 0 ColumnIndexFilter.__init__(self, eval_func)
class RowIndexFilter(IndexFilter): """Filter out rows by its row index """ def rows(self): """number of rows to be filtered out""" if self.indices: return len(self.indices) else: return 0 def validate_filter(self, reader): """ Find out which column index to be filtered :param Matrix reader: a Matrix instance """ self.indices = [i for i in reader.row_range() if self.eval_func(i)] def translate(self, row, column): """Map the row, column after filtering to the original ones before filtering :param int row: row index after filtering :param int column: column index after filtering :returns: set of (row, new_column) """ if self.indices: new_row = row for i in self.indices: if i <= new_row: new_row += 1 return new_row, column else: return row, column
[docs]class RowFilter(RowIndexFilter): """Filters a list of rows"""
[docs] def __init__(self, indices): """Constructor :param list indices: a list of column indices to be filtered out """ eval_func = lambda x: x in indices RowIndexFilter.__init__(self, eval_func)
[docs]class SingleRowFilter(RowIndexFilter): """Filters out a single row"""
[docs] def __init__(self, index): """Constructor :param list indices: a list of column indices to be filtered out """ eval_func = lambda x: x == index RowIndexFilter.__init__(self, eval_func)
[docs]class OddRowFilter(RowIndexFilter): """Filters out odd indexed rows row 0 is seen as the first row """
[docs] def __init__(self): eval_func = lambda x: (x+1) % 2 == 1 RowIndexFilter.__init__(self, eval_func)
[docs]class EvenRowFilter(RowIndexFilter): """Filters out even indexed rows row 0 is seen as the first row """
[docs] def __init__(self): eval_func = lambda x: (x+1) % 2 == 0 RowIndexFilter.__init__(self, eval_func)
class RowValueFilter(RowIndexFilter): """Filters out rows based on its row values .. note:: it takes time as it needs to go through all values """ def validate_filter(self, reader): """ Filter out the row indices This is what it does:: new_indices = [] index = 0 for r in reader.rows(): if not self.eval_func(r): new_indices.append(index) index += 1 :param Matrix reader: a Matrix instance """ self.indices = [row[0] for row in enumerate(reader.rows()) if self.eval_func(row[1])] class NamedRowValueFilter(RowIndexFilter): """Filter out rows that satisfy a condition .. note:: it takes time as it needs to go through all values """ def validate_filter(self, reader): """ Filter out the row indices This is what it does:: new_indices = [] index = 0 for r in reader.rows(): if not self.eval_func(r): new_indices.append(index) index += 1 :param Matrix reader: a Matrix instance """ series = reader.colnames self.indices = [row[0] for row in enumerate(reader.rows()) if self.eval_func(dict(zip(series, row[1])))] class SeriesRowValueFilter(NamedRowValueFilter): """Backword compactibility""" pass class ColumnValueFilter(ColumnIndexFilter): """Filters out rows based on its row values .. note:: it takes time as it needs to go through all values """ def validate_filter(self, reader): """ Filter out the row indices This is what it does:: new_indices = [] index = 0 for r in reader.rows(): if not self.eval_func(r): new_indices.append(index) index += 1 :param Matrix reader: a Matrix instance """ self.indices = [column[0] for column in enumerate(reader.columns()) if self.eval_func(column[1])] class NamedColumnValueFilter(ColumnIndexFilter): """Filter out rows that satisfy a condition .. note:: it takes time as it needs to go through all values """ def validate_filter(self, reader): """ Filter out the row indices This is what it does:: new_indices = [] index = 0 for r in reader.rows(): if not self.eval_func(r): new_indices.append(index) index += 1 :param Matrix reader: a Matrix instance """ series = reader.rownames self.indices = [column[0] for column in enumerate(reader.columns()) if self.eval_func(dict(zip(series, column[1])))] class RowInFileFilter(RowValueFilter): """Filter out rows that has a row from another reader""" def __init__(self, reader): """ Constructor :param Matrix reader: a Matrix instance """ func = lambda row_a: not reader.contains((lambda row_b: row_a == row_b)) RowValueFilter.__init__(self, func)