xls.py

#
#

Copyright 2014-2019 Álvaro Justen https://github.com/turicas/rows/

#

This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

#

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

#

You should have received a copy of the GNU Lesser General Public License along with this program. If not, see http://www.gnu.org/licenses/.

from __future__ import unicode_literals

import datetime
import os
from io import BytesIO

import xlrd
import xlwt

import rows.fields as fields
from rows.plugins.utils import create_table, prepare_to_export
from rows.utils import Source


CELL_TYPES = {
    xlrd.XL_CELL_BLANK: fields.TextField,
    xlrd.XL_CELL_DATE: fields.DatetimeField,
    xlrd.XL_CELL_ERROR: None,
    xlrd.XL_CELL_TEXT: fields.TextField,
    xlrd.XL_CELL_BOOLEAN: fields.BoolField,
    xlrd.XL_CELL_EMPTY: None,
    xlrd.XL_CELL_NUMBER: fields.FloatField,
}
#

TODO: add more formatting styles for other types such as currency TODO: styles may be influenced by locale

FORMATTING_STYLES = {
    fields.DateField: xlwt.easyxf(num_format_str="yyyy-mm-dd"),
    fields.DatetimeField: xlwt.easyxf(num_format_str="yyyy-mm-dd hh:mm:ss"),
    fields.PercentField: xlwt.easyxf(num_format_str="0.00%"),
}
#
def _python_to_xls(field_types):
#
    def convert_value(field_type, value):
        data = {}
        if field_type in FORMATTING_STYLES:
            data["style"] = FORMATTING_STYLES[field_type]

        if field_type in (
            fields.BinaryField,
            fields.BoolField,
            fields.DateField,
            fields.DatetimeField,
            fields.DecimalField,
            fields.FloatField,
            fields.IntegerField,
            fields.PercentField,
            fields.TextField,
        ):
            return value, data

        else:  # don't know this field
            return field_type.serialize(value), data
#
    def convert_row(row):
        return [
            convert_value(field_type, value)
            for field_type, value in zip(field_types, row)
        ]

    return convert_row
#

Return the cell value of the table passed by argument, based in row and column.

def cell_value(sheet, row, col):
#
    cell = sheet.cell(row, col)
    field_type = CELL_TYPES[cell.ctype]
#

TODO: this approach will not work if using locale

    value = cell.value

    if field_type is None:
        return None

    elif field_type is fields.TextField:
        if cell.ctype != xlrd.XL_CELL_BLANK:
            return value
        else:
            return ""

    elif field_type is fields.DatetimeField:
        if value == 0.0:
            return None

        try:
            time_tuple = xlrd.xldate_as_tuple(value, sheet.book.datemode)
        except xlrd.xldate.XLDateTooLarge:
            return None
        value = field_type.serialize(datetime.datetime(*time_tuple))
        return value.split("T00:00:00")[0]

    elif field_type is fields.BoolField:
        if value == 0:
            return False
        elif value == 1:
            return True

    elif cell.xf_index is None:
        return value  # TODO: test

    else:
        book = sheet.book
        xf = book.xf_list[cell.xf_index]
        fmt = book.format_map[xf.format_key]

        if fmt.format_str.endswith("%"):
#

TODO: we may optimize this approach: we’re converting to string and the library is detecting the type when we could just say to the library this value is PercentField

            if value is not None:
                try:
                    decimal_places = len(fmt.format_str[:-1].split(".")[-1])
                except IndexError:
                    decimal_places = 2
                return "{}%".format(str(round(value * 100, decimal_places)))
            else:
                return None

        elif type(value) == float and int(value) == value:
            return int(value)

        else:
            return value
#
def get_table_start(sheet):
    empty_cell_type = xlrd.empty_cell.ctype
    start_column, start_row = 0, 0
    for col in range(sheet.ncols):
        if any(cell for cell in sheet.col(col) if cell.ctype != empty_cell_type):
            start_column = col
            break
    for row in range(sheet.nrows):
        if any(cell for cell in sheet.row(row) if cell.ctype != empty_cell_type):
            start_row = row
            break
    return start_row, start_column
#
def sheet_names(filename_or_fobj):
#

TODO: setup/teardown must be methods of a class so we can reuse them

    source = Source.from_file(filename_or_fobj, mode="rb", plugin_name="xls")
    source.fobj.close()
    devnull = open(os.devnull, mode="w")
    book = xlrd.open_workbook(source.uri, formatting_info=False, logfile=devnull)
    result = book.sheet_names()
    del book
    devnull.close()
    return result
#

Return a rows.Table created from imported XLS file.

def import_from_xls(
    filename_or_fobj,
    sheet_name=None,
    sheet_index=0,
    start_row=None,
    start_column=None,
    end_row=None,
    end_column=None,
    *args,
    **kwargs
):
#
    source = Source.from_file(filename_or_fobj, mode="rb", plugin_name="xls")
    source.fobj.close()
    devnull = open(os.devnull, mode="w")
    book = xlrd.open_workbook(source.uri, formatting_info=True, logfile=devnull)

    if sheet_name is not None:
        sheet = book.sheet_by_name(sheet_name)
    else:
        sheet = book.sheet_by_index(sheet_index)
#

TODO: may re-use Excel data types

#

Get header and rows xlrd library reads rows and columns starting from 0 and ending on sheet.nrows/ncols - 1. rows accepts the same pattern The xlrd library reads rows and columns starting from 0 and ending on sheet.nrows/ncols - 1. rows also uses 0-based indexes, so no transformation is needed

    min_row, min_column = get_table_start(sheet)
    max_row, max_column = sheet.nrows - 1, sheet.ncols - 1
#

TODO: consider adding a parameter ignore_padding=True and when it’s True, consider start_row starting from min_row and start_column starting from min_col.

    start_row = max(start_row if start_row is not None else min_row, min_row)
    end_row = min(end_row if end_row is not None else max_row, max_row)
    start_column = max(
        start_column if start_column is not None else min_column, min_column
    )
    end_column = min(end_column if end_column is not None else max_column, max_column)

    table_rows = [
        [
            cell_value(sheet, row_index, column_index)
            for column_index in range(start_column, end_column + 1)
        ]
        for row_index in range(start_row, end_row + 1)
    ]

    devnull.close()
    meta = {"imported_from": "xls", "source": source, "name": sheet.name}
    return create_table(table_rows, meta=meta, *args, **kwargs)
#

Export the rows.Table to XLS file and return the saved file.

def export_to_xls(table, filename_or_fobj=None, sheet_name="Sheet1", *args, **kwargs):
#
    workbook = xlwt.Workbook()
    sheet = workbook.add_sheet(sheet_name)

    prepared_table = prepare_to_export(table, *args, **kwargs)

    field_names = next(prepared_table)
    for column_index, field_name in enumerate(field_names):
        sheet.write(0, column_index, field_name)

    _convert_row = _python_to_xls([table.fields.get(field) for field in field_names])
    for row_index, row in enumerate(prepared_table, start=1):
        for column_index, (value, data) in enumerate(_convert_row(row)):
            sheet.write(row_index, column_index, value, **data)

    return_result = False
    if filename_or_fobj is None:
        filename_or_fobj = BytesIO()
        return_result = True

    source = Source.from_file(filename_or_fobj, mode="wb", plugin_name="xls")
    workbook.save(source.fobj)
    source.fobj.flush()

    if return_result:
        source.fobj.seek(0)
        result = source.fobj.read()
    else:
        result = source.fobj

    if source.should_close:
        source.fobj.close()

    return result