Skip to content

Commit

Permalink
Temporarily fix openpyxl read_worksheets issue
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Mar 1, 2024
1 parent ba951fa commit 6026fe9
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@
from Orange.data import Compression, open_compressed, detect_encoding, \
isnastr, guess_data_type, sanitize_variable
from Orange.data.io_base import FileFormatBase, Flags, DataTableMixin, PICKLE_PROTOCOL
from Orange.misc.openpyxl_patch import read_worksheets

from Orange.util import flatten


# temporary fix for file not closed issue until openpyxl prepare release
openpyxl.reader.excel.ExcelReader.read_worksheets = read_worksheets


# Support values longer than 128K (i.e. text contents features)
csv.field_size_limit(100*1024*1024)

Expand Down
6 changes: 6 additions & 0 deletions Orange/misc/_distmatrix_xlsx.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import numpy as np
import openpyxl

from Orange.misc.openpyxl_patch import read_worksheets


# temporary fix for file not closed issue until openpyxl prepare release
openpyxl.reader.excel.ExcelReader.read_worksheets = read_worksheets


def read_matrix(filename, sheet_name=None):
sheet = _get_sheet(filename, sheet_name)
Expand Down
100 changes: 100 additions & 0 deletions Orange/misc/openpyxl_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import warnings

from openpyxl.cell import MergedCell
from openpyxl.comments.comment_sheet import CommentSheet
from openpyxl.drawing.spreadsheet_drawing import SpreadsheetDrawing
from openpyxl.packaging.relationship import (
get_rels_path,
RelationshipList,
get_dependents,
)
from openpyxl.pivot.table import TableDefinition
from openpyxl.reader.drawings import find_images
from openpyxl.worksheet._read_only import ReadOnlyWorksheet
from openpyxl.worksheet._reader import WorksheetReader
from openpyxl.xml.constants import COMMENTS_NS
from openpyxl.xml.functions import fromstring
from openpyxl.worksheet.table import Table


# temporary fix for file not closed issue until openpyxl prepare release
# https://foss.heptapod.net/openpyxl/openpyxl/-/merge_requests/
# 436#7922bd5f66e11e4ca4539f093b2680a25c1f80db
def read_worksheets(self):
# pylint: disable=too-many-branches
comment_warning = (
"Cell '{0}':{1} is part of a merged range but has a comment which will "
"be removed because merged cells cannot contain any data."
)
for sheet, rel in self.parser.find_sheets():
if rel.target not in self.valid_files:
continue

Check warning on line 31 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L31

Added line #L31 was not covered by tests

if "chartsheet" in rel.Type:
self.read_chartsheet(sheet, rel)
continue

Check warning on line 35 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L34-L35

Added lines #L34 - L35 were not covered by tests

rels_path = get_rels_path(rel.target)
rels = RelationshipList()
if rels_path in self.valid_files:
rels = get_dependents(self.archive, rels_path)

if self.read_only:
ws = ReadOnlyWorksheet(self.wb, sheet.name, rel.target, self.shared_strings)
ws.sheet_state = sheet.state
self.wb._sheets.append(ws) # pylint: disable=protected-access
continue

Check warning on line 46 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L43-L46

Added lines #L43 - L46 were not covered by tests
fh = self.archive.open(rel.target)
ws = self.wb.create_sheet(sheet.name)
ws._rels = rels # pylint: disable=protected-access
ws_parser = WorksheetReader(
ws, fh, self.shared_strings, self.data_only, self.rich_text
)
ws_parser.bind_all()
fh.close()

# assign any comments to cells
for r in rels.find(COMMENTS_NS):
src = self.archive.read(r.target)
comment_sheet = CommentSheet.from_tree(fromstring(src))
for ref, comment in comment_sheet.comments:
try:
ws[ref].comment = comment
except AttributeError:
c = ws[ref]
if isinstance(c, MergedCell):
warnings.warn(comment_warning.format(ws.title, c.coordinate))
continue

Check warning on line 67 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L58-L67

Added lines #L58 - L67 were not covered by tests

# preserve link to VML file if VBA
if self.wb.vba_archive and ws.legacy_drawing:
ws.legacy_drawing = rels.get(ws.legacy_drawing).target

Check warning on line 71 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L71

Added line #L71 was not covered by tests
else:
ws.legacy_drawing = None

for t in ws_parser.tables:
src = self.archive.read(t)
xml = fromstring(src)
table = Table.from_tree(xml)
ws.add_table(table)

Check warning on line 79 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L76-L79

Added lines #L76 - L79 were not covered by tests

#pylint: disable=protected-access
drawings = rels.find(SpreadsheetDrawing._rel_type)
for rel in drawings:
charts, images = find_images(self.archive, rel.target)
for c in charts:
ws.add_chart(c, c.anchor)

Check warning on line 86 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L86

Added line #L86 was not covered by tests
for im in images:
ws.add_image(im, im.anchor)

Check warning on line 88 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L88

Added line #L88 was not covered by tests

pivot_rel = rels.find(TableDefinition.rel_type)
pivot_caches = self.parser.pivot_caches
for r in pivot_rel:
pivot_path = r.Target
src = self.archive.read(pivot_path)
tree = fromstring(src)
pivot = TableDefinition.from_tree(tree)
pivot.cache = pivot_caches[pivot.cacheId]
ws.add_pivot(pivot)

Check warning on line 98 in Orange/misc/openpyxl_patch.py

View check run for this annotation

Codecov / codecov/patch

Orange/misc/openpyxl_patch.py#L93-L98

Added lines #L93 - L98 were not covered by tests

ws.sheet_state = sheet.state
17 changes: 17 additions & 0 deletions Orange/misc/tests/test_openpyxl_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import unittest

import openpyxl


class TestRemoveTemporarySolution(unittest.TestCase):
def test_remove_openpyxl_temp_solution(self):
"""
When this test starts to fail revert https://github.com/biolab/orange3/pull/6737
"""
self.assertLessEqual(
[int(x) for x in openpyxl.__version__.split(".")], [3, 1, 2]
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 6026fe9

Please sign in to comment.