Skip to content

Commit

Permalink
Fix NaN handling for strings (ref biolab#6670)
Browse files Browse the repository at this point in the history
In fixing this, switched string handling from fixed-length to variable length
https://docs.h5py.org/en/stable/special.html#variable-length-strings
  • Loading branch information
stuart-cls committed Sep 27, 2024
1 parent 9a52609 commit 2ef7b8d
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import h5py
import numpy as np
import pandas as pd

import xlrd
import xlsxwriter
Expand Down Expand Up @@ -601,6 +602,8 @@ def parse(attr):
f.create_dataset("Y", data=data.Y)
if data.metas.size:
for i, attr in enumerate(data.domain.metas):
col_type = 'S' if isinstance(attr, StringVariable) else 'f'
col_type = h5py.string_dtype() if isinstance(attr, StringVariable) else 'f'
col_data = data.metas[:, [i]].astype(col_type)
f.create_dataset(f'metas/{i}', data=col_data)
if col_type is not 'f':
col_data[pd.isnull(col_data)] = ""
f.create_dataset(f'metas/{i}', data=col_data, dtype=col_type)

0 comments on commit 2ef7b8d

Please sign in to comment.