Skip to content

Commit

Permalink
Reduce memory by saving data to temp files. Add data containers.
Browse files Browse the repository at this point in the history
  • Loading branch information
mcara committed Jul 20, 2022
1 parent 05887e2 commit e9db52c
Show file tree
Hide file tree
Showing 8 changed files with 394 additions and 83 deletions.
12 changes: 10 additions & 2 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,16 @@ Release Notes
.. 0.2.2 (unreleased)
==================
0.2.1 (unreleased)
==================
0.3.0 (20-July-2022)
====================

- Added data containers module and updated main code to support these containers
with the purpose of minimizing memory usage by writing/loading data
arrays to temporary files when needed. [#21]


0.2.1 (08-July-2022)
====================

- Updated code to reduce warnings with latest ``numpy`` versions. [#16]

Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Content
:maxdepth: 2

source/match
source/containers
source/lsq_optimizer
source/utils
source/LICENSE.rst
Expand Down
11 changes: 11 additions & 0 deletions docs/source/containers.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
***************
Data Containers
***************

.. moduleauthor:: Mihai Cara <[email protected]>

.. currentmodule:: wiimatch.containers

.. automodule:: wiimatch.containers
:members:
:undoc-members:
1 change: 1 addition & 0 deletions wiimatch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
from . import match # noqa: F401
from . import lsq_optimizer # noqa: F401
from . import utils # noqa: F401
from . import containers # noqa: F401
208 changes: 208 additions & 0 deletions wiimatch/containers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
"""
Data containers for accessing image data (i.e., `numpy.ndarray`)
uniformly whether they are kept in memory, as memory mapped files (load),
or stored to/loaded from a file as whole arrays.
:Author: Mihai Cara (contact: [email protected])
:License: :doc:`LICENSE`
"""

import abc
import tempfile

import numpy as np


__all__ = ['WMData', 'WMInMemoryData', 'WMMappedData', 'WMMemMappedData']


class WMData(abc.ABC):
""" Base class for all data containers. Provides a common interface to
access data.
"""
kind = 'mapped'
""" Hints to how data are stored: ``'mapped'``, ``'file'``, or
``'in-memory'``. May be used by code for performance optimization. """

@property
@abc.abstractmethod
def data(self):
""" Sets/Gets linked data.
Parameters
----------
data : object
Data to be set.
"""
pass

@data.setter
@abc.abstractmethod
def data(self, data):
pass

@property
@abc.abstractmethod
def shape(self):
""" Returns a tuple describing the shape of linked data. """
pass


class WMInMemoryData(WMData):
""" Acessor for in-memory `numpy.ndarray` data. """

kind = 'in-memory'
""" Hints to how data are stored: ``'mapped'``, ``'file'``, or
``'in-memory'``. May be used by code for performance optimization. """

def __init__(self, data):
super().__init__()
self.data = data

@property
def data(self):
""" Sets/gets linked `numpy.ndarray`.
Parameters
----------
data : object
Data to be set.
"""
return self._data

@data.setter
def data(self, data):
self._data = np.asarray(data)

@property
def shape(self):
""" Returns a tuple describing the shape of linked data. """
return np.shape(self._data)


class WMMappedData(WMData):
""" Data container for arrays stored in temporary files. This is best
suited when array data are needed in memory all at once and when array
is not needed - it can be stored to a file.
To access small segments of data, use cls:`WMMemMappedData`.
"""

kind = 'file'
""" Hints to how data are stored: ``'mapped'``, ``'file'``, or
``'in-memory'``. May be used by code for performance optimization. """

def __init__(self, data, tmpfile=None, prefix='tmp_wiimatch_',
suffix='.npy', tmpdir=''):
super().__init__()
if tmpfile is None:
self._close = True
self._tmp = tempfile.NamedTemporaryFile(
prefix=prefix,
suffix=suffix,
dir=tmpdir
)
if not self._tmp:
raise RuntimeError("Unable to create temporary file.")
else:
# temp file managed by the caller
self._close = False
self._tmp = tmpfile

self.data = data

@property
def data(self):
""" Sets/gets linked `numpy.ndarray`.
Parameters
----------
data : object
Data to be set.
"""
self._tmp.seek(0)
return np.load(self._tmp)

@data.setter
def data(self, data):
data = np.asarray(data)
self._data_shape = data.shape
self._tmp.seek(0)
np.save(self._tmp, data)

def __del__(self):
if self._close:
self._tmp.close()

@property
def shape(self):
""" Returns a tuple describing the shape of linked data. """
return self._data_shape


class WMMemMappedData(WMData):
""" Data container for arrays stored in temporary files. This is best
suited when array data are needed in memory all at once and when array
is not needed - it can be stored to a file.
To access entire data arrays, use cls:`WMMappedData`.
"""

kind = 'mapped'
""" Hints to how data are stored: ``'mapped'``, ``'file'``, or
``'in-memory'``. May be used by code for performance optimization. """

def __init__(self, data, tmpfile=None, prefix='tmp_wiimatch_',
suffix='.npy', tmpdir=''):
super().__init__()
if tmpfile is None:
self._close = True
self._tmp = tempfile.NamedTemporaryFile(
prefix=prefix,
suffix=suffix,
dir=tmpdir
)
if not self._tmp:
raise RuntimeError("Unable to create temporary file.")
else:
# temp file managed by the caller
self._close = False
self._tmp = tmpfile

self.data = data

@property
def data(self):
""" Sets/gets linked `numpy.ndarray`.
Parameters
----------
data : object
Data to be set.
"""
return self._data

@data.setter
def data(self, data):
data = np.asarray(data)
self._data_shape = data.shape
self._tmp.seek(0)
self._data = np.memmap(self._tmp, dtype=data.dtype.type, mode='w+',
shape=data.shape)
self._data[...] = data[...]

def __del__(self):
if self._close:
self._data = None
self._tmp.close()

@property
def shape(self):
""" Returns a tuple describing the shape of linked data. """
return self._data_shape
Loading

0 comments on commit e9db52c

Please sign in to comment.