diff --git a/src/rail/creation/degradation/addRandom.py b/src/rail/creation/degradation/addRandom.py new file mode 100644 index 00000000..e8be4c6c --- /dev/null +++ b/src/rail/creation/degradation/addRandom.py @@ -0,0 +1,33 @@ +"""Add a column of random numbers to a dataframe.""" + +import numpy as np + +from ceci.config import StageParameter as Param +from rail.creation.noisifier import Noisifier + +class AddColumnOfRandom(Noisifier): + """Add a column of random numbers to a dataframe + """ + + name = "AddColumnOfRandom" + config_options = Noisifier.config_options.copy() + config_options.update( + col_name=Param(str, "chaos_bunny", msg="Name of the column with random numbers"), + ) + + def __init__(self, args, comm=None): + """ + Constructor + + Does standard Noisifier initialization + """ + Noisifier.__init__(self, args, comm=comm) + + def _initNoiseModel(self): # pragma: no cover + np.random.seed(self.config.seed) + + def _addNoise(self): # pragma: no cover + data = self.get_data('input') + copy = data.copy() + copy.insert(0, self.config.col_name, np.random.uniform(size=len(copy))) + self.add_data("output", copy) diff --git a/src/rail/creation/degradation/quantityCut.py b/src/rail/creation/degradation/quantityCut.py index cdbe86b5..4dde67d7 100644 --- a/src/rail/creation/degradation/quantityCut.py +++ b/src/rail/creation/degradation/quantityCut.py @@ -3,10 +3,10 @@ from numbers import Number import numpy as np -from rail.creation.degrader import Degrader +from rail.creation.selector import Selector -class QuantityCut(Degrader): +class QuantityCut(Selector): """Degrader that applies a cut to the given columns. Note that if a galaxy fails any of the cuts on any one of its columns, that @@ -14,7 +14,7 @@ class QuantityCut(Degrader): """ name = "QuantityCut" - config_options = Degrader.config_options.copy() + config_options = Selector.config_options.copy() config_options.update(cuts=dict) def __init__(self, args, comm=None): @@ -23,7 +23,7 @@ def __init__(self, args, comm=None): Performs standard Degrader initialization as well as defining the cuts to be applied. """ - Degrader.__init__(self, args, comm=comm) + Selector.__init__(self, args, comm=comm) self.cuts = None self.set_cuts(self.config["cuts"]) @@ -82,7 +82,7 @@ def set_cuts(self, cuts: dict): else: raise TypeError(bad_cut_msg) - def run(self): + def _select(self): """Applies cuts. Notes @@ -97,7 +97,7 @@ def run(self): columns = set(self.cuts.keys()).intersection(data.columns) if len(columns) == 0: # pragma: no cover - self.add_data("output", data) + return np.ones(len(data), dtype=int) else: # generate a pandas query from the cuts query = [ @@ -105,9 +105,10 @@ def run(self): for col in columns ] query = " & ".join(query) - - out_data = data.query(query) - self.add_data("output", out_data) + out_indices = data.query(query).index.values + out_mask = np.zeros(len(data), dtype=int) + out_mask[out_indices] = 1 + return out_mask def __repr__(self): # pragma: no cover """Pretty print this object.""" diff --git a/src/rail/creation/degrader.py b/src/rail/creation/degrader.py index 8527c299..9c1578eb 100644 --- a/src/rail/creation/degrader.py +++ b/src/rail/creation/degrader.py @@ -8,7 +8,7 @@ from rail.core.data import PqHandle -class Degrader(RailStage): +class Degrader(RailStage): # pragma: no cover """Base class Degraders, which apply various degradations to synthetic photometric data. diff --git a/src/rail/creation/noisifier.py b/src/rail/creation/noisifier.py new file mode 100644 index 00000000..4042cdad --- /dev/null +++ b/src/rail/creation/noisifier.py @@ -0,0 +1,83 @@ +"""Abstract base class defining a noisifier. + +The key feature here is the run adds noise to the catalog. +Intended subclasses are noisifier that adds LSST noise / other telescope noise +""" + +from rail.core.stage import RailStage +from rail.core.data import PqHandle + + +class Noisifier(RailStage): + """Base class Noisifier, which adds noise to the input catalog + + Noisifier take "input" data in the form of pandas dataframes in Parquet + files and provide as "output" another pandas dataframes written to Parquet + files. + """ + + name = 'Noisifier' + config_options = RailStage.config_options.copy() + config_options.update(seed=1337) + inputs = [('input', PqHandle)] + outputs = [('output', PqHandle)] + + def __init__(self, args, comm=None): + """Initialize Noisifier that can add noise to photometric data""" + RailStage.__init__(self, args, comm=comm) + + + def _initNoiseModel(self): # pragma: no cover + raise NotImplementedError("Noisifier._initNoiseModel()") + + def _addNoise(self): # pragma: no cover + raise NotImplementedError("Noisifier._addNoise()") + + def __call__(self, sample, seed: int = None): + """The main interface method for ``Noisifier``. + + Adds noise to the input catalog + + This will attach the input to this `Noisifier` + + Then it will call the _initNoiseModel() and _addNoise(), which need to be + implemented by the sub-classes. + + The _initNoiseModel() method will initialize the noise model of the sub-classes, and + store the noise model as self.noiseModel + + The _addNoise() method will add noise to the flux and magnitude of the column of the + catalog. + + The finalize() method will check the end results (like preserving number of rows) + + Finally, this will return a PqHandle providing access to that output + data. + + Parameters + ---------- + sample : table-like + The sample to be degraded + seed : int, default=None + An integer to set the numpy random seed + + Returns + ------- + output_data : PqHandle + A handle giving access to a table with degraded sample + """ + if seed is not None: + self.config.seed = seed + self.set_data('input', sample) + + self.run() + self.finalize() + return self.get_handle('output') + + + def run(self): + + self._initNoiseModel() + self._addNoise() + + diff --git a/src/rail/creation/selector.py b/src/rail/creation/selector.py new file mode 100644 index 00000000..1faaf929 --- /dev/null +++ b/src/rail/creation/selector.py @@ -0,0 +1,75 @@ +"""Abstract base class defining a selector. + +The key feature here is make selection to either the photometric or spectroscopic catalog. +Intended subclasses spectroscopic selection, probability selection on a grid for the photometry, +or pure photometric selection. +""" + +from ceci.config import StageParameter as Param +from rail.core.stage import RailStage +from rail.core.data import PqHandle + + +class Selector(RailStage): + """Base class Selector, which makes selection to the catalog + + Selector take "input" data in the form of pandas dataframes in Parquet + files and provide as "output" another pandas dataframes written to Parquet + files. + """ + + name = 'Selector' + config_options = RailStage.config_options.copy() + config_options.update( + drop_rows=Param(bool, True, msg="Drop selected rows from output table"), + ) + inputs = [('input', PqHandle)] + outputs = [('output', PqHandle)] + + def __init__(self, args, comm=None): + """Initialize Noisifier that can add noise to photometric data""" + RailStage.__init__(self, args, comm=comm) + + def __call__(self, sample): + """The main interface method for ``Selector``. + + Adds noise to the input catalog + + This will attach the input to this `Selector` + + Then it will call the select() which add a flag column to the catalog. flag=1 means + selected, 0 means dropped. + + If dropRows = True, the dropped rows will not be presented in the output catalog, + otherwise, all rows will be presented. + + Finally, this will return a PqHandle providing access to that output + data. + + Parameters + ---------- + sample : table-like + The sample to be selected + + Returns + ------- + output_data : PqHandle + A handle giving access to a table with selected sample + """ + self.set_data('input', sample) + self.run() + self.finalize() + return self.get_handle('output') + + def run(self): + data = self.get_data('input') + selection_mask = self._select() + if self.config['drop_rows']: + out_data = data[selection_mask.astype(bool)] + else: + out_data = data.copy() + out_data.insert(0, 'flag', selection_mask) + self.add_data("output", out_data) + + def _select(self): # pragma: no cover + raise NotImplementedError("Selector._select()") diff --git a/tests/creation/test_degraders.py b/tests/creation/test_degraders.py index cdade3ce..1263fae8 100644 --- a/tests/creation/test_degraders.py +++ b/tests/creation/test_degraders.py @@ -8,7 +8,7 @@ from rail.core.data import DATA_STORE, TableHandle from rail.core.util_stages import ColumnMapper from rail.creation.degradation.quantityCut import QuantityCut -# from rail.creation.degradation.spectroscopic_selections import * +from rail.creation.degradation.addRandom import AddColumnOfRandom @pytest.fixture @@ -71,14 +71,33 @@ def test_QuantityCut_returns_correct_shape(data): """Make sure QuantityCut is returning the correct shape""" cuts = { - "u": 0, - "y": (1, 2), + "u": 30, + "redshift": (1, 2), } degrader = QuantityCut.make_stage(cuts=cuts) degraded_data = degrader(data).data - assert degraded_data.shape == data.data.query("u < 0 & y > 1 & y < 2").shape + assert degraded_data.shape == data.data.query("u < 30 & redshift > 1 & redshift < 2").shape os.remove(degrader.get_output(degrader.get_aliased_tag("output"), final_name=True)) + + degrader_w_flag = QuantityCut.make_stage(name="degrader_w_flag", cuts=cuts, drop_rows=False) + degraded_data_w_flag = degrader_w_flag(data).data + + test_mask = np.zeros(len(data.data), dtype=int) + out_indices = data.data.query("u < 30 & redshift > 1 & redshift < 2").index.values + test_mask[out_indices] = 1 + + assert (degraded_data_w_flag['flag'] == test_mask).all() + os.remove(degrader_w_flag.get_output(degrader_w_flag.get_aliased_tag("output"), final_name=True)) + + + +def test_add_random(data): + + add_random = AddColumnOfRandom.make_stage() + + test_data = add_random(data, seed=1234).data + assert len(test_data[add_random.config.col_name]) == len(data.data)