From 22c47611679c7bc2d6314a4e6e6367e8e9f53a4b Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 3 Aug 2024 21:04:31 +0530 Subject: [PATCH 1/9] Add initial backfitting with posterior sampling --- preliz/ppls/pymc_io.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 007bf4ab..91dc5af3 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -3,6 +3,7 @@ # pylint: disable=protected-access from sys import modules +import arviz as az import numpy as np try: @@ -209,3 +210,17 @@ def non_constant_parents(var_): parents.append(variable.owner.inputs[0]) return parents + + +def parse_backfitting(model, data): + posterior = az.extract(data, group="posterior") + + model_info = get_model_information(model)[2] + parsed_info = [(dist, var) for var, dist in model_info.items()] + print(posterior, parsed_info) + new_priors = [] + for dist, var in parsed_info: + dist._fit_mle(posterior[var].values) + new_priors.append((dist, var)) + new_model = "\n".join(f"{var} = {new_prior}" for new_prior, var in new_priors) + return new_model From df2bf6fb68256f09efa4c65f0817a264c541db1b Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 3 Aug 2024 21:09:47 +0530 Subject: [PATCH 2/9] Add initial backfitting with posterior sampling --- preliz/ppls/pymc_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 91dc5af3..5f84b95a 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -217,7 +217,7 @@ def parse_backfitting(model, data): model_info = get_model_information(model)[2] parsed_info = [(dist, var) for var, dist in model_info.items()] - print(posterior, parsed_info) + new_priors = [] for dist, var in parsed_info: dist._fit_mle(posterior[var].values) From 7d3da90182d4a8f3fe9d001895f0fa0c7a5d2198 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sun, 4 Aug 2024 19:48:56 +0530 Subject: [PATCH 3/9] Fix the 'non_constant_parents' using ancestors --- preliz/ppls/pymc_io.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 5f84b95a..d93d9135 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -8,6 +8,7 @@ try: from pytensor.tensor import vector, TensorConstant + from pytensor.graph.basic import ancestors from pymc import logp, compile_pymc from pymc.util import is_transformed_name, get_untransformed_name except ModuleNotFoundError: @@ -118,15 +119,11 @@ def get_model_information(model): # pylint: disable=too-many-locals pymc_to_preliz = get_pymc_to_preliz() rvs_to_values = model.rvs_to_values - for r_v in model.free_RVs: - if not non_constant_parents(r_v): - free_rvs.append(r_v) - for r_v in model.free_RVs: r_v_eval = r_v.eval() size = r_v_eval.size shape = r_v_eval.shape - nc_parents = non_constant_parents(r_v) + nc_parents = non_constant_parents(r_v, model.free_RVs) name = r_v.owner.op.name dist = pymc_to_preliz[name] @@ -202,13 +199,14 @@ def reshape_params(model, var_info, p_model, params): return value -def non_constant_parents(var_): +def non_constant_parents(var_, free_rvs): """Find the parents of a variable that are not constant.""" parents = [] for variable in var_.get_parents()[0].inputs[2:]: if not isinstance(variable, TensorConstant): - parents.append(variable.owner.inputs[0]) - + for free_rv in free_rvs: + if free_rv in list(ancestors([variable])) and free_rv not in parents: + parents.append(free_rv) return parents From 73e20a7a95ee6f4d5aad90dd6c3d153e77fcb5c7 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Fri, 9 Aug 2024 16:17:35 +0530 Subject: [PATCH 4/9] Add additional parameter, update the code --- preliz/ppls/pymc_io.py | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index d93d9135..2300a6b4 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -3,18 +3,19 @@ # pylint: disable=protected-access from sys import modules +import preliz as pz import arviz as az import numpy as np try: from pytensor.tensor import vector, TensorConstant from pytensor.graph.basic import ancestors - from pymc import logp, compile_pymc + from pymc import logp, compile_pymc, sample from pymc.util import is_transformed_name, get_untransformed_name except ModuleNotFoundError: pass -from preliz.internal.optimization import get_distributions +from preliz.internal.optimization import get_distributions, fit_to_sample def backfitting(prior, p_model, var_info2): @@ -210,12 +211,40 @@ def non_constant_parents(var_, free_rvs): return parents -def parse_backfitting(model, data): - posterior = az.extract(data, group="posterior") +def parse_backfitting(model, data, alternative=None): + """ + Get the model information for fitting the samples from prior into user provided model's prior. + We need to "backfit" because we can not use arbitrary samples as priors. - model_info = get_model_information(model)[2] - parsed_info = [(dist, var) for var, dist in model_info.items()] + Parameters + ---------- + model : A PyMC model + A probabilistic model + + data : np.ndarray + Samples which needs to be fitted using "backfitting" + + alternative : list, defaults to None + Users can add the model variables to consider alternative distributions while fitting samples + """ + + data = np.asarray(data) + with model: + idata = sample(random_seed=4124) + posterior = az.extract(idata, group="posterior") + model_info = get_model_information(model)[2] + if alternative: + parsed_info = [] + for key in model_info.keys(): + if key in alternative: + dists_extra = [pz.Gamma(), pz.Normal(), pz.HalfNormal(), model_info[key]] + idx = np.argmin(fit_to_sample(dists_extra, data, data.min(), data.max()).losses) + parsed_info.append((dists_extra[idx], key)) + else: + parsed_info.append((model_info[key], key)) + else: + parsed_info = [(dist, var) for var, dist in model_info.items()] new_priors = [] for dist, var in parsed_info: dist._fit_mle(posterior[var].values) From 934b9bca507d6b2c849bad0a3ef0629bbc854c89 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 10 Aug 2024 02:15:13 +0530 Subject: [PATCH 5/9] Update `posterior_to_prior` --- preliz/ppls/pymc_io.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 2300a6b4..90f2cca7 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -211,7 +211,7 @@ def non_constant_parents(var_, free_rvs): return parents -def parse_backfitting(model, data, alternative=None): +def posterior_to_prior(model, idata, alternative=None): """ Get the model information for fitting the samples from prior into user provided model's prior. We need to "backfit" because we can not use arbitrary samples as priors. @@ -221,33 +221,31 @@ def parse_backfitting(model, data, alternative=None): model : A PyMC model A probabilistic model - data : np.ndarray - Samples which needs to be fitted using "backfitting" + idata : Inference Data + InferenceData from which to extract the data. - alternative : list, defaults to None + alternative : "auto", list, dict, defaults to None Users can add the model variables to consider alternative distributions while fitting samples """ - data = np.asarray(data) - with model: - idata = sample(random_seed=4124) posterior = az.extract(idata, group="posterior") model_info = get_model_information(model)[2] - if alternative: - parsed_info = [] - for key in model_info.keys(): - if key in alternative: - dists_extra = [pz.Gamma(), pz.Normal(), pz.HalfNormal(), model_info[key]] - idx = np.argmin(fit_to_sample(dists_extra, data, data.min(), data.max()).losses) - parsed_info.append((dists_extra[idx], key)) - else: - parsed_info.append((model_info[key], key)) - else: - parsed_info = [(dist, var) for var, dist in model_info.items()] + parsed_info = [(dist, var) for var, dist in model_info.items()] new_priors = [] + for dist, var in parsed_info: - dist._fit_mle(posterior[var].values) - new_priors.append((dist, var)) + dists = [model_info[var]] + + if alternative == "auto": + dists += [pz.Normal(), pz.HalfNormal(), pz.Gamma()] + elif isinstance(alternative, list): + dists += alternative + elif isinstance(alternative, dict): + dists += alternative.get(var, []) + # Take the dist with the least penalization term + idx = pz.mle(dists, posterior[var].values)[0] + new_priors.append((dists[idx[0]], var)) + new_model = "\n".join(f"{var} = {new_prior}" for new_prior, var in new_priors) return new_model From e6ad294355feef2076791b6833cb7ba022035692 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 10 Aug 2024 02:17:52 +0530 Subject: [PATCH 6/9] Add posterior as a parameter --- preliz/ppls/pymc_io.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 90f2cca7..637533dd 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -211,7 +211,7 @@ def non_constant_parents(var_, free_rvs): return parents -def posterior_to_prior(model, idata, alternative=None): +def posterior_to_prior(model, posterior, alternative=None): """ Get the model information for fitting the samples from prior into user provided model's prior. We need to "backfit" because we can not use arbitrary samples as priors. @@ -221,15 +221,14 @@ def posterior_to_prior(model, idata, alternative=None): model : A PyMC model A probabilistic model - idata : Inference Data - InferenceData from which to extract the data. + posterior : Posterior samples + InferenceData from with the posterior group alternative : "auto", list, dict, defaults to None Users can add the model variables to consider alternative distributions while fitting samples """ - posterior = az.extract(idata, group="posterior") model_info = get_model_information(model)[2] parsed_info = [(dist, var) for var, dist in model_info.items()] new_priors = [] From d556bef3dab7875af2664dea98b4f950c81047bb Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 10 Aug 2024 02:22:41 +0530 Subject: [PATCH 7/9] Removed unused imports --- preliz/ppls/pymc_io.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index 637533dd..d0ed1c95 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -4,18 +4,17 @@ from sys import modules import preliz as pz -import arviz as az import numpy as np try: from pytensor.tensor import vector, TensorConstant from pytensor.graph.basic import ancestors - from pymc import logp, compile_pymc, sample + from pymc import logp, compile_pymc from pymc.util import is_transformed_name, get_untransformed_name except ModuleNotFoundError: pass -from preliz.internal.optimization import get_distributions, fit_to_sample +from preliz.internal.optimization import get_distributions def backfitting(prior, p_model, var_info2): From 5ff447cbd71af934a3615b4b96e8cf5862866a00 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 10 Aug 2024 02:27:39 +0530 Subject: [PATCH 8/9] Realign imports --- preliz/ppls/pymc_io.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index d0ed1c95..f2204a11 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -3,7 +3,6 @@ # pylint: disable=protected-access from sys import modules -import preliz as pz import numpy as np try: @@ -15,6 +14,8 @@ pass from preliz.internal.optimization import get_distributions +from preliz.distributions import Gamma, Normal, HalfNormal +from preliz.unidimensional.mle import mle def backfitting(prior, p_model, var_info2): @@ -236,13 +237,13 @@ def posterior_to_prior(model, posterior, alternative=None): dists = [model_info[var]] if alternative == "auto": - dists += [pz.Normal(), pz.HalfNormal(), pz.Gamma()] + dists += [Normal(), HalfNormal(), Gamma()] elif isinstance(alternative, list): dists += alternative elif isinstance(alternative, dict): dists += alternative.get(var, []) # Take the dist with the least penalization term - idx = pz.mle(dists, posterior[var].values)[0] + idx = mle(dists, posterior[var].values)[0] new_priors.append((dists[idx[0]], var)) new_model = "\n".join(f"{var} = {new_prior}" for new_prior, var in new_priors) From a7bca32c83bf8b7b854aad1eca4db228becdad73 Mon Sep 17 00:00:00 2001 From: rohanbabbar04 Date: Sat, 10 Aug 2024 02:43:07 +0530 Subject: [PATCH 9/9] Update docs, add condition for fit_mle --- preliz/ppls/pymc_io.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/preliz/ppls/pymc_io.py b/preliz/ppls/pymc_io.py index f2204a11..484e7841 100644 --- a/preliz/ppls/pymc_io.py +++ b/preliz/ppls/pymc_io.py @@ -213,8 +213,9 @@ def non_constant_parents(var_, free_rvs): def posterior_to_prior(model, posterior, alternative=None): """ - Get the model information for fitting the samples from prior into user provided model's prior. - We need to "backfit" because we can not use arbitrary samples as priors. + Updates the priors of a probabilistic model by fitting them to posterior data, using either predefined or + user-specified alternative distributions. It selects the best-fitting distribution for each variable based + on maximum likelihood estimation (MLE). The result is a model with priors better aligned to the observed data. Parameters ---------- @@ -242,9 +243,12 @@ def posterior_to_prior(model, posterior, alternative=None): dists += alternative elif isinstance(alternative, dict): dists += alternative.get(var, []) - # Take the dist with the least penalization term - idx = mle(dists, posterior[var].values)[0] - new_priors.append((dists[idx[0]], var)) + if len(dists) == 1: + dists[0]._fit_mle(posterior[var].values) + new_priors.append((dists[0], var)) + else: + idx = mle(dists, posterior[var].values, plot=False)[0] + new_priors.append((dists[idx[0]], var)) new_model = "\n".join(f"{var} = {new_prior}" for new_prior, var in new_priors) return new_model