Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add backfitting with posterior sampling #508

Merged
merged 9 commits into from
Aug 9, 2024
54 changes: 46 additions & 8 deletions preliz/ppls/pymc_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
# pylint: disable=protected-access
from sys import modules

import preliz as pz
import numpy as np

try:
from pytensor.tensor import vector, TensorConstant
from pytensor.graph.basic import ancestors
from pymc import logp, compile_pymc
from pymc.util import is_transformed_name, get_untransformed_name
except ModuleNotFoundError:
Expand Down Expand Up @@ -117,15 +119,11 @@ def get_model_information(model): # pylint: disable=too-many-locals
pymc_to_preliz = get_pymc_to_preliz()
rvs_to_values = model.rvs_to_values

for r_v in model.free_RVs:
if not non_constant_parents(r_v):
free_rvs.append(r_v)

for r_v in model.free_RVs:
r_v_eval = r_v.eval()
size = r_v_eval.size
shape = r_v_eval.shape
nc_parents = non_constant_parents(r_v)
nc_parents = non_constant_parents(r_v, model.free_RVs)

name = r_v.owner.op.name
dist = pymc_to_preliz[name]
Expand Down Expand Up @@ -201,11 +199,51 @@ def reshape_params(model, var_info, p_model, params):
return value


def non_constant_parents(var_):
def non_constant_parents(var_, free_rvs):
"""Find the parents of a variable that are not constant."""
parents = []
for variable in var_.get_parents()[0].inputs[2:]:
if not isinstance(variable, TensorConstant):
parents.append(variable.owner.inputs[0])

for free_rv in free_rvs:
if free_rv in list(ancestors([variable])) and free_rv not in parents:
parents.append(free_rv)
return parents


def posterior_to_prior(model, posterior, alternative=None):
"""
Get the model information for fitting the samples from prior into user provided model's prior.
aloctavodia marked this conversation as resolved.
Show resolved Hide resolved
We need to "backfit" because we can not use arbitrary samples as priors.

Parameters
----------
model : A PyMC model
A probabilistic model

posterior : Posterior samples
InferenceData from with the posterior group

alternative : "auto", list, dict, defaults to None
Users can add the model variables to consider alternative distributions while fitting samples

"""

model_info = get_model_information(model)[2]
parsed_info = [(dist, var) for var, dist in model_info.items()]
new_priors = []

for dist, var in parsed_info:
dists = [model_info[var]]

if alternative == "auto":
dists += [pz.Normal(), pz.HalfNormal(), pz.Gamma()]
elif isinstance(alternative, list):
dists += alternative
elif isinstance(alternative, dict):
dists += alternative.get(var, [])
# Take the dist with the least penalization term
rohanbabbar04 marked this conversation as resolved.
Show resolved Hide resolved
idx = pz.mle(dists, posterior[var].values)[0]
aloctavodia marked this conversation as resolved.
Show resolved Hide resolved
new_priors.append((dists[idx[0]], var))

new_model = "\n".join(f"{var} = {new_prior}" for new_prior, var in new_priors)
return new_model
Loading