Skip to content

Commit

Permalink
Merge pull request #62 from smuellerd/main
Browse files Browse the repository at this point in the history
Updated benchmark to remove NAs in activities
  • Loading branch information
PauBadiaM authored Aug 2, 2023
2 parents 7752273 + 1b9823b commit f74a3e9
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 28 deletions.
49 changes: 47 additions & 2 deletions decoupler/tests/test_utilsbenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,21 @@ def test_append_by_experiment():

append_by_experiment(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

act_na = act.astype(float)
act_na[1,0,0] = np.nan
act_na[1,1,0] = np.nan
act_na[1,2,0] = np.nan

df_na = []

append_by_experiment(df_na, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df) == 2
assert df[0][5] < df[1][5]
assert df[0][5] < df_na[0][5] #check improvement of performance due to removal of NAs
assert df[0][6] < df_na[0][6] #check change of class imbalance due to removal of NAs


def test_append_by_source():
Expand Down Expand Up @@ -216,7 +229,39 @@ def test_append_by_source():
append_by_source(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)
assert len(df) == 4
assert df[0][5] < df[1][5]
assert df[0][5] < df[2][5]

act_na = act.astype(float)
act_na[1,4,0] = np.nan

df_na = []

append_by_source(df_na, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na) == 3
assert df_na[0][2] == 'T1'

act_na[1,0,0] = np.nan

df_na_2 = []
append_by_source(df_na_2, grpby_i=None, grp=None, act=act_na, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na_2) == 2

act_na_3 = act.astype(float)
act_na_3[1,0,0] = np.nan

df_na_3 = []

append_by_source(df_na_3, grpby_i=None, grp=None, act=act_na_3, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, min_exp=1)

assert len(df_na_3) == 3
assert df_na_3[0][2] == 'T5'




def test_append_metrics_scores():
Expand Down Expand Up @@ -260,7 +305,7 @@ def test_append_metrics_scores():
append_metrics_scores(df, grpby_i=None, grp=None, act=act, grt=grt, srcs=srcs,
mthds=mthds, metrics=metrics, by='source', min_exp=1)
assert len(df) == 4
assert df[0][5] < df[1][5]
assert df[0][5] < df[2][5]


def test_check_groupby():
Expand Down
67 changes: 41 additions & 26 deletions decoupler/utils_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,43 +123,58 @@ def append_by_experiment(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_e
# Flatten act by method
act, grt = act.reshape(-1, act.shape[-1]).T, grt.flatten()

# Compute Class Imbalance
ci = np.sum(grt) / len(grt)

# Compute per method and metric
for m in range(len(mthds)):
mth = mthds[m]
for metric in metrics:
scores = compute_metric(act[m], grt, metric, pi0=pi0, n_iter=n_iter, seed=seed)
# identify activity scores with NAs in each method
act_i = act[m]
nan_mask = np.isnan(act_i)
# Remove NAs from activity matrix and ground truth
act_i = act_i[~nan_mask]
grt_i = grt[~nan_mask]
# Compute Class Imbalance
ci = np.sum(grt_i) / len(grt_i)
# Compute metrics
scores = compute_metric(act_i, grt_i, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, None, mth, metric, score, ci]
df.append(row)


def append_by_source(df, grpby_i, grp, act, grt, srcs, mthds, metrics, min_exp=5, pi0=0.5,
n_iter=1000, seed=42):

# Remove sources with less than min_exp
src_msk = np.sum(grt > 0., axis=0) >= min_exp
act, grt = act[:, src_msk, :], grt[:, src_msk]
srcs = srcs[src_msk]

# Compute per source, method and metric
for s in range(len(srcs)):
src = srcs[s]
tmp_grt = grt[:, s]

# Compute Class Imbalance
ci = np.sum(tmp_grt) / len(tmp_grt)

for m in range(len(mthds)):
mth = mthds[m]
tmp_act = act[:, s, m]
for metric in metrics:
scores = compute_metric(tmp_act, tmp_grt, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, src, mth, metric, score, ci]
df.append(row)

for m in range(len(mthds)):
mth = mthds[m]
act_i = act[:,:,m]
nan_mask = np.isnan(act_i)

grt_i = grt.copy()
grt_i[nan_mask]=np.nan

# Remove sources with less than min_exp
src_msk = np.sum(grt_i > 0., axis=0) >= min_exp
act_i, grt_i = act[:, src_msk, :], grt_i[:, src_msk]
srcs_method = srcs[src_msk]

# Compute per source, method and metric
for s in range(len(srcs_method)):
src = srcs_method[s]
tmp_grt = grt_i[:, s]
nan_mask = np.isnan(tmp_grt)

grt_source = tmp_grt[~nan_mask]
act_source = act_i[:, s, m][~nan_mask]

# Compute Class Imbalance
ci = np.sum(grt_source) / len(grt_source)
if ci != 0. and ci != 1.:
for metric in metrics:
scores = compute_metric(act_source, grt_source, metric, pi0=pi0, n_iter=n_iter, seed=seed)
for score in scores:
row = [grpby_i, grp, src, mth, metric, score, ci]
df.append(row)


def append_metrics_scores(df, grpby_i, grp, act, grt, srcs, mthds, metrics, by, min_exp=5, pi0=0.5,
Expand Down

0 comments on commit f74a3e9

Please sign in to comment.