Skip to content

Outlier fit #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/sccala/gen_testdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def generate_observed_data(
hubble=False,
calib=False,
h0=70.0,
outl_frac=0.0,
):
if rng is None:
rng = np.random.default_rng()
Expand All @@ -36,6 +37,11 @@ def generate_observed_data(
col = rng.normal(loc=col_range[0], scale=col_range[1])
ae = np.absolute(rng.normal(loc=ae_range[0], scale=ae_range[1]))

# Optional outlier generation
if outl_frac > 0 and rng.uniform() < outl_frac:
vel = np.absolute(rng.normal(loc=vel_range[0], scale=5000e3))
col = rng.normal(loc=col_range[0], scale=0.5)
ae = np.absolute(rng.normal(loc=ae_range[0], scale=0.5))
vel_obs = rng.normal(loc=vel, scale=verr)
col_obs = rng.normal(loc=col, scale=cerr)
ae_obs = rng.normal(loc=ae, scale=aeerr)
Expand Down Expand Up @@ -120,6 +126,7 @@ def gen_testdata(
sig_cut_nom=None,
calib_m_cut_nom=None,
calib_sig_cut_nom=None,
outl_frac=0.0,
):
"""
Function generating simulated datasets for standardisation
Expand Down Expand Up @@ -173,6 +180,8 @@ def gen_testdata(
calib_m_cut_nom : float
calib_sig_cut_nom : float
Parameters for the nominal detection values exported to the data.
outl_frac : float
Fraction of outliers to be generated. Default: 0.0

Returns
-------
Expand Down Expand Up @@ -214,6 +223,7 @@ def gen_testdata(
rng=rng,
hubble=hubble,
h0=h0,
outl_frac=outl_frac,
)
detection_prob = detection_probability(mag, m_cut=m_cut, sigma_cut=sigma_cut)
detected = rng.uniform() < detection_prob
Expand Down Expand Up @@ -302,6 +312,7 @@ def gen_testdata(
hubble=hubble,
calib=True,
h0=h0,
outl_frac=outl_frac,
)
detection_prob = detection_probability(
mag, m_cut=calib_m_cut, sigma_cut=calib_sigma_cut
Expand Down Expand Up @@ -463,6 +474,7 @@ def main(args):
calib_sigma_cut=args.calib_sigma_cut,
calib_m_cut_nom=args.calib_m_cut_nom,
calib_sig_cut_nom=args.calib_sig_cut_nom,
outl_frac=args.outl_frac,
)

return data
Expand Down Expand Up @@ -564,6 +576,12 @@ def cli():
type=float,
help="Nominal sigma cut for the exported calibrator data. Default: 0.5",
)
parser.add_argument(
"--outl_frac",
type=float,
help="Fraction of outliers to be generated. Default: 0.0",
default=0.0,
)

args = parser.parse_args()

Expand Down
16 changes: 14 additions & 2 deletions src/sccala/sccala_scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,19 @@ def main(args):
blind=args.unblind,
blindkey=args.blindkey,
)
if args.outlier and args.classic:
raise ValueError("Outlier model is not available for classic SCM")

model = args.model
if not args.classic:
if model == "hubble":
model = models.HubbleSCM()
model = models.HubbleSCMOutlier() if args.outlier else models.HubbleSCM()
elif model == "hubble-free":
model = models.HubbleFreeSCM()
model = (
models.HubbleFreeSCMOutlier()
if args.outlier
else models.HubbleFreeSCM()
)
elif model == "hubble-nh":
model = models.NHHubbleSCM()
elif model == "hubble-nh-simple":
Expand Down Expand Up @@ -52,6 +58,7 @@ def main(args):
save_warmup=args.save_warmup,
quiet=False,
classic=args.classic,
outlier=args.outlier,
output_dir=args.output_dir,
test_data=args.test_data,
selection_effects=args.selection_effects,
Expand Down Expand Up @@ -144,6 +151,11 @@ def cli():
action="store_true",
help="If flag is given, classical SCM is used instead of extended SCM",
)
parser.add_argument(
"--outlier",
action="store_true",
help="If flag is given, outlier model will be used.",
)
parser.add_argument(
"--unblind",
action="store_false",
Expand Down
91 changes: 73 additions & 18 deletions src/sccala/scmlib/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


from sccala.utillib.aux import NumpyEncoder
from sccala.utillib.const import VS_INIT, RV_INIT, VTRUE_INIT


class SCM_Model:
Expand Down Expand Up @@ -141,9 +142,9 @@ def __init__(self):
def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": 7500e3,
"rv": 1000e3,
"v_true": [7500e3] * self.data["sn_idx"],
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
}
else:
self.init = init
Expand All @@ -156,6 +157,29 @@ def print_results(self, df, blind=True):
return


class HubbleFreeSCMOutlier(SCM_Model):
def __init__(self):
self.__name__ = "hubble-free-scm-outlier"
super().__init__()

def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
}
else:
self.init = init
return

def print_results(self, df, blind=True):
keys = ["alpha", "beta", "gamma", "sigma_int", "Mi", "outl_frac"]
for key in keys:
print("%s = %.2g +/- %.2g" % (key, np.mean(df[key]), np.std(df[key])))
return


class HubbleSCM(SCM_Model):
def __init__(self):
self.__name__ = "hubble-scm"
Expand All @@ -165,14 +189,14 @@ def __init__(self):
def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": 7500e3,
"rv": 1000e3,
"v_true": [7500e3] * self.data["sn_idx"],
"calib_v_true": [7500e3] * self.data["calib_sn_idx"],
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
"calib_v_true": [VTRUE_INIT] * self.data["calib_sn_idx"],
}
for i in range(self.data["num_calib_dset"]):
self.init["calib_vs.%d" % (i + 1)] = 7500e3
self.init["calib_rv.%d" % (i + 1)] = 1000e3
self.init["calib_vs.%d" % (i + 1)] = VS_INIT
self.init["calib_rv.%d" % (i + 1)] = RV_INIT
else:
self.init = init
return
Expand All @@ -187,6 +211,37 @@ def print_results(self, df, blind=True):
return


class HubbleSCMOutlier(SCM_Model):
def __init__(self):
self.__name__ = "hubble-scm-outlier"
super().__init__()
self.hubble = True

def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
"calib_v_true": [VTRUE_INIT] * self.data["calib_sn_idx"],
}
for i in range(self.data["num_calib_dset"]):
self.init["calib_vs.%d" % (i + 1)] = VS_INIT
self.init["calib_rv.%d" % (i + 1)] = RV_INIT
else:
self.init = init
return

def print_results(self, df, blind=True):
if blind:
keys = ["alpha", "beta", "gamma", "sigma_int", "Mi", "outl_frac"]
else:
keys = ["alpha", "beta", "gamma", "sigma_int", "Mi", "H0", "outl_frac"]
for key in keys:
print("%s = %.4g +/- %.4g" % (key, np.mean(df[key]), np.std(df[key])))
return


class ClassicNHHubbleFreeSCM(SCM_Model):
def __init__(self):
self.__name__ = "classic-nh-hubble-free-scm"
Expand Down Expand Up @@ -240,9 +295,9 @@ def __init__(self):
def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": 7500e3,
"rv": 1000e3,
"v_true": [7500e3] * self.data["sn_idx"],
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
}
else:
self.init = init
Expand All @@ -264,14 +319,14 @@ def __init__(self):
def set_initial_conditions(self, init=None):
if init is None:
self.init = {
"vs": 7500e3,
"rv": 1000e3,
"v_true": [7500e3] * self.data["sn_idx"],
"calib_v_true": [7500e3] * self.data["calib_sn_idx"],
"vs": VS_INIT,
"rv": RV_INIT,
"v_true": [VTRUE_INIT] * self.data["sn_idx"],
"calib_v_true": [VTRUE_INIT] * self.data["calib_sn_idx"],
}
for i in range(self.data["num_calib_dset"]):
self.init["calib_vs.%d" % (i + 1)] = 7500e3
self.init["calib_rv.%d" % (i + 1)] = 1000e3
self.init["calib_vs.%d" % (i + 1)] = VS_INIT
self.init["calib_rv.%d" % (i + 1)] = RV_INIT
else:
self.init = init
return
Expand Down
22 changes: 14 additions & 8 deletions src/sccala/scmlib/models/classic-hubble-free-scm.stan
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ parameters {
}
transformed parameters{
array[sn_idx] real mag_true;
array[sn_idx] real sn_log_like;
array[sn_idx] real mean;
array[sn_idx] real v_mi;
real sigma_int;
Expand All @@ -36,17 +37,25 @@ transformed parameters{
v_mi[i] = (errors[i][1,1] + sigma_int^2) + sigma_cut^2 + (alpha * rv / (vs * log10()))^2 + (beta * rc)^2;
}
}

for (i in 1:sn_idx) {
sn_log_like[i] = multi_normal_lpdf(obs[i] | [mag_true[i], v_true[i], c_true[i]]', errors[i] + diag_matrix([sigma_int^2, 0, 0]'));
if (use_selection != 0) {
sn_log_like[i] += normal_lcdf(mag_cut | obs[i][1], sigma_cut)
- log(normal_cdf(mag_cut | mean[i], sqrt(v_mi[i])) + 0.0001);
}
}
}
model {
Mi ~ uniform(-30,0);
alpha ~ uniform(-20,20);
beta ~ uniform(-20,20);
log_sigma ~ uniform(-3,0);

vs ~ cauchy(7500e3,1500e3);
vs ~ cauchy(0.75,0.15);
cs ~ cauchy(0,0.5);

rv ~ normal(0,1500e3);
rv ~ normal(0,0.15);
rc ~ normal(0,0.5);

v_true ~ normal(vs,rv);
Expand All @@ -55,12 +64,9 @@ model {
mag_cut ~ normal(m_cut_nom,0.5);
sigma_cut ~ normal(sig_cut_nom,0.25);

for (i in 1:sn_idx) {
target += multi_normal_lpdf(obs[i] | [mag_true[i], v_true[i], c_true[i]]', errors[i] + [[sigma_int^2, 0, 0], [0, 0, 0], [0, 0, 0]]);
if (use_selection != 0) {
target += normal_lcdf(mag_cut | obs[i][1], sigma_cut)
- log(normal_cdf(mag_cut | mean[i], sqrt(v_mi[i])) + 0.0001);
outl_frac ~ lognormal(-3,0.25);

}
for (i in 1:sn_idx) {
target += sn_log_like[i];
}
}
Loading