diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py
index 6521a82a..5a3142c9 100644
--- a/batchglm/api/models/__init__.py
+++ b/batchglm/api/models/__init__.py
@@ -1,3 +1,4 @@
 from . import glm_nb
 from . import glm_norm
 from . import glm_beta
+from . import glm_bern
diff --git a/batchglm/api/models/glm_bern.py b/batchglm/api/models/glm_bern.py
new file mode 100644
index 00000000..94736a89
--- /dev/null
+++ b/batchglm/api/models/glm_bern.py
@@ -0,0 +1,2 @@
+from batchglm.models.glm_bern import InputData, Model, Simulator
+from batchglm.train.tf.glm_bern import Estimator
\ No newline at end of file
diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
index 7a5993bb..60cdae3e 100644
--- a/batchglm/api/utils/random.py
+++ b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, Beta
+from batchglm.utils.random import NegativeBinomial, Normal, Bernoulli, Beta
diff --git a/batchglm/models/glm_bern/__init__.py b/batchglm/models/glm_bern/__init__.py
new file mode 100644
index 00000000..efcf833d
--- /dev/null
+++ b/batchglm/models/glm_bern/__init__.py
@@ -0,0 +1,4 @@
+from .model import Model, Model_XArray
+from .external import InputData
+from .simulator import Simulator
+from .estimator import AbstractEstimator, EstimatorStoreXArray
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/estimator.py b/batchglm/models/glm_bern/estimator.py
new file mode 100644
index 00000000..26ff2fa6
--- /dev/null
+++ b/batchglm/models/glm_bern/estimator.py
@@ -0,0 +1,30 @@
+import abc
+
+from .model import Model, Model_XArray
+from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+
+
+class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
+    r"""
+    Estimator base class for generalized linear models (GLMs) with
+    bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+
+class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):
+
+    def __init__(self, estim: AbstractEstimator):
+        input_data = estim.input_data
+        # to_xarray triggers the get function of these properties and thereby
+        # causes evaluation of the properties that have not been computed during
+        # training, such as the hessian.
+        params = estim.to_xarray(
+            ["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
+            coords=input_data.data
+        )
+
+        Model_XArray.__init__(self, input_data, params)
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/external.py b/batchglm/models/glm_bern/external.py
new file mode 100644
index 00000000..bb52b9f2
--- /dev/null
+++ b/batchglm/models/glm_bern/external.py
@@ -0,0 +1,11 @@
+from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
+from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
+from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+from batchglm.models.base_glm import _Simulator_GLM
+from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
+
+import batchglm.data as data_utils
+import batchglm.utils.random as rand_utils
+from batchglm.utils.numeric import weighted_mean, weighted_variance
+from batchglm.utils.linalg import groupwise_solve_lm
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/model.py b/batchglm/models/glm_bern/model.py
new file mode 100644
index 00000000..748ab9ba
--- /dev/null
+++ b/batchglm/models/glm_bern/model.py
@@ -0,0 +1,83 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import xarray as xr
+import numpy as np
+
+from .external import InputData
+from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+
+# Define distribution parameters:
+MODEL_PARAMS = MODEL_PARAMS.copy()
+MODEL_PARAMS.update({
+    "mu": ("observations", "features"),
+    "r": ("observations", "features"),
+})
+
+class Model(_Model_GLM, metaclass=abc.ABCMeta):
+    """
+    Generalized Linear Model (GLM) with bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return MODEL_PARAMS
+
+    def link_loc(self, data):
+        return np.log(data/(1-data))
+
+    def inverse_link_loc(self, data):
+        return 1/(1+np.exp(-data))
+
+    def link_scale(self, data):
+        return data
+
+    def inverse_link_scale(self, data):
+        return data
+
+    @property
+    def eta_loc(self) -> xr.DataArray:
+        # TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
+        if isinstance(self.par_link_loc, xr.DataArray):
+            eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
+        else:
+            eta = np.matmul(self.design_loc.values, self.par_link_loc)
+
+        if self.size_factors is not None:
+            assert False, "size factors not allowed"
+        return eta
+
+    @property
+    def mu(self) -> xr.DataArray:
+        return self.location
+
+    @property
+    def r(self) -> xr.DataArray:
+        return self.scale
+
+
+def model_from_params(*args, **kwargs) -> Model:
+    (input_data, params) = _model_from_params(*args, **kwargs)
+    return Model_XArray(input_data, params)
+
+
+class Model_XArray(_Model_XArray_GLM, Model):
+    _input_data: InputData
+    params: xr.Dataset
+
+    def __init__(self, input_data: InputData, params: xr.Dataset):
+        super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
+        super(Model, self).__init__()
+
+    def __str__(self):
+        return "[%s.%s object at %s]: data=%s" % (
+            type(self).__module__,
+            type(self).__name__,
+            hex(id(self)),
+            self.params
+        )
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/batchglm/models/glm_bern/simulator.py b/batchglm/models/glm_bern/simulator.py
new file mode 100644
index 00000000..de96456e
--- /dev/null
+++ b/batchglm/models/glm_bern/simulator.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from .model import Model
+from .external import rand_utils, _Simulator_GLM
+
+
+class Simulator(_Simulator_GLM, Model):
+    """
+    Simulator for Generalized Linear Models (GLMs) with bernoulli noise.
+    Uses logit linker function.
+    """
+
+    def __init__(
+            self,
+            num_observations=1000,
+            num_features=100
+    ):
+        Model.__init__(self)
+        _Simulator_GLM.__init__(
+            self,
+            num_observations=num_observations,
+            num_features=num_features
+        )
+
+    def generate_params(
+            self,
+            rand_fn_ave=lambda shape: np.random.uniform(0.3, 0.4, shape),
+            rand_fn=None,
+            rand_fn_loc=lambda shape: np.random.uniform(0.4, 0.6, shape),
+            rand_fn_scale=lambda shape: np.zeros(shape),
+        ):
+        self._generate_params(
+            self,
+            rand_fn_ave=rand_fn_ave,
+            rand_fn=rand_fn,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale,
+        )
+
+    def generate_data(self):
+        """
+        Sample random data based on bernoulli distribution and parameters.
+        """
+        self.data["X"] = (
+            self.param_shapes()["X"],
+            rand_utils.Bernoulli(mean=self.mu).sample()
+        )
diff --git a/batchglm/models/glm_bern/utils.py b/batchglm/models/glm_bern/utils.py
new file mode 100644
index 00000000..82533f62
--- /dev/null
+++ b/batchglm/models/glm_bern/utils.py
@@ -0,0 +1,37 @@
+from typing import Union
+
+import numpy as np
+import xarray as xr
+
+from .external import closedform_glm_mean
+from .external import SparseXArrayDataArray
+
+
+def closedform_bern_glm_logitmu(
+        X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_loc,
+        constraints_loc,
+        size_factors=None,
+        link_fn=lambda data: np.log(data/(1-data)),
+        inv_link_fn=lambda data: 1/(1+np.exp(-data))
+):
+    r"""
+    Calculates a closed-form solution for the `mu` parameters of bernoulli GLMs.
+
+    :param X: The sample data
+    :param design_loc: design matrix for location
+    :param constraints_loc: tensor (all parameters x dependent parameters)
+        Tensor that encodes how complete parameter set which includes dependent
+        parameters arises from indepedent parameters: all = <constraints, indep>.
+        This form of constraints is used in vector generalized linear models (VGLMs).
+    :param size_factors: size factors for X
+    :return: tuple: (groupwise_means, mu, rmsd)
+    """
+    return closedform_glm_mean(
+        X=X,
+        dmat=design_loc,
+        constraints=constraints_loc,
+        size_factors=size_factors,
+        link_fn=link_fn,
+        inv_link_fn=inv_link_fn
+    )
\ No newline at end of file
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index d7a9dd18..3afdb0b8 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -24,10 +24,10 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
+            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.3, shape),
             rand_fn=None,
-            rand_fn_loc=lambda shape: np.random.uniform(0.05, 0.15, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e5, 2*1e5, shape),
+            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e2, 2e3, shape),
         ):
         self._generate_params(
             self,
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index 900735e6..a3c8592d 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -35,7 +35,6 @@ def closedform_beta_glm_logitmean(
         dmat=design_loc,
         constraints=constraints_loc,
         size_factors=size_factors,
-        weights=None,
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )
diff --git a/batchglm/train/tf/base_glm_all/estimator.py b/batchglm/train/tf/base_glm_all/estimator.py
index 9ac5f4a2..1cc5a321 100644
--- a/batchglm/train/tf/base_glm_all/estimator.py
+++ b/batchglm/train/tf/base_glm_all/estimator.py
@@ -73,6 +73,8 @@ def __init__(
             from .external_norm import EstimatorGraph
         elif noise_model == "beta":
             from .external_beta import EstimatorGraph
+        elif noise_model == "bern":
+            from .external_bern import EstimatorGraph
         else:
             raise ValueError("noise model %s was not recognized" % noise_model)
         self.noise_model = noise_model
@@ -356,6 +358,8 @@ def finalize(self):
             from .external_norm import EstimatorStoreXArray
         elif self.noise_model == "beta":
             from .external_beta import EstimatorStoreXArray
+        elif self.noise_model == "bern":
+            from .external_bern import EstimatorStoreXArray
         else:
             raise ValueError("noise model not recognized")
 
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 5cea4784..20fb9ab9 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -66,6 +66,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -252,6 +254,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -433,6 +437,8 @@ def __init__(
             from .external_norm import ModelVars
         elif noise_model == "beta":
             from .external_beta import ModelVars
+        elif noise_model == "bern":
+            from .external_bern import ModelVars
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
diff --git a/batchglm/train/tf/base_glm_all/external_bern.py b/batchglm/train/tf/base_glm_all/external_bern.py
new file mode 100644
index 00000000..6d8e071a
--- /dev/null
+++ b/batchglm/train/tf/base_glm_all/external_bern.py
@@ -0,0 +1,6 @@
+from batchglm.train.tf.glm_bern import EstimatorGraph
+from batchglm.train.tf.glm_bern import BasicModelGraph, ModelVars, ProcessModel
+from batchglm.train.tf.glm_bern import Hessians, FIM, Jacobians, ReducibleTensors
+
+from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/reducible_tensors.py b/batchglm/train/tf/base_glm_all/reducible_tensors.py
index bbd9461f..dbe20689 100644
--- a/batchglm/train/tf/base_glm_all/reducible_tensors.py
+++ b/batchglm/train/tf/base_glm_all/reducible_tensors.py
@@ -37,6 +37,8 @@ def assemble_tensors(self, idx, data):
             from .external_norm import BasicModelGraph
         elif self.noise_model == "beta":
             from .external_beta import BasicModelGraph
+        elif self.noise_model == "bern":
+            from .external_bern import BasicModelGraph
         else:
             raise ValueError("noise model %s was not recognized" % self.noise_model)
 
diff --git a/batchglm/train/tf/glm_bern/__init__.py b/batchglm/train/tf/glm_bern/__init__.py
new file mode 100644
index 00000000..4db081bb
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/__init__.py
@@ -0,0 +1,7 @@
+from .estimator import Estimator
+from .estimator_graph import EstimatorGraph
+from .model import BasicModelGraph, ModelVars, ProcessModel
+from .hessians import Hessians
+from .fim import FIM
+from .jacobians import Jacobians
+from .reducible_tensors import ReducibleTensors
diff --git a/batchglm/train/tf/glm_bern/estimator.py b/batchglm/train/tf/glm_bern/estimator.py
new file mode 100644
index 00000000..11dd5a8a
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/estimator.py
@@ -0,0 +1,238 @@
+import logging
+from typing import Union
+
+import numpy as np
+import tensorflow as tf
+
+from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
+from .external import closedform_bern_glm_logitmu
+from .external import SparseXArrayDataArray
+from .estimator_graph import EstimatorGraph
+from .model import ProcessModel
+from .training_strategies import TrainingStrategies
+
+logger = logging.getLogger("batchglm")
+
+
+class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
+    """
+    Estimator for Generalized Linear Models (GLMs) with bernoulli noise.
+    Uses a logit linker function.
+    """
+
+    def __init__(
+            self,
+            input_data: InputData,
+            batch_size: int = 500,
+            graph: tf.Graph = None,
+            init_model: Model = None,
+            init_a: Union[np.ndarray, str] = "AUTO",
+            init_b: Union[np.ndarray, str] = "AUTO",
+            quick_scale: bool = False,
+            model: EstimatorGraph = None,
+            provide_optimizers: dict = {
+                "gd": True,
+                "adam": True,
+                "adagrad": True,
+                "rmsprop": True,
+                "nr": True,
+                "nr_tr": True,
+                "irls": True,
+                "irls_gd": True,
+                "irls_tr": True,
+                "irls_gd_tr": True,
+            },
+            provide_batched: bool = False,
+            provide_fim: bool = False,
+            provide_hessian: bool = False,
+            optim_algos: list = [],
+            extended_summary=False,
+            dtype="float64"
+    ):
+        """
+        Performs initialisation and creates a new estimator.
+
+        :param input_data: InputData
+            The input data
+        :param batch_size: int
+            Size of mini-batches used.
+        :param graph: (optional) tf.Graph
+        :param init_model: (optional)
+            If provided, this model will be used to initialize this Estimator.
+        :param init_a: (Optional)
+            Low-level initial values for a. Can be:
+
+            - str:
+                * "auto": automatically choose best initialization
+                * "random": initialize with random values
+                * "standard": initialize intercept with observed mean
+                * "init_model": initialize with another model (see `ìnit_model` parameter)
+                * "closed_form": try to initialize with closed form
+            - np.ndarray: direct initialization of 'a'
+        :param init_b: (won't be used)
+        :param quick_scale: (won't be used)
+        :param model: EstimatorGraph
+            EstimatorGraph to use. Basically for debugging.
+        :param provide_optimizers:
+
+            E.g.    {"gd": False, "adam": False, "adagrad": False, "rmsprop": False,
+                    "nr": False, "nr_tr": True,
+                    "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
+        :param provide_batched: bool
+            Whether mini-batched optimizers should be provided.
+        :param provide_fim: Whether to compute fisher information matrix during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param provide_hessian: Whether to compute hessians during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param optim_algos: Algorithms that you want to use on this object. Depending on that,
+            the hessian and/or fisher information matrix are computed.
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param extended_summary: Include detailed information in the summaries.
+            Will increase runtime of summary writer, use only for debugging.
+        :param dtype: Precision used in tensorflow.
+        """
+        self.TrainingStrategies = TrainingStrategies
+
+        self._input_data = input_data
+        self._train_loc = True
+        self._train_scale = True
+
+        (init_a, init_b) = self.init_par(
+            input_data=input_data,
+            init_a=init_a,
+            init_b=init_b,
+            init_model=init_model
+        )
+        init_a = init_a.astype(dtype)
+        init_b = init_b.astype(dtype)
+        if quick_scale:
+            self._train_scale = False
+
+        if len(optim_algos) > 0:
+            if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
+                provide_hessian = True
+            if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]):
+                provide_fim = True
+
+        EstimatorAll.__init__(
+            self=self,
+            input_data=input_data,
+            batch_size=batch_size,
+            graph=graph,
+            init_a=init_a,
+            init_b=init_b,
+            model=model,
+            provide_optimizers=provide_optimizers,
+            provide_batched=provide_batched,
+            provide_fim=provide_fim,
+            provide_hessian=provide_hessian,
+            extended_summary=extended_summary,
+            noise_model="bern",
+            dtype=dtype
+        )
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+    def init_par(
+            self,
+            input_data,
+            init_a,
+            init_b,
+            init_model
+    ):
+        r"""
+        standard:
+        Only initialise intercept and keep other coefficients as zero.
+
+        closed-form:
+        Initialize with Maximum Likelihood / Maximum of Momentum estimators
+
+        Idea:
+        $$
+            \theta &= f(x) \\
+            \Rightarrow f^{-1}(\theta) &= x \\
+                &= (D \cdot D^{+}) \cdot x \\
+                &= D \cdot (D^{+} \cdot x) \\
+                &= D \cdot x' = f^{-1}(\theta)
+        $$
+        """
+
+        size_factors_init = input_data.size_factors
+        if size_factors_init is not None:
+            size_factors_init = np.expand_dims(size_factors_init, axis=1)
+            size_factors_init = np.broadcast_to(
+                array=size_factors_init,
+                shape=[input_data.num_observations, input_data.num_features]
+            )
+
+        if init_model is None:
+            groupwise_means = None
+            init_a_str = None
+            if isinstance(init_a, str):
+                init_a_str = init_a.lower()
+                # Chose option if auto was chosen
+                if init_a.lower() == "auto":
+                    init_a = "closed_form"
+
+                if init_a.lower() == "closed_form":
+                    groupwise_means, init_a, rmsd_a = closedform_bern_glm_logitmu(
+                        X=input_data.X,
+                        design_loc=input_data.design_loc,
+                        constraints_loc=input_data.constraints_loc.values,
+                        size_factors=size_factors_init,
+                        link_fn=lambda mu: np.log(self.np_clip_param(mu, "mu")/(1-self.np_clip_param(mu, "mu")))
+                    )
+
+                    # train mu, if the closed-form solution is inaccurate
+                    self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
+
+                    if input_data.size_factors is not None:
+                        if np.any(input_data.size_factors != 1):
+                            self._train_loc = True
+
+                    logger.debug("Using closed-form MLE initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                elif init_a.lower() == "standard":
+                    if isinstance(input_data.X, SparseXArrayDataArray):
+                        overall_means = input_data.X.mean(dim="observations")
+                    else:
+                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
+                    overall_means = self.np_clip_param(overall_means, "mu")
+
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_a[0, :] = np.log(overall_means/(1-overall_means))
+                    self._train_loc = True
+
+                    logger.debug("Using standard initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                elif init_a.lower() == "all_zero":
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    self._train_loc = True
+
+                    logger.debug("Using all_zero initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                else:
+                    raise ValueError("init_a string %s not recognized" % init_a)
+
+        else:
+            if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
+                my_loc_names = set(input_data.loc_names.values)
+                my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names.values))
+
+                init_loc = np.zeros([input_data.num_loc_params, input_data.num_features])
+                for parm in my_loc_names:
+                    init_idx = np.where(init_model.input_data.loc_names == parm)[0]
+                    my_idx = np.where(input_data.loc_names == parm)[0]
+                    init_loc[my_idx] = init_model.a_var[init_idx]
+
+                init_a = init_loc
+                logger.debug("Using initialization based on input model for mean")
+        init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
+
+        return init_a, init_b
+
+    @property
+    def input_data(self) -> InputData:
+        return self._input_data
diff --git a/batchglm/train/tf/glm_bern/estimator_graph.py b/batchglm/train/tf/glm_bern/estimator_graph.py
new file mode 100644
index 00000000..8e609600
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/estimator_graph.py
@@ -0,0 +1,12 @@
+import logging
+
+from .model import ProcessModel
+from .external import EstimatorGraphAll
+
+logger = logging.getLogger(__name__)
+
+
+class EstimatorGraph(ProcessModel, EstimatorGraphAll):
+    """
+    Full class.
+    """
diff --git a/batchglm/train/tf/glm_bern/external.py b/batchglm/train/tf/glm_bern/external.py
new file mode 100644
index 00000000..3fcd24ee
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/external.py
@@ -0,0 +1,20 @@
+import batchglm.data as data_utils
+
+from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
+from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
+from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
+
+import batchglm.train.tf.ops as op_utils
+import batchglm.train.tf.train as train_utils
+from batchglm.train.tf.base import TFEstimatorGraph, MonitoredTFEstimator
+
+from batchglm.train.tf.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM
+from batchglm.train.tf.base_glm import ESTIMATOR_PARAMS, ProcessModelGLM, ModelVarsGLM
+from batchglm.train.tf.base_glm import HessiansGLM, FIMGLM, JacobiansGLM
+
+from batchglm.train.tf.base_glm_all import EstimatorAll, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL
+
+import batchglm.utils.random as rand_utils
+from batchglm.utils.linalg import groupwise_solve_lm
+from batchglm import pkg_constants
diff --git a/batchglm/train/tf/glm_bern/fim.py b/batchglm/train/tf/glm_bern/fim.py
new file mode 100644
index 00000000..c0d20183
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/fim.py
@@ -0,0 +1,26 @@
+import tensorflow as tf
+
+import logging
+
+from .external import FIMGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class FIM(FIMGLMALL):
+
+    def _weight_fim_aa(
+            self,
+            loc,
+            scale
+    ):
+        const = - loc * (1-loc)
+
+        return const
+
+    def _weight_fim_bb(
+            self,
+            loc,
+            scale
+    ):
+        return tf.zeros_like(loc)
diff --git a/batchglm/train/tf/glm_bern/hessians.py b/batchglm/train/tf/glm_bern/hessians.py
new file mode 100644
index 00000000..5362cf0a
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/hessians.py
@@ -0,0 +1,37 @@
+import tensorflow as tf
+
+import logging
+
+from .external import HessianGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Hessians(HessianGLMALL):
+
+    def _weight_hessian_ab(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
+
+    def _weight_hessian_aa(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        const = - loc * (1-loc)
+        return const
+
+    def _weight_hessian_bb(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
+
+
diff --git a/batchglm/train/tf/glm_bern/jacobians.py b/batchglm/train/tf/glm_bern/jacobians.py
new file mode 100644
index 00000000..be92bd80
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/jacobians.py
@@ -0,0 +1,35 @@
+import logging
+
+import tensorflow as tf
+
+from .external import JacobiansGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Jacobians(JacobiansGLMALL):
+
+    def _weights_jac_a(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = - tf.sparse.add(X, -1)
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = 1 - X
+            Xdense = X
+
+        const = Xdense*(1-loc) - (one_minus_X)*loc
+
+        return const
+
+    def _weights_jac_b(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
diff --git a/batchglm/train/tf/glm_bern/model.py b/batchglm/train/tf/glm_bern/model.py
new file mode 100644
index 00000000..398ce5e2
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/model.py
@@ -0,0 +1,133 @@
+import logging
+
+import tensorflow as tf
+
+import numpy as np
+
+from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM
+from .external import pkg_constants
+
+logger = logging.getLogger(__name__)
+
+
+class ProcessModel(ProcessModelGLM):
+
+    def param_bounds(
+            self,
+            dtype
+    ):
+        if isinstance(dtype, tf.DType):
+            dmin = dtype.min
+            dmax = dtype.max
+            dtype = dtype.as_numpy_dtype
+        else:
+            dtype = np.dtype(dtype)
+            dmin = np.finfo(dtype).min
+            dmax = np.finfo(dtype).max
+            dtype = dtype.type
+
+        zero = np.nextafter(0, np.inf, dtype=dtype)
+        one = np.nextafter(1, -np.inf, dtype=dtype)
+
+        sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
+        bounds_min = {
+            "a_var": np.log(zero/(1-zero)) / sf,
+            "b_var": np.log(zero) / sf,
+            "eta_loc": np.log(zero/(1-zero)) / sf,
+            "eta_scale": np.log(zero) / sf,
+            "mu": np.nextafter(0, np.inf, dtype=dtype),
+            "r": np.nextafter(0, np.inf, dtype=dtype),
+            "probs": dtype(0),
+            "log_probs": np.log(zero),
+        }
+        bounds_max = {
+            "a_var": np.log(one/(1-one)) / sf,
+            "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            "eta_loc": np.log(one/(1-one)) / sf,
+            "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            "mu": one,
+            "r": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "probs": dtype(1),
+            "log_probs": dtype(0),
+        }
+        return bounds_min, bounds_max
+
+
+class ModelVars(ProcessModel, ModelVarsGLM):
+    """
+    Full class.
+    """
+
+
+class BasicModelGraph(ProcessModel, BasicModelGraphGLM):
+
+    def __init__(
+            self,
+            X,
+            design_loc,
+            design_scale,
+            constraints_loc,
+            constraints_scale,
+            a_var,
+            b_var,
+            dtype,
+            size_factors=None
+    ):
+        a_var = self.tf_clip_param(a_var, "a_var")
+        b_var = self.tf_clip_param(b_var, "b_var")
+
+        if constraints_loc is not None:
+            eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var))
+        else:
+            eta_loc = tf.matmul(design_loc, a_var)
+
+        if size_factors is not None:
+            eta_loc = tf.add(eta_loc, tf.log(size_factors))
+
+        eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
+
+        if constraints_scale is not None:
+            eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var))
+        else:
+            eta_scale = tf.matmul(design_scale, b_var)
+
+        eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
+        
+        # Inverse linker functions:
+        model_loc = 1 / (1 + tf.exp(-eta_loc))
+        model_scale = eta_scale
+
+        # Log-likelihood:
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = -tf.sparse.add(X, -1)
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = 1 - X
+            Xdense = X
+
+        log_probs = Xdense*tf.log(model_loc) + (one_minus_X)*tf.log(1 - model_loc)
+        log_probs = self.tf_clip_param(log_probs, "log_probs")
+
+        # Variance:
+        sigma2 = model_loc*(1-model_loc)
+
+        self.X = X
+        self.design_loc = design_loc
+        self.design_scale = design_scale
+        self.constraints_loc = constraints_loc
+        self.constraints_scale = constraints_scale
+        self.a_var = a_var
+        self.b_var = b_var
+        self.size_factors = size_factors
+        self.dtype = dtype
+
+        self.eta_loc = eta_loc
+        self.eta_scale = eta_scale
+        self.model_loc = model_loc
+        self.model_scale = model_scale
+        self.mu = model_loc
+        self.r = model_scale
+
+        self.log_probs = log_probs
+
+        self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_bern/reducible_tensors.py b/batchglm/train/tf/glm_bern/reducible_tensors.py
new file mode 100644
index 00000000..862ccaf8
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/reducible_tensors.py
@@ -0,0 +1,13 @@
+import logging
+
+from .external import ReducableTensorsGLMALL
+from .hessians import Hessians
+from .jacobians import Jacobians
+from .fim import FIM
+
+logger = logging.getLogger("batchglm")
+
+
+class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
+    """
+    """
diff --git a/batchglm/train/tf/glm_bern/training_strategies.py b/batchglm/train/tf/glm_bern/training_strategies.py
new file mode 100644
index 00000000..d9e57377
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/training_strategies.py
@@ -0,0 +1,27 @@
+from enum import Enum
+
+
+class TrainingStrategies(Enum):
+
+    AUTO = None
+    DEFAULT = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": False,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
+    IRLS = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": False,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
+    IRLS_BATCHED = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": True,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index 7dba0586..1b6468ba 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -11,6 +11,7 @@
 from .model import ProcessModel
 from .training_strategies import TrainingStrategies
 
+logger = logging.getLogger("batchglm")
 
 
 class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
@@ -105,7 +106,7 @@ def __init__(
 
         self._input_data = input_data
         self._train_loc = True
-        self._train_scale = not quick_scale
+        self._train_scale = True
 
         (init_a, init_b) = self.init_par(
             input_data=input_data,
@@ -115,6 +116,8 @@ def __init__(
         )
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
+        if quick_scale:
+            self._train_scale = False
 
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
@@ -210,7 +213,11 @@ def init_par(
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
                     self._train_loc = True
 
+<<<<<<< HEAD
+                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
+=======
                     logging.getLogger("batchglm").debug("Using all_zero initialization for mean")
+>>>>>>> ae5c18711c085c035df339117da2e79d2321302c
                 else:
                     raise ValueError("init_a string %s not recognized" % init_a)
                 logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
@@ -224,7 +231,7 @@ def init_par(
                         design_scale=input_data.design_scale[:, [0]],
                         constraints=input_data.constraints_scale[[0], [0]].values,
                         size_factors=size_factors_init,
-                        groupwise_means=groupwise_means,
+                        groupwise_means=None,
                         link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
                     )
                     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
diff --git a/batchglm/train/tf/glm_beta/hessians.py b/batchglm/train/tf/glm_beta/hessians.py
index d4bbb165..537a50f1 100644
--- a/batchglm/train/tf/glm_beta/hessians.py
+++ b/batchglm/train/tf/glm_beta/hessians.py
@@ -15,17 +15,17 @@ def _weight_hessian_aa(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
+        one_minus_loc = tf.ones_like(loc) - loc
         loc_times_scale = loc * scale
         one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
 
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            const1 = tf.log(X / (1 - X))
+            const1 = tf.log(X / (tf.ones_like(X) - X))
 
-        const2 = (1 - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
+        const2 = (tf.ones_like(loc) - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
         const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(scalar_one, loc_times_scale) - tf.polygamma(scalar_one, one_minus_loc_times_scale))
         const = loc * one_minus_loc_times_scale * (const2 + const3)
         return const
@@ -36,15 +36,15 @@ def _weight_hessian_ab(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
+        one_minus_loc = tf.ones_like(loc) - loc
         loc_times_scale = loc * scale
         one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
 
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            const1 = tf.log(X / (1 - X))
+            const1 = tf.log(X / (tf.ones_like(X) - X))
 
         const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1
         const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale))
@@ -59,18 +59,20 @@ def _weight_hessian_bb(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
+        one_minus_loc = tf.ones_like(loc) - loc
         loc_times_scale = loc * scale
         one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
 
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            Xdense = tf.sparse.to_dense(X)
         else:
-            const1 = tf.log(X / (1 - X))
+            one_minus_X = tf.ones_like(X) - X
+            Xdense = X
 
-        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
-                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
+        const2 = loc * (tf.log(Xdense) - tf.digamma(loc_times_scale))\
+                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(one_minus_X)) \
                  + tf.digamma(scale)
         const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
                           + tf.polygamma(scalar_one, scale)\
diff --git a/batchglm/train/tf/glm_beta/jacobians.py b/batchglm/train/tf/glm_beta/jacobians.py
index 1d97149d..1eec6172 100644
--- a/batchglm/train/tf/glm_beta/jacobians.py
+++ b/batchglm/train/tf/glm_beta/jacobians.py
@@ -15,11 +15,11 @@ def _weights_jac_a(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
+        one_minus_loc = tf.ones_like(loc) - loc
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -1))
+            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            const1 = tf.log(X/(1-X))
+            const1 = tf.log(X/(tf.ones_like(X)-X))
         const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
         const = const2 * scale * loc * one_minus_loc
         return const
@@ -31,12 +31,12 @@ def _weights_jac_b(
             scale,
     ):
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = - tf.sparse.add(X, -1)
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
             Xdense = tf.sparse.to_dense(X)
         else:
-            one_minus_X = 1 - X
+            one_minus_X = tf.ones_like(X) - X
             Xdense = X
-        one_minus_loc = 1 - loc
-        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc\
+        one_minus_loc = tf.ones_like(X) - loc
+        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc
             + loc * tf.log(Xdense) + one_minus_loc * tf.log(one_minus_X))
         return const
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index a647f1d6..04330a85 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -35,8 +35,8 @@ def param_bounds(
             "b_var": np.log(zero) / sf,
             "eta_loc": np.log(zero/(1-zero)) / sf,
             "eta_scale": np.log(zero) / sf,
-            "mean": np.nextafter(0, np.inf, dtype=dtype),
-            "samplesize": np.nextafter(0, np.inf, dtype=dtype),
+            "mean": zero,
+            "samplesize": zero,
             "probs": dtype(0),
             "log_probs": np.log(zero),
         }
@@ -47,8 +47,8 @@ def param_bounds(
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "mean": one,
             "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": dtype(1),
-            "log_probs": dtype(0),
+            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
         return bounds_min, bounds_max
 
@@ -91,27 +91,25 @@ def __init__(
         eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
         
         # Inverse linker functions:
-        model_loc = 1/(1+tf.exp(-eta_loc))
+        model_loc = tf.ones_like(eta_loc)/(tf.ones_like(eta_loc)+tf.exp(-eta_loc))
         model_scale = tf.exp(eta_scale)
 
         # Log-likelihood:
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = -tf.sparse.add(X, -1)
+            one_minus_X = -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=dtype))
             Xdense = tf.sparse.to_dense(X)
         else:
-            one_minus_X = 1 - X
+            one_minus_X = tf.ones_like(X) - X
             Xdense = X
 
-        one_minus_loc = 1 - model_loc
-        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
-                    - tf.lgamma(one_minus_loc * model_scale)\
-                    + (model_scale * model_loc - 1) * tf.log(Xdense)\
-                    + (one_minus_loc * model_scale - 1) * tf.log(one_minus_X)
-
+        one_minus_loc = tf.ones_like(model_loc) - model_loc
+        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale) - tf.lgamma(one_minus_loc * model_scale) \
+                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense) \
+                    + (one_minus_loc * model_scale - tf.ones_like(model_loc)) * tf.log(one_minus_X)
         log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
-        sigma2 = (model_loc * one_minus_loc) / (1 + model_scale)
+        sigma2 = (model_loc * one_minus_loc) / (tf.ones_like(model_loc) + model_scale)
 
         self.X = X
         self.design_loc = design_loc
diff --git a/batchglm/train/tf/glm_norm/model.py b/batchglm/train/tf/glm_norm/model.py
index 7b7c2eb5..d0a84da9 100644
--- a/batchglm/train/tf/glm_norm/model.py
+++ b/batchglm/train/tf/glm_norm/model.py
@@ -44,8 +44,8 @@ def param_bounds(
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "mean": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "sd": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": dtype(1),
-            "log_probs": dtype(0),
+            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
         return bounds_min, bounds_max
 
diff --git a/batchglm/unit_test/base_glm/__init__.py b/batchglm/unit_test/base_glm/__init__.py
index a2ad1f12..e2200222 100644
--- a/batchglm/unit_test/base_glm/__init__.py
+++ b/batchglm/unit_test/base_glm/__init__.py
@@ -1,5 +1,4 @@
 from .test_acc_glm import Test_Accuracy_GLM, _Test_Accuracy_GLM_Estim
-from .test_acc_analytic_glm import Test_AccuracyAnalytic_GLM, _Test_AccuracyAnalytic_GLM_Estim
 from .test_acc_constrained_vglm import Test_AccuracyConstrained_VGLM, _Test_AccuracyConstrained_VGLM_Estim
 from .test_acc_sizefactors_glm import Test_AccuracySizeFactors_GLM, _Test_AccuracySizeFactors_GLM_Estim
 from .test_graph_glm import Test_Graph_GLM, _Test_Graph_GLM_Estim
diff --git a/batchglm/unit_test/base_glm/test_acc_glm.py b/batchglm/unit_test/base_glm/test_acc_glm.py
index 5531e396..8c1c4cce 100644
--- a/batchglm/unit_test/base_glm/test_acc_glm.py
+++ b/batchglm/unit_test/base_glm/test_acc_glm.py
@@ -38,7 +38,7 @@ def estimate(
         self.estimator.train_sequence(training_strategy=[
             {
                 "learning_rate": lr,
-                "convergence_criteria": "all_converged_ll",
+                "convergence_criteria": "all_converged",
                 "stopping_criteria": acc,
                 "use_batching": batched,
                 "optim_algo": algo,
diff --git a/batchglm/unit_test/glm_all/external.py b/batchglm/unit_test/glm_all/external.py
index be8b1be9..3865a988 100644
--- a/batchglm/unit_test/glm_all/external.py
+++ b/batchglm/unit_test/glm_all/external.py
@@ -1,5 +1,4 @@
 from batchglm.unit_test.base_glm import Test_Accuracy_GLM, _Test_Accuracy_GLM_Estim
-from batchglm.unit_test.base_glm import Test_AccuracyAnalytic_GLM, _Test_AccuracyAnalytic_GLM_Estim
 from batchglm.unit_test.base_glm import Test_AccuracyConstrained_VGLM, _Test_AccuracyConstrained_VGLM_Estim
 from batchglm.unit_test.base_glm import Test_AccuracySizeFactors_GLM, _Test_AccuracySizeFactors_GLM_Estim
 from batchglm.unit_test.base_glm import Test_Graph_GLM, _Test_Graph_GLM_Estim
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
index 30cd3a6a..e50acc23 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
@@ -36,6 +36,12 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -83,9 +89,18 @@ def eval_estimation_a(
             if self.noise_model == "nb":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
-            elif self.noise_model == "norm":
+            elif self.noise_model=="norm":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="beta":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model=="bern":
+                threshold_dev = 1e-1
+                threshold_std = 1e-1
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -121,9 +136,17 @@ def eval_estimation_b(
             elif self.noise_model == "norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model == "beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "bern":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
             else:
                 raise ValueError("noise_model not recognized")
-
         if init_b == "standard":
             mean_dev = np.mean(estimator_store.b[0, :] - self.simulator.b[0, :])
             std_dev = np.std(estimator_store.b[0, :] - self.simulator.b[0, :])
@@ -157,6 +180,10 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta":
+                from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -178,10 +205,33 @@ def get_estimator(self, train_scale, sparse, init_a, init_b):
     def simulate_complex(self):
         self.sim = self.get_simulator()
         self.sim.generate_sample_description(num_batches=1, num_conditions=2)
+
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
         self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.random.uniform(1, 3, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1, 3, shape)
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
         )
         self.sim.generate_data()
 
@@ -194,10 +244,32 @@ def rand_fn_standard(shape):
             theta[0, :] = np.random.uniform(5, 20, shape[1])
             return theta
 
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
         self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.ones(shape),
-            rand_fn_scale=lambda shape: rand_fn_standard(shape)
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
         )
         self.sim.generate_data()
 
@@ -270,7 +342,7 @@ class Test_AccuracyAnalytic_GLM_NORM(
     """
 
     def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR),
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
         logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_closed_b_closed()")
 
@@ -289,6 +361,62 @@ def test_a_standard_b_standard(self):
         self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
         self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
+class Test_AccuracyAnalytic_GLM_BETA(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for beta distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
+
+        self.noise_model = "beta"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
+
+        self.noise_model = "beta"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
+class Test_AccuracyAnalytic_GLM_BERN(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for bernoulli distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_closed_b_closed()")
+
+        self.noise_model = "bern"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_standard_b_standard()")
+
+        self.noise_model = "bern"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        #self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_acc_glm_all.py b/batchglm/unit_test/glm_all/test_acc_glm_all.py
index 1377b4e8..8a76eb1a 100644
--- a/batchglm/unit_test/glm_all/test_acc_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_acc_glm_all.py
@@ -29,6 +29,10 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -57,7 +61,9 @@ def __init__(
             provide_optimizers=provide_optimizers,
             provide_batched=True,
             init_a="standard",
-            init_b="standard"
+            init_b="standard",
+            provide_fim = True,
+            provide_hessian = True,
         )
         super().__init__(
             estimator=estimator,
@@ -102,6 +108,10 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta":
+                from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -177,7 +187,7 @@ class Test_Accuracy_GLM_NORM(
     def test_full_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_full_norm()")
+        logger.error("Test_Accuracy_GLM_NORM.test_full_norm()")
 
         self.noise_model = "norm"
         self.simulate()
@@ -187,13 +197,70 @@ def test_full_norm(self):
     def test_batched_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_batched_norm()")
+        logger.error("Test_Accuracy_GLM_NORM.test_batched_norm()")
 
         self.noise_model = "norm"
         self.simulate()
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
+class Test_Accuracy_GLM_BETA(
+    Test_Accuracy_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for negative binomial noise.
+    """
+
+    def test_full_beta(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BETA.test_full_beta()")
+
+        self.noise_model = "beta"
+        self.simulate()
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_beta(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BETA.test_batched_beta()")
+
+        self.noise_model = "beta"
+        self.simulate()
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
+
+class Test_Accuracy_GLM_BERN(
+    Test_Accuracy_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for negative binomial noise.
+    """
+
+    def test_full_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BERN.test_full_bern()")
+
+        self.noise_model = "bern"
+        self.simulate()
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BERN.test_batched_bern()")
+
+        self.noise_model = "bern"
+        self.simulate()
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index 0d9aebdf..3c585cbc 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -30,8 +30,10 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
-            elif noise_model=="beta":
+            elif noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -103,6 +105,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -194,6 +198,7 @@ def test_batched_norm(self):
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
+
 class Test_Graph_GLM_BETA(
     Test_Graph_GLM_ALL,
     unittest.TestCase
@@ -221,5 +226,32 @@ def test_batched_beta(self):
         self._test_batched(sparse=True)
 
 
+class Test_Graph_GLM_BERN(
+    Test_Graph_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether training graphs work for bernoulli noise.
+    """
+
+    def test_full_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_BERN.test_full_bern()")
+
+        self.noise_model = "bern"
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_BERN.test_batched_bern()")
+
+        self.noise_model = "bern"
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 7e7b74a5..1c5d5e11 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -30,6 +30,8 @@ def simulate(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -53,6 +55,8 @@ def get_hessians(
                 from batchglm.api.models.glm_norm import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Estimator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -84,6 +88,8 @@ def _test_compute_hessians(self, sparse):
                 from batchglm.api.models.glm_norm import Simulator, InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator, InputData
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -127,12 +133,12 @@ def _test_compute_hessians(self, sparse):
 
         logging.getLogger("batchglm").info("run time observation batch-wise analytic solution: %f" % t_analytic)
         logging.getLogger("batchglm").info("run time tensorflow solution: %f" % t_tf)
-        logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((h_tf - h_analytic))))
+        logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs(h_tf - h_analytic)))
 
-        #i = 1
-        #print(h_tf[i, :, :])
-        #print(h_analytic[i, :, :])
-        #print(h_tf[i, :, :] - h_analytic[i, :, :])
+        # i = 1
+        # print("\n h_tf: \n", h_tf[i, :, :])
+        # print("\n h_analytic: \n", h_analytic[i, :, :])
+        # print("\n difference: \n", (h_tf[i, :, :] - h_analytic[i, :, :]))
 
         # Make sure that hessians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(h_analytic)) > 1e-10, \
@@ -183,6 +189,19 @@ def test_compute_hessians_beta(self):
 
         return True
 
+class Test_Hessians_GLM_BERN(Test_Hessians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_hessians_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_BERN.test_compute_hessians_bern()")
+
+        self.noise_model = "bern"
+        self._test_compute_hessians(sparse=False)
+        #self._test_compute_hessians(sparse=False)  # TODO tf>=1.13 waiting for tf.sparse.expand_dims to work
+
+        return True
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index dde1a746..1dd668b7 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -30,6 +30,8 @@ def simulate(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -53,6 +55,8 @@ def get_jacs(
                 from batchglm.api.models.glm_norm import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Estimator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -98,6 +102,8 @@ def compare_jacs(
                 from batchglm.api.models.glm_norm import InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import InputData
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -132,6 +138,10 @@ def compare_jacs(
         t1_tf = time.time()
         t_tf = t1_tf - t0_tf
 
+
+        # print("J_analytic: ", J_analytic)
+        # print("J_tf: ", J_tf)
+
         # Make sure that jacobians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(J_analytic)) > 1e-10, \
             "jacobians too small to perform test: %f" % np.sum(np.abs(J_analytic))
@@ -145,7 +155,8 @@ def compare_jacs(
         #print(J_analytic)
         #print((J_tf - J_analytic) / J_tf)
 
-        mrad = np.max(np.abs((J_tf - J_analytic) / J_tf))
+        # mrad = np.max(np.abs((J_tf - J_analytic) / J_tf))
+        mrad = np.max(np.abs(J_tf - J_analytic))
         assert mrad < 1e-12, mrad
         return True
 
@@ -177,6 +188,7 @@ def test_compute_jacobians_norm(self):
         self._test_compute_jacobians(sparse=False)
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
+
 class Test_Jacobians_GLM_BETA(Test_Jacobians_GLM_ALL, unittest.TestCase):
 
     def test_compute_jacobians_beta(self):
@@ -189,5 +201,17 @@ def test_compute_jacobians_beta(self):
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
 
+class Test_Jacobians_GLM_BERN(Test_Jacobians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_jacobians_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.INFO)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logging.getLogger("batchglm").error("Test_Jacobians_GLM_BERN.test_compute_jacobians_bern()")
+
+        self.noise_model = "bern"
+        self._test_compute_jacobians(sparse=False)
+        #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/utils/random.py b/batchglm/utils/random.py
index 1a6c6d54..f3fcd8ec 100644
--- a/batchglm/utils/random.py
+++ b/batchglm/utils/random.py
@@ -175,7 +175,7 @@ def sample(self, size=None):
 
 class Beta:
     r"""
-    Beta distribution.
+    beta distribution.
     """
 
     p: np.ndarray
@@ -197,4 +197,29 @@ def sample(self, size=None):
             b=self.q,
             size=size
         )
+        return random_data
+
+
+class Bernoulli:
+    r"""
+    Bernoulli distribution.
+    """
+
+    p: np.ndarray
+
+    def __init__(self, mean):
+        self.p=mean
+
+    def sample(self, size=None):
+        """
+        Sample from all distributions data of size `size`.
+        :param size: The size
+        :return: numpy array containing sampled data
+
+        """
+        random_data = np.random.binomial(
+            n=1,
+            p=self.p,
+            size=size
+        )
         return random_data
\ No newline at end of file
diff --git a/tutorials/glm_norm.ipynb b/tutorials/glm_norm.ipynb
index be2af931..3b4cca32 100644
--- a/tutorials/glm_norm.ipynb
+++ b/tutorials/glm_norm.ipynb
@@ -76,19 +76,19 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray 'X' (observations: 1000, features: 100)>\n",
-       "array([[136381.377429, 186625.238809, 161525.713344, ..., 182496.906438,\n",
-       "        181262.355823, 192805.589245],\n",
-       "       [136497.769208, 186867.304933, 161667.283264, ..., 182699.889726,\n",
-       "        181355.661108, 192949.718914],\n",
-       "       [136383.256653, 186619.58899 , 161522.772444, ..., 182499.34418 ,\n",
-       "        181243.055505, 192805.625307],\n",
+       "array([[153530.668053, 184471.534877, 169294.048184, ..., 124758.218499,\n",
+       "        163299.072128, 168342.198344],\n",
+       "       [153727.148687, 184660.252114, 169413.785819, ..., 124985.662782,\n",
+       "        163416.764229, 168525.694527],\n",
+       "       [153524.265736, 184464.054546, 169294.235636, ..., 124763.473351,\n",
+       "        163296.463545, 168330.185236],\n",
        "       ...,\n",
-       "       [137092.403002, 186639.049   , 161944.547794, ..., 182873.36988 ,\n",
-       "        181420.200523, 193160.770375],\n",
-       "       [136579.709776, 186803.033004, 161839.651223, ..., 182703.690275,\n",
-       "        181415.959071, 192991.729134],\n",
-       "       [137088.506608, 186717.445438, 161547.221266, ..., 182701.225484,\n",
-       "        181945.100631, 193082.734682]])\n",
+       "       [153823.158262, 185136.831761, 169525.750169, ..., 124938.104856,\n",
+       "        163703.131904, 168728.535786],\n",
+       "       [153620.064816, 184690.47005 , 169416.598402, ..., 124924.478733,\n",
+       "        163443.696657, 168461.014046],\n",
+       "       [153870.35622 , 184581.061892, 169478.678999, ..., 125108.260219,\n",
+       "        163420.133359, 168725.511263]])\n",
        "Dimensions without coordinates: observations, features"
       ]
      },
@@ -110,19 +110,19 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (observations: 1000, features: 100)>\n",
-       "array([[  7.081342,   7.679191,   9.989066, ...,   3.006988,   7.153044,\n",
-       "          4.354511],\n",
-       "       [ 60.935395,  58.548347,  16.420187, ...,  17.435734,  49.276409,\n",
-       "          8.850265],\n",
-       "       [  7.081342,   7.679191,   9.989066, ...,   3.006988,   7.153044,\n",
-       "          4.354511],\n",
+       "array([[  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,\n",
+       "          6.458461],\n",
+       "       [ 10.768345,  33.85133 ,  36.596781, ...,  82.445376,  29.838425,\n",
+       "          9.863909],\n",
+       "       [  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,\n",
+       "          6.458461],\n",
        "       ...,\n",
-       "       [583.350617, 234.053087, 118.719318, ..., 158.027114, 400.083058,\n",
-       "         45.818811],\n",
-       "       [ 67.791559,  30.698362,  72.221781, ...,  27.253545,  58.076708,\n",
-       "         22.543787],\n",
-       "       [583.350617, 234.053087, 118.719318, ..., 158.027114, 400.083058,\n",
-       "         45.818811]])\n",
+       "       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,\n",
+       "         59.458407],\n",
+       "       [ 12.535728,  62.946941,  17.554913, ...,  13.469561,  19.613986,\n",
+       "         38.930793],\n",
+       "       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,\n",
+       "         59.458407]])\n",
        "Dimensions without coordinates: observations, features"
       ]
      },
@@ -144,16 +144,16 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
-       "array([[1.363856e+05, 1.866230e+05, 1.615200e+05, ..., 1.825047e+05,\n",
-       "        1.812539e+05, 1.928044e+05],\n",
-       "       [1.504359e+02, 1.926565e+02, 1.503140e+02, ..., 1.998073e+02,\n",
-       "        1.363696e+02, 1.546470e+02],\n",
-       "       [1.771764e+02, 1.630434e+02, 1.008328e+02, ..., 1.960234e+02,\n",
-       "        1.361614e+02, 1.567535e+02],\n",
-       "       [1.909409e+02, 1.566499e+02, 1.457898e+02, ..., 1.685650e+02,\n",
-       "        1.806504e+02, 1.750800e+02],\n",
-       "       [1.820867e+02, 1.572823e+02, 1.671585e+02, ..., 1.501432e+02,\n",
-       "        1.783009e+02, 1.863868e+02]])\n",
+       "array([[1.535276e+05, 1.844713e+05, 1.692924e+05, ..., 1.247599e+05,\n",
+       "        1.632980e+05, 1.683406e+05],\n",
+       "       [1.941580e+02, 1.618886e+02, 1.078499e+02, ..., 1.376593e+02,\n",
+       "        1.037169e+02, 1.901242e+02],\n",
+       "       [1.113831e+02, 1.690494e+02, 1.244814e+02, ..., 1.450850e+02,\n",
+       "        1.374363e+02, 1.623521e+02],\n",
+       "       [1.941130e+02, 1.235625e+02, 1.801599e+02, ..., 1.154509e+02,\n",
+       "        1.729885e+02, 1.536259e+02],\n",
+       "       [1.067593e+02, 1.453621e+02, 1.029423e+02, ..., 1.707639e+02,\n",
+       "        1.541521e+02, 1.270179e+02]])\n",
        "Coordinates:\n",
        "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
        "Dimensions without coordinates: features"
@@ -177,11 +177,11 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[1.957463, 2.038514, 2.301491, ..., 1.100939, 1.967538, 1.471212],\n",
-       "       [2.152351, 2.031339, 0.49702 , ..., 1.757583, 1.929907, 0.709235],\n",
-       "       [2.210487, 2.02778 , 0.979552, ..., 1.617446, 0.610368, 2.297003],\n",
-       "       [1.15282 , 2.089364, 2.280364, ..., 1.595779, 2.034945, 1.737918],\n",
-       "       [2.258974, 1.385695, 1.978251, ..., 2.204245, 2.094227, 1.644247]])\n",
+       "array([[1.322782, 2.157026, 1.965074, ..., 2.138539, 1.323062, 1.865391],\n",
+       "       [1.053829, 1.364953, 1.634886, ..., 2.273597, 2.072735, 0.423491],\n",
+       "       [2.103463, 2.022464, 1.513653, ..., 0.87566 , 2.288657, 1.114935],\n",
+       "       [2.225734, 0.831343, 2.051622, ..., 0.626995, 1.506541, 1.975494],\n",
+       "       [1.205801, 1.985266, 0.90026 , ..., 0.461894, 1.653181, 1.796394]])\n",
        "Coordinates:\n",
        "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
        "Dimensions without coordinates: features"
@@ -219,7 +219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -227,14 +227,14 @@
     "                init_a = \"standard\", init_b = \"standard\", \n",
     "                provide_optimizers = {\n",
     "                    \"gd\": True, \"adam\": True, \"adagrad\": True, \"rmsprop\": True,\n",
-    "                    \"nr\": True, \"nr_tr\": True,\n",
-    "                    \"irls\": True, \"irls_gd\": True, \"irls_tr\": True, \"irls_gd_tr\": True,\n",
-    "            })"
+    "                    \"nr\": False, \"nr_tr\": False,\n",
+    "                    \"irls\": False, \"irls_gd\": False, \"irls_tr\": False, \"irls_gd_tr\": False,\n",
+    "            }, provide_hessian = False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -253,27 +253,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Step: 0 loss: 650.374732 models converged 0\n",
-      "INFO:tensorflow:Step: 1 loss: 650.209592, converged 83 in 0.645 sec., updated 31, {f: 0, g: 83, x: 0}\n",
-      "INFO:tensorflow:Step: 2 loss: 650.206823, converged 83 in 0.192 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 3 loss: 650.198667, converged 83 in 0.199 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 4 loss: 650.182757, converged 83 in 0.203 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 5 loss: 650.182757, converged 83 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 6 loss: 650.182757, converged 83 in 0.204 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 7 loss: 650.182757, converged 83 in 0.2 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 8 loss: 650.182757, converged 98 in 0.201 sec., updated 0, {f: 0, g: 0, x: 15}\n",
-      "INFO:tensorflow:Step: 9 loss: 650.182757, converged 99 in 0.202 sec., updated 0, {f: 0, g: 0, x: 1}\n",
-      "INFO:tensorflow:Step: 10 loss: 650.182757, converged 99 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 11 loss: 650.182757, converged 99 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 12 loss: 650.182757, converged 99 in 0.19 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 13 loss: 650.182757, converged 100 in 0.193 sec., updated 0, {f: 0, g: 0, x: 1}\n"
+      "INFO:tensorflow:Step: 0 loss: 615.682423 models converged 0\n",
+      "INFO:tensorflow:Step: 1 loss: 615.682423, converged 100 in 0.227 sec., updated 100, {f: 100, g: 100, x: 100}\n"
      ]
     }
    ],
@@ -281,17 +269,17 @@
     "estimator.train_sequence(training_strategy=[\n",
     "            {\n",
     "                \"learning_rate\": 1,\n",
-    "                \"convergence_criteria\": \"all_converged_ll\",\n",
+    "                \"convergence_criteria\": \"all_converged\",\n",
     "                \"stopping_criteria\": 1e-6,\n",
     "                \"use_batching\": False,\n",
-    "                \"optim_algo\": \"nr_tr\",\n",
+    "                \"optim_algo\": \"gd\",\n",
     "            },\n",
     "        ])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -300,320 +288,78 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<xarray.DataArray ()>\n",
-       "array(-84.127032)"
+       "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
+       "array([[ 2.079207e-02,  5.072105e+00,  5.357422e+00, ...,  7.132302e+00,\n",
+       "         3.804407e+00,  5.184096e-01],\n",
+       "       [-3.651440e-01, -9.410049e+00, -1.232361e+01, ...,  3.187043e+00,\n",
+       "        -8.003614e+00, -6.045565e-01],\n",
+       "       [-1.111495e+01,  6.376660e+00,  4.579032e+00, ..., -6.709791e+00,\n",
+       "        -8.973671e+00,  2.515338e+00],\n",
+       "       [ 5.207923e+00, -3.547651e+00, -2.312268e+01, ..., -1.051617e+01,\n",
+       "        -3.800159e+00,  1.252344e+00],\n",
+       "       [ 4.002172e+00, -9.691013e+00, -8.613539e-01, ..., -1.567684e+01,\n",
+       "         2.253166e-01, -5.545119e+00]])\n",
+       "Coordinates:\n",
+       "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
+       "Dimensions without coordinates: features"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "np.mean(store.a - sim.a)"
+    "store.a - sim.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray ()>\n",
-       "array(-0.637152)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "np.mean(store.b - sim.b)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[ 1.36600688e+05,  1.86841946e+05,  1.61697770e+05,\n",
-       "         1.61222009e+05,  1.60421263e+05,  1.25578231e+05,\n",
-       "         1.88510287e+05,  1.29307146e+05,  1.51278544e+05,\n",
-       "         1.71637615e+05,  1.77025928e+05,  1.72080625e+05,\n",
-       "         1.69513558e+05,  1.30717017e+05,  1.42971344e+05,\n",
-       "         1.28180470e+05,  1.24354358e+05,  1.83470673e+05,\n",
-       "         1.60164500e+05,  1.39658339e+05,  1.58726546e+05,\n",
-       "         1.30253608e+05,  1.77426758e+05,  1.64664323e+05,\n",
-       "         1.07705236e+05,  1.18145893e+05,  1.38891881e+05,\n",
-       "         1.71310584e+05,  1.63992087e+05,  1.83785895e+05,\n",
-       "         1.11725452e+05,  1.02048812e+05,  1.17864771e+05,\n",
-       "         1.60429832e+05,  1.73236891e+05,  1.18189379e+05,\n",
-       "         1.24350625e+05,  1.83446020e+05,  1.29091481e+05,\n",
-       "         1.72074305e+05,  1.92275543e+05,  1.36370243e+05,\n",
-       "         1.38289082e+05,  1.60284886e+05,  1.99862241e+05,\n",
-       "         1.81011155e+05,  1.63575778e+05,  1.98979039e+05,\n",
-       "         1.57633199e+05,  2.00185428e+05,  1.65807483e+05,\n",
-       "         1.55876087e+05,  1.12052748e+05,  1.34068809e+05,\n",
-       "         1.03614147e+05,  1.26435009e+05,  1.58195954e+05,\n",
-       "         1.57939242e+05,  1.94262637e+05,  1.72595280e+05,\n",
-       "         1.14816606e+05,  1.90048906e+05,  1.11892800e+05,\n",
-       "         1.85515427e+05,  1.79462275e+05,  1.61840581e+05,\n",
-       "         1.38948016e+05,  1.26029069e+05,  1.89826897e+05,\n",
-       "         1.95896314e+05,  1.83772458e+05,  1.40717995e+05,\n",
-       "         1.27780375e+05,  1.92972427e+05,  1.84697090e+05,\n",
-       "         1.66191026e+05,  1.29493831e+05,  1.24717599e+05,\n",
-       "         1.93200966e+05,  1.67277191e+05,  1.40858855e+05,\n",
-       "         1.35554049e+05,  1.58251038e+05,  1.11735423e+05,\n",
-       "         1.74338593e+05,  1.68898537e+05,  1.61979892e+05,\n",
-       "         1.54009922e+05,  1.73006998e+05,  1.27857914e+05,\n",
-       "         1.23804970e+05,  1.40832963e+05,  1.33008294e+05,\n",
-       "         1.90844112e+05,  1.36434406e+05,  1.27883891e+05,\n",
-       "         1.32452803e+05,  1.82732615e+05,  1.81457380e+05,\n",
-       "         1.93013253e+05],\n",
-       "       [ 1.54445953e+00,  3.05909575e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  4.46565150e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.92235758e+00,\n",
-       "         0.00000000e+00,  2.46363918e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.71697086e+00,\n",
-       "         1.95154632e+00,  0.00000000e+00,  8.47302097e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.39559323e+00,\n",
-       "         0.00000000e+00,  1.05701263e-01,  0.00000000e+00,\n",
-       "         1.34281794e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  4.77700368e-01,  0.00000000e+00,\n",
-       "         1.64828089e+00,  1.62463681e+00,  0.00000000e+00,\n",
-       "         1.63319749e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  9.86431300e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         2.84312371e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  2.62745775e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  1.56232470e-01,  2.69528706e+00,\n",
-       "         0.00000000e+00,  2.53867612e+00,  2.33622989e+00,\n",
-       "         1.01078263e+00,  0.00000000e+00,  2.57767978e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.37585627e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.05949901e+00,\n",
-       "         1.58620477e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  3.64251741e+00,\n",
-       "         1.72734831e+00,  4.51230141e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.76653257e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-2.32729104e+00, -2.05367096e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.27231177e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.83565458e+00,\n",
-       "         0.00000000e+00, -2.48867187e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.25276182e+00,\n",
-       "        -2.64775144e+00,  0.00000000e+00, -2.46202546e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.61304753e+00,\n",
-       "         0.00000000e+00, -1.31351294e+00,  0.00000000e+00,\n",
-       "        -1.73279553e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.72656755e+00,  0.00000000e+00,\n",
-       "        -2.41455405e+00, -2.28191447e+00,  0.00000000e+00,\n",
-       "        -2.96460316e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.20887626e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -1.14341336e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.99430719e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.21392025e+00, -1.48195464e+00,\n",
-       "         0.00000000e+00, -1.92268014e-01, -1.22094320e+00,\n",
-       "        -2.50058692e+00,  0.00000000e+00, -6.83935374e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.50694282e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.38320697e+00,\n",
-       "        -1.28503616e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -6.34130746e-01,\n",
-       "        -1.28115927e+00, -9.90750532e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.25076041e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-1.67364705e+00, -2.20747216e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.16074149e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.15348090e+00,\n",
-       "         0.00000000e+00, -7.75272089e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -5.01629634e-01,\n",
-       "        -1.65291138e+00,  0.00000000e+00, -1.83672505e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.47599285e+00,\n",
-       "         0.00000000e+00, -2.49183441e+00,  0.00000000e+00,\n",
-       "        -2.25441905e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.17708091e+00,  0.00000000e+00,\n",
-       "        -1.70461297e+00, -1.93925454e+00,  0.00000000e+00,\n",
-       "        -1.21010116e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.31758032e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -9.40813337e-01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -4.27078030e-02,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.03240035e+00, -1.59750233e+00,\n",
-       "         0.00000000e+00, -1.13952258e+00, -2.93926199e+00,\n",
-       "        -1.41135606e+00,  0.00000000e+00, -2.82863812e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.07676013e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.50577772e+00,\n",
-       "        -2.32829294e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.02268440e-01,\n",
-       "        -1.96934735e+00, -2.25841446e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.20595706e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-2.10765358e+00, -2.14294402e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.94081618e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.65920182e+00,\n",
-       "         0.00000000e+00, -1.74490575e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.81008591e+00,\n",
-       "        -1.38704494e+00,  0.00000000e+00, -1.98391449e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.16010059e+00,\n",
-       "         0.00000000e+00, -1.96127914e+00,  0.00000000e+00,\n",
-       "        -2.24185834e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.53916699e+00,  0.00000000e+00,\n",
-       "        -1.91495004e+00, -2.03812612e+00,  0.00000000e+00,\n",
-       "        -1.68412802e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.59627712e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -2.39003182e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  1.62007343e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.98591181e+00, -1.94062697e+00,\n",
-       "         0.00000000e+00, -2.86622331e+00, -6.20792981e-01,\n",
-       "        -2.17767640e+00,  0.00000000e+00,  8.25520670e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.56718444e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.76884641e+00,\n",
-       "        -3.03975291e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  6.97524704e-02,\n",
-       "        -2.46433494e+00, -2.25680107e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.19071469e+00,\n",
-       "         0.00000000e+00]])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "store.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
-       "array([[1.363856e+05, 1.866230e+05, 1.615200e+05, ..., 1.825047e+05,\n",
-       "        1.812539e+05, 1.928044e+05],\n",
-       "       [1.504359e+02, 1.926565e+02, 1.503140e+02, ..., 1.998073e+02,\n",
-       "        1.363696e+02, 1.546470e+02],\n",
-       "       [1.771764e+02, 1.630434e+02, 1.008328e+02, ..., 1.960234e+02,\n",
-       "        1.361614e+02, 1.567535e+02],\n",
-       "       [1.909409e+02, 1.566499e+02, 1.457898e+02, ..., 1.685650e+02,\n",
-       "        1.806504e+02, 1.750800e+02],\n",
-       "       [1.820867e+02, 1.572823e+02, 1.671585e+02, ..., 1.501432e+02,\n",
-       "        1.783009e+02, 1.863868e+02]])\n",
-       "Coordinates:\n",
-       "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[ 3.819261,  3.583939,  2.574651, ...,  3.879411,  3.496541,  3.304978],\n",
-       "       [-2.134151, -2.023047, -0.49702 , ..., -1.757583, -1.907441, -0.709235],\n",
-       "       [-2.230739, -2.041629, -0.979552, ..., -1.617446, -0.634581, -2.297003],\n",
-       "       [-1.176945, -2.103288, -2.280364, ..., -1.595779, -2.055217, -1.737918],\n",
-       "       [-2.279375, -1.399903, -1.978251, ..., -2.204245, -2.113975, -1.644247]])\n",
-       "Coordinates:\n",
-       "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "store.b - sim.b"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[1.957463, 2.038514, 2.301491, ..., 1.100939, 1.967538, 1.471212],\n",
-       "       [2.152351, 2.031339, 0.49702 , ..., 1.757583, 1.929907, 0.709235],\n",
-       "       [2.210487, 2.02778 , 0.979552, ..., 1.617446, 0.610368, 2.297003],\n",
-       "       [1.15282 , 2.089364, 2.280364, ..., 1.595779, 2.034945, 1.737918],\n",
-       "       [2.258974, 1.385695, 1.978251, ..., 2.204245, 2.094227, 1.644247]])\n",
-       "Coordinates:\n",
-       "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.b"
    ]