diff --git a/diffxpy/pkg_constants.py b/diffxpy/pkg_constants.py index 12dfea2..48cabe0 100644 --- a/diffxpy/pkg_constants.py +++ b/diffxpy/pkg_constants.py @@ -15,4 +15,4 @@ BATCHGLM_BACKEND = "tf1" BATCHGLM_FEATUREWISE = True -BATCHGLM_AUTOGRAD = True +BATCHGLM_AUTOGRAD = False diff --git a/diffxpy/testing/tests.py b/diffxpy/testing/tests.py index c378494..a8c9445 100644 --- a/diffxpy/testing/tests.py +++ b/diffxpy/testing/tests.py @@ -194,6 +194,8 @@ def _fit( constructor_args = {} if quick_scale is not None: constructor_args["quick_scale"] = quick_scale + if batch_size is not None and backend != "tf2": + constructor_args["batch_size"] = batch_size # Backend-specific constructor arguments: if backend.lower() in ["tf1"]: constructor_args['provide_optimizers'] = { diff --git a/diffxpy/unit_test/test_compare_numpy_tf2.py b/diffxpy/unit_test/test_compare_numpy_tf2.py new file mode 100644 index 0000000..8ddb4f1 --- /dev/null +++ b/diffxpy/unit_test/test_compare_numpy_tf2.py @@ -0,0 +1,81 @@ +import unittest +import logging +import diffxpy.api as de +import numpy as np + + +class TestBackendsNb(unittest.TestCase): + + """ + Negative binomial noise model unit tests that test whether the wald test results + in the same logfoldchange, p- and q-values and the coefficents are the same after + fitting when using the same simulated data. + """ + + def __init__(self, *args, **kwargs): + + super(TestBackendsNb, self).__init__(*args, **kwargs) + + from batchglm.api.models.numpy.glm_nb import Simulator + self.sim = Simulator(num_observations=10000, num_features=200) + self.sim.generate_sample_description(num_batches=0, num_conditions=4) + self.sim.generate_params() + self.sim.generate_data() + + logging.getLogger("tensorflow").setLevel(logging.ERROR) + logging.getLogger("batchglm").setLevel(logging.WARNING) + logging.getLogger("diffxpy").setLevel(logging.WARNING) + + self.numpy_results = de.test.wald( + data=self.sim.input_data, + sample_description=self.sim.sample_description, + factor_loc_totest="condition", + formula_loc="~ 1 + condition + batch", + noise_model='nb', + backend='numpy' + ) + _ = self.numpy_results.summary() + + self.tf2_results = de.test.wald( + data=self.sim.input_data, + sample_description=self.sim.sample_description, + factor_loc_totest="condition", + formula_loc="~ 1 + condition + batch", + noise_model='nb', + backend='tf2' + ) + _ = self.tf2_results.summary() + + """ + Test with numpy: + """ + + def test_coeff_similarity(self): + + a_var_max_diff = np.max(np.abs(self.tf2_results.model_estim.a_var-self.numpy_results.model_estim.a_var)) + b_var_max_diff = np.max(np.abs(self.tf2_results.model_estim.b_var-self.numpy_results.model_estim.b_var)) + assert a_var_max_diff < 1e-6 and b_var_max_diff < 1e-6, \ + ("a_var_max_diff: %f, b_var_max_diff: %f", a_var_max_diff, b_var_max_diff) + + return True + + def test_logfoldchange_similarity(self): + max_diff = np.max(np.abs(self.tf2_results.summary()['log2fc'].values-self.numpy_results.summary()['log2fc'].values)) + assert max_diff < 1e-12, ("log_fold_change difference: %f > 1e-12", max_diff) + + return True + + def test_pval_similarity(self): + max_diff = np.max(np.abs(self.tf2_results.pval-self.numpy_results.pval)) + assert max_diff < 1e-12, ("p-val difference: %f > 1e-12", max_diff) + + return True + + def test_qval_similarity(self): + max_diff = np.max(np.abs(self.tf2_results.summary()['qval'].values-self.numpy_results.summary()['qval'].values)) + assert max_diff < 1e-12, ("q-val difference: %f > 1e-12", max_diff) + + return True + +if __name__ == '__main__': + unittest.main() diff --git a/diffxpy/unit_test/test_fit.py b/diffxpy/unit_test/test_fit.py index 99c5430..39d3bce 100644 --- a/diffxpy/unit_test/test_fit.py +++ b/diffxpy/unit_test/test_fit.py @@ -17,7 +17,7 @@ def _test_model_fit( """ Test if de.wald() generates a uniform p-value distribution if it is given data simulated based on the null model. Returns the p-value - of the two-side Kolmgorov-Smirnov test for equality of the observed + of the two-side Kolmgorov-Smirnov test for equality of the observed p-value distribution and a uniform distribution. :param n_cells: Number of cells to simulate (number of observations per test). @@ -34,7 +34,7 @@ def _test_model_fit( raise ValueError("noise model %s not recognized" % noise_model) sim = Simulator(num_observations=n_cells, num_features=n_genes) - sim.generate_sample_description(num_batches=0, num_conditions=0) + sim.generate_sample_description(num_batches=1, num_conditions=1) sim.generate_params(rand_fn_scale=rand_fn_scale) sim.generate_data() @@ -77,7 +77,7 @@ def _test_model_fit_partition( raise ValueError("noise model %s not recognized" % noise_model) sim = Simulator(num_observations=n_cells, num_features=n_genes) - sim.generate_sample_description(num_batches=0, num_conditions=0) + sim.generate_sample_description(num_batches=1, num_conditions=1) sim.generate_params(rand_fn_scale=rand_fn_scale) sim.generate_data() @@ -121,7 +121,7 @@ def _test_residuals_fit( raise ValueError("noise model %s not recognized" % noise_model) sim = Simulator(num_observations=n_cells, num_features=n_genes) - sim.generate_sample_description(num_batches=0, num_conditions=0) + sim.generate_sample_description(num_batches=1, num_conditions=1) sim.generate() random_sample_description = pd.DataFrame({ @@ -209,23 +209,23 @@ def test_residuals_fit( noise_model="nb" ) - +""" class TestFitNorm(_TestFit, unittest.TestCase): - """ + Normal noise model unit tests that tests whether model fit relay works. - """ + def test_model_fit( self, n_cells: int = 2000, n_genes: int = 2 ): - """ + Test if model fit for "norm" noise model works. :param n_cells: Number of cells to simulate (number of observations per test). :param n_genes: Number of genes to simulate (number of tests). - """ + logging.getLogger("tensorflow").setLevel(logging.ERROR) logging.getLogger("batchglm").setLevel(logging.WARNING) logging.getLogger("diffxpy").setLevel(logging.WARNING) @@ -242,12 +242,12 @@ def test_model_fit_partition( n_cells: int = 2000, n_genes: int = 2 ): - """ + Test if partitioned model fit for "norm" noise model works. :param n_cells: Number of cells to simulate (number of observations per test). :param n_genes: Number of genes to simulate (number of tests). - """ + logging.getLogger("tensorflow").setLevel(logging.ERROR) logging.getLogger("batchglm").setLevel(logging.WARNING) logging.getLogger("diffxpy").setLevel(logging.WARNING) @@ -264,12 +264,12 @@ def test_residuals_fit( n_cells: int = 2000, n_genes: int = 2 ): - """ + Test if residual fit for "norm" noise model works. :param n_cells: Number of cells to simulate (number of observations per test). :param n_genes: Number of genes to simulate (number of tests). - """ + logging.getLogger("tensorflow").setLevel(logging.ERROR) logging.getLogger("batchglm").setLevel(logging.WARNING) logging.getLogger("diffxpy").setLevel(logging.WARNING) @@ -280,7 +280,7 @@ def test_residuals_fit( n_genes=n_genes, noise_model="norm" ) - +""" if __name__ == '__main__': unittest.main()