From eb8dd9df574f76761164256bf46ad529c68b5811 Mon Sep 17 00:00:00 2001 From: Danila Date: Tue, 2 Jul 2024 20:27:17 -0700 Subject: [PATCH] Update notebooks with the upcoming changes in v0.3 --- .../notebooks/annotations_management.ipynb | 34 +- docs/source/notebooks/axes.ipynb | 16 +- docs/source/notebooks/nuances.ipynb | 291 ++++++++++++++---- docs/source/notebooks/quickstart_mudata.ipynb | 26 +- 4 files changed, 261 insertions(+), 106 deletions(-) diff --git a/docs/source/notebooks/annotations_management.ipynb b/docs/source/notebooks/annotations_management.ipynb index 5252406..3ad518f 100644 --- a/docs/source/notebooks/annotations_management.ipynb +++ b/docs/source/notebooks/annotations_management.ipynb @@ -2059,7 +2059,7 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 60 × 100\n",
+       "
MuData object with n_obs × n_vars = 60 × 100 (shared var) \n",
        "  3 modalities\n",
        "    mod1:\t10 x 100\n",
        "      obs:\t'dataset'\n",
@@ -2069,7 +2069,7 @@
        "      obs:\t'dataset', 'species', 'reference'
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 60 × 100\n", + "MuData object with n_obs × n_vars = 60 × 100 (shared var) \n", " 3 modalities\n", " mod1:\t10 x 100\n", " obs:\t'dataset'\n", @@ -2211,7 +2211,7 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 10 × 100\n",
+       "
MuData object with n_obs × n_vars = 10 × 100 (shared obs and var) \n",
        "  2 modalities\n",
        "    raw:\t10 x 100\n",
        "      obs:\t'status'\n",
@@ -2220,7 +2220,7 @@
        "      var:\t'filtered'
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 10 × 100\n", + "MuData object with n_obs × n_vars = 10 × 100 (shared obs and var) \n", " 2 modalities\n", " raw:\t10 x 100\n", " obs:\t'status'\n", @@ -2337,10 +2337,10 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 10 × 300\n",
+       "
MuData object with n_obs × n_vars = 10 × 300 (shared obs and var) \n",
        "  obs:\t'dataset'\n",
        "  2 modalities\n",
-       "    mod1:\tMuData object with n_obs × n_vars = 10 × 300\n",
+       "    mod1:\tMuData object with n_obs × n_vars = 10 × 100 (shared obs and var) \n",
        "      2 modalities\n",
        "        raw:\t10 x 100\n",
        "          obs:\t'status'\n",
@@ -2350,10 +2350,10 @@
        "    mod2:\t10 x 200
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 10 × 300\n", + "MuData object with n_obs × n_vars = 10 × 300 (shared obs and var) \n", " obs:\t'dataset'\n", " 2 modalities\n", - " mod1:\tMuData object with n_obs × n_vars = 10 × 300\n", + " mod1:\tMuData object with n_obs × n_vars = 10 × 100 (shared obs and var) \n", " 2 modalities\n", " raw:\t10 x 100\n", " obs:\t'status'\n", @@ -2383,13 +2383,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'mod1': MuData object with n_obs × n_vars = 10 × 100\n", - " 2 modalities\n", - " raw:\t10 x 100\n", - " obs:\t'status'\n", - " qced:\t8 x 50\n", - " obs:\t'status', 'filtered'\n", - " var:\t'filtered', 'mod2': AnnData object with n_obs × n_vars = 10 × 200}\n" + "MuData\n", + "├─ mod1 MuData [shared obs and var] (10 × 100)\n", + "│ ├─ raw AnnData (10 x 100)\n", + "│ └─ qced AnnData (8 x 50)\n", + "└─ mod2 AnnData (10 x 200)\n" ] } ], @@ -2514,9 +2512,9 @@ ], "metadata": { "kernelspec": { - "display_name": "ad0.9 (issue126)", + "display_name": "Python 3.12 (main)", "language": "python", - "name": "issue126" + "name": "main" }, "language_info": { "codemirror_mode": { @@ -2528,7 +2526,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/docs/source/notebooks/axes.ipynb b/docs/source/notebooks/axes.ipynb index 6999d93..dcf1abe 100644 --- a/docs/source/notebooks/axes.ipynb +++ b/docs/source/notebooks/axes.ipynb @@ -59,7 +59,7 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install mudata" + "%pip install mudata" ] }, { @@ -127,7 +127,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/_core/mudata.py:445: UserWarning: Cannot join columns with the same name because var_names are intersecting.\n", + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/src/mudata/_core/mudata.py:869: UserWarning: Cannot join columns with the same name because var_names are intersecting.\n", " warnings.warn(\n" ] }, @@ -314,13 +314,13 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 600 × 1000\n",
+       "
MuData object with n_obs × n_vars = 600 × 1000 (shared var) \n",
        "  2 modalities\n",
        "    dat1:\t100 x 1000\n",
        "    dat2:\t500 x 1000
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 600 × 1000\n", + "MuData object with n_obs × n_vars = 600 × 1000 (shared var) \n", " 2 modalities\n", " dat1:\t100 x 1000\n", " dat2:\t500 x 1000" @@ -441,13 +441,13 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 100 × 900\n",
+       "
MuData object with n_obs × n_vars = 100 × 900 (shared obs and var) \n",
        "  2 modalities\n",
        "    raw:\t100 x 900\n",
        "    preproc:\t100 x 300
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 100 × 900\n", + "MuData object with n_obs × n_vars = 100 × 900 (shared obs and var) \n", " 2 modalities\n", " raw:\t100 x 900\n", " preproc:\t100 x 300" @@ -466,7 +466,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -480,7 +480,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.12.3" }, "toc": { "base_numbering": 1, diff --git a/docs/source/notebooks/nuances.ipynb b/docs/source/notebooks/nuances.ipynb index 24404b6..e48daf7 100644 --- a/docs/source/notebooks/nuances.ipynb +++ b/docs/source/notebooks/nuances.ipynb @@ -36,7 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install mudata" + "%pip install mudata" ] }, { @@ -45,7 +45,7 @@ "metadata": {}, "outputs": [], "source": [ - "import mudata as md\n", + "import mudata\n", "from mudata import MuData, AnnData" ] }, @@ -83,6 +83,13 @@ "mod2 = AnnData(X=np.dot(z, w2.T))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -138,9 +145,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/_core/mudata.py:404: UserWarning: Cannot join columns with the same name because var_names are intersecting.\n", + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/src/mudata/_core/mudata.py:869: UserWarning: Cannot join columns with the same name because var_names are intersecting.\n", " warnings.warn(\n", - "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/_core/mudata.py:852: UserWarning: Modality names will be prepended to var_names since there are identical var_names in different modalities.\n", + "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mudata/src/mudata/_core/mudata.py:1478: UserWarning: Modality names will be prepended to var_names since there are identical var_names in different modalities.\n", " warnings.warn(\n" ] } @@ -156,13 +163,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Variable names in AnnData objects" + " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "### Variable names in AnnData objects\n", + "\n", "In the example above it is worth pointing out that `.var_names_make_unique()` is an in-place operation, just as [the same method](https://anndata.readthedocs.io/en/stable/anndata.AnnData.var_names_make_unique.html) is in `anndata`.\n", "\n", "Hence original AnnData objects' `.var_names` have also been modified:" @@ -194,35 +203,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Update" + "## Update\n", + "\n", + "> ***NB:** If individual modalities are changed, updating the MuData object containing it might be required.*\n", + "\n", + "Modalities in ``MuData`` objects are full-featured ``AnnData`` objects. Hence they can be operated individually, and their ``MuData`` parent will have to be updated to fetch this information." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "> ***NB:** If individual modalities are changed, updating the MuData object containing it might be required.*" + ">**NB:** Starting from `v0.3`, `mudata` will be adopting a more flexible approach to metadata management: updating global index with `.update()` will become independent from managing columns, which can now be done with `.pull_obs()`/`.pull_var()` and `.push_obs()`/`.push_var()`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Modalities in ``MuData`` objects are full-featured ``AnnData`` objects. Hence they can be operated individually, and their ``MuData`` parent will have to be updated to fetch this information." + "See more about annotations management in [in the respective tutorial](./annotations_management.ipynb)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Observations" + " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Consider the following example: a new column has been added to a modality-specific metadata table:" + "### Filtering data\n", + "\n", + "In rare cases some observations (or variables) can be dropped from all the contained modalities:" ] }, { @@ -231,14 +246,17 @@ "metadata": {}, "outputs": [], "source": [ - "mod1.obs[\"mod1_profiled\"] = True" + "smaller_mdata = MuData({\n", + " \"mod1\": mod1[:900].copy(),\n", + " \"mod2\": mod2[:900].copy(),\n", + "})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "While `mdata` includes `mod1` as its first modality, it currently does not know about this change:" + "While `smaller_mdata` includes `mod1` and `mod2` as its modalities, it currently does not know about this change:" ] }, { @@ -248,8 +266,17 @@ "outputs": [ { "data": { + "text/html": [ + "
MuData object with n_obs × n_vars = 900 × 300\n",
+       "  2 modalities\n",
+       "    mod1:\t900 x 100\n",
+       "    mod2:\t900 x 200
" + ], "text/plain": [ - "Index([], dtype='object')" + "MuData object with n_obs × n_vars = 900 × 300\n", + " 2 modalities\n", + " mod1:\t900 x 100\n", + " mod2:\t900 x 200" ] }, "execution_count": 8, @@ -258,14 +285,14 @@ } ], "source": [ - "mdata.obs.columns" + "smaller_mdata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "`.update()` method will fetch these updates and propagate them to the global `.obs` table." + "`.update()` method will fetch these updates:" ] }, { @@ -273,54 +300,19 @@ "execution_count": 9, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Index(['mod1:mod1_profiled'], dtype='object')\n" - ] - }, { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mod1:mod1_profiled
0True
1True
\n", - "
" + "
MuData object with n_obs × n_vars = 900 × 300\n",
+       "  2 modalities\n",
+       "    mod1:\t900 x 100\n",
+       "    mod2:\t900 x 200
" ], "text/plain": [ - " mod1:mod1_profiled\n", - "0 True\n", - "1 True" + "MuData object with n_obs × n_vars = 900 × 300\n", + " 2 modalities\n", + " mod1:\t900 x 100\n", + " mod2:\t900 x 200" ] }, "execution_count": 9, @@ -328,17 +320,182 @@ "output_type": "execute_result" } ], + "source": [ + "smaller_mdata.update()\n", + "smaller_mdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice the global dimensions are now correctly reflected in the `MuData` object." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Observations annotations\n", + "\n", + "Consider the following example: a new column has been added to a modality-specific metadata table:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "mod1.obs[\"mod1_profiled\"] = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "While `mdata` includes `mod1` as its first modality, nothing has changed at the global level of the annotation:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index([], dtype='object')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdata.obs.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`.update()` method will only sync the `obs_names`:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# default from v0.4\n", + "mudata.set_options(pull_on_update=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index([], dtype='object')\n" + ] + } + ], "source": [ "mdata.update()\n", - "print(mdata.obs.columns)\n", - "mdata.obs.head(2)" + "print(mdata.obs.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we need the annotation at the global level, we can copy it from the all the underlying modalities:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['mod1:mod1_profiled'], dtype='object')\n" + ] + } + ], + "source": [ + "mdata.pull_obs()\n", + "print(mdata.obs.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "del mdata.obs[\"mod1:mod1_profiled\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "As `MuData` objects are designed with shared observations by default, this annotation is automatically prefixed by the modality that originated this annotation." + "As `MuData` objects are designed with shared observations by default, this annotation is automatically prefixed by the modality that originated this annotation.\n", + "\n", + "There is however flexibility when it comes to using prefixes for observations annotations that are specific to individual modalities:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['mod1_profiled'], dtype='object')\n" + ] + } + ], + "source": [ + "mdata.pull_obs(prefix_unique=False)\n", + "print(mdata.obs.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " ] }, { @@ -357,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -365,12 +522,12 @@ "mod2.var[\"assay\"] = \"B\"\n", "\n", "# Will fetch these values\n", - "mdata.update()" + "mdata.pull_var()" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -431,7 +588,7 @@ "mod2:88 B" ] }, - "execution_count": 11, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -458,7 +615,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -472,7 +629,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.12.3" }, "toc": { "base_numbering": 1, diff --git a/docs/source/notebooks/quickstart_mudata.ipynb b/docs/source/notebooks/quickstart_mudata.ipynb index 0026ac7..bf6946f 100644 --- a/docs/source/notebooks/quickstart_mudata.ipynb +++ b/docs/source/notebooks/quickstart_mudata.ipynb @@ -31,7 +31,7 @@ "metadata": {}, "outputs": [], "source": [ - "! pip install mudata" + "%pip install mudata" ] }, { @@ -482,11 +482,11 @@ "\n", " B bool numpy.ndarray \n", "
Distances
.obsp0 elements
No distances
A
1000 × 100
AnnData object 1000 obs × 100 var
Matrix
.X
\n", - " float32    numpy.ndarray\n", + " float64    numpy.ndarray\n", "
Layers
.layers0 elements
No layers
Metadata
.obs0 elements
No metadata
Embeddings
.obsm0 elements
No embeddings
Distances
.obsp0 elements
No distances
Miscellaneous
.uns1 elements
\n", " \n", "
misc dict 1 element adata: True
B
1000 × 50
AnnData object 1000 obs × 50 var
Matrix
.X
\n", - " float32    numpy.ndarray\n", + " float64    numpy.ndarray\n", "
Layers
.layers0 elements
No layers
Metadata
.obs0 elements
No metadata
Embeddings
.obsm0 elements
No embeddings
Distances
.obsp0 elements
No distances
Miscellaneous
.uns0 elements
No miscellaneous

" ], "text/plain": [ @@ -543,14 +543,14 @@ { "data": { "text/html": [ - "
MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu'\n",
+       "
MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu'\n",
        "  2 modalities\n",
        "    A:\t1000 x 100\n",
        "      uns:\t'misc'\n",
        "    B:\t1000 x 50
" ], "text/plain": [ - "MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu'\n", + "MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu'\n", " 2 modalities\n", " A:\t1000 x 100\n", " uns:\t'misc'\n", @@ -690,21 +690,21 @@ "details[open] > .summary-mod::before {\n", " content: '○';\n", "}\n", - "MuData object 1000 obs × 150 var in 2 modalities
backed at /var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu
Metadata
.obs0 elements
No metadata
Embeddings & mappings
.obsm2 elements
\n", + "MuData object 1000 obs × 150 var in 2 modalities
backed at /var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu
Metadata
.obs0 elements
No metadata
Embeddings & mappings
.obsm2 elements
\n", " \n", " \n", "\n", " \n", - "
A bool numpy.ndarray
B bool numpy.ndarray
Distances
.obsp0 elements
No distances
A
1000 × 100
AnnData object 1000 obs × 100 var
backed at /var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu
Matrix
.X
\n", - " float32    h5py._hl.dataset.Dataset\n", + "
Distances
.obsp0 elements
No distances
A
1000 × 100
AnnData object 1000 obs × 100 var
backed at /var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu
Matrix
.X
\n", + " float64    h5py._hl.dataset.Dataset\n", "
Layers
.layers0 elements
No layers
Metadata
.obs0 elements
No metadata
Embeddings
.obsm0 elements
No embeddings
Distances
.obsp0 elements
No distances
Miscellaneous
.uns1 elements
\n", " \n", - "
misc dict 1 element adata: True
B
1000 × 50
AnnData object 1000 obs × 50 var
backed at /var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu
Matrix
.X
\n", - " float32    h5py._hl.dataset.Dataset\n", + "
B
1000 × 50
AnnData object 1000 obs × 50 var
backed at /var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu
Matrix
.X
\n", + " float64    h5py._hl.dataset.Dataset\n", "
Layers
.layers0 elements
No layers
Metadata
.obs0 elements
No metadata
Embeddings
.obsm0 elements
No embeddings
Distances
.obsp0 elements
No distances
Miscellaneous
.uns0 elements
No miscellaneous

" ], "text/plain": [ - "MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/xt/tvy3s7w17vn1b700k_351pz00000gp/T/muon_getting_started_m8own7bb.h5mu'\n", + "MuData object with n_obs × n_vars = 1000 × 150 backed at '/var/folders/6f/ht39xtd54tv4n5p35ccnpgf80000gn/T/muon_getting_started_x3o0r9iz.h5mu'\n", " 2 modalities\n", " A:\t1000 x 100\n", " uns:\t'misc'\n", @@ -818,7 +818,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -832,7 +832,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.12.3" } }, "nbformat": 4,