diff --git a/docs/source/api.rst b/docs/source/api.rst
index 4da63fd1..cb2e1892 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -33,6 +33,7 @@ Please refer to the `Single Cell Best Practices Book`_ for more details.
     hvg_intersect
     hvg_batch
     score_cell_cycle
+    get_cell_cycle_genes
     reduce_data
 
 
diff --git a/scib/preprocessing.py b/scib/preprocessing.py
index 1fd8496e..209f3df2 100644
--- a/scib/preprocessing.py
+++ b/scib/preprocessing.py
@@ -1,7 +1,9 @@
 import logging
 import re
 import tempfile
+from typing import Literal
 
+import anndata as ad
 import numpy as np
 import pandas as pd
 import scanpy as sc
@@ -642,8 +644,24 @@ def reduce_data(
         sc.tl.umap(adata)
 
 
-# Cell Cycle
-def score_cell_cycle(adata, organism="mouse"):
+def score_cell_cycle(
+    adata: ad.AnnData,
+    organism: Literal[
+        "mouse",
+        "mus musculus",
+        "mus_musculus",
+        "human",
+        "homo sapiens",
+        "homo_sapiens",
+        "c_elegans",
+        "c elegans",
+        "caenorhabditis elegans",
+        "caenorhabditis_elegans",
+        "zebrafish",
+        "danio rerio",
+        "danio_rerio",
+    ] = "mouse",
+):
     """Score cell cycle score given an organism
 
     Wrapper function for `scanpy.tl.score_genes_cell_cycle`_
@@ -653,43 +671,111 @@ def score_cell_cycle(adata, organism="mouse"):
     Tirosh et al. cell cycle marker genes downloaded from
     https://raw.githubusercontent.com/theislab/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt
 
-    For human, mouse genes are capitalised and used directly. This is under the assumption that cell cycle genes are
-    well conserved across species.
+    See more on gene sets in :func:`~scib.preprocessing.get_cell_cycle_genes`.
+
+    This function picks gene IDs or gene names of the cell cycle genes, depending on what is present in the adata object.
 
     :param adata: anndata object containing
     :param organism: organism of gene names to match cell cycle genes
     :return: tuple of ``(s_genes, g2m_genes)`` of S-phase genes and G2- and M-phase genes scores
     """
-    import pathlib
-
-    root = pathlib.Path(__file__).parent
-
-    cc_files = {
-        "mouse": [
-            root / "resources/s_genes_tirosh.txt",
-            root / "resources/g2m_genes_tirosh.txt",
-        ],
-        "human": [
-            root / "resources/s_genes_tirosh_hm.txt",
-            root / "resources/g2m_genes_tirosh_hm.txt",
-        ],
-    }
 
-    with open(cc_files[organism][0]) as f:
-        s_genes = [x.strip() for x in f.readlines() if x.strip() in adata.var.index]
-    with open(cc_files[organism][1]) as f:
-        g2m_genes = [x.strip() for x in f.readlines() if x.strip() in adata.var.index]
+    def filter_genes(adata: ad.AnnData, df: pd.DataFrame, columns: list = None):
+        if columns is None:
+            columns = ["gene_name", "gene_id"]
+        elif isinstance(columns, str):
+            columns = [columns]
+
+        n_genes = 0
+        for col in columns:
+            _genes = [g for g in df[col] if g in adata.var_names]
+            if len(_genes) > n_genes:  # pick largest overlapping set
+                n_genes = len(_genes)
+                genes = _genes
+
+        if n_genes == 0:
+            # pick random genes for error message
+            rand_genes = np.random.choice(adata.var_names, 10)
+            raise ValueError(
+                f"cell cycle genes not in adata\n organism: {organism}\n varnames: {rand_genes}\n cell cycle genes:\n {df}"
+            )
+        return genes
 
-    if (len(s_genes) == 0) or (len(g2m_genes) == 0):
-        rand_choice = np.random.randint(1, adata.n_vars, 10)
-        rand_genes = adata.var_names[rand_choice].tolist()
-        raise ValueError(
-            f"cell cycle genes not in adata\n organism: {organism}\n varnames: {rand_genes}"
-        )
+    # get gene sets
+    gene_map = get_cell_cycle_genes(organism)
 
+    # filter gene sets across data
+    s_genes = filter_genes(adata, gene_map.query("phase == 'S'"))
+    g2m_genes = filter_genes(adata, gene_map.query("phase == 'G2/M'"))
+
+    # compute scores
     sc.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes)
 
 
+def get_cell_cycle_genes(
+    organism: Literal[
+        "mouse",
+        "mus musculus",
+        "mus_musculus",
+        "human",
+        "homo sapiens",
+        "homo_sapiens",
+        "c_elegans",
+        "c elegans",
+        "caenorhabditis elegans",
+        "caenorhabditis_elegans",
+        "zebrafish",
+        "danio rerio",
+        "danio_rerio",
+    ]
+):
+    """
+    Get cell cycle genes for a given organism
+
+    Tirosh et al. cell cycle marker genes downloaded from
+    https://raw.githubusercontent.com/theislab/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt
+
+    For human, mouse genes are capitalised and used directly. This is under the assumption that cell cycle genes are
+    well conserved across species
+
+    For organisms other than human or mouse, orthlogy-mapped datasets from Tinyaltas were used:
+    https://github.com/hbc/tinyatlas/tree/master/cell_cycle
+
+    :param organism: organism of gene names to match cell cycle genes
+    :param identifier: gene identifier to use. options: "gene_name", "gene_id"
+    """
+    from pathlib import Path
+
+    organism_map = {
+        "mouse": "mus_musculus",
+        "mus musculus": "mus_musculus",
+        "human": "homo_sapiens",
+        "homo sapiens": "homo_sapiens",
+        "c_elegans": "caenorhabditis_elegans",
+        "caenorhabditis elegans": "caenorhabditis_elegans",
+        "c elegans": "caenorhabditis_elegans",
+        "zebrafish": "danio_rerio",
+        "danio rerio": "danio_rerio",
+    }
+    # additionally map each key to itself to make them available as well
+    organism_map |= {x: x for x in organism_map.values()}
+
+    # get lower-case organism name
+    organism = organism.lower()
+
+    assert (
+        organism in organism_map
+    ), f"organism '{organism}' not supported. Supported organisms: {list(organism_map.keys())}"
+
+    # get organism name needed for retrieving correct file
+    organism = organism_map[organism]
+
+    # read gene sets
+    gene_file = Path(__file__).parent / "resources" / f"cell_cycle_genes_{organism}.tsv"
+    assert gene_file.exists(), f"{gene_file} doesn't exist"
+    return pd.read_table(gene_file)
+
+
 def save_seurat(adata, path, batch, hvgs=None):
     """Save an ``anndata`` object to file as a Seurat object
 
diff --git a/scib/resources/cell_cycle_genes_caenorhabditis_elegans.tsv b/scib/resources/cell_cycle_genes_caenorhabditis_elegans.tsv
new file mode 100644
index 00000000..a8f78d1f
--- /dev/null
+++ b/scib/resources/cell_cycle_genes_caenorhabditis_elegans.tsv
@@ -0,0 +1,18 @@
+phase	modified	gene_id	gene_name
+G2/M	2024-07-04	WBGene00006974	zen-4
+G2/M	2024-07-04	WBGene00000257	bmk-1
+G2/M	2024-07-04	WBGene00000405	cdk-1
+G2/M	2024-07-04	WBGene00000099	air-2
+S	2024-07-04	WBGene00011912	T22C1.1
+S	2024-07-04	WBGene00004338	rfc-2
+S	2024-07-04	WBGene00004297	rad-51
+S	2024-07-04	WBGene00003154	mcm-2
+S	2024-07-04	WBGene00013241	ung-1
+S	2024-07-04	WBGene00009372	evl-18
+S	2024-07-04	WBGene00000382	cdc-6
+S	2024-07-04	WBGene00003418	msh-2
+S	2024-07-04	WBGene00003156	mcm-4
+S	2024-07-04	WBGene00009287	psf-2
+S	2024-07-04	WBGene00022141	chaf-2
+S	2024-07-04	WBGene00000794	crn-1
+S	2024-07-04	WBGene00022455	tyms-1
diff --git a/scib/resources/cell_cycle_genes_danio_rerio.tsv b/scib/resources/cell_cycle_genes_danio_rerio.tsv
new file mode 100644
index 00000000..92486f05
--- /dev/null
+++ b/scib/resources/cell_cycle_genes_danio_rerio.tsv
@@ -0,0 +1,47 @@
+phase	modified	gene_id	gene_name
+G2/M	2018-10-19	ENSDARG00000078654	tpx2
+G2/M	2018-10-19	ENSDARG00000075621	birc5a
+G2/M	2018-10-19	ENSDARG00000001313	g2e3
+G2/M	2018-10-19	ENSDARG00000061187	cbx5
+G2/M	2018-10-19	ENSDARG00000056621	ctcf
+G2/M	2018-10-19	ENSDARG00000041361	ttk
+G2/M	2018-10-19	ENSDARG00000038882	smc4
+G2/M	2018-10-19	ENSDARG00000005619	nek2
+G2/M	2018-10-19	ENSDARG00000055133	cenpf
+G2/M	2018-10-19	ENSDARG00000117089	CKS2
+G2/M	2018-10-19	ENSDARG00000024488	top2a
+G2/M	2018-10-19	ENSDARG00000043137	cdca8
+G2/M	2018-10-19	ENSDARG00000002403	nusap1
+G2/M	2018-10-19	ENSDARG00000010948	kif11
+G2/M	2018-10-19	ENSDARG00000054804	anp32e
+G2/M	2018-10-19	ENSDARG00000014013	lbr
+G2/M	2018-10-19	ENSDARG00000036180	ccnb2
+G2/M	2018-10-19	ENSDARG00000029722	hmgb2a
+G2/M	2018-10-19	ENSDARG00000087554	cdk1
+G2/M	2018-10-19	ENSDARG00000007971	cks1b
+G2/M	2018-10-19	ENSDARG00000102674	ckap5
+S	2018-10-19	ENSDARG00000057683	mcm6
+S	2018-10-19	ENSDARG00000043720	cdc45
+S	2018-10-19	ENSDARG00000018022	msh2
+S	2018-10-19	ENSDARG00000019507	mcm5
+S	2018-10-19	ENSDARG00000045308	pola1
+S	2018-10-19	ENSDARG00000040041	mcm4
+S	2018-10-19	ENSDARG00000035957	gmnn
+S	2018-10-19	ENSDARG00000037188	rpa2
+S	2018-10-19	ENSDARG00000057738	hells
+S	2018-10-19	ENSDARG00000057323	e2f8
+S	2018-10-19	ENSDARG00000002304	gins2
+S	2018-10-19	ENSDARG00000054155	pcna
+S	2018-10-19	ENSDARG00000039208	nasp
+S	2018-10-19	ENSDARG00000074410	brip1
+S	2018-10-19	ENSDARG00000019907	dscc1
+S	2018-10-19	ENSDARG00000023002	dtl
+S	2018-10-19	ENSDARG00000077620	cdca7a
+S	2018-10-19	ENSDARG00000056473	chaf1b
+S	2018-10-19	ENSDARG00000056414	usp1
+S	2018-10-19	ENSDARG00000100558	slbp
+S	2018-10-19	ENSDARG00000014017	rrm1
+S	2018-10-19	ENSDARG00000011404	fen1
+S	2018-10-19	ENSDARG00000056832	exo1
+S	2018-10-19	ENSDARG00000042894	tyms
+S	2018-10-19	ENSDARG00000103409	uhrf1
diff --git a/scib/resources/cell_cycle_genes_homo_sapiens.tsv b/scib/resources/cell_cycle_genes_homo_sapiens.tsv
new file mode 100644
index 00000000..ae8c97a6
--- /dev/null
+++ b/scib/resources/cell_cycle_genes_homo_sapiens.tsv
@@ -0,0 +1,98 @@
+gene_name	gene_id	phase
+MCM5	ENSG00000100297	S
+PCNA	ENSG00000132646	S
+TYMS	ENSG00000176890	S
+FEN1	ENSG00000168496	S
+MCM2	ENSG00000073111	S
+MCM4	ENSG00000104738	S
+RRM1	ENSG00000167325	S
+UNG	ENSG00000076248	S
+GINS2	ENSG00000131153	S
+MCM6	ENSG00000076003	S
+CDCA7	ENSG00000144354	S
+DTL	ENSG00000143476	S
+PRIM1	ENSG00000198056	S
+UHRF1	ENSG00000276043	S
+MLF1IP	ENSG00000151725	S
+HELLS	ENSG00000119969	S
+RFC2	ENSG00000049541	S
+RPA2	ENSG00000117748	S
+NASP	ENSG00000132780	S
+RAD51AP1	ENSG00000111247	S
+GMNN	ENSG00000112312	S
+WDR76	ENSG00000092470	S
+SLBP	ENSG00000163950	S
+CCNE2	ENSG00000175305	S
+UBR7	ENSG00000012963	S
+POLD3	ENSG00000077514	S
+MSH2	ENSG00000095002	S
+ATAD2	ENSG00000156802	S
+RAD51	ENSG00000051180	S
+RRM2	ENSG00000171848	S
+CDC45	ENSG00000093009	S
+CDC6	ENSG00000094804	S
+EXO1	ENSG00000174371	S
+TIPIN	ENSG00000075131	S
+DSCC1	ENSG00000136982	S
+BLM	ENSG00000197299	S
+CASP8AP2	ENSG00000118412	S
+USP1	ENSG00000162607	S
+CLSPN	ENSG00000092853	S
+POLA1	ENSG00000101868	S
+CHAF1B	ENSG00000159259	S
+BRIP1	ENSG00000136492	S
+E2F8	ENSG00000129173	S
+HMGB2	ENSG00000164104	G2/M
+CDK1	ENSG00000170312	G2/M
+NUSAP1	ENSG00000137804	G2/M
+UBE2C	ENSG00000175063	G2/M
+BIRC5	ENSG00000089685	G2/M
+TPX2	ENSG00000088325	G2/M
+TOP2A	ENSG00000131747	G2/M
+NDC80	ENSG00000080986	G2/M
+CKS2	ENSG00000123975	G2/M
+NUF2	ENSG00000143228	G2/M
+CKS1B	ENSG00000173207	G2/M
+MKI67	ENSG00000148773	G2/M
+TMPO	ENSG00000120802	G2/M
+CENPF	ENSG00000117724	G2/M
+TACC3	ENSG00000013810	G2/M
+FAM64A	ENSG00000129195	G2/M
+SMC4	ENSG00000113810	G2/M
+CCNB2	ENSG00000157456	G2/M
+CKAP2L	ENSG00000169607	G2/M
+CKAP2	ENSG00000136108	G2/M
+AURKB	ENSG00000178999	G2/M
+BUB1	ENSG00000169679	G2/M
+KIF11	ENSG00000138160	G2/M
+ANP32E	ENSG00000143401	G2/M
+TUBB4B	ENSG00000188229	G2/M
+GTSE1	ENSG00000075218	G2/M
+KIF20B	ENSG00000138182	G2/M
+HJURP	ENSG00000123485	G2/M
+CDCA3	ENSG00000111665	G2/M
+HN1	ENSG00000189159	G2/M
+CDC20	ENSG00000117399	G2/M
+TTK	ENSG00000112742	G2/M
+CDC25C	ENSG00000158402	G2/M
+KIF2C	ENSG00000142945	G2/M
+RANGAP1	ENSG00000100401	G2/M
+NCAPD2	ENSG00000010292	G2/M
+DLGAP5	ENSG00000126787	G2/M
+CDCA2	ENSG00000184661	G2/M
+CDCA8	ENSG00000134690	G2/M
+ECT2	ENSG00000114346	G2/M
+KIF23	ENSG00000137807	G2/M
+HMMR	ENSG00000072571	G2/M
+AURKA	ENSG00000087586	G2/M
+PSRC1	ENSG00000134222	G2/M
+ANLN	ENSG00000011426	G2/M
+LBR	ENSG00000143815	G2/M
+CKAP5	ENSG00000175216	G2/M
+CENPE	ENSG00000138778	G2/M
+CTCF	ENSG00000102974	G2/M
+NEK2	ENSG00000117650	G2/M
+G2E3	ENSG00000092140	G2/M
+GAS2L3	ENSG00000139354	G2/M
+CBX5	ENSG00000094916	G2/M
+CENPA	ENSG00000115163	G2/M
diff --git a/scib/resources/cell_cycle_genes_mus_musculus.tsv b/scib/resources/cell_cycle_genes_mus_musculus.tsv
new file mode 100644
index 00000000..3938d978
--- /dev/null
+++ b/scib/resources/cell_cycle_genes_mus_musculus.tsv
@@ -0,0 +1,98 @@
+gene_name	gene_id	phase
+Mcm5	ENSMUSG00000005410	S
+Pcna	ENSMUSG00000027342	S
+Tyms	ENSMUSG00000025747	S
+Fen1	ENSMUSG00000024742	S
+Mcm2	ENSMUSG00000002870	S
+Mcm4	ENSMUSG00000022673	S
+Rrm1	ENSMUSG00000030978	S
+Ung	ENSMUSG00000029591	S
+Gins2	ENSMUSG00000031821	S
+Mcm6	ENSMUSG00000026355	S
+Cdca7	ENSMUSG00000055612	S
+Dtl	ENSMUSG00000037474	S
+Prim1	ENSMUSG00000025395	S
+Uhrf1	ENSMUSG00000001228	S
+Mlf1ip	ENSMUSG00000031629	S
+Hells	ENSMUSG00000025001	S
+Rfc2	ENSMUSG00000023104	S
+Rpa2	ENSMUSG00000028884	S
+Nasp	ENSMUSG00000028693	S
+Rad51ap1	ENSMUSG00000030346	S
+Gmnn	ENSMUSG00000006715	S
+Wdr76	ENSMUSG00000027242	S
+Slbp	ENSMUSG00000004642	S
+Ccne2	ENSMUSG00000028212	S
+Ubr7	ENSMUSG00000041712	S
+Pold3	ENSMUSG00000030726	S
+Msh2	ENSMUSG00000024151	S
+Atad2	ENSMUSG00000022360	S
+Rad51	ENSMUSG00000027323	S
+Rrm2	ENSMUSG00000020649	S
+Cdc45	ENSMUSG00000000028	S
+Cdc6	ENSMUSG00000017499	S
+Exo1	ENSMUSG00000039748	S
+Tipin	ENSMUSG00000032397	S
+Dscc1	ENSMUSG00000022422	S
+Blm	ENSMUSG00000030528	S
+Casp8ap2	ENSMUSG00000028282	S
+Usp1	ENSMUSG00000028560	S
+Clspn	ENSMUSG00000042489	S
+Pola1	ENSMUSG00000006678	S
+Chaf1b	ENSMUSG00000022945	S
+Brip1	ENSMUSG00000034329	S
+E2f8	ENSMUSG00000046179	S
+Hmgb2	ENSMUSG00000054717	G2/M
+Cdk1	ENSMUSG00000019942	G2/M
+Nusap1	ENSMUSG00000027306	G2/M
+Ube2c	ENSMUSG00000001403	G2/M
+Birc5	ENSMUSG00000017716	G2/M
+Tpx2	ENSMUSG00000027469	G2/M
+Top2a	ENSMUSG00000020914	G2/M
+Ndc80	ENSMUSG00000024056	G2/M
+Cks2	ENSMUSG00000062248	G2/M
+Nuf2	ENSMUSG00000026683	G2/M
+Cks1b	ENSMUSG00000028044	G2/M
+Mki67	ENSMUSG00000031004	G2/M
+Tmpo	ENSMUSG00000019961	G2/M
+Cenpf	ENSMUSG00000026605	G2/M
+Tacc3	ENSMUSG00000037313	G2/M
+Fam64a	ENSMUSG00000020808	G2/M
+Smc4	ENSMUSG00000034349	G2/M
+Ccnb2	ENSMUSG00000032218	G2/M
+Ckap2l	ENSMUSG00000048327	G2/M
+Ckap2	ENSMUSG00000037725	G2/M
+Aurkb	ENSMUSG00000020897	G2/M
+Bub1	ENSMUSG00000027379	G2/M
+Kif11	ENSMUSG00000012443	G2/M
+Anp32e	ENSMUSG00000015749	G2/M
+Tubb4b	ENSMUSG00000036752	G2/M
+Gtse1	ENSMUSG00000022385	G2/M
+Kif20b	ENSMUSG00000024795	G2/M
+Hjurp	ENSMUSG00000044783	G2/M
+Cdca3	ENSMUSG00000023505	G2/M
+Hn1	ENSMUSG00000020737	G2/M
+Cdc20	ENSMUSG00000006398	G2/M
+Ttk	ENSMUSG00000038379	G2/M
+Cdc25c	ENSMUSG00000044201	G2/M
+Kif2c	ENSMUSG00000028678	G2/M
+Rangap1	ENSMUSG00000022391	G2/M
+Ncapd2	ENSMUSG00000038252	G2/M
+Dlgap5	ENSMUSG00000037544	G2/M
+Cdca2	ENSMUSG00000048922	G2/M
+Cdca8	ENSMUSG00000028873	G2/M
+Ect2	ENSMUSG00000027699	G2/M
+Kif23	ENSMUSG00000032254	G2/M
+Hmmr	ENSMUSG00000020330	G2/M
+Aurka	ENSMUSG00000027496	G2/M
+Psrc1	ENSMUSG00000068744	G2/M
+Anln	ENSMUSG00000036777	G2/M
+Lbr	ENSMUSG00000004880	G2/M
+Ckap5	ENSMUSG00000040549	G2/M
+Cenpe	ENSMUSG00000045328	G2/M
+Ctcf	ENSMUSG00000005698	G2/M
+Nek2	ENSMUSG00000026622	G2/M
+G2e3	ENSMUSG00000035293	G2/M
+Gas2l3	ENSMUSG00000074802	G2/M
+Cbx5	ENSMUSG00000009575	G2/M
+Cenpa	ENSMUSG00000029177	G2/M
diff --git a/scib/resources/convert_genes.py b/scib/resources/convert_genes.py
new file mode 100644
index 00000000..670a2568
--- /dev/null
+++ b/scib/resources/convert_genes.py
@@ -0,0 +1,112 @@
+import requests
+from tqdm import tqdm
+
+
+def get_gene_name_from_ensembl(gene_ids: list, species: str):
+    base_url = "https://rest.ensembl.org"
+    gene_names = []
+
+    for gene_id in tqdm(gene_ids):
+        response = requests.get(
+            f"{base_url}/lookup/id/{gene_id}?expand=1;species={species}",
+            headers={"Content-Type": "application/json"},
+        )
+
+        if response.status_code == 200:
+            data = response.json()
+            gene_name = data.get("display_name", gene_id)
+            gene_names.append(gene_name)
+        else:
+            print(f"Error: {response.status_code}, skipping gene {gene_id}...")
+            gene_names.append(gene_id)
+
+    return gene_names
+
+
+def get_gene_id_from_ensembl(gene_names: list, species: str):
+    base_url = "https://rest.ensembl.org"
+    gene_ids = []
+
+    for gene_name in tqdm(gene_names):
+        response = requests.get(
+            f"{base_url}/xrefs/symbol/{species}/{gene_name}?expand=1",
+            headers={"Content-Type": "application/json"},
+        )
+
+        if response.status_code == 200:
+            data = response.json()
+            if data:
+                gene_id = data[0].get(
+                    "id", gene_name
+                )  # Get the first result's Ensembl ID
+                gene_ids.append(gene_id)
+            else:
+                gene_ids.append("Not Found")
+        else:
+            print(f"Error: {response.status_code}, skipping gene {gene_name}...")
+            gene_ids.append(gene_name)
+
+    return gene_ids
+
+
+if __name__ == "__main__":
+    from pathlib import Path
+
+    import pandas as pd
+
+    root = Path(__file__).parent
+
+    cc_files = {
+        "mus_musculus": "https://raw.githubusercontent.com/theislab/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt",
+        "homo_sapiens": "https://raw.githubusercontent.com/theislab/scanpy_usage/master/180209_cell_cycle/data/regev_lab_cell_cycle_genes.txt",
+        "caenorhabditis_elegans": "https://raw.githubusercontent.com/hbc/tinyatlas/refs/heads/master/cell_cycle/Caenorhabditis_elegans.csv",
+        "danio_rerio": "https://raw.githubusercontent.com/hbc/tinyatlas/refs/heads/master/cell_cycle/Danio_rerio.csv",
+    }
+
+    # Tirosh mouse and human
+    # processed according to https://github.com/scverse/scanpy_usage/blob/master/180209_cell_cycle/cell_cycle.ipynb
+    for organism in ["mus_musculus", "homo_sapiens"]:
+        print(f"Organism: {organism}")
+
+        # read file
+        gene_names = pd.read_csv(cc_files[organism], header=None)[0]
+
+        if organism == "mus_musculus":
+            gene_names = gene_names.str.capitalize()
+
+        # convert gene names
+        gene_ids = get_gene_id_from_ensembl(gene_names, species=organism)
+
+        # create gene map
+        gene_map = pd.DataFrame(dict(gene_name=gene_names, gene_id=gene_ids))
+
+        # set cell cycle phase
+        gene_map.loc[:43, "phase"] = "S"
+        gene_map.loc[43:, "phase"] = "G2/M"
+
+        # write to file
+        print(gene_map)
+        gene_map.to_csv(
+            root / f"cell_cycle_genes_{organism}.tsv", sep="\t", index=False
+        )
+
+    # Tinyatlas gene sets
+    # https://github.com/hbc/tinyatlas/tree/master/cell_cycle
+    for organism in ["caenorhabditis_elegans", "danio_rerio"]:
+        print(f"Organism: {organism}")
+
+        # read file
+        gene_map = pd.read_csv(cc_files[organism])
+        gene_map["gene_id"] = gene_map["geneID"]
+        del gene_map["geneID"]
+
+        # get gene names
+        gene_map["gene_name"] = get_gene_name_from_ensembl(
+            gene_map["gene_id"], species=organism
+        )
+
+        # write to file
+        print(gene_map)
+        gene_map.to_csv(
+            root / f"cell_cycle_genes_{organism}.tsv", sep="\t", index=False
+        )
diff --git a/setup.cfg b/setup.cfg
index 23e359b4..d33184f4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -66,7 +66,7 @@ zip_safe = False
 
 [options.package_data]
 scib =
-	resources/*.txt
+	resources/*
 	knn_graph/*
 
 [options.extras_require]
diff --git a/tests/conftest.py b/tests/conftest.py
index 972cf45a..87faf04d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -97,3 +97,29 @@ def adata_clustered(adata_neighbors):
         adata_obj, label_key="celltype", cluster_key="cluster", verbose=True
     )
     yield adata_obj
+
+
+DATASETS = {
+    "c_elegans": "https://github.com/Munfred/wormcells-data/releases/download/cao2017/cao2017.h5ad",
+    "zebrafish": "https://figshare.com/ndownloader/files/27265280",
+    # from https://cellrank.readthedocs.io/en/stable/_modules/cellrank/datasets.html
+}
+
+
+@pytest.fixture()
+def adata_from_url(request):
+    dataset_name = request.param
+    url = DATASETS[dataset_name]
+
+    adata = sc.read(f"{dataset_name}.h5ad", backup_url=url)
+    assert adata is not None
+    adata.uns["dataset_name"] = dataset_name
+
+    if "gene_id" in adata.var.columns:
+        adata.var_names = adata.var["gene_id"]
+
+    if dataset_name == "zebrafish":
+        adata.var_names = adata.var_names.str.lower()
+        adata = adata[:, ~adata.var_names.duplicated()].copy()
+
+    yield adata
diff --git a/tests/preprocessing/test_gene_scoring.py b/tests/preprocessing/test_gene_scoring.py
new file mode 100644
index 00000000..e3b26135
--- /dev/null
+++ b/tests/preprocessing/test_gene_scoring.py
@@ -0,0 +1,44 @@
+import pytest
+import scanpy as sc
+
+import scib
+
+
+def test_mouse(adata_paul15):
+
+    assert "S_score" not in adata_paul15.obs.columns
+    assert "G2M_score" not in adata_paul15.obs.columns
+    assert "phase" not in adata_paul15.obs.columns
+
+    scib.pp.score_cell_cycle(
+        adata_paul15,
+        organism="mouse",
+    )
+    assert "S_score" in adata_paul15.obs.columns
+    assert "G2M_score" in adata_paul15.obs.columns
+    assert "phase" in adata_paul15.obs.columns
+
+    scib.pp.score_cell_cycle(
+        adata_paul15,
+        organism="mus musculus",
+    )
+
+
+def test_human(adata_paul15):
+    scib.pp.score_cell_cycle(
+        sc.datasets.pbmc68k_reduced(),
+        organism="human",
+    )
+    with pytest.raises(ValueError):
+        scib.pp.score_cell_cycle(
+            adata_paul15,
+            organism="human",
+        )
+
+
+@pytest.mark.parametrize("adata_from_url", ["c_elegans", "zebrafish"], indirect=True)
+def test_organism(adata_from_url):
+    scib.pp.score_cell_cycle(
+        adata_from_url,
+        organism=adata_from_url.uns["dataset_name"],
+    )