Skip to content

Commit

Permalink
handle input dataframe index datatype is number-like issue, #303, #305
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhuoqing Fang authored and Zhuoqing Fang committed Mar 6, 2025
1 parent 6c4deb7 commit 267b005
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions gseapy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,22 @@ def _read_file(self, path: str) -> pd.DataFrame:

return rank_metric.select_dtypes(include=[np.number]).reset_index()

def _reset_index(self, rank_metric: pd.DataFrame):
"""
check gene ids type
"""
# handle index is already gene_names
if rank_metric.index.dtype == "O":
# Try to check if all elements can be converted to numbers
try:
# is_string_numbers = True, don't reset index
pd.to_numeric(rank_metric.index)
except (ValueError, TypeError):
# Contains non-numeric strings, likely gene names
# is_string_numbers = False
rank_metric = rank_metric.reset_index()
return rank_metric

def _load_data(self, exprs: Union[str, pd.Series, pd.DataFrame]) -> pd.DataFrame:
"""
helper function to read data
Expand All @@ -281,9 +297,7 @@ def _load_data(self, exprs: Union[str, pd.Series, pd.DataFrame]) -> pd.DataFrame
# handle dataframe with gene_name as index.
self._logger.debug("Input data is a DataFrame with gene names")
# handle index is already gene_names
if not isinstance(rank_metric.index, pd.RangeIndex):
rank_metric = rank_metric.reset_index()

rank_metric = self._reset_index(rank_metric)
# if rank_metric.columns.dtype != "O":
rank_metric.columns = rank_metric.columns.astype(str)

Expand Down

0 comments on commit 267b005

Please sign in to comment.