Add pygmt.read to read a dataset/grid/image into pandas.DataFrame/xarray.DataArray

seisman · seisman · commit cef4cdfa497f · 2024-12-05T11:24:10.000+08:00
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -172,6 +172,7 @@ Input/output
     :toctree: generated
 
     load_dataarray
+    read
 
 GMT Defaults
 ------------
diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -54,6 +54,7 @@
     makecpt,
     nearneighbor,
     project,
+    read,
     select,
     sph2grd,
     sphdistance,
diff --git a/pygmt/datasets/load_remote_dataset.py b/pygmt/datasets/load_remote_dataset.py
@@ -6,10 +6,9 @@
 from typing import Any, Literal, NamedTuple
 
 import xarray as xr
-from pygmt.clib import Session
 from pygmt.exceptions import GMTInvalidInput
-from pygmt.helpers import build_arg_list, kwargs_to_strings
-from pygmt.src import which
+from pygmt.helpers import kwargs_to_strings
+from pygmt.src import read, which
 
 
 class Resolution(NamedTuple):
@@ -443,14 +442,7 @@ def _load_remote_dataset(
 
     fname = f"@{prefix}_{resolution}_{reg}"
     kind = "image" if name in {"earth_day", "earth_night"} else "grid"
-    kwdict = {"R": region, "T": {"grid": "g", "image": "i"}[kind]}
-    with Session() as lib:
-        with lib.virtualfile_out(kind=kind) as voutgrd:
-            lib.call_module(
-                module="read",
-                args=[fname, voutgrd, *build_arg_list(kwdict)],
-            )
-            grid = lib.virtualfile_to_raster(kind=kind, outgrid=None, vfname=voutgrd)
+    grid = read(fname, kind=kind, region=region)
 
     # Full path to the grid if not tiled grids.
     source = which(fname, download="a") if not resinfo.tiled else None
diff --git a/pygmt/datasets/samples.py b/pygmt/datasets/samples.py
@@ -8,8 +8,7 @@
 import pandas as pd
 import xarray as xr
 from pygmt.exceptions import GMTInvalidInput
-from pygmt.io import load_dataarray
-from pygmt.src import which
+from pygmt.src import read, which
 
 
 def _load_japan_quakes() -> pd.DataFrame:
@@ -203,8 +202,7 @@ def _load_earth_relief_holes() -> xr.DataArray:
         The Earth relief grid. Coordinates are latitude and longitude in degrees. Relief
         is in meters.
     """
-    fname = which("@earth_relief_20m_holes.grd", download="c")
-    return load_dataarray(fname, engine="netcdf4")
+    return read("@earth_relief_20m_holes.grd", kind="grid")
 
 
 class GMTSampleData(NamedTuple):
diff --git a/pygmt/helpers/testing.py b/pygmt/helpers/testing.py
@@ -7,9 +7,9 @@
 import string
 from pathlib import Path
 
+import xarray as xr
 from pygmt.exceptions import GMTImageComparisonFailure
-from pygmt.io import load_dataarray
-from pygmt.src import which
+from pygmt.src import read
 
 
 def check_figures_equal(*, extensions=("png",), tol=0.0, result_dir="result_images"):
@@ -144,17 +144,16 @@ def wrapper(*args, ext="png", request=None, **kwargs):
     return decorator
 
 
-def load_static_earth_relief():
+def load_static_earth_relief() -> xr.DataArray:
     """
     Load the static_earth_relief file for internal testing.
 
     Returns
     -------
-    data : xarray.DataArray
+    data
         A grid of Earth relief for internal tests.
     """
-    fname = which("@static_earth_relief.nc", download="c")
-    return load_dataarray(fname)
+    return read("@static_earth_relief.nc", kind="grid")  # type: ignore[return-value]
 
 
 def skip_if_no(package):
diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py
@@ -41,6 +41,7 @@
 from pygmt.src.plot3d import plot3d
 from pygmt.src.project import project
 from pygmt.src.psconvert import psconvert
+from pygmt.src.read import read
 from pygmt.src.rose import rose
 from pygmt.src.select import select
 from pygmt.src.shift_origin import shift_origin
diff --git a/pygmt/src/read.py b/pygmt/src/read.py
@@ -0,0 +1,88 @@
+"""
+Read data from files
+"""
+
+from typing import Literal
+
+import pandas as pd
+import xarray as xr
+from pygmt.clib import Session
+from pygmt.helpers import build_arg_list, fmt_docstring, kwargs_to_strings, use_alias
+
+
+@fmt_docstring
+@use_alias(R="region")
+@kwargs_to_strings(R="sequence")
+def read(
+    file,
+    kind: Literal["dataset", "grid", "image"],
+    **kwargs,
+) -> pd.DataFrame | xr.DataArray:
+    """
+    Read a dataset, grid, or image from a file and return the appropriate object.
+
+    For datasets, it returns a :class:`pandas.DataFrame`. For grids and images, it
+    returns a :class:`xarray.DataArray`.
+
+    Parameters
+    ----------
+    file
+        The file name to read.
+    kind
+        The kind of data to read. Valid values are ``"dataset"``, ``"grid"``, and
+        ``"image"``.
+    {region}
+
+    For datasets, the following keyword arguments are supported:
+
+    column_names
+        A list of column names.
+    header
+        Row number containing column names. ``header=None`` means not to parse the
+        column names from table header. Ignored if the row number is larger than the
+        number of headers in the table.
+    dtype
+        Data type. Can be a single type for all columns or a dictionary mapping column
+        names to types.
+    index_col
+        Column to set as index.
+
+    Returns
+    -------
+    Return type depends on the ``kind`` argument:
+
+    - ``"dataset"``: :class:`pandas.DataFrame`
+    - ``"grid"`` or ``"image"``: :class:`xarray.DataArray`
+
+
+    Examples
+    --------
+
+    Read a dataset into a :class:`pandas.DataFrame` object:
+
+    >>> from pygmt import read
+    >>> df = read("@hotspots.txt", kind="dataset")
+    >>> type(df)
+    <class 'pandas.core.frame.DataFrame'>
+
+    Read a grid into an :class:`xarray.DataArray` object:
+    >>> dataarray = read("@earth_relief_01d", kind="grid")
+    >>> type(dataarray)
+    <class 'xarray.core.dataarray.DataArray'>
+    """
+    code = {"dataset": "d", "grid": "g", "image": "i"}[kind]
+
+    kwdict = {
+        "R": kwargs["R"],
+        "T": code,
+    }
+
+    with Session() as lib:
+        with lib.virtualfile_out(kind=kind) as voutfile:
+            lib.call_module("read", args=[file, voutfile, *build_arg_list(kwdict)])
+
+        match kind:
+            case "dataset":
+                return lib.virtualfile_to_dataset(vfname=voutfile, **kwargs)
+            case "grid" | "image":
+                return lib.virtualfile_to_raster(vfname=voutfile, kind=kind)