function, tests, whatsnew, docs

kandersolar · kandersolar · commit ff35e6ee840a · 2025-10-17T09:40:02.000-04:00
diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst
@@ -237,6 +237,18 @@ lower quality.
    iotools.read_crn
 
 
+MERRA-2
+^^^^^^^
+
+A global reanalysis dataset providing weather, aerosol, and solar irradiance
+data.
+
+.. autosummary::
+   :toctree: generated/
+
+   iotools.get_merra2
+
+
 Generic data file readers
 -------------------------
 
diff --git a/docs/sphinx/source/whatsnew/v0.13.2.rst b/docs/sphinx/source/whatsnew/v0.13.2.rst
@@ -27,7 +27,8 @@ Enhancements
   :py:func:`~pvlib.singlediode.bishop88_mpp`,
   :py:func:`~pvlib.singlediode.bishop88_v_from_i`, and
   :py:func:`~pvlib.singlediode.bishop88_i_from_v`. (:issue:`2497`, :pull:`2498`)
-
+* Add :py:func:`~pvlib.iotools.get_merra2`, a function for accessing
+  MERRA-2 reanalysis data. (:pull:`2572`)
 
 
 Documentation
@@ -53,4 +54,5 @@ Maintenance
 
 Contributors
 ~~~~~~~~~~~~
+* Kevin Anderson (:ghuser:`kandersolar`)
 
diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
@@ -45,3 +45,4 @@
 from pvlib.iotools.meteonorm import get_meteonorm_observation_training  # noqa: F401, E501
 from pvlib.iotools.meteonorm import get_meteonorm_tmy  # noqa: F401
 from pvlib.iotools.nasa_power import get_nasa_power  # noqa: F401
+from pvlib.iotools.merra2 import get_merra2  # noqa: F401
diff --git a/pvlib/iotools/merra2.py b/pvlib/iotools/merra2.py
@@ -0,0 +1,154 @@
+import pandas as pd
+import requests
+from io import StringIO
+
+
+VARIABLE_MAP = {
+    'SWGDN': 'ghi',
+    'SWGDNCLR': 'ghi_clear',
+    'ALBEDO': 'albedo',
+    'T2M': 'temp_air',
+    'T2MDEW': 'temp_dew',
+    'PS': 'pressure',
+    'TOTEXTTAU': 'aod550',
+}
+
+def get_merra2(latitude, longitude, start, end, username, password, dataset,
+               variables, map_variables=True):
+    """
+    Retrieve MERRA-2 time-series irradiance and meteorological data from
+    NASA's GESDISC data archive.
+
+    MERRA-2 [1]_ offers modeled data for many atmospheric quantities at hourly
+    resolution on a 0.5° x 0.625° global grid.
+
+    Access must be granted to the GESDISC data archive before EarthData
+    credentials will work.  See [2]_ for instructions.
+
+    Parameters
+    ----------
+    latitude : float
+        In decimal degrees, north is positive (ISO 19115).
+    longitude: float
+        In decimal degrees, east is positive (ISO 19115).
+    start : datetime like or str
+        First timestamp of the requested period. If a timezone is not
+        specified, UTC is assumed.
+    end : datetime like or str
+        Last timestamp of the requested period. If a timezone is not
+        specified, UTC is assumed.
+    username : str
+        NASA EarthData username.
+    password : str
+        NASA EarthData password.
+    dataset : str
+        Dataset name (with version), e.g. "M2T1NXRAD.5.12.4".
+    variables : list of str
+        List of variable names to retrieve.  See the documentation of the
+        specific dataset you are accessing for options.
+    map_variables : bool, default True
+        When true, renames columns of the DataFrame to pvlib variable names
+        where applicable. See variable :const:`VARIABLE_MAP`.
+
+    Raises
+    ------
+    ValueError
+        If ``start`` and ``end`` are in different years, when converted to UTC.
+
+    Returns
+    -------
+    data : pd.DataFrame
+        Time series data. The index corresponds to the middle of the interval.
+    meta : dict
+        Metadata.
+
+    Notes
+    -----
+    The following datasets provide quantities useful for PV modeling:
+
+    - M2T1NXRAD.5.12.4: SWGDN, SWGDNCLR, ALBEDO
+    - M2T1NXSLV.5.12.4: T2M, U10M, V10M, T2MDEW, PS
+    - M2T1NXAER.5.12.4: TOTEXTTAU
+
+    Note that MERRA2 does not currently provide DNI or DHI.
+
+    References
+    ----------
+    .. [1] https://gmao.gsfc.nasa.gov/gmao-products/merra-2/
+    .. [2] https://disc.gsfc.nasa.gov/earthdata-login
+    """
+
+    # general API info here:
+    # https://docs.unidata.ucar.edu/tds/5.0/userguide/netcdf_subset_service_ref.html  # noqa: E501
+
+    def _to_utc_dt_notz(dt):
+        dt = pd.to_datetime(dt)
+        if dt.tzinfo is None:  # convert everything to UTC
+            dt = dt.tz_localize("UTC")
+        else:
+            dt = dt.tz_convert("UTC")
+        return dt.tz_localize(None)  # drop tz so that isoformat() is clean
+
+    start = _to_utc_dt_notz(start)
+    end = _to_utc_dt_notz(end)
+
+    if (year := start.year) != end.year:
+        raise ValueError("start and end must be in the same year (in UTC)")
+
+    url = (
+        "https://goldsmr4.gesdisc.eosdis.nasa.gov/thredds/ncss/grid/"
+        f"MERRA2_aggregation/{dataset}/{dataset}_Aggregation_{year}.ncml"
+    )
+
+    parameters = {
+        'var': ",".join(variables),
+        'latitude': latitude,
+        'longitude': longitude,
+        'time_start': start.isoformat() + "Z",
+        'time_end': end.isoformat() + "Z",
+        'accept': 'csv',
+    }
+
+    auth = (username, password)
+
+    with requests.Session() as session:
+        session.auth = auth
+        login = session.request('get', url, params=parameters)
+        response = session.get(login.url, auth=auth, params=parameters)
+
+    response.raise_for_status()
+
+    content = response.content.decode('utf-8')
+    buffer = StringIO(content)
+    df = pd.read_csv(buffer)
+
+    df.index = pd.to_datetime(df['time'])
+
+    meta = {}
+    meta['dataset'] = dataset
+    meta['station'] = df['station'].values[0]
+    meta['latitude'] = df['latitude[unit="degrees_north"]'].values[0]
+    meta['longitude'] = df['longitude[unit="degrees_east"]'].values[0]
+
+    # drop the non-data columns
+    dropcols = ['time', 'station', 'latitude[unit="degrees_north"]',
+        'longitude[unit="degrees_east"]']
+    df = df.drop(columns=dropcols)
+
+    # column names are like T2M[unit="K"] by default.  extract the unit
+    # for the metadata, then rename col to just T2M
+    units = {}
+    rename = {}
+    for col in df.columns:
+        name, _ = col.split("[", maxsplit=1)
+        unit = col.split('"')[1]
+        units[name] = unit
+        rename[col] = name
+
+    meta['units'] = units
+    df = df.rename(columns=rename)
+
+    if map_variables:
+        df = df.rename(columns=VARIABLE_MAP)
+
+    return df, meta
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -130,6 +130,20 @@ def nrel_api_key():
     reason='requires solaranywhere credentials')
 
 
+try:
+    # Attempt to load NASA EarthData credentials used for testing
+    # pvlib.iotools.get_merra2
+    earthdata_username = os.environ["EARTHDATA_USERNAME"]
+    earthdata_password = os.environ["EARTHDATA_PASSWORD"]
+    has_earthdata_credentials = True
+except KeyError:
+    has_earthdata_credentials = False
+
+requires_earthdata_credentials = pytest.mark.skipif(
+    not has_earthdata_credentials,
+    reason='requires EarthData credentials')
+
+
 try:
     import statsmodels  # noqa: F401
     has_statsmodels = True
diff --git a/tests/iotools/test_merra2.py b/tests/iotools/test_merra2.py
@@ -0,0 +1,69 @@
+"""
+tests for pvlib/iotools/merra2.py
+"""
+
+import pandas as pd
+import pytest
+import pvlib
+import os
+from tests.conftest import RERUNS, RERUNS_DELAY, requires_earthdata_credentials
+
+
+@pytest.fixture
+def params():
+    earthdata_username = os.environ["EARTHDATA_USERNAME"]
+    earthdata_password = os.environ["EARTHDATA_PASSWORD"]
+
+    return {'latitude': 40.01, 'longitude': -80.01,
+            'start': '2020-06-01 15:00', 'end': '2020-06-01 20:00',
+            'dataset': 'M2T1NXRAD.5.12.4', 'variables': ['ALBEDO', 'SWGDN'],
+            'username': earthdata_username, 'password': earthdata_password,
+    }
+
+
+@pytest.fixture
+def expected():
+    index = pd.date_range("2020-06-01 15:30", "2020-06-01 20:30", freq="h",
+                          tz="UTC")
+    index.name = 'time'
+    albedo = [0.163931, 0.1609407, 0.1601474, 0.1612476, 0.164664, 0.1711341]
+    ghi = [ 930., 1002.75, 1020.25, 981.25, 886.5, 743.5]
+    df = pd.DataFrame({'albedo': albedo, 'ghi': ghi}, index=index)
+    return df  
+
+
+@pytest.fixture
+def expected_meta():
+    return {
+        'dataset': 'M2T1NXRAD.5.12.4',
+        'station': 'GridPointRequestedAt[40.010N_80.010W]',
+        'latitude': 40.0,
+        'longitude': -80.0,
+        'units': {'ALBEDO': '1', 'SWGDN': 'W m-2'}
+    }
+
+
+@requires_earthdata_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_merra2(params, expected, expected_meta):
+    df, meta = pvlib.iotools.get_merra2(**params)
+    pd.testing.assert_frame_equal(df, expected, check_freq=False)
+    assert meta == expected_meta
+
+
+@requires_earthdata_credentials
+@pytest.mark.remote_data
+@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
+def test_get_merra2_map_variables(params, expected, expected_meta):
+    df, meta = pvlib.iotools.get_merra2(**params, map_variables=False)
+    expected = expected.rename(columns={'albedo': 'ALBEDO', 'ghi': 'SWGDN'})
+    pd.testing.assert_frame_equal(df, expected, check_freq=False)
+    assert meta == expected_meta
+
+
+def test_get_merra2_error():
+    with pytest.raises(ValueError, match='must be in the same year'):
+        pvlib.iotools.get_merra2(40, -80, '2019-12-31', '2020-01-02',
+                                 username='anything', password='anything',
+                                 dataset='anything', variables=[])