update the benchmark FIM look up and more

supathdhitalGEO · supathdhitalGEO · commit 3d73b0092330 · 2026-02-05T12:13:18.000-06:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
     "notebook>=6.5.7",
     "geocube<=0.7.1",
     "geopandas>=0.14.3",
-    "fimeval>=0.1.56"
+    "fimeval>=0.1.59"
 ]
 
 [project.optional-dependencies]
diff --git a/src/fimserve/fimevaluation/fims_setup.py b/src/fimserve/fimevaluation/fims_setup.py
@@ -6,7 +6,7 @@
 from collections import defaultdict
 
 # Internal utilities
-from .utilis import (
+from .utils import (
     load_catalog_core,
     download_fim_assets,
     _to_date,
@@ -15,6 +15,7 @@
     _record_hour_or_none,
     format_records_for_print,
     find_fims,
+    _record_huc8_list,
 )
 
 from ..datadownload import DownloadHUC8, setup_directories
@@ -42,7 +43,7 @@ def _ensure_roots(self):
         self._roots_initialized = True
 
     def availability(self, HUCID: str) -> str:
-        from .utilis import availability as _avail
+        from .utils import availability as _avail
 
         return _avail(HUCID)
 
@@ -171,7 +172,7 @@ def process(
                     r
                     for r in records
                     if str(r.get("file_name", "")).strip() == fname
-                    and str(r.get("huc8", "")).strip() == str(huc8).strip()
+                    and str(huc8).strip() in set(_record_huc8_list(r))
                 ]
                 cand_any_huc = [
                     r for r in records if str(r.get("file_name", "")).strip() == fname
@@ -348,7 +349,7 @@ def process(
 
                 # Ensure/generate OWP HAND FIM for that event time and copy to all matching site folders
                 owp_src_copied_any = False
-                if user_dt:
+                if ensure_owp and user_dt:
                     for site in dl_by_site.keys():
                         folder = inputs_root / f"HUC{huc8}_{site}"
                         owp_path = self._ensure_owp_to(
@@ -370,7 +371,7 @@ def process(
                             "downloads": dl_records,
                             "owp_path": (
                                 str(folder / f"NWM_{label}_{huc8}_inundation.tif")
-                                if owp_src_copied_any
+                                if (ensure_owp and owp_src_copied_any)
                                 else None
                             ),
                         }
@@ -379,9 +380,10 @@ def process(
             msg_bits = [
                 f"Downloaded {total_downloaded} benchmark item(s) into '{inputs_root}'."
             ]
-            msg_bits.append(
-                "OWP HAND FIMs ensured per event (based on benchmark timestamps)."
-            )
+            if ensure_owp:
+                msg_bits.append(
+                    "OWP HAND FIMs ensured per event (based on benchmark timestamps)."
+                )
 
             return {
                 "status": "ok",
@@ -517,25 +519,41 @@ def _generate_owp(self, huc8: str, user_dt: str) -> Optional[Path]:
 def fim_lookup(
     HUCID: str,
     date_input: Optional[str] = None,
-    file_name: Optional[str] = None,
-    run_handfim: bool = False,
-    out_dir: Optional[str] = None,  # Directory to place downloaded/generated files
     start_date: Optional[str] = None,
     end_date: Optional[str] = None,
+    file_name: Optional[str] = None,
+    run_handfim: bool = False,
+    out_dir: Optional[str] = None,
 ) -> str:
     """
-    - run_handfim=False (default): show a formatted benchmark list.
-    - run_handfim=True: run the OWP HAND process (copy/generate), DO NOT print the benchmark summary;
-      just return the operational message from the process step.
+    Behavior:
+      - If file_name is provided: ALWAYS download the benchmark assets (tif + gpkg) into out_dir (or CWD),
+        regardless of run_handfim.
+      - If run_handfim=True: additionally ensure/generate OWP HAND FIM (copied into the same folder(s)).
+      - If file_name is not provided:
+          * run_handfim=False -> listing mode (query/pretty print)
+          * run_handfim=True  -> process mode (download strict matches + ensure OWP)
     """
     svc = FIMService()
 
-    # List-only mode
+    # If filename is provided, always download benchmark assets.
+    if file_name:
+        rep = svc.process(
+            huc8=HUCID,
+            date_input=date_input,
+            ensure_owp=run_handfim,
+            generate_owp_if_missing=run_handfim,
+            out_dir=out_dir,
+            file_name=file_name,
+        )
+        return rep.get("message", "")
+
+    # If No filename provided: preserve original behavior
     if not run_handfim:
         q = svc.query(
             HUCID=HUCID,
             date_input=date_input,
-            file_name=file_name,
+            file_name=None,
             start_date=start_date,
             end_date=end_date,
         )
@@ -545,7 +563,6 @@ def fim_lookup(
                 "No benchmark FIMs were matched with the information you provided.\n"
                 f"(HUC={HUCID}"
                 f"{', date='+date_input if date_input else ''}"
-                f"{', file_name='+file_name if file_name else ''}"
                 f"{', range=['+str(start_date)+' , '+str(end_date)+']' if (start_date or end_date) else ''})"
             )
 
@@ -557,18 +574,16 @@ def fim_lookup(
             filt.append(f"date '{date_input}'")
         if start_date or end_date:
             filt.append(f"range [{start_date or '-∞'} , {end_date or '∞'}]")
-        if file_name:
-            filt.append(f"file '{file_name}'")
         prefix = header + (" for " + ", ".join(filt) + ":\n" if filt else ":\n")
         return prefix + txt
 
-    # Run/ensure OWP mode
+    #  If run_handfim=True, no filename: process strict matches with date and ensure OWP
     rep = svc.process(
         huc8=HUCID,
         date_input=date_input,
         ensure_owp=True,
         generate_owp_if_missing=True,
         out_dir=out_dir,
-        file_name=file_name,
+        file_name=None,
     )
-    return rep.get("message", "")
+    return rep.get("message", "")
diff --git a/src/fimserve/fimevaluation/utils.py b/src/fimserve/fimevaluation/utils.py
@@ -143,10 +143,7 @@ def _context_str(
 
     return ", ".join(parts) if parts else "your filters"
 
-
-def format_records_for_print(
-    records: List[Dict[str, Any]], context: Optional[str] = None
-) -> str:
+def format_records_for_print(records: List[Dict[str, Any]], context: Optional[str] = None) -> str:
     if not records:
         ctx = context or "your filters"
         return f"Benchmark FIMs were not matched for {ctx}."
@@ -159,7 +156,7 @@ def format_records_for_print(
 
     blocks: List[str] = []
     for r in records:
-        tier = r.get("tier") or r.get("quality") or "Unknown"
+        tier = _tier_label(r)
         date_str = _pretty_date_for_print(r)
         res = r.get("resolution_m")
         res_txt = f"{res}m" if res is not None else "NA"
@@ -194,8 +191,89 @@ def _download(bucket: str, key: str, dest_path: str) -> str:
     _S3.download_file(bucket, key, dest_path)
     return dest_path
 
+def _record_huc8_list(rec: Dict[str, Any]) -> List[str]:
+    """
+    Return HUC8s from a record as a normalized list of strings.
+
+    Catalog store:
+      - "huc8": "['03020201','03020202',...]" 
+    """
+    v = rec.get("huc8")
+
+    if v is None:
+        return []
+    if isinstance(v, (list, tuple, set)):
+        out: List[str] = []
+        for x in v:
+            if x is None:
+                continue
+            s = str(x).strip().strip("'").strip('"')
+            if s:
+                out.append(s)
+        return out
+
+    if isinstance(v, str):
+        s = v.strip()
+        if not s:
+            return []
+
+        # stringified list like "['03020201', '03020202']"
+        if s.startswith("[") and s.endswith("]"):
+            try:
+                import ast
+                parsed = ast.literal_eval(s)
+                if isinstance(parsed, (list, tuple, set)):
+                    out: List[str] = []
+                    for x in parsed:
+                        if x is None:
+                            continue
+                        t = str(x).strip().strip("'").strip('"')
+                        if t:
+                            out.append(t)
+                    return out
+            except Exception:
+                pass
+
+            inner = s[1:-1].strip()
+            if not inner:
+                return []
+            parts = [p.strip() for p in inner.split(",") if p.strip()]
+            out2: List[str] = []
+            for p in parts:
+                t = p.strip().strip("'").strip('"')
+                if t:
+                    out2.append(t)
+            return out2
+        return [s.strip().strip("'").strip('"')]
+    return [str(v).strip()]
+
+
+def _tier_label(rec: Dict[str, Any]) -> str:
+    """
+    Normalize tier/quality/HWM style labels into a consistent printable string.
+    """
+    raw = rec.get("tier")
+    if raw is None or str(raw).strip() == "":
+        raw = rec.get("quality")
+    if raw is None or str(raw).strip() == "":
+        raw = rec.get("HWM")    #For High Water Marks (HWM)
+
+    s = str(raw).strip() if raw is not None else ""
+    if not s:
+        return "Unknown"
+
+    s_low = s.lower().replace(" ", "").replace("-", "_")
+    #forms: Tier_2, tier2, 2, Tier 2
+    if "tier" in s_low:
+        m = re.search(r"tier[_ ]*(\d+)", s_low)
+        if m:
+            return f"Tier {m.group(1)}"
+        return s.replace("_", " ").strip()
+    if s.isdigit():
+        return f"Tier {s}"
+    return s
 
-# Search FIMs record in database
+#finding the benchmark FIMs
 def find_fims(
     records: List[Dict[str, Any]],
     huc8: str,
@@ -222,7 +300,7 @@ def find_fims(
       - Else (date with hour, or no date): fall back to strict behavior
     """
     huc8 = str(huc8).strip()
-    recs = [r for r in records if str(r.get("huc8", "")).strip() == huc8]
+    recs = [r for r in records if huc8 in set(_record_huc8_list(r))]
 
     if file_name:
         fname = file_name.strip()
@@ -268,7 +346,7 @@ def find_fims(
 
     if date_input and _to_hour_or_none(date_input) is None:
         target_day = _to_date(date_input)
-        out = []
+        out: List[Dict[str, Any]] = []
         for r in recs:
             r_day = _record_day(r)
             if r_day == target_day:
@@ -291,7 +369,7 @@ def find_fims(
 
 def summarize_huc_availability(records: List[Dict[str, Any]], huc8: str) -> str:
     huc8 = str(huc8).strip()
-    recs = [r for r in records if str(r.get("huc8", "")).strip() == huc8]
+    recs = [r for r in records if huc8 in set(_record_huc8_list(r))]
     if not recs:
         return f"No benchmark FIMs on HUC {huc8}."
 
@@ -302,11 +380,12 @@ def summarize_huc_availability(records: List[Dict[str, Any]], huc8: str) -> str:
             with_raw.append(r)
 
     if not with_raw:
-        rps = sorted(
-            {str(r.get("return_period")) for r in recs if r.get("return_period")}
-        )
+        rps = sorted({str(r.get("return_period")) for r in recs if r.get("return_period")})
         if rps:
-            return f"No real flood-based benchmarks on HUC {huc8}. Only synthetic return periods available: {', '.join(rps)}."
+            return (
+                f"No real flood-based benchmarks on HUC {huc8}. "
+                f"Only synthetic return periods available: {', '.join(rps)}."
+            )
         return f"No real flood-based benchmarks on HUC {huc8}."
 
     day_set, hour_set = set(), set()
@@ -375,13 +454,16 @@ def download_fim_assets(record: Dict[str, Any], dest_dir: str) -> Dict[str, Any]
 def build_huc_event_dict(records: List[Dict[str, Any]]) -> Dict[str, List[str]]:
     d: Dict[str, List[str]] = {}
     for r in records:
-        huc = str(r.get("huc8"))
+        hucs = _record_huc8_list(r)
+        if not hucs:
+            continue
         day = _record_day(r)
         if not day:
             continue
         hour = _record_hour_or_none(r)
         ts = day.isoformat() if hour is None else f"{day:%Y-%m-%d} {hour:02d}:00:00"
-        d.setdefault(huc, []).append(ts)
+        for huc in hucs:
+            d.setdefault(str(huc), []).append(ts)
     for k in list(d.keys()):
         d[k] = sorted(set(d[k]))
     return d
@@ -412,7 +494,7 @@ def bmFIMFindandDownload(
     catalog = load_catalog_core()
     records = catalog.get("records", [])
 
-    # STRICT set (used for status/logic/downloads)
+    # STRICT set
     strict_matches = find_fims(
         records,
         huc8=HUC8,
@@ -423,7 +505,7 @@ def bmFIMFindandDownload(
         relaxed_for_print=False,
     )
 
-    # RELAXED set (printing only)
+    # RELAXED set
     relaxed_records_for_print = find_fims(
         records,
         huc8=HUC8,
diff --git a/tests/test_evalutionhandfim.py b/tests/test_evalutionhandfim.py
@@ -5,13 +5,13 @@
 # Look for the benchmark FIM data for the HUC8 and event date
 def test_bm_fimlookup():
     out = fm.fim_lookup(
-        HUCID="07070005",
-        date_input="2019-05-30 23:00:00",  # If user is more specific then they can pass date (with hour if known) along with HUC8
-        run_handfim=True,  # It will look for the OWP HAND FIM for the mentioned HUC8 and date; if not found it will download and generate the OWP HAND FIM
-        file_name="S1A_9_6m_20190530T23573_910244W430506N_BM.tif",  # If user passes a specific filename, it will download that and assume it is the right benchmark
-        out_dir="./FIMserv/test",  # If user wants to save the benchmark FIM in a specific directory
-        # start_date="2024-06-20",  # If user is not sure of the exact date then they can pass a range of dates
+        HUCID="10240011",
+        # date_input="2019-06-15",  # If user is more specific then they can pass date (with hour if known) along with HUC8
+        # start_date="2017-06-20", #If user is not sure of the exact date then they can pass a range of dates
         # end_date="2024-06-25",
+        run_handfim=True,  #It will look for the owp hand fim for the mentioned HUC8 and date, if not found it will download and generate the owp hand fim; default is False
+        file_name= "S1A_9_2m_20190615T00210_945738W393944N_BM.tif", #If user pass the specific filename, it will download that and assume that this is the right benchmark, else based on exact match of date it will look for the benchmark
+        out_dir= '../test_FIMeval', #Required if user wants to download the benchmark FIM data
     )
     print(out)
 
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ dependencies = [`
`34`	`34`	`"notebook>=6.5.7",`
`35`	`35`	`"geocube<=0.7.1",`
`36`	`36`	`"geopandas>=0.14.3",`
`37`		`- "fimeval>=0.1.56"`
	`37`	`+ "fimeval>=0.1.59"`
`38`	`38`	`]`
`39`	`39`
`40`	`40`	`[project.optional-dependencies]`