From fcfd340a2473955cee1162eec8b1736ba041557f Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Mon, 28 Jul 2025 13:38:29 +0530
Subject: [PATCH 01/11] Initial changes

---
 db_init.sql                                   |  49 +-
 server/app/db/dbmodels.py                     |  27 +-
 server/app/db/dbqueries.py                    | 117 ++++-
 .../db/migrations/001_create_label_index.sql  | 124 +++++
 server/app/main.py                            | 263 +++++++++-
 server/app/utils.py                           | 492 ++++++++++++++++++
 ui/src/components/ArtifactPTable/index.jsx    |  28 +-
 7 files changed, 1089 insertions(+), 11 deletions(-)
 create mode 100644 server/app/db/migrations/001_create_label_index.sql

diff --git a/db_init.sql b/db_init.sql
index cb67ee88b..63e8b9118 100644
--- a/db_init.sql
+++ b/db_init.sql
@@ -1,7 +1,46 @@
-CREATE TABLE IF NOT EXISTS registered_servers(
-	id SERIAL,
-	server_name VARCHAR(255) NOT NULL,
-	host_info VARCHAR(255) NOT NULL PRIMARY KEY,
-	last_sync_time BIGINT DEFAULT NULL
+-- Label indexing table for full-text search of CSV label content
+CREATE TABLE IF NOT EXISTS label_index (
+    id SERIAL PRIMARY KEY,
+    file_name VARCHAR(255) NOT NULL,
+    file_path TEXT NOT NULL,
+    row_index INTEGER NOT NULL,
+    content TEXT NOT NULL,
+    -- PostgreSQL automatically uses EXTENDED strategy:
+    -- - Allows compression AND out-of-line storage
+    -- - Compresses first, then moves to TOAST if still large
+    metadata JSONB,
+    search_vector TSVECTOR,
+    created_at BIGINT NOT NULL,
+    updated_at BIGINT NOT NULL,
+
+    -- Unique constraint to prevent duplicate entries
+    CONSTRAINT unique_label_file_row UNIQUE (file_name, row_index)
 );
 
+-- Create indexes for performance
+CREATE INDEX IF NOT EXISTS idx_label_index_file_name ON label_index(file_name);
+CREATE INDEX IF NOT EXISTS idx_label_index_created_at ON label_index(created_at);
+
+-- Create GIN index for full-text search (most important for performance)
+CREATE INDEX IF NOT EXISTS idx_label_index_search_vector ON label_index USING gin(search_vector);
+
+-- Create a trigger to automatically update the search_vector column
+CREATE OR REPLACE FUNCTION update_label_search_vector() RETURNS trigger AS $$
+BEGIN
+    NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, ''));
+    NEW.updated_at := EXTRACT(EPOCH FROM NOW()) * 1000;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Create the trigger (only if table exists)
+DO $$
+BEGIN
+    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'label_index') THEN
+        DROP TRIGGER IF EXISTS trigger_update_label_search_vector ON label_index;
+        CREATE TRIGGER trigger_update_label_search_vector
+            BEFORE INSERT OR UPDATE ON label_index
+            FOR EACH ROW EXECUTE FUNCTION update_label_search_vector();
+    END IF;
+END $$;
+
diff --git a/server/app/db/dbmodels.py b/server/app/db/dbmodels.py
index b11372746..bd833e134 100644
--- a/server/app/db/dbmodels.py
+++ b/server/app/db/dbmodels.py
@@ -11,8 +11,10 @@
     Index,
     UniqueConstraint,
     MetaData,
-    SmallInteger
+    SmallInteger,
+    JSON
 )
+from sqlalchemy.dialects.postgresql import TSVECTOR
 
 metadata = MetaData()
 
@@ -185,4 +187,27 @@
 
     # Unique Constraint
     UniqueConstraint("artifact_id", "execution_id", "type", name="uniqueevent") 
+)
+
+
+# Label indexing table for PostgreSQL full-text search
+label_index = Table(
+    "label_index", metadata,
+    Column("id", Integer, primary_key=True, nullable=False),
+    Column("file_name", String(255), nullable=False),
+    Column("file_path", Text, nullable=False),
+    Column("row_index", Integer, nullable=False),
+    Column("content", Text, nullable=False),
+    Column("metadata", JSON),
+    Column("search_vector", TSVECTOR),
+    Column("created_at", BigInteger, nullable=False),
+    Column("updated_at", BigInteger, nullable=False),
+
+    # Indexes for performance
+    Index("idx_label_index_file_name", "file_name"),
+    Index("idx_label_index_search_vector", "search_vector", postgresql_using="gin"),
+    Index("idx_label_index_created_at", "created_at"),
+
+    # Unique constraint to prevent duplicate entries
+    UniqueConstraint("file_name", "row_index", name="unique_label_file_row")
 )
\ No newline at end of file
diff --git a/server/app/db/dbqueries.py b/server/app/db/dbqueries.py
index 792dd835b..eb7c68dd9 100644
--- a/server/app/db/dbqueries.py
+++ b/server/app/db/dbqueries.py
@@ -1,7 +1,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from fastapi import Depends
 from server.app.db.dbconfig import get_db
-from sqlalchemy import select, func, text, String, bindparam, case, distinct
+from sqlalchemy import select, func, String, distinct, text
 from server.app.db.dbmodels import (
     artifact, 
     artifactproperty, 
@@ -13,6 +13,7 @@
     execution,
     executionproperty,
     event,
+    label_index,
 )
 
 async def register_server_details(db: AsyncSession, server_name: str, host_info: str):
@@ -312,3 +313,117 @@ async def fetch_executions(
         "total_items": total_record,
         "items": [dict(row) for row in rows]
     }
+
+
+async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeline_name: str = None, limit: int = 50):
+    """
+    Search for artifacts that have labels matching the filter value.
+    This function searches within label CSV content using PostgreSQL full-text search.
+    Works with or without explicit labels_uri properties.
+    """
+    try:
+        # First, try to search labels directly and return any matching content
+        # This approach works even if artifacts don't have labels_uri properties
+        base_query = """
+            SELECT DISTINCT
+                li.file_name as label_file,
+                li.content as matching_content,
+                li.metadata as label_metadata,
+                li.row_index,
+                ts_rank(li.search_vector, plainto_tsquery('english', :filter_value)) as relevance_score
+            FROM label_index li
+            WHERE li.search_vector @@ plainto_tsquery('english', :filter_value)
+            ORDER BY relevance_score DESC
+            LIMIT :limit
+        """
+
+        params = {"filter_value": filter_value, "limit": limit}
+
+        result = await db.execute(text(base_query), params)
+        label_results = result.mappings().all()
+
+        # Convert label results to a format compatible with artifact results
+        converted_results = []
+        for label_result in label_results:
+            converted_results.append({
+                'artifact_id': None,  # No specific artifact ID
+                'name': f"Label Match: {label_result['label_file']}",
+                'uri': f"label://{label_result['label_file']}#{label_result['row_index']}",
+                'type_id': None,
+                'create_time_since_epoch': None,
+                'last_update_time_since_epoch': None,
+                'label_file': label_result['label_file'],
+                'matching_content': label_result['matching_content'],
+                'label_metadata': label_result['label_metadata'],
+                'relevance_score': float(label_result['relevance_score'])
+            })
+
+        return converted_results
+
+    except Exception as e:
+        print(f"Label search error: {e}")
+        return []
+
+
+async def fetch_artifacts_with_label_search(
+    db: AsyncSession,
+    pipeline_name: str,
+    artifact_type: str,
+    filter_value: str,
+    active_page: int = 1,
+    page_size: int = 5,
+    sort_column: str = "name",
+    sort_order: str = "ASC"
+):
+    """
+    Enhanced artifact search that includes label content search.
+    This combines regular artifact search with label content search.
+    """
+    # First, get regular artifact search results
+    artifact_results = await fetch_artifacts(
+        db, pipeline_name, artifact_type, filter_value,
+        active_page, page_size, sort_column, sort_order
+    )
+
+    # If filter_value is provided, also search in labels
+    if filter_value and filter_value.strip():
+        try:
+            label_results = await search_labels_in_artifacts(db, filter_value, pipeline_name, 50)
+
+            # Add label search results as separate items (since they don't correspond to existing artifacts)
+            if active_page == 1 and label_results:  # Only add on first page
+                added_count = 0
+                max_additional = max(0, page_size - len(artifact_results['items']))
+
+                for label_result in label_results:
+                    if added_count < max_additional:
+                        # Create a pseudo-artifact item from label search result
+                        # Make sure all fields have non-null values that frontend expects
+                        enhanced_item = {
+                            'artifact_id': f"label_{label_result['label_file']}_{label_result.get('row_index', 0)}",
+                            'name': f"{label_result['label_file']} (Row {label_result.get('row_index', 0) + 1})",
+                            'uri': label_result.get('uri', f"label://{label_result['label_file']}"),
+                            'type_id': 'Label',
+                            'create_time_since_epoch': 0,  # Use 0 instead of None
+                            'last_update_time_since_epoch': 0,  # Use 0 instead of None
+                            'artifact_properties': [],  # Empty array instead of None
+                            'execution': '',  # Empty string instead of None
+                            'label_match': True,
+                            'matching_label_content': label_result['matching_content'],
+                            'label_file': label_result['label_file'],
+                            'label_metadata': label_result.get('label_metadata', '{}'),
+                            'relevance_score': float(label_result['relevance_score'])
+                        }
+                        artifact_results['items'].append(enhanced_item)
+                        added_count += 1
+
+                # Update total count if we added items
+                if added_count > 0:
+                    artifact_results['total_items'] += added_count
+                    print(f"Added {added_count} label search results to artifacts")
+
+        except Exception as e:
+            print(f"Error in label search integration: {e}")
+            # Continue with regular results if label search fails
+
+    return artifact_results
diff --git a/server/app/db/migrations/001_create_label_index.sql b/server/app/db/migrations/001_create_label_index.sql
new file mode 100644
index 000000000..fcb302bea
--- /dev/null
+++ b/server/app/db/migrations/001_create_label_index.sql
@@ -0,0 +1,124 @@
+-- Migration: Create label_index table for full-text search of CSV label content
+-- This migration adds the label_index table to support searching CSV label content
+-- through the existing artifact search functionality
+
+-- Create the label_index table
+CREATE TABLE IF NOT EXISTS label_index (
+    id SERIAL PRIMARY KEY,
+    file_name VARCHAR(255) NOT NULL,
+    file_path TEXT NOT NULL,
+    row_index INTEGER NOT NULL,
+    content TEXT NOT NULL,
+    metadata JSONB,
+    search_vector TSVECTOR,
+    created_at BIGINT NOT NULL,
+    updated_at BIGINT NOT NULL,
+    
+    -- Unique constraint to prevent duplicate entries
+    CONSTRAINT unique_label_file_row UNIQUE (file_name, row_index)
+);
+
+-- Create indexes for performance
+CREATE INDEX IF NOT EXISTS idx_label_index_file_name ON label_index(file_name);
+CREATE INDEX IF NOT EXISTS idx_label_index_created_at ON label_index(created_at);
+
+-- Create GIN index for full-text search (most important for performance)
+CREATE INDEX IF NOT EXISTS idx_label_index_search_vector ON label_index USING gin(search_vector);
+
+-- Create a trigger to automatically update the search_vector column
+-- This trigger will populate the tsvector column whenever content is inserted or updated
+CREATE OR REPLACE FUNCTION update_label_search_vector() RETURNS trigger AS $$
+BEGIN
+    NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, ''));
+    NEW.updated_at := EXTRACT(EPOCH FROM NOW()) * 1000;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Create the trigger
+DROP TRIGGER IF EXISTS trigger_update_label_search_vector ON label_index;
+CREATE TRIGGER trigger_update_label_search_vector
+    BEFORE INSERT OR UPDATE ON label_index
+    FOR EACH ROW EXECUTE FUNCTION update_label_search_vector();
+
+-- Create a function to search labels with ranking
+CREATE OR REPLACE FUNCTION search_labels(search_query TEXT, result_limit INTEGER DEFAULT 10)
+RETURNS TABLE (
+    file_name VARCHAR(255),
+    row_index INTEGER,
+    content TEXT,
+    metadata JSONB,
+    relevance_score REAL
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT 
+        li.file_name,
+        li.row_index,
+        li.content,
+        li.metadata,
+        ts_rank(li.search_vector, plainto_tsquery('english', search_query))::REAL as relevance_score
+    FROM label_index li
+    WHERE li.search_vector @@ plainto_tsquery('english', search_query)
+    ORDER BY relevance_score DESC
+    LIMIT result_limit;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Create a function to find artifacts with matching labels
+CREATE OR REPLACE FUNCTION find_artifacts_with_label_matches(search_query TEXT, result_limit INTEGER DEFAULT 10)
+RETURNS TABLE (
+    artifact_id INTEGER,
+    artifact_name VARCHAR(255),
+    artifact_uri TEXT,
+    label_file VARCHAR(255),
+    matching_content TEXT,
+    relevance_score REAL
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT DISTINCT
+        a.id as artifact_id,
+        a.name as artifact_name,
+        a.uri as artifact_uri,
+        li.file_name as label_file,
+        li.content as matching_content,
+        ts_rank(li.search_vector, plainto_tsquery('english', search_query))::REAL as relevance_score
+    FROM artifact a
+    JOIN artifactproperty ap ON a.id = ap.artifact_id
+    JOIN label_index li ON SPLIT_PART(ap.string_value, ':', 1) = li.file_name
+    WHERE ap.name = 'labels_uri'
+      AND li.search_vector @@ plainto_tsquery('english', search_query)
+    ORDER BY relevance_score DESC
+    LIMIT result_limit;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Add comments for documentation
+COMMENT ON TABLE label_index IS 'Stores indexed CSV label content for full-text search integration with artifact search';
+COMMENT ON COLUMN label_index.file_name IS 'Name of the CSV label file (without path)';
+COMMENT ON COLUMN label_index.file_path IS 'Full path to the CSV label file';
+COMMENT ON COLUMN label_index.row_index IS 'Row number within the CSV file (0-based)';
+COMMENT ON COLUMN label_index.content IS 'Concatenated content of the CSV row for search';
+COMMENT ON COLUMN label_index.metadata IS 'Additional metadata about the label entry (JSON format)';
+COMMENT ON COLUMN label_index.search_vector IS 'PostgreSQL tsvector for full-text search';
+COMMENT ON COLUMN label_index.created_at IS 'Timestamp when the record was created (milliseconds since epoch)';
+COMMENT ON COLUMN label_index.updated_at IS 'Timestamp when the record was last updated (milliseconds since epoch)';
+
+-- Grant permissions (adjust as needed for your setup)
+-- GRANT SELECT, INSERT, UPDATE, DELETE ON label_index TO your_app_user;
+-- GRANT USAGE, SELECT ON SEQUENCE label_index_id_seq TO your_app_user;
+
+-- Example usage queries (for testing):
+-- 
+-- 1. Search for labels containing "data":
+-- SELECT * FROM search_labels('data', 5);
+--
+-- 2. Find artifacts with labels containing "training":
+-- SELECT * FROM find_artifacts_with_label_matches('training', 10);
+--
+-- 3. Manual search with custom ranking:
+-- SELECT file_name, content, ts_rank(search_vector, plainto_tsquery('english', 'your_search_term')) as rank
+-- FROM label_index 
+-- WHERE search_vector @@ plainto_tsquery('english', 'your_search_term')
+-- ORDER BY rank DESC;
diff --git a/server/app/main.py b/server/app/main.py
index 848e84dc7..298cc1dbb 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -2,6 +2,7 @@
 import io
 import time
 import zipfile
+import csv
 from fastapi import FastAPI, Request, HTTPException, Query, UploadFile, File, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, PlainTextResponse, StreamingResponse
@@ -34,7 +35,9 @@
     register_server_details,
     get_registered_server_details,
     get_sync_status,
-    update_sync_status
+    update_sync_status,
+    fetch_artifacts_with_label_search,
+    search_labels_in_artifacts
 )
 from pathlib import Path
 import os
@@ -49,12 +52,52 @@
     ExecutionRequest,
 )
 import httpx
+import logging
 from jsonpath_ng.ext import parse
 from cmflib.cmf_federation import update_mlmd
+from server.app.db.dbconfig import DATABASE_URL, async_session
+from server.app.utils import (
+    auto_reindex_if_needed,
+    search_labels,
+    get_label_stats,
+    index_csv_labels,
+    index_csv_labels_with_hash
+)
 
 server_store_path = "/cmf-server/data/postgres_data"
 query = CmfQuery(is_server=True)
 
+async def initialize_label_search():
+    """Initialize label search functionality on startup"""
+    try:
+        logger = logging.getLogger(__name__)
+        logger.info("Initializing label search functionality...")
+
+        # Check if labels directory exists and has CSV files
+        labels_dir = Path("/cmf-server/data/labels")
+        if labels_dir.exists():
+            csv_files = list(labels_dir.glob("*.csv"))
+            if csv_files:
+                logger.info(f"Found {len(csv_files)} CSV label files")
+
+                # Check if we need to index (if no records exist)
+                stats = await get_label_stats(DATABASE_URL)
+                if stats['total_records'] == 0:
+                    logger.info("Indexing label files...")
+                    result = await index_csv_labels(DATABASE_URL)
+                    logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
+                else:
+                    logger.info(f"Label search ready: {stats['total_records']} records indexed")
+            else:
+                logger.info("No CSV label files found in /cmf-server/data/labels")
+        else:
+            logger.info("Labels directory not found, label search will be available when files are added")
+
+    except Exception as e:
+        logger = logging.getLogger(__name__)
+        logger.warning(f"Label search initialization failed: {e}")
+        logger.info("Label search will be available once configured properly")
+
 #global variables
 dict_of_art_ids = {}
 dict_of_exe_ids = {}
@@ -71,6 +114,10 @@ async def lifespan(app: FastAPI):
         dict_of_exe_ids = await async_api(get_all_exe_ids, query)
         # loaded artifact ids into memory
         dict_of_art_ids = await async_api(get_all_artifact_ids, query, dict_of_exe_ids)
+
+        # Initialize label search functionality
+        await initialize_label_search()
+
     yield
     dict_of_art_ids.clear()
     dict_of_exe_ids.clear()
@@ -133,6 +180,16 @@ async def mlmd_push(info: MLMDPushRequest):
             # async function
                 await update_global_exe_dict(pipeline_name)
                 await update_global_art_dict(pipeline_name)
+
+                # Auto-reindex labels after artifact push
+                try:
+                    logger = logging.getLogger(__name__)
+                    reindex_result = await auto_reindex_if_needed(DATABASE_URL)
+                    if reindex_result['status'] == 'reindexed':
+                        logger.info(f"Auto-reindexed after artifact push: {reindex_result['message']}")
+                except Exception as e:
+                    logger = logging.getLogger(__name__)
+                    logger.warning(f"Auto-reindex after push failed: {e}")
         finally:
             lock_counts[pipeline_name] -= 1  # Decrement the reference count after lock released
             if lock_counts[pipeline_name] == 0:   #if lock_counts of pipeline is zero means lock is release from it
@@ -171,6 +228,7 @@ async def get_artifacts(
     query_params: ArtifactRequest = Depends(),
     db: AsyncSession = Depends(get_db)
 ):
+    start_time = time.time()
 
     filter_value = query_params.filter_value
     active_page = query_params.active_page
@@ -178,8 +236,38 @@ async def get_artifacts(
     sort_order = query_params.sort_order
     record_per_page = query_params.record_per_page
 
-    """Retrieve paginated artifacts with filtering, sorting, and full-text search."""
-    return await fetch_artifacts(db, pipeline_name, artifact_type, filter_value, active_page, record_per_page, sort_field, sort_order)
+    """Retrieve paginated artifacts with filtering, sorting, and full-text search including label content."""
+
+    # Auto-reindex labels if needed (only on first page to avoid performance issues)
+    reindex_time = 0
+    if active_page == 1:
+        try:
+            logger = logging.getLogger(__name__)
+            reindex_start = time.time()
+            reindex_result = await auto_reindex_if_needed(DATABASE_URL)
+            reindex_time = time.time() - reindex_start
+            if reindex_result['status'] == 'reindexed':
+                logger.info(f"{reindex_result['message']}")
+        except Exception as e:
+            logger = logging.getLogger(__name__)
+            logger.warning(f"Auto-reindex failed: {e}")
+
+    query_start = time.time()
+    result = await fetch_artifacts_with_label_search(db, pipeline_name, artifact_type, filter_value, active_page, record_per_page, sort_field, sort_order)
+    query_time = time.time() - query_start
+
+    total_time = time.time() - start_time
+
+    # Add timing information to the response
+    result["timing"] = {
+        "total_time_ms": round(total_time * 1000, 2),
+        "query_time_ms": round(query_time * 1000, 2),
+        "reindex_time_ms": round(reindex_time * 1000, 2) if reindex_time > 0 else 0,
+        "filter_value": filter_value,
+        "has_label_search": bool(filter_value and filter_value.strip())
+    }
+
+    return result
 
 
 # api to display executions available in mlmd file[from postgres]
@@ -796,3 +884,172 @@ async def artifact_lineage(request: Request, pipeline_name: str):
     else:
         return None
 """
+
+# Label Search Management Endpoints
+@app.post("/api/labels/reindex")
+async def reindex_labels():
+    """Reindex all label files - useful when CSV files are updated"""
+    try:
+        result = await index_csv_labels_with_hash(DATABASE_URL)
+
+        return {
+            "status": "success",
+            "message": f"Reindexed {result['total_files']} files with {result.get('total_records', 0)} records",
+            "details": result
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Reindexing failed: {str(e)}")
+
+@app.get("/api/labels/status")
+async def get_label_search_status():
+    """Get the current status of label search functionality"""
+    try:
+        # Check labels directory
+        labels_dir = Path("/cmf-server/data/labels")
+        csv_files = list(labels_dir.glob("*.csv")) if labels_dir.exists() else []
+
+        # Check database
+        stats = await get_label_stats(DATABASE_URL)
+
+        return {
+            "status": "active" if stats['total_records'] > 0 else "inactive",
+            "labels_directory": str(labels_dir),
+            "csv_files_found": len(csv_files),
+            "indexed_files": stats['total_files'],
+            "indexed_records": stats['total_records'],
+            "files": [f.name for f in csv_files]
+        }
+
+    except Exception as e:
+        return {
+            "status": "error",
+            "error": str(e)
+        }
+
+@app.post("/api/labels/test")
+async def test_label_search():
+    """Test label search functionality with sample data"""
+    try:
+        # Create sample test data
+        sample_data = [
+            {"id": 1, "category": "training", "type": "dataset", "accuracy": 0.95},
+            {"id": 2, "category": "validation", "type": "dataset", "accuracy": 0.87},
+            {"id": 3, "category": "test", "type": "model", "performance": "high"},
+        ]
+
+        # Create temporary CSV file
+        labels_dir = Path("/cmf-server/data/labels")
+        labels_dir.mkdir(parents=True, exist_ok=True)
+
+        test_file = labels_dir / "test_labels.csv"
+        with open(test_file, 'w', newline='') as csvfile:
+            fieldnames = sample_data[0].keys()
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(sample_data)
+
+        # Index the test file
+        result = await index_csv_labels(DATABASE_URL)
+
+        # Test search
+        search_results = await search_labels(DATABASE_URL, "training", 5)
+
+        return {
+            "status": "success",
+            "message": "Label search test completed successfully",
+            "indexing_result": result,
+            "search_results": search_results,
+            "test_file": str(test_file)
+        }
+
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"Label search test failed: {str(e)}"
+        }
+
+@app.get("/api/labels/search")
+async def search_label_content(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
+    """Search label content using PostgreSQL full-text search"""
+    start_time = time.time()
+
+    try:
+
+        search_start = time.time()
+        results = await search_labels(DATABASE_URL, query, limit)
+        search_time = time.time() - search_start
+
+        total_time = time.time() - start_time
+
+        return {
+            "status": "success",
+            "query": query,
+            "results": results,
+            "total_results": len(results),
+            "timing": {
+                "total_time_ms": round(total_time * 1000, 2),
+                "search_time_ms": round(search_time * 1000, 2),
+                "query": query,
+                "limit": limit
+            }
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
+
+@app.get("/api/labels/search-direct")
+async def search_labels_direct(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
+    """Direct label search that returns label matches as pseudo-artifacts"""
+    start_time = time.time()
+
+    try:
+
+        db_start = time.time()
+        async with async_session() as db:
+            search_start = time.time()
+            results = await search_labels_in_artifacts(db, query, None, limit)
+            search_time = time.time() - search_start
+        db_time = time.time() - db_start
+
+        total_time = time.time() - start_time
+
+        return {
+            "status": "success",
+            "query": query,
+            "results": results,
+            "total_results": len(results),
+            "timing": {
+                "total_time_ms": round(total_time * 1000, 2),
+                "db_time_ms": round(db_time * 1000, 2),
+                "search_time_ms": round(search_time * 1000, 2),
+                "query": query,
+                "limit": limit,
+                "uses_sqlalchemy_core": True
+            }
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Direct label search failed: {str(e)}")
+
+@app.get("/api/labels/health")
+async def label_search_health():
+    """Health check for label search functionality"""
+    try:
+        stats = await get_label_stats(DATABASE_URL)
+
+        return {
+            "status": "healthy" if stats['status'] == 'success' else "unhealthy",
+            "service": "label-search-postgres",
+            "version": "1.0.0",
+            "database": "postgresql",
+            "indexed_files": stats['total_files'],
+            "indexed_records": stats['total_records']
+        }
+
+    except Exception as e:
+        return {
+            "status": "unhealthy",
+            "service": "label-search-postgres",
+            "error": str(e)
+        }
diff --git a/server/app/utils.py b/server/app/utils.py
index c85d4a69d..bea2997e9 100644
--- a/server/app/utils.py
+++ b/server/app/utils.py
@@ -1,3 +1,23 @@
+# Standard library imports
+import asyncio
+import csv
+import json
+import time
+import os
+import hashlib
+import re
+import logging
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+
+# Third-party imports
+from sqlalchemy.ext.asyncio import create_async_engine
+from sqlalchemy import text
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+
 def modify_arti_name(arti_name, type):
     # artifact_name optimization based on artifact type.["Dataset","Model","Metrics"]
     try:
@@ -64,4 +84,476 @@ def modify_arti_name(arti_name, type):
         print(f"Error parsing artifact name: {e}")
         name = arti_name  # Fallback to the original arti_name in case of error
     return name
+
+
+# Label Search Utility Functions
+
+async def index_csv_labels(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Index CSV label files into PostgreSQL for full-text search
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Look for both .csv files and files without extension (assuming they're CSV)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension that might be CSV files
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
+                # Try to detect if it's a CSV file by reading first few lines
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
+                        if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            logger.info(f"Detected CSV-like file without extension: {file_path.name}")
+                except:
+                    pass  # Skip files that can't be read
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        engine = create_async_engine(database_url, echo=False)
+        indexed_files = []
+        total_records = 0
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                try:
+                    # Clear existing data for this file
+                    await conn.execute(
+                        text("DELETE FROM label_index WHERE file_name = :file_name"),
+                        {"file_name": csv_file.name}
+                    )
+
+                    # Process CSV file
+                    records = []
+                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                        # Detect CSV format with fallback
+                        sample = csvfile.read(1024)
+                        csvfile.seek(0)
+
+                        # Try to detect delimiter, with fallbacks
+                        delimiter = ','  # Default fallback
+                        try:
+                            sniffer = csv.Sniffer()
+                            detected_delimiter = sniffer.sniff(sample).delimiter
+                            delimiter = detected_delimiter
+                        except:
+                            # Fallback: try common delimiters
+                            for test_delimiter in [',', '\t', ';', '|']:
+                                if test_delimiter in sample:
+                                    delimiter = test_delimiter
+                                    break
+
+                        reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                        for row_index, row in enumerate(reader):
+                            # Create searchable content
+                            content_parts = []
+                            metadata = {}
+
+                            for key, value in row.items():
+                                if value and value.strip():
+                                    content_parts.append(f"{key}: {value.strip()}")
+                                    metadata[key] = value.strip()
+
+                            content = " | ".join(content_parts)
+
+                            if content.strip():
+                                records.append({
+                                    'file_name': csv_file.name,
+                                    'file_path': str(csv_file),
+                                    'row_index': row_index,
+                                    'content': content,
+                                    'metadata': json.dumps(metadata),
+                                    'created_at': int(time.time() * 1000),
+                                    'updated_at': int(time.time() * 1000)
+                                })
+
+                    # Insert records
+                    if records:
+                        insert_query = text("""
+                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
+                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
+                        """)
+                        await conn.execute(insert_query, records)
+
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': len(records),
+                            'status': 'success'
+                        })
+                        total_records += len(records)
+                        logger.info(f"Indexed {len(records)} records from {csv_file.name}")
+                    else:
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': 0,
+                            'status': 'empty'
+                        })
+
+                except Exception as e:
+                    logger.error(f"Error indexing {csv_file.name}: {e}")
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'error',
+                        'error': str(e)
+                    })
+
+        await engine.dispose()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+async def search_labels(database_url: str, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+    """
+    Search indexed labels using PostgreSQL full-text search
+    """
+    try:
+
+        engine = create_async_engine(database_url, echo=False)
+
+        async with engine.begin() as conn:
+            result = await conn.execute(text("""
+                SELECT file_name, row_index, content, metadata,
+                       ts_rank(search_vector, plainto_tsquery('english', :query)) as relevance_score
+                FROM label_index
+                WHERE search_vector @@ plainto_tsquery('english', :query)
+                ORDER BY relevance_score DESC
+                LIMIT :limit
+            """), {"query": query, "limit": limit})
+
+            results = [
+                {
+                    "file_name": row[0],
+                    "row_index": row[1],
+                    "content": row[2],
+                    "metadata": json.loads(row[3]) if row[3] else {},
+                    "relevance_score": float(row[4])
+                }
+                for row in result.fetchall()
+            ]
+
+        await engine.dispose()
+        return results
+
+    except Exception as e:
+        logger.error(f"Label search failed: {e}")
+        return []
+
+async def get_label_stats(database_url: str) -> Dict[str, Any]:
+    """
+    Get statistics about indexed labels
+    """
+    try:
+
+        engine = create_async_engine(database_url, echo=False)
+
+        async with engine.begin() as conn:
+            # Get total records and files
+            result = await conn.execute(text("""
+                SELECT
+                    COUNT(*) as total_records,
+                    COUNT(DISTINCT file_name) as total_files
+                FROM label_index
+            """))
+            stats = result.fetchone()
+
+            # Get recent files
+            result = await conn.execute(text("""
+                SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
+                FROM label_index
+                GROUP BY file_name
+                ORDER BY last_updated DESC
+                LIMIT 10
+            """))
+            recent_files = [
+                {
+                    'file_name': row[0],
+                    'record_count': row[1],
+                    'last_updated': row[2]
+                }
+                for row in result.fetchall()
+            ]
+
+        await engine.dispose()
+
+        return {
+            'total_records': stats[0] if stats else 0,
+            'total_files': stats[1] if stats else 0,
+            'recent_files': recent_files,
+            'status': 'success'
+        }
+
+    except Exception as e:
+        logger.error(f"Error getting label stats: {e}")
+        return {
+            'status': 'error',
+            'error': str(e),
+            'total_records': 0,
+            'total_files': 0,
+            'recent_files': []
+        }
+
+async def auto_reindex_if_needed(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Automatically reindex labels if files have been modified or content has changed.
+    Handles MD5 hash filenames and detects content changes.
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "no_directory", "message": "Labels directory does not exist"}
+
+        # Get all potential CSV files (with and without .csv extension)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also check files without extension (including MD5 hash names)
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                # Check if it's an MD5 hash (32 hex characters)
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # For MD5 hash files, be more lenient in detection
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            if is_md5_hash:
+                                logger.info(f"Detected MD5 hash file: {file_path.name}")
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "no_files", "message": "No CSV files found"}
+
+        # Check if we need to reindex based on content changes
+        engine = create_async_engine(database_url, echo=False)
+        needs_reindex = False
+        files_to_reindex = []
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                # Calculate content hash for change detection
+                try:
+                    with open(csv_file, 'rb') as f:
+                        content_hash = hashlib.md5(f.read()).hexdigest()
+                except:
+                    continue
+
+                # Check if file exists in index and get its content hash
+                result = await conn.execute(text("""
+                    SELECT
+                        MAX(updated_at) as last_indexed,
+                        COUNT(*) as record_count
+                    FROM label_index
+                    WHERE file_name = :file_name
+                """), {"file_name": csv_file.name})
+
+                row = result.fetchone()
+                last_indexed = row[0] if row else None
+                record_count = row[1] if row else 0
+
+                # Get stored content hash from metadata if available
+                result = await conn.execute(text("""
+                    SELECT metadata->>'content_hash' as stored_hash
+                    FROM label_index
+                    WHERE file_name = :file_name
+                    LIMIT 1
+                """), {"file_name": csv_file.name})
+
+                stored_hash_row = result.fetchone()
+                stored_hash = stored_hash_row[0] if stored_hash_row else None
+
+                # Check if reindexing is needed
+                file_mtime = int(os.path.getmtime(csv_file) * 1000)
+
+                if (not last_indexed or
+                    record_count == 0 or
+                    stored_hash != content_hash or
+                    file_mtime > (last_indexed + 60000)):  # 1 minute grace period
+
+                    needs_reindex = True
+                    files_to_reindex.append({
+                        'file': csv_file,
+                        'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
+                    })
+                    logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
+
+        await engine.dispose()
+
+        if needs_reindex:
+            logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
+            result = await index_csv_labels_with_hash(database_url, labels_directory)
+            return {
+                "status": "reindexed",
+                "message": f"Auto-reindexed {result.get('total_files', 0)} files",
+                "files_reindexed": [f['file'].name for f in files_to_reindex],
+                "details": result
+            }
+        else:
+            return {
+                "status": "up_to_date",
+                "message": "All label files are up to date"
+            }
+
+    except Exception as e:
+        logger.error(f"Auto-reindex failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+async def index_csv_labels_with_hash(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Enhanced version of index_csv_labels that stores content hashes for change detection
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Get all potential CSV files (including MD5 hash files)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        engine = create_async_engine(database_url, echo=False)
+        indexed_files = []
+        total_records = 0
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                try:
+                    # Calculate content hash
+                    with open(csv_file, 'rb') as f:
+                        content_hash = hashlib.md5(f.read()).hexdigest()
+
+                    # Clear existing data for this file
+                    await conn.execute(
+                        text("DELETE FROM label_index WHERE file_name = :file_name"),
+                        {"file_name": csv_file.name}
+                    )
+
+                    # Process CSV file
+                    records = []
+                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                        # Detect CSV format with fallback
+                        sample = csvfile.read(1024)
+                        csvfile.seek(0)
+
+                        # Try to detect delimiter, with fallbacks
+                        delimiter = ','  # Default fallback
+                        try:
+                            sniffer = csv.Sniffer()
+                            detected_delimiter = sniffer.sniff(sample).delimiter
+                            delimiter = detected_delimiter
+                        except:
+                            # Fallback: try common delimiters
+                            for test_delimiter in [',', '\t', ';', '|']:
+                                if test_delimiter in sample:
+                                    delimiter = test_delimiter
+                                    break
+
+                        reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                        for row_index, row in enumerate(reader):
+                            # Create searchable content
+                            content_parts = []
+                            metadata = {"content_hash": content_hash}  # Store content hash
+
+                            for key, value in row.items():
+                                if value and value.strip():
+                                    content_parts.append(f"{key}: {value.strip()}")
+                                    metadata[key] = value.strip()
+
+                            content = " | ".join(content_parts)
+
+                            if content.strip():
+                                records.append({
+                                    'file_name': csv_file.name,
+                                    'file_path': str(csv_file),
+                                    'row_index': row_index,
+                                    'content': content,
+                                    'metadata': json.dumps(metadata),
+                                    'created_at': int(time.time() * 1000),
+                                    'updated_at': int(time.time() * 1000)
+                                })
+
+                    # Insert records
+                    if records:
+                        insert_query = text("""
+                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
+                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
+                        """)
+                        await conn.execute(insert_query, records)
+
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': len(records),
+                            'content_hash': content_hash,
+                            'status': 'success'
+                        })
+                        total_records += len(records)
+                        logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
+                    else:
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': 0,
+                            'status': 'empty'
+                        })
+
+                except Exception as e:
+                    logger.error(f"Error indexing {csv_file.name}: {e}")
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'error',
+                        'error': str(e)
+                    })
+
+        await engine.dispose()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
  
\ No newline at end of file
diff --git a/ui/src/components/ArtifactPTable/index.jsx b/ui/src/components/ArtifactPTable/index.jsx
index 00d098ce4..45d19da14 100644
--- a/ui/src/components/ArtifactPTable/index.jsx
+++ b/ui/src/components/ArtifactPTable/index.jsx
@@ -145,6 +145,9 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                 {artifactType === "Dataset" && (
                   <th scope="col" className="label px-6 py-3">LABEL</th>
                 )}
+                {artifactType === "Label" && (
+                  <th scope="col" className="label px-6 py-3">LABEL</th>
+                )}
                 <th className="px-6 py-3" scope="col">URI</th>
                 <th className="px-6 py-3" scope="col">URL</th>
                 <th className="px-6 py-3" scope="col">GIT REPO</th>
@@ -193,7 +196,7 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                       {(getPropertyValue(artifact.artifact_properties, "labels_uri") || "")
                         .split(",")
                         .map((label_name) => label_name.trim())
-                        .filter((label_name) => label_name.length > 0) // Optional: skip empty strings
+                        .filter((label_name) => label_name.length > 0)
                         .map((label_name) => (
                           <div key={label_name} className="label">
                             <a
@@ -217,6 +220,29 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                         ))}
                     </td>
                   )}
+                  {artifactType === "Label" && (
+                    <td className="px-6 py-4">
+                      <div className="label">
+                        <a
+                          href="#"
+                          onClick={(e) => {
+                            e.preventDefault();
+                            getLabelData(artifact.name.split(":")[1] || artifact.name);
+                            setShowPopup(true);
+                          }}
+                        >
+                          {artifact.name}
+                        </a>
+                        {showPopup && (
+                          <LabelCardPopup
+                            show={showPopup}
+                            label_data={labelData}
+                            onClose={handleClosePopup}
+                          />
+                        )}
+                      </div>
+                    </td>
+                  )}
                   <td className="px-6 py-4"><Highlight text={String(artifact.uri)} highlight={filterValue}/></td>
                   <td className="px-6 py-4"><Highlight text={getPropertyValue(artifact.artifact_properties, "url")} highlight={filterValue}/></td>
                   <td className="px-6 py-4"><Highlight text={getPropertyValue(artifact.artifact_properties, "git_repo")} highlight={filterValue}/></td>

From 68b1c1888421b15797654d024c6c85dd6687a843 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Thu, 31 Jul 2025 12:49:47 +0530
Subject: [PATCH 02/11] Changes for label content search with highlight

---
 db_init.sql                                |   7 +
 server/app/main.py                         | 149 +++++---
 ui/src/client.js                           |  23 ++
 ui/src/components/ArtifactPTable/index.jsx |  42 +--
 ui/src/pages/artifacts_postgres/index.css  | 142 +++++++
 ui/src/pages/artifacts_postgres/index.jsx  | 409 +++++++++++++++++++--
 6 files changed, 681 insertions(+), 91 deletions(-)

diff --git a/db_init.sql b/db_init.sql
index 63e8b9118..34d7a1db3 100644
--- a/db_init.sql
+++ b/db_init.sql
@@ -1,3 +1,10 @@
+CREATE TABLE IF NOT EXISTS registered_servers(
+	id SERIAL,
+	server_name VARCHAR(255) NOT NULL,
+	host_info VARCHAR(255) NOT NULL PRIMARY KEY,
+	last_sync_time BIGINT DEFAULT NULL
+);
+
 -- Label indexing table for full-text search of CSV label content
 CREATE TABLE IF NOT EXISTS label_index (
     id SERIAL PRIMARY KEY,
diff --git a/server/app/main.py b/server/app/main.py
index 298cc1dbb..e5efdf3f1 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -228,7 +228,6 @@ async def get_artifacts(
     query_params: ArtifactRequest = Depends(),
     db: AsyncSession = Depends(get_db)
 ):
-    start_time = time.time()
 
     filter_value = query_params.filter_value
     active_page = query_params.active_page
@@ -239,35 +238,121 @@ async def get_artifacts(
     """Retrieve paginated artifacts with filtering, sorting, and full-text search including label content."""
 
     # Auto-reindex labels if needed (only on first page to avoid performance issues)
-    reindex_time = 0
     if active_page == 1:
         try:
             logger = logging.getLogger(__name__)
-            reindex_start = time.time()
             reindex_result = await auto_reindex_if_needed(DATABASE_URL)
-            reindex_time = time.time() - reindex_start
             if reindex_result['status'] == 'reindexed':
                 logger.info(f"{reindex_result['message']}")
         except Exception as e:
             logger = logging.getLogger(__name__)
             logger.warning(f"Auto-reindex failed: {e}")
 
-    query_start = time.time()
     result = await fetch_artifacts_with_label_search(db, pipeline_name, artifact_type, filter_value, active_page, record_per_page, sort_field, sort_order)
-    query_time = time.time() - query_start
 
-    total_time = time.time() - start_time
+    return result
 
-    # Add timing information to the response
-    result["timing"] = {
-        "total_time_ms": round(total_time * 1000, 2),
-        "query_time_ms": round(query_time * 1000, 2),
-        "reindex_time_ms": round(reindex_time * 1000, 2) if reindex_time > 0 else 0,
-        "filter_value": filter_value,
-        "has_label_search": bool(filter_value and filter_value.strip())
-    }
 
-    return result
+@app.get("/artifacts/{pipeline_name}/Label/search")
+async def search_label_artifacts(
+    pipeline_name: str,
+    content_filter: str = Query(..., description="Search term to find in label content"),
+    sort_order: str = Query("asc", description="Sort order (asc or desc)"),
+    active_page: int = Query(1, gt=0, description="Page number"),
+    record_per_page: int = Query(5, gt=0, description="Number of records per page"),
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    Search for label artifacts that contain the specified content in their CSV files.
+    Returns clean label artifacts with proper structure, not content search results.
+    """
+
+    try:
+        # Step 1: Search for labels containing the content
+        label_search_results = await search_labels_in_artifacts(db, content_filter, pipeline_name, 100)
+
+        if not label_search_results:
+            return {
+                "total_items": 0,
+                "items": [],
+                "search_metadata": {
+                    "search_term": content_filter,
+                    "search_type": "content_filter"
+                }
+            }
+
+        # Step 2: Extract unique label names
+        unique_labels = set()
+        label_metadata = {}
+
+        for result in label_search_results:
+            label_name = result['label_file']
+            unique_labels.add(label_name)
+
+            # Store metadata for each label
+            if label_name not in label_metadata:
+                label_metadata[label_name] = {
+                    'matching_rows': 0,
+                    'total_rows': result.get('total_rows', 0),
+                    'relevance_score': result.get('relevance_score', 0.0)
+                }
+            label_metadata[label_name]['matching_rows'] += 1
+
+        # Step 3: Fetch actual label artifacts
+        all_artifacts_result = await fetch_artifacts(
+            db, pipeline_name, "Label", "", 1, 1000, "name", sort_order
+        )
+
+        # Step 4: Filter to only labels that contain the search term
+        filtered_artifacts = []
+        for artifact in all_artifacts_result['items']:
+            # Extract clean label name from artifact name
+            clean_name = artifact['name']
+            if ':' in clean_name:
+                clean_name = clean_name.split(':', 1)[1]
+
+            # Remove any (Row X) suffix if present
+            import re
+            clean_name = re.sub(r'\s*\(Row\s+\d+\)$', '', clean_name).strip()
+
+            if clean_name in unique_labels:
+                # Create enhanced artifact with search context
+                enhanced_artifact = {
+                    'artifact_id': artifact['artifact_id'],
+                    'name': clean_name,  # Clean name without prefixes/suffixes
+                    'uri': f"artifacts/labels.csv:{clean_name}",
+                    'type_id': 'Label',
+                    'execution': artifact.get('execution', 'N/A'),
+                    'create_time_since_epoch': artifact.get('create_time_since_epoch', 'N/A'),
+                    'last_update_time_since_epoch': artifact.get('last_update_time_since_epoch', 'N/A'),
+                    'artifact_properties': artifact.get('artifact_properties', []),
+                    'search_context': {
+                        'search_term': content_filter,
+                        'matching_rows': label_metadata[clean_name]['matching_rows'],
+                        'total_rows': label_metadata[clean_name]['total_rows'],
+                        'relevance_score': label_metadata[clean_name]['relevance_score']
+                    }
+                }
+                filtered_artifacts.append(enhanced_artifact)
+
+        # Step 5: Apply pagination
+        total_items = len(filtered_artifacts)
+        start_idx = (active_page - 1) * record_per_page
+        end_idx = start_idx + record_per_page
+        paginated_items = filtered_artifacts[start_idx:end_idx]
+
+        return {
+            "total_items": total_items,
+            "items": paginated_items,
+            "search_metadata": {
+                "search_term": content_filter,
+                "search_type": "content_filter",
+                "unique_labels_found": len(unique_labels)
+            }
+        }
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error searching label artifacts: {str(e)}")
 
 
 # api to display executions available in mlmd file[from postgres]
@@ -972,27 +1057,15 @@ async def test_label_search():
 @app.get("/api/labels/search")
 async def search_label_content(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
     """Search label content using PostgreSQL full-text search"""
-    start_time = time.time()
 
     try:
-
-        search_start = time.time()
         results = await search_labels(DATABASE_URL, query, limit)
-        search_time = time.time() - search_start
-
-        total_time = time.time() - start_time
 
         return {
             "status": "success",
             "query": query,
             "results": results,
-            "total_results": len(results),
-            "timing": {
-                "total_time_ms": round(total_time * 1000, 2),
-                "search_time_ms": round(search_time * 1000, 2),
-                "query": query,
-                "limit": limit
-            }
+            "total_results": len(results)
         }
 
     except Exception as e:
@@ -1001,32 +1074,16 @@ async def search_label_content(query: str = Query(..., description="Search query
 @app.get("/api/labels/search-direct")
 async def search_labels_direct(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
     """Direct label search that returns label matches as pseudo-artifacts"""
-    start_time = time.time()
 
     try:
-
-        db_start = time.time()
         async with async_session() as db:
-            search_start = time.time()
             results = await search_labels_in_artifacts(db, query, None, limit)
-            search_time = time.time() - search_start
-        db_time = time.time() - db_start
-
-        total_time = time.time() - start_time
 
         return {
             "status": "success",
             "query": query,
             "results": results,
-            "total_results": len(results),
-            "timing": {
-                "total_time_ms": round(total_time * 1000, 2),
-                "db_time_ms": round(db_time * 1000, 2),
-                "search_time_ms": round(search_time * 1000, 2),
-                "query": query,
-                "limit": limit,
-                "uses_sqlalchemy_core": True
-            }
+            "total_results": len(results)
         }
 
     except Exception as e:
diff --git a/ui/src/client.js b/ui/src/client.js
index 9c52de55b..0fc19404f 100644
--- a/ui/src/client.js
+++ b/ui/src/client.js
@@ -54,6 +54,21 @@ class FastAPIClient {
       });
   }
 
+  async searchLabelArtifacts(pipeline_name, content_filter, sort_order = "asc", active_page = 1, record_per_page = 5) {
+    return this.apiClient
+      .get(`/artifacts/${pipeline_name}/Label/search`, {
+        params: {
+          content_filter: content_filter,
+          sort_order: sort_order,
+          active_page: active_page,
+          record_per_page: record_per_page,
+        },
+      })
+      .then(({ data }) => {
+        return data;
+      });
+  }
+
   async getArtifactTypes() {
     return this.apiClient.get(`/artifact_types`).then(({ data }) => {
       return data;
@@ -168,6 +183,14 @@ class FastAPIClient {
     });
   }
 
+  async getLabelsList() {
+    return this.apiClient
+      .get(`/api/labels/status`)
+      .then(({ data }) => {
+        return data.files || [];
+      });
+  }
+
   async getServerRegistration(server_name, host_info){
     return this.apiClient
       .post(`/register-server`, {
diff --git a/ui/src/components/ArtifactPTable/index.jsx b/ui/src/components/ArtifactPTable/index.jsx
index 45d19da14..d385f26b5 100644
--- a/ui/src/components/ArtifactPTable/index.jsx
+++ b/ui/src/components/ArtifactPTable/index.jsx
@@ -25,7 +25,7 @@ import LabelCardPopup from "../LabelCardPopup";
 
 const client = new FastAPIClient(config);
 
-const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder, filterValue}) => {
+const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder, filterValue, onLabelClick}) => {
   const [data, setData] = useState([]);
   const [sortOrder, setSortOrder] = useState("asc");
   const [sortTimeOrder, setSortTimeOrder] = useState("asc");
@@ -73,7 +73,6 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
 
     // Ensure properties is now an array
     if (!Array.isArray(properties)) {
-        console.warn("Expected an array for properties, got:", properties);
         return "N/A";
     }
 
@@ -109,9 +108,7 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
   };
 
   const getLabelData = (label_name) => {
-    console.log(label_name)
     client.getLabelData(label_name).then((data) => {
-      console.log(data);
       setLabelData(data);
     });
   }
@@ -166,7 +163,22 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                   </td>
                   { /* Convert artifact ID to string and render it with highlighted search term if it matches the filter value */}
                   <td className="px-6 py-4"><Highlight text={String(artifact.artifact_id)} highlight={filterValue}/></td>
-                  <td className="px-6 py-4"><Highlight text={String(artifact.name)} highlight={filterValue}/></td>
+                  <td className="px-6 py-4">
+                    {artifactType === "Label" && onLabelClick ? (
+                      <button
+                        onClick={(e) => {
+                          e.preventDefault();
+                          e.stopPropagation();
+                          onLabelClick(artifact.name, artifact);
+                        }}
+                        className="text-blue-600 hover:text-blue-800 hover:underline cursor-pointer"
+                      >
+                        <Highlight text={String(artifact.name)} highlight={filterValue}/>
+                      </button>
+                    ) : (
+                      <Highlight text={String(artifact.name)} highlight={filterValue}/>
+                    )}
+                  </td>
                   <td className="px-6 py-4"><Highlight text={String(artifact.execution)} highlight={filterValue}/></td>
                   {artifactType === "Model" && (
                     <td className="px-6 py-4">
@@ -196,7 +208,7 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                       {(getPropertyValue(artifact.artifact_properties, "labels_uri") || "")
                         .split(",")
                         .map((label_name) => label_name.trim())
-                        .filter((label_name) => label_name.length > 0)
+                        .filter((label_name) => label_name.length > 0) // Optional: skip empty strings
                         .map((label_name) => (
                           <div key={label_name} className="label">
                             <a
@@ -223,23 +235,9 @@ const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder,
                   {artifactType === "Label" && (
                     <td className="px-6 py-4">
                       <div className="label">
-                        <a
-                          href="#"
-                          onClick={(e) => {
-                            e.preventDefault();
-                            getLabelData(artifact.name.split(":")[1] || artifact.name);
-                            setShowPopup(true);
-                          }}
-                        >
+                        <span>
                           {artifact.name}
-                        </a>
-                        {showPopup && (
-                          <LabelCardPopup
-                            show={showPopup}
-                            label_data={labelData}
-                            onClose={handleClosePopup}
-                          />
-                        )}
+                        </span>
                       </div>
                     </td>
                   )}
diff --git a/ui/src/pages/artifacts_postgres/index.css b/ui/src/pages/artifacts_postgres/index.css
index 87db4144d..f82b80eaa 100644
--- a/ui/src/pages/artifacts_postgres/index.css
+++ b/ui/src/pages/artifacts_postgres/index.css
@@ -71,3 +71,145 @@ body {
   background: rgb(88, 147, 241);
 }
 */
+
+/* Loading spinner animation */
+@keyframes spin {
+  0% { transform: rotate(0deg); }
+  100% { transform: rotate(360deg); }
+}
+
+.animate-spin {
+  animation: spin 1s linear infinite;
+}
+
+/* Standard DataTable styling */
+.rdt_Table {
+  font-size: 14px;
+}
+
+.rdt_TableHead {
+  background-color: #f8f9fa;
+  border-bottom: 1px solid #dee2e6;
+}
+
+.rdt_TableHeadRow {
+  background-color: #f8f9fa;
+}
+
+.rdt_TableRow {
+  border-bottom: 1px solid #dee2e6;
+}
+
+.rdt_TableRow:hover {
+  background-color: #f8f9fa;
+}
+
+.rdt_TableCell {
+  padding: 12px 16px;
+}
+
+/* Horizontal scrolling for label properties table */
+.overflow-x-auto {
+  overflow-x: auto;
+  -webkit-overflow-scrolling: touch;
+}
+
+.overflow-x-auto::-webkit-scrollbar {
+  height: 8px;
+}
+
+.overflow-x-auto::-webkit-scrollbar-track {
+  background: #f1f1f1;
+  border-radius: 4px;
+}
+
+.overflow-x-auto::-webkit-scrollbar-thumb {
+  background: #c1c1c1;
+  border-radius: 4px;
+}
+
+.overflow-x-auto::-webkit-scrollbar-thumb:hover {
+  background: #a8a8a8;
+}
+
+/* Ensure table cells don't wrap */
+.min-w-full td {
+  white-space: nowrap;
+}
+
+/* Make expanded properties table scrollable too */
+.expanded-table {
+  width: 100%;
+  overflow-x: auto;
+}
+
+/* Split pane styles */
+.split-pane-container {
+  display: flex;
+  height: 100%;
+  width: 100%;
+}
+
+.split-pane-left {
+  overflow: auto;
+  border-right: 1px solid #e5e7eb;
+}
+
+.split-pane-right {
+  overflow: auto;
+  background-color: #f9fafb;
+}
+
+.split-pane-resizer {
+  width: 4px;
+  background-color: #e5e7eb;
+  cursor: col-resize;
+  transition: background-color 0.2s ease;
+  position: relative;
+  flex-shrink: 0;
+}
+
+.split-pane-resizer:hover {
+  background-color: #3b82f6;
+}
+
+.split-pane-resizer:active {
+  background-color: #2563eb;
+}
+
+.split-pane-resizer::after {
+  content: '';
+  position: absolute;
+  top: 50%;
+  left: 50%;
+  transform: translate(-50%, -50%);
+  width: 2px;
+  height: 20px;
+  background-color: #9ca3af;
+  border-radius: 1px;
+}
+
+.split-pane-resizer:hover::after {
+  background-color: white;
+}
+
+/* Label table clickable name styling */
+.label-name-clickable {
+  color: #2563eb;
+  cursor: pointer;
+  transition: color 0.2s ease;
+}
+
+.label-name-clickable:hover {
+  color: #1d4ed8;
+  text-decoration: underline;
+}
+
+/* Selected label highlighting */
+.selected-label-row {
+  background-color: #eff6ff !important;
+}
+
+.selected-label-row td {
+  border-color: #3b82f6;
+}
diff --git a/ui/src/pages/artifacts_postgres/index.jsx b/ui/src/pages/artifacts_postgres/index.jsx
index 270c3c32d..bee81a064 100644
--- a/ui/src/pages/artifacts_postgres/index.jsx
+++ b/ui/src/pages/artifacts_postgres/index.jsx
@@ -14,7 +14,7 @@
  * limitations under the License.
  ***/
 
-import React, { useEffect, useState } from "react";
+import React, { useEffect, useState, useCallback, useRef } from "react";
 import FastAPIClient from "../../client";
 import config from "../../config";
 import DashboardHeader from "../../components/DashboardHeader";
@@ -23,6 +23,8 @@ import Footer from "../../components/Footer";
 import "./index.css";
 import Sidebar from "../../components/Sidebar";
 import ArtifactTypeSidebar from "../../components/ArtifactTypeSidebar";
+import Papa from "papaparse";
+import Highlight from "../../components/Highlight";
 
 const client = new FastAPIClient(config);
 
@@ -38,9 +40,29 @@ const ArtifactsPostgres = () => {
   const [sortOrder, setSortOrder] = useState("asc");
   const [totalItems, setTotalItems] = useState(0);
   const [activePage, setActivePage] = useState(1);
-  const [clickedButton, setClickedButton] = useState("page"); 
+  const [clickedButton, setClickedButton] = useState("page");
   const [selectedCol, setSelectedCol] = useState("name");
-  
+
+  // Label-specific state
+  const [selectedTableLabel, setSelectedTableLabel] = useState(null);
+  const [labelData, setLabelData] = useState("");
+  const [parsedLabelData, setParsedLabelData] = useState([]);
+  const [labelColumns, setLabelColumns] = useState([]);
+  const [labelContentLoading, setLabelContentLoading] = useState(false);
+  const [currentPage, setCurrentPage] = useState(0);
+  const [rowsPerPage, setRowsPerPage] = useState(10);
+
+  const clearLabelData = () => {
+    setLabelData("");
+    setParsedLabelData([]);
+    setLabelColumns([]);
+    setLabelContentLoading(false);
+    setCurrentPage(0);
+  };
+
+  // Flag to prevent re-fetching artifacts when just loading label content
+  const [isLoadingLabelContent, setIsLoadingLabelContent] = useState(false);
+
   useEffect(() => {
     fetchPipelines(); // Fetch pipelines and artifact types when the component mounts
   },[]);
@@ -70,36 +92,71 @@ const ArtifactsPostgres = () => {
   };  
  
   useEffect(() => {
-    if ( selectedPipeline && selectedArtifactType ){
+    if ( selectedPipeline && selectedArtifactType && !isLoadingLabelContent ){
       fetchArtifacts(selectedPipeline, selectedArtifactType, sortOrder, activePage, filter, selectedCol);
     }
-  }, [selectedArtifactType, sortOrder, activePage, selectedCol, filter]);
-
-  const fetchArtifacts = (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
-    client.getArtifacts(pipelineName, artifactType, sortOrder, activePage, filter, selectedCol)
-      .then((data) => {
-        setArtifacts(data.items);
-        setTotalItems(data.total_items);
-      });  
-  };    
+  }, [selectedArtifactType, sortOrder, activePage, selectedCol, filter, isLoadingLabelContent]);
+
+  const fetchArtifacts = async (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
+    try {
+      // Handle Label search case
+      if (artifactType === "Label" && filter && filter.trim() !== "") {
+        try {
+          const searchData = await client.searchLabelArtifacts(pipelineName, filter, sortOrder, activePage, 5);
+
+          // Add search context to artifacts
+          const processedItems = searchData.items.map(item => ({
+            ...item,
+            isSearchResult: true,
+            searchFilter: filter
+          }));
+
+          setArtifacts(processedItems);
+          setTotalItems(searchData.total_items);
+          return; // Early return
+        } catch (searchError) {
+          console.warn('Label search failed, falling back to regular fetch:', searchError);
+          // Fall through to regular fetch
+        }
+      }
+
+      // Regular artifact fetching
+      const regularData = await client.getArtifacts(pipelineName, artifactType, sortOrder, activePage, filter, selectedCol);
+      setArtifacts(regularData.items);
+      setTotalItems(regularData.total_items);
+
+    } catch (error) {
+      console.error('Failed to fetch artifacts:', error);
+      setArtifacts([]);
+      setTotalItems(0);
+    }
+  };
   
   const handleArtifactTypeClick = (artifactType) => {
     if (selectedArtifactType !== artifactType) {
       // if same artifact type is not clicked, sets page as null until it retrieves data for that type.
       setArtifacts(null);
-    }  
+    }
     setSelectedArtifactType(artifactType);
     setActivePage(1);
-  };  
+
+    // Clear label-related state when switching artifact types
+    setSelectedTableLabel(null);
+    clearLabelData();
+  };
 
   const handlePipelineClick = (pipeline) => {
     if (selectedPipeline !== pipeline) {
       // this condition sets page as null.
       setArtifacts(null);
-    }  
+    }
     setSelectedPipeline(pipeline);
     setActivePage(1);
-  };  
+
+    // Clear label-related state when switching pipelines
+    setSelectedTableLabel(null);
+    clearLabelData();
+  };
 
   const handleFilter = (value) => {
     setFilter(value); // Update the filter string
@@ -135,7 +192,285 @@ const ArtifactsPostgres = () => {
       setClickedButton("next");
       handlePageClick(activePage + 1);
     }  
-  };  
+  };
+
+  // Simple label content display component
+  const LabelContentPanel = () => {
+    return (
+      <div className="p-4">
+        {selectedTableLabel ? (
+        <div>
+          {labelContentLoading ? (
+            <div className="flex justify-center items-center py-12">
+              <div className="inline-block animate-spin rounded-full h-8 w-8 border-b-2 border-gray-900"></div>
+              <p className="ml-3 text-gray-600">Loading content...</p>
+            </div>
+          ) : labelData ? (
+            <div className="h-full flex flex-col">
+              {/* Header aligned with left table */}
+              <div className="p-4 border-b border-gray-200">
+                <h3 className="text-lg font-medium text-gray-900">
+                  {selectedTableLabel.name.split(":")[1] || selectedTableLabel.name}
+                </h3>
+              </div>
+
+              {/* Fixed size table container */}
+              <div className="flex flex-col" style={{ height: '400px' }}>
+                <div className="overflow-auto bg-white border border-gray-300 rounded" style={{ height: '320px' }}>
+                  <table className="divide-y divide-gray-200 border-4 w-full">
+                    <thead className="sticky top-0">
+                      <tr className="text-xs font-bold font-sans text-left text-black uppercase">
+                        {labelColumns.map((column, index) => (
+                          <th
+                            key={index}
+                            scope="col"
+                            className="px-6 py-3"
+                          >
+                            {column.name}
+                          </th>
+                        ))}
+                      </tr>
+                    </thead>
+                    <tbody className="bg-white divide-y divide-gray-200">
+                      {parsedLabelData.slice(currentPage * rowsPerPage, (currentPage + 1) * rowsPerPage).map((row, rowIndex) => (
+                        <tr key={rowIndex} className="text-sm font-medium text-gray-800">
+                          {labelColumns.map((column, colIndex) => (
+                            <td key={colIndex} className="px-6 py-4">
+                              <Highlight
+                                text={String(row[column.name] || '')}
+                                highlight={selectedTableLabel?.isSearchResult ? selectedTableLabel.searchFilter : ''}
+                              />
+                            </td>
+                          ))}
+                        </tr>
+                      ))}
+                    </tbody>
+                  </table>
+                </div>
+
+                {/* Pagination controls */}
+                <div className="flex items-center justify-between px-4 py-3 bg-gray-50 border-t border-gray-200">
+                  <div className="flex items-center space-x-2">
+                    <span className="text-sm text-gray-700">Rows per page:</span>
+                    <select
+                      value={rowsPerPage}
+                      onChange={(e) => {
+                        setRowsPerPage(Number(e.target.value));
+                        setCurrentPage(0);
+                      }}
+                      className="border border-gray-300 rounded px-2 py-1 text-sm"
+                    >
+                      <option value={10}>10</option>
+                      <option value={25}>25</option>
+                      <option value={50}>50</option>
+                      <option value={100}>100</option>
+                    </select>
+                  </div>
+
+                  <div className="flex items-center space-x-2">
+                    <span className="text-sm text-gray-700">
+                      {currentPage * rowsPerPage + 1}-{Math.min((currentPage + 1) * rowsPerPage, parsedLabelData.length)} of {parsedLabelData.length}
+                    </span>
+                    <button
+                      onClick={() => setCurrentPage(Math.max(0, currentPage - 1))}
+                      disabled={currentPage === 0}
+                      className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
+                    >
+                      Previous
+                    </button>
+                    <button
+                      onClick={() => setCurrentPage(Math.min(Math.ceil(parsedLabelData.length / rowsPerPage) - 1, currentPage + 1))}
+                      disabled={currentPage >= Math.ceil(parsedLabelData.length / rowsPerPage) - 1}
+                      className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
+                    >
+                      Next
+                    </button>
+                  </div>
+                </div>
+              </div>
+            </div>
+          ) : (
+            <div className="text-center py-12">
+              <p className="text-gray-600">No content available</p>
+            </div>
+          )}
+        </div>
+      ) : (
+        <div className="flex justify-center items-center py-12">
+          <div className="text-center">
+            <h3 className="text-xl font-medium text-gray-800 mb-2">
+              Select a Label
+            </h3>
+            <p className="text-gray-600">
+              Click on a label name in the table to view its content
+            </p>
+          </div>
+        </div>
+      )}
+    </div>
+    );
+  };
+
+  // Handle label click from table
+  const handleTableLabelClick = async (labelName, artifact) => {
+    // Prevent useEffect from triggering fetchArtifacts while loading label content
+    setIsLoadingLabelContent(true);
+    setSelectedTableLabel(artifact);
+    setLabelContentLoading(true);
+    setCurrentPage(0); // Reset pagination when new label is selected
+
+    // Use the URI from the artifact for getLabelData, not just the label name
+    const fileNameForAPI = artifact.uri || `artifacts/labels.csv:${labelName}`;
+
+    try {
+      // Clear old data first
+      setParsedLabelData([]);
+      setLabelColumns([]);
+
+      // Helper function to try different URI formats
+      const tryGetLabelData = async (labelName, fileNameForAPI) => {
+        const uriFormatsToTry = [
+          fileNameForAPI,                           // Original: artifacts/labels.csv:93951bf...
+          labelName,                                // Just the label name: 93951bf...
+          `artifacts/labels.csv/${labelName}`,      // Alternative format: artifacts/labels.csv/93951bf...
+          `labels.csv:${labelName}`,                // Without artifacts prefix: labels.csv:93951bf...
+          `${labelName}.csv`                        // As CSV file: 93951bf....csv
+        ];
+
+        for (const uriToTry of uriFormatsToTry) {
+          try {
+            const data = await client.getLabelData(uriToTry);
+            return data; // Success - return immediately
+          } catch (uriError) {
+            continue; // Try next URI format
+          }
+        }
+
+        throw new Error(`All URI formats failed. Tried: ${uriFormatsToTry.join(', ')}`);
+      };
+
+      const labelData = await tryGetLabelData(labelName, fileNameForAPI);
+
+      setLabelData(labelData);
+
+      const parsed = Papa.parse(labelData, { header: true });
+
+      // Check if this is a search result - if so, filter to matching rows only
+      if (artifact.isSearchResult && artifact.searchFilter) {
+        const searchFilter = artifact.searchFilter;
+
+        // Filter to show only rows that contain the search term
+        const matchingRows = parsed.data.filter((row) => {
+          const rowValues = Object.values(row);
+
+          const hasMatch = rowValues.some(value => {
+            if (value && value.toString().toLowerCase().includes(searchFilter.toLowerCase())) {
+              return true;
+            }
+            return false;
+          });
+
+          return hasMatch;
+        });
+
+        setParsedLabelData(matchingRows);
+      } else {
+        // Normal label - show all data
+        setParsedLabelData(parsed.data);
+      }
+
+      if (parsed.meta.fields) {
+        setLabelColumns(
+          parsed.meta.fields.map(field => ({
+            name: field,
+            selector: row => row[field],
+            sortable: true,
+          }))
+        );
+      }
+    } catch (error) {
+      // Clear data on error to prevent showing old content
+      setParsedLabelData([]);
+      setLabelColumns([]);
+
+      // Show error message to user
+      setParsedLabelData([{
+        error: "Failed to load label content",
+        message: error.message,
+        uri: fileNameForAPI
+      }]);
+
+    } finally {
+      setLabelContentLoading(false);
+      setIsLoadingLabelContent(false); // Reset flag to allow normal useEffect behavior
+    }
+  };
+
+  // Resizable Split Pane Component
+  const ResizableSplitPane = ({ leftContent, rightContent, initialSplitPercentage = 50 }) => {
+    const [splitPercentage, setSplitPercentage] = useState(initialSplitPercentage);
+    const [isDragging, setIsDragging] = useState(false);
+    const containerRef = useRef(null);
+
+    const handleMouseDown = (e) => {
+      setIsDragging(true);
+      e.preventDefault();
+    };
+
+    const handleMouseMove = useCallback((e) => {
+      if (!isDragging || !containerRef.current) return;
+
+      const containerRect = containerRef.current.getBoundingClientRect();
+      const newPercentage = ((e.clientX - containerRect.left) / containerRect.width) * 100;
+
+      // Limit between 20% and 80%
+      const clampedPercentage = Math.max(20, Math.min(80, newPercentage));
+      setSplitPercentage(clampedPercentage);
+    }, [isDragging]);
+
+    const handleMouseUp = useCallback(() => {
+      setIsDragging(false);
+    }, []);
+
+    useEffect(() => {
+      if (isDragging) {
+        document.addEventListener('mousemove', handleMouseMove);
+        document.addEventListener('mouseup', handleMouseUp);
+        return () => {
+          document.removeEventListener('mousemove', handleMouseMove);
+          document.removeEventListener('mouseup', handleMouseUp);
+        };
+      }
+    }, [isDragging, handleMouseMove, handleMouseUp]);
+
+    return (
+      <div ref={containerRef} className="flex h-full w-full">
+        {/* Left Pane */}
+        <div style={{ width: `${splitPercentage}%` }} className="overflow-auto">
+          {leftContent}
+        </div>
+
+        {/* Resizer */}
+        <div
+          className={`w-1 bg-gray-300 hover:bg-gray-400 cursor-col-resize flex-shrink-0 ${
+            isDragging ? 'bg-gray-400' : ''
+          }`}
+          onMouseDown={handleMouseDown}
+        >
+          <div className="w-full h-full flex items-center justify-center">
+            <div className="w-0.5 h-8 bg-gray-500"></div>
+          </div>
+        </div>
+
+        {/* Right Pane */}
+        <div style={{ width: `${100 - splitPercentage}%` }} className="overflow-auto">
+          {rightContent}
+        </div>
+      </div>
+    );
+  };
+
+
 
   return (
     <>
@@ -152,7 +487,8 @@ const ArtifactsPostgres = () => {
               className="flex-grow"
             />
           </div>
-          <div className="w-5/6 justify-center items-center mx-auto px-4 flex-grow">
+
+          <div className="justify-center items-center mx-auto px-4 flex-grow w-5/6">
             <div className="flex flex-col w-full">
                 {selectedPipeline !== null && (
                   <ArtifactTypeSidebar
@@ -162,16 +498,42 @@ const ArtifactsPostgres = () => {
                   />
                 )}
             </div>
-            <div>
+            {selectedArtifactType === "Label" ? (
+              <div className="flex-grow" style={{ height: 'calc(100vh - 200px)' }}>
+                <ResizableSplitPane
+                  leftContent={
+                    <div className="p-4">
+                      {artifacts !== null && artifacts.length > 0 ? (
+                        <ArtifactPTable
+                          artifacts={artifacts}
+                          artifactType={selectedArtifactType}
+                          onsortOrder={toggleSortOrder}
+                          onsortTimeOrder={toggleSortTime}
+                          filterValue={filter}
+                          onLabelClick={selectedArtifactType === "Label" ? handleTableLabelClick : undefined}
+                        />
+                      ) : (
+                        <div className="text-center py-12">
+                          <p className="text-gray-600">No label artifacts available</p>
+                        </div>
+                      )}
+                    </div>
+                  }
+                  rightContent={<LabelContentPanel />}
+                  initialSplitPercentage={50}
+                />
+              </div>
+            ) : (
+              <div>
                 {artifacts !== null && artifacts.length > 0 ? (
-                  <ArtifactPTable 
+                  <ArtifactPTable
                     artifacts={artifacts}
                     artifactType={selectedArtifactType}
                     onsortOrder={toggleSortOrder}
                     onsortTimeOrder={toggleSortTime}
                     filterValue={filter}
                     />
-                    
+
                 ) : (
                   <div>No data available</div> // Display message when there are no artifacts
                 )}
@@ -251,7 +613,8 @@ const ArtifactsPostgres = () => {
                     </button>
                   </>
                 )}
-            </div>
+              </div>
+            )}
           </div>
         </div>
         <Footer />

From 347ab208b9a13b009f1ef6de940efd5f75585aba Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Thu, 31 Jul 2025 19:52:27 +0530
Subject: [PATCH 03/11] fixed UI for comprehensive searching on label metadata
 and csv content

---
 server/app/main.py                         | 145 ++++++++++---------
 ui/src/components/ArtifactPTable/index.jsx |  38 +++--
 ui/src/pages/artifacts_postgres/index.jsx  | 154 ++++++++++++++-------
 3 files changed, 210 insertions(+), 127 deletions(-)

diff --git a/server/app/main.py b/server/app/main.py
index e5efdf3f1..a3ee42fdd 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -3,6 +3,7 @@
 import time
 import zipfile
 import csv
+import re
 from fastapi import FastAPI, Request, HTTPException, Query, UploadFile, File, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, PlainTextResponse, StreamingResponse
@@ -36,7 +37,6 @@
     get_registered_server_details,
     get_sync_status,
     update_sync_status,
-    fetch_artifacts_with_label_search,
     search_labels_in_artifacts
 )
 from pathlib import Path
@@ -248,7 +248,7 @@ async def get_artifacts(
             logger = logging.getLogger(__name__)
             logger.warning(f"Auto-reindex failed: {e}")
 
-    result = await fetch_artifacts_with_label_search(db, pipeline_name, artifact_type, filter_value, active_page, record_per_page, sort_field, sort_order)
+    result = await fetch_artifacts(db, pipeline_name, artifact_type, filter_value, active_page, record_per_page, sort_field, sort_order)
 
     return result
 
@@ -263,92 +263,105 @@ async def search_label_artifacts(
     db: AsyncSession = Depends(get_db)
 ):
     """
-    Search for label artifacts that contain the specified content in their CSV files.
-    Returns clean label artifacts with proper structure, not content search results.
+    Search for label artifacts - returns same structure as regular artifacts API
     """
-
     try:
-        # Step 1: Search for labels containing the content
-        label_search_results = await search_labels_in_artifacts(db, content_filter, pipeline_name, 100)
+        # Combined search: both label content and regular artifact properties
+        # 1. Search in label CSV content
+        label_content_results = await search_labels_in_artifacts(db, content_filter, pipeline_name, 100)
+
+        # 2. Search in regular artifact properties (like other artifact types)
+        property_search_results = await fetch_artifacts(
+            db, pipeline_name, "Label", content_filter, 1, 1000, "name", sort_order
+        )
 
-        if not label_search_results:
+        # If no results from either search, return empty
+        if not label_content_results and property_search_results['total_items'] == 0:
             return {
                 "total_items": 0,
-                "items": [],
-                "search_metadata": {
-                    "search_term": content_filter,
-                    "search_type": "content_filter"
-                }
+                "items": []
             }
 
-        # Step 2: Extract unique label names
-        unique_labels = set()
-        label_metadata = {}
-
-        for result in label_search_results:
-            label_name = result['label_file']
-            unique_labels.add(label_name)
-
-            # Store metadata for each label
-            if label_name not in label_metadata:
-                label_metadata[label_name] = {
-                    'matching_rows': 0,
-                    'total_rows': result.get('total_rows', 0),
-                    'relevance_score': result.get('relevance_score', 0.0)
-                }
-            label_metadata[label_name]['matching_rows'] += 1
-
-        # Step 3: Fetch actual label artifacts
+        # Extract label files that match content search
+        matching_label_files = set()
+        for result in label_content_results:
+            label_file = result.get('label_file', '')
+            if label_file:
+                # Remove file extension if present
+                clean_label_file = label_file.replace('.csv', '') if label_file.endswith('.csv') else label_file
+                matching_label_files.add(clean_label_file)
+                # Also add the original name in case it's used as-is
+                matching_label_files.add(label_file)
+
+        # Get all label artifacts (for content matching)
         all_artifacts_result = await fetch_artifacts(
             db, pipeline_name, "Label", "", 1, 1000, "name", sort_order
         )
 
-        # Step 4: Filter to only labels that contain the search term
+        # Combine results: artifacts that match either content search OR property search
         filtered_artifacts = []
+
+        # Create a set of artifact IDs that match property search
+        property_match_ids = set()
+        for artifact in property_search_results['items']:
+            property_match_ids.add(artifact['artifact_id'])
+
         for artifact in all_artifacts_result['items']:
-            # Extract clean label name from artifact name
-            clean_name = artifact['name']
-            if ':' in clean_name:
-                clean_name = clean_name.split(':', 1)[1]
-
-            # Remove any (Row X) suffix if present
-            import re
-            clean_name = re.sub(r'\s*\(Row\s+\d+\)$', '', clean_name).strip()
-
-            if clean_name in unique_labels:
-                # Create enhanced artifact with search context
-                enhanced_artifact = {
-                    'artifact_id': artifact['artifact_id'],
-                    'name': clean_name,  # Clean name without prefixes/suffixes
-                    'uri': f"artifacts/labels.csv:{clean_name}",
-                    'type_id': 'Label',
-                    'execution': artifact.get('execution', 'N/A'),
-                    'create_time_since_epoch': artifact.get('create_time_since_epoch', 'N/A'),
-                    'last_update_time_since_epoch': artifact.get('last_update_time_since_epoch', 'N/A'),
-                    'artifact_properties': artifact.get('artifact_properties', []),
-                    'search_context': {
-                        'search_term': content_filter,
-                        'matching_rows': label_metadata[clean_name]['matching_rows'],
-                        'total_rows': label_metadata[clean_name]['total_rows'],
-                        'relevance_score': label_metadata[clean_name]['relevance_score']
-                    }
+            artifact_name = artifact['name']
+            artifact_uri = artifact.get('uri', '')
+            artifact_id = artifact['artifact_id']
+
+            # Check if this artifact matches property search
+            property_matches = artifact_id in property_match_ids
+
+            # Check if this artifact matches content search
+            content_matches = False
+            if matching_label_files:
+                # Clean the artifact name - remove prefixes and suffixes
+                clean_name = artifact_name
+                if ':' in clean_name:
+                    clean_name = clean_name.split(':', 1)[1]
+
+                # Remove row indicators like "(Row 1)" from the name
+                clean_name = re.sub(r'\s*\(Row\s+\d+\)$', '', clean_name).strip()
+
+                # Check multiple matching strategies for content
+                name_matches = clean_name in matching_label_files
+
+                # Also check if URI contains any of the matching label files
+                uri_matches = False
+                if artifact_uri:
+                    for label_file in matching_label_files:
+                        if label_file in artifact_uri:
+                            uri_matches = True
+                            break
+
+                # Check if original artifact name matches (without cleaning)
+                original_name_matches = artifact_name in matching_label_files
+
+                content_matches = name_matches or uri_matches or original_name_matches
+
+            # Include artifact if it matches either property search OR content search
+            if property_matches or content_matches:
+                # Add search metadata but keep same structure as regular artifacts
+                artifact['search_metadata'] = {
+                    'search_term': content_filter,
+                    'is_search_result': True,
+                    'property_match': property_matches,
+                    'content_match': content_matches
                 }
-                filtered_artifacts.append(enhanced_artifact)
+                filtered_artifacts.append(artifact)
 
-        # Step 5: Apply pagination
+        # Apply pagination
         total_items = len(filtered_artifacts)
         start_idx = (active_page - 1) * record_per_page
         end_idx = start_idx + record_per_page
         paginated_items = filtered_artifacts[start_idx:end_idx]
 
+        # Return same structure as regular artifacts API
         return {
             "total_items": total_items,
-            "items": paginated_items,
-            "search_metadata": {
-                "search_term": content_filter,
-                "search_type": "content_filter",
-                "unique_labels_found": len(unique_labels)
-            }
+            "items": paginated_items
         }
 
     except Exception as e:
diff --git a/ui/src/components/ArtifactPTable/index.jsx b/ui/src/components/ArtifactPTable/index.jsx
index d385f26b5..864ae5d24 100644
--- a/ui/src/components/ArtifactPTable/index.jsx
+++ b/ui/src/components/ArtifactPTable/index.jsx
@@ -15,7 +15,7 @@
  ***/
 
 // ArtifactTable.jsx
-import React, { useState, useEffect } from "react";
+import React, { useState, useEffect, useRef } from "react";
 import ModelCardPopup from "../ModelCardPopup";
 import Highlight from "../Highlight";
 import FastAPIClient from "../../client";
@@ -25,27 +25,37 @@ import LabelCardPopup from "../LabelCardPopup";
 
 const client = new FastAPIClient(config);
 
-const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder, filterValue, onLabelClick}) => {
-  const [data, setData] = useState([]);
+const ArtifactPTable = ({artifacts, artifactType, onsortOrder, onsortTimeOrder, filterValue, onLabelClick, expandedRow: externalExpandedRow, setExpandedRow: externalSetExpandedRow}) => {
   const [sortOrder, setSortOrder] = useState("asc");
   const [sortTimeOrder, setSortTimeOrder] = useState("asc");
-  const [expandedRow, setExpandedRow] = useState(null);
+
+  // Use internal state as fallback if external state is not provided
+  const [internalExpandedRow, setInternalExpandedRow] = useState(null);
+
+  // Use external state if provided, otherwise use internal state
+  const expandedRow = externalExpandedRow !== undefined ? externalExpandedRow : internalExpandedRow;
+  const setExpandedRow = externalSetExpandedRow || setInternalExpandedRow;
   const [showPopup, setShowPopup] = useState(false);
   const [popupData, setPopupData] = useState("");
   const [labelData, setLabelData] = useState("");
 
+  // Handle expanded row based on filter value - only when filter actually changes
+  const prevFilterValue = useRef(filterValue);
   useEffect(() => {
-    // if data then set artifacts with that data else set it null.
-    setData(artifacts);
-    // handle expanded row based on filter value
-    if (filterValue.trim() !== ""){
-      // expand all rows when filter value is set
-      setExpandedRow("all");
-    }else{
-      // collapse all rows when filter value is empty
-      setExpandedRow(null);
+    // Only auto-expand/collapse when filter value actually changes, not on every render
+    if (prevFilterValue.current !== filterValue) {
+      if (filterValue.trim() !== ""){
+        // expand all rows when filter value is set
+        setExpandedRow("all");
+      } else {
+        // collapse all rows when filter value is cleared (but not on initial load)
+        if (prevFilterValue.current.trim() !== "") {
+          setExpandedRow(null);
+        }
+      }
+      prevFilterValue.current = filterValue;
     }
-  }, [artifacts]);
+  }, [filterValue, setExpandedRow, artifactType, expandedRow]);
 
 
   const renderArrow = () => (
diff --git a/ui/src/pages/artifacts_postgres/index.jsx b/ui/src/pages/artifacts_postgres/index.jsx
index bee81a064..7b6820452 100644
--- a/ui/src/pages/artifacts_postgres/index.jsx
+++ b/ui/src/pages/artifacts_postgres/index.jsx
@@ -14,7 +14,7 @@
  * limitations under the License.
  ***/
 
-import React, { useEffect, useState, useCallback, useRef } from "react";
+import React, { useEffect, useState, useCallback, useRef, useMemo } from "react";
 import FastAPIClient from "../../client";
 import config from "../../config";
 import DashboardHeader from "../../components/DashboardHeader";
@@ -28,6 +28,9 @@ import Highlight from "../../components/Highlight";
 
 const client = new FastAPIClient(config);
 
+// Memoized ArtifactPTable to prevent unnecessary re-renders
+const MemoizedArtifactPTable = React.memo(ArtifactPTable);
+
 const ArtifactsPostgres = () => {
   const [selectedPipeline, setSelectedPipeline] = useState(null);
   const [pipelines, setPipelines] = useState([]);
@@ -63,6 +66,29 @@ const ArtifactsPostgres = () => {
   // Flag to prevent re-fetching artifacts when just loading label content
   const [isLoadingLabelContent, setIsLoadingLabelContent] = useState(false);
 
+  // Ref to immediately block fetchArtifacts calls during label loading
+  const isLoadingLabelContentRef = useRef(false);
+
+  // Lift accordion state to parent to preserve it across re-renders
+  const [expandedRow, setExpandedRow] = useState(null);
+
+  // Reset accordion state when artifact type changes
+  useEffect(() => {
+    setExpandedRow(null);
+  }, [selectedArtifactType]);
+
+  // Handle accordion auto-expansion for search filters at parent level
+  useEffect(() => {
+    if (selectedArtifactType === "Label") {
+      if (filter && filter.trim() !== "") {
+        setExpandedRow("all");
+      }
+    }
+  }, [filter, selectedArtifactType]);
+
+  // Simple memoization
+  const stableArtifacts = useMemo(() => artifacts, [artifacts]);
+
   useEffect(() => {
     fetchPipelines(); // Fetch pipelines and artifact types when the component mounts
   },[]);
@@ -92,26 +118,35 @@ const ArtifactsPostgres = () => {
   };  
  
   useEffect(() => {
-    if ( selectedPipeline && selectedArtifactType && !isLoadingLabelContent ){
+    // Fetch artifacts when these dependencies change (but not when loading label content)
+    if ( selectedPipeline && selectedArtifactType ){
       fetchArtifacts(selectedPipeline, selectedArtifactType, sortOrder, activePage, filter, selectedCol);
     }
-  }, [selectedArtifactType, sortOrder, activePage, selectedCol, filter, isLoadingLabelContent]);
+  }, [selectedPipeline, selectedArtifactType, sortOrder, activePage, selectedCol, filter]);
 
   const fetchArtifacts = async (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
     try {
+      // Don't fetch if we're currently loading label content (prevents left panel reload)
+      if (isLoadingLabelContent || isLoadingLabelContentRef.current) {
+        return;
+      }
+
+      // Clear artifacts immediately to show loading state (consistent with other artifact types)
+      setArtifacts(null);
+
       // Handle Label search case
       if (artifactType === "Label" && filter && filter.trim() !== "") {
         try {
           const searchData = await client.searchLabelArtifacts(pipelineName, filter, sortOrder, activePage, 5);
 
-          // Add search context to artifacts
-          const processedItems = searchData.items.map(item => ({
-            ...item,
-            isSearchResult: true,
-            searchFilter: filter
-          }));
+          // Add search context to artifacts while preserving backend search_metadata
+          // Use a more efficient approach to avoid unnecessary object creation
+          searchData.items.forEach(item => {
+            item.isSearchResult = true;
+            item.searchFilter = filter;
+          });
 
-          setArtifacts(processedItems);
+          setArtifacts(searchData.items);
           setTotalItems(searchData.total_items);
           return; // Early return
         } catch (searchError) {
@@ -158,20 +193,20 @@ const ArtifactsPostgres = () => {
     clearLabelData();
   };
 
-  const handleFilter = (value) => {
+  const handleFilter = useCallback((value) => {
     setFilter(value); // Update the filter string
     setActivePage(1);
- };   
+  }, []);
 
-  const toggleSortOrder = (newSortOrder) => {
+  const toggleSortOrder = useCallback((newSortOrder) => {
     setSortOrder(newSortOrder);
     setSelectedCol("name");
-  };  
+  }, []);
 
-  const toggleSortTime = (newSortOrder) => {
+  const toggleSortTime = useCallback((newSortOrder) => {
     setSortOrder(newSortOrder);
     setSelectedCol("create_time_since_epoch");
-  };  
+  }, []);
 
   const handlePageClick = (page) => {
     setActivePage(page);
@@ -194,8 +229,8 @@ const ArtifactsPostgres = () => {
     }  
   };
 
-  // Simple label content display component
-  const LabelContentPanel = () => {
+  // Memoized label content display component
+  const LabelContentPanel = useMemo(() => {
     return (
       <div className="p-4">
         {selectedTableLabel ? (
@@ -309,12 +344,17 @@ const ArtifactsPostgres = () => {
       )}
     </div>
     );
-  };
+  }, [selectedTableLabel, labelContentLoading, labelData, parsedLabelData, labelColumns, currentPage, rowsPerPage]);
+
+  // Handle label click from table - memoized to prevent re-renders
+  const handleTableLabelClick = useCallback(async (labelName, artifact) => {
+    // Set ref IMMEDIATELY to block any fetchArtifacts calls
+    isLoadingLabelContentRef.current = true;
 
-  // Handle label click from table
-  const handleTableLabelClick = async (labelName, artifact) => {
-    // Prevent useEffect from triggering fetchArtifacts while loading label content
+    // Set loading flag to prevent fetchArtifacts from running
     setIsLoadingLabelContent(true);
+
+    // Batch state updates to minimize re-renders
     setSelectedTableLabel(artifact);
     setLabelContentLoading(true);
     setCurrentPage(0); // Reset pagination when new label is selected
@@ -355,11 +395,11 @@ const ArtifactsPostgres = () => {
 
       const parsed = Papa.parse(labelData, { header: true });
 
-      // Check if this is a search result - if so, filter to matching rows only
-      if (artifact.isSearchResult && artifact.searchFilter) {
+      // Check if this is a search result and if the search term was found in CSV content
+      if (artifact.isSearchResult && artifact.searchFilter && artifact.search_metadata?.content_match) {
         const searchFilter = artifact.searchFilter;
 
-        // Filter to show only rows that contain the search term
+        // Filter CSV rows to show only those containing the search term
         const matchingRows = parsed.data.filter((row) => {
           const rowValues = Object.values(row);
 
@@ -375,7 +415,7 @@ const ArtifactsPostgres = () => {
 
         setParsedLabelData(matchingRows);
       } else {
-        // Normal label - show all data
+        // Normal label OR search result with property-only match - show all data
         setParsedLabelData(parsed.data);
       }
 
@@ -403,8 +443,43 @@ const ArtifactsPostgres = () => {
     } finally {
       setLabelContentLoading(false);
       setIsLoadingLabelContent(false); // Reset flag to allow normal useEffect behavior
+      isLoadingLabelContentRef.current = false; // Reset ref to allow future fetchArtifacts calls
     }
-  };
+  }, []); // Empty dependency array for useCallback
+
+  // Memoized onLabelClick to prevent prop changes
+  const memoizedOnLabelClick = useMemo(() => {
+    return selectedArtifactType === "Label" ? handleTableLabelClick : undefined;
+  }, [selectedArtifactType, handleTableLabelClick]);
+
+  // Memoized setExpandedRow to prevent prop changes
+  const memoizedSetExpandedRow = useCallback((value) => {
+    setExpandedRow(value);
+  }, [selectedArtifactType, filter]);
+
+  // Memoized Left Panel Component to prevent unnecessary re-renders
+  const MemoizedLeftPanel = useMemo(() => {
+    return (
+      <div className="p-4">
+        {stableArtifacts !== null && stableArtifacts?.length > 0 ? (
+          <MemoizedArtifactPTable
+            artifacts={stableArtifacts}
+            artifactType={selectedArtifactType}
+            onsortOrder={toggleSortOrder}
+            onsortTimeOrder={toggleSortTime}
+            filterValue={filter}
+            onLabelClick={memoizedOnLabelClick}
+            expandedRow={expandedRow}
+            setExpandedRow={memoizedSetExpandedRow}
+          />
+        ) : (
+          <div className="text-center py-12">
+            <p className="text-gray-600">No label artifacts available</p>
+          </div>
+        )}
+      </div>
+    );
+  }, [stableArtifacts, selectedArtifactType, filter, toggleSortOrder, toggleSortTime, memoizedOnLabelClick, expandedRow, memoizedSetExpandedRow]); // Include accordion state
 
   // Resizable Split Pane Component
   const ResizableSplitPane = ({ leftContent, rightContent, initialSplitPercentage = 50 }) => {
@@ -501,37 +576,22 @@ const ArtifactsPostgres = () => {
             {selectedArtifactType === "Label" ? (
               <div className="flex-grow" style={{ height: 'calc(100vh - 200px)' }}>
                 <ResizableSplitPane
-                  leftContent={
-                    <div className="p-4">
-                      {artifacts !== null && artifacts.length > 0 ? (
-                        <ArtifactPTable
-                          artifacts={artifacts}
-                          artifactType={selectedArtifactType}
-                          onsortOrder={toggleSortOrder}
-                          onsortTimeOrder={toggleSortTime}
-                          filterValue={filter}
-                          onLabelClick={selectedArtifactType === "Label" ? handleTableLabelClick : undefined}
-                        />
-                      ) : (
-                        <div className="text-center py-12">
-                          <p className="text-gray-600">No label artifacts available</p>
-                        </div>
-                      )}
-                    </div>
-                  }
-                  rightContent={<LabelContentPanel />}
+                  leftContent={MemoizedLeftPanel}
+                  rightContent={LabelContentPanel}
                   initialSplitPercentage={50}
                 />
               </div>
             ) : (
               <div>
                 {artifacts !== null && artifacts.length > 0 ? (
-                  <ArtifactPTable
+                  <MemoizedArtifactPTable
                     artifacts={artifacts}
                     artifactType={selectedArtifactType}
                     onsortOrder={toggleSortOrder}
                     onsortTimeOrder={toggleSortTime}
                     filterValue={filter}
+                    expandedRow={expandedRow}
+                    setExpandedRow={memoizedSetExpandedRow}
                     />
 
                 ) : (

From 1066453c9d2bc799c78371c66a3b66c2ff46120e Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Mon, 4 Aug 2025 12:51:05 +0530
Subject: [PATCH 04/11] refactored backend server code

---
 server/app/label_utils.py | 464 +++++++++++++++++++++++++++++++++++
 server/app/main.py        | 106 +-------
 server/app/utils.py       | 493 --------------------------------------
 3 files changed, 467 insertions(+), 596 deletions(-)
 create mode 100644 server/app/label_utils.py

diff --git a/server/app/label_utils.py b/server/app/label_utils.py
new file mode 100644
index 000000000..7d581e397
--- /dev/null
+++ b/server/app/label_utils.py
@@ -0,0 +1,464 @@
+"""
+Label Search Utility Functions
+
+This module contains all label-related functionality including:
+- CSV label file indexing
+- Label search statistics
+- Auto-reindexing capabilities
+- Content hash-based change detection
+"""
+
+# Standard library imports
+import csv
+import json
+import time
+import os
+import hashlib
+import re
+import logging
+from pathlib import Path
+from typing import Dict, Any
+
+# Third-party imports
+from sqlalchemy.ext.asyncio import create_async_engine
+from sqlalchemy import text
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+
+async def index_csv_labels(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Index CSV label files into PostgreSQL for full-text search
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Look for both .csv files and files without extension (assuming they're CSV)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension that might be CSV files
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
+                # Try to detect if it's a CSV file by reading first few lines
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
+                        if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            logger.info(f"Detected CSV-like file without extension: {file_path.name}")
+                except:
+                    pass  # Skip files that can't be read
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        engine = create_async_engine(database_url, echo=False)
+        indexed_files = []
+        total_records = 0
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                try:
+                    # Clear existing data for this file
+                    await conn.execute(
+                        text("DELETE FROM label_index WHERE file_name = :file_name"),
+                        {"file_name": csv_file.name}
+                    )
+
+                    # Process CSV file
+                    records = []
+                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                        # Detect CSV format with fallback
+                        sample = csvfile.read(1024)
+                        csvfile.seek(0)
+
+                        # Try to detect delimiter, with fallbacks
+                        delimiter = ','  # Default fallback
+                        try:
+                            sniffer = csv.Sniffer()
+                            detected_delimiter = sniffer.sniff(sample).delimiter
+                            delimiter = detected_delimiter
+                        except:
+                            # Fallback: try common delimiters
+                            for test_delimiter in [',', '\t', ';', '|']:
+                                if test_delimiter in sample:
+                                    delimiter = test_delimiter
+                                    break
+
+                        reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                        for row_index, row in enumerate(reader):
+                            # Create searchable content
+                            content_parts = []
+                            metadata = {}
+
+                            for key, value in row.items():
+                                if value and value.strip():
+                                    content_parts.append(f"{key}: {value.strip()}")
+                                    metadata[key] = value.strip()
+
+                            content = " | ".join(content_parts)
+
+                            if content.strip():
+                                records.append({
+                                    'file_name': csv_file.name,
+                                    'file_path': str(csv_file),
+                                    'row_index': row_index,
+                                    'content': content,
+                                    'metadata': json.dumps(metadata),
+                                    'created_at': int(time.time() * 1000),
+                                    'updated_at': int(time.time() * 1000)
+                                })
+
+                    # Insert records
+                    if records:
+                        insert_query = text("""
+                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
+                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
+                        """)
+                        await conn.execute(insert_query, records)
+
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': len(records),
+                            'status': 'success'
+                        })
+                        total_records += len(records)
+                        logger.info(f"Indexed {len(records)} records from {csv_file.name}")
+                    else:
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': 0,
+                            'status': 'empty'
+                        })
+
+                except Exception as e:
+                    logger.error(f"Error indexing {csv_file.name}: {e}")
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'error',
+                        'error': str(e)
+                    })
+
+        await engine.dispose()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+
+async def get_label_stats(database_url: str) -> Dict[str, Any]:
+    """
+    Get statistics about indexed labels
+    """
+    try:
+
+        engine = create_async_engine(database_url, echo=False)
+
+        async with engine.begin() as conn:
+            # Get total records and files
+            result = await conn.execute(text("""
+                SELECT
+                    COUNT(*) as total_records,
+                    COUNT(DISTINCT file_name) as total_files
+                FROM label_index
+            """))
+            stats = result.fetchone()
+
+            # Get recent files
+            result = await conn.execute(text("""
+                SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
+                FROM label_index
+                GROUP BY file_name
+                ORDER BY last_updated DESC
+                LIMIT 10
+            """))
+            recent_files = [
+                {
+                    'file_name': row[0],
+                    'record_count': row[1],
+                    'last_updated': row[2]
+                }
+                for row in result.fetchall()
+            ]
+
+        await engine.dispose()
+
+        return {
+            'total_records': stats[0] if stats else 0,
+            'total_files': stats[1] if stats else 0,
+            'recent_files': recent_files,
+            'status': 'success'
+        }
+
+    except Exception as e:
+        logger.error(f"Error getting label stats: {e}")
+        return {
+            'status': 'error',
+            'error': str(e),
+            'total_records': 0,
+            'total_files': 0,
+            'recent_files': []
+        }
+
+
+async def auto_reindex_if_needed(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Automatically reindex labels if files have been modified or content has changed.
+    Handles MD5 hash filenames and detects content changes.
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "no_directory", "message": "Labels directory does not exist"}
+
+        # Get all potential CSV files (with and without .csv extension)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also check files without extension (including MD5 hash names)
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                # Check if it's an MD5 hash (32 hex characters)
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # For MD5 hash files, be more lenient in detection
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            if is_md5_hash:
+                                logger.info(f"Detected MD5 hash file: {file_path.name}")
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "no_files", "message": "No CSV files found"}
+
+        # Check if we need to reindex based on content changes
+        engine = create_async_engine(database_url, echo=False)
+        needs_reindex = False
+        files_to_reindex = []
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                # Calculate content hash for change detection
+                try:
+                    with open(csv_file, 'rb') as f:
+                        content_hash = hashlib.md5(f.read()).hexdigest()
+                except:
+                    continue
+
+                # Check if file exists in index and get its content hash
+                result = await conn.execute(text("""
+                    SELECT
+                        MAX(updated_at) as last_indexed,
+                        COUNT(*) as record_count
+                    FROM label_index
+                    WHERE file_name = :file_name
+                """), {"file_name": csv_file.name})
+
+                row = result.fetchone()
+                last_indexed = row[0] if row else None
+                record_count = row[1] if row else 0
+
+                # Get stored content hash from metadata if available
+                result = await conn.execute(text("""
+                    SELECT metadata->>'content_hash' as stored_hash
+                    FROM label_index
+                    WHERE file_name = :file_name
+                    LIMIT 1
+                """), {"file_name": csv_file.name})
+
+                stored_hash_row = result.fetchone()
+                stored_hash = stored_hash_row[0] if stored_hash_row else None
+
+                # Check if reindexing is needed
+                file_mtime = int(os.path.getmtime(csv_file) * 1000)
+
+                if (not last_indexed or
+                    record_count == 0 or
+                    stored_hash != content_hash or
+                    file_mtime > (last_indexed + 60000)):  # 1 minute grace period
+
+                    needs_reindex = True
+                    files_to_reindex.append({
+                        'file': csv_file,
+                        'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
+                    })
+                    logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
+
+        await engine.dispose()
+
+        if needs_reindex:
+            logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
+            result = await index_csv_labels_with_hash(database_url, labels_directory)
+            return {
+                "status": "reindexed",
+                "message": f"Auto-reindexed {result.get('total_files', 0)} files",
+                "files_reindexed": [f['file'].name for f in files_to_reindex],
+                "details": result
+            }
+        else:
+            return {
+                "status": "up_to_date",
+                "message": "All label files are up to date"
+            }
+
+    except Exception as e:
+        logger.error(f"Auto-reindex failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+
+async def index_csv_labels_with_hash(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Enhanced version of index_csv_labels that stores content hashes for change detection
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Get all potential CSV files (including MD5 hash files)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        engine = create_async_engine(database_url, echo=False)
+        indexed_files = []
+        total_records = 0
+
+        async with engine.begin() as conn:
+            for csv_file in csv_files:
+                try:
+                    # Calculate content hash
+                    with open(csv_file, 'rb') as f:
+                        content_hash = hashlib.md5(f.read()).hexdigest()
+
+                    # Clear existing data for this file
+                    await conn.execute(
+                        text("DELETE FROM label_index WHERE file_name = :file_name"),
+                        {"file_name": csv_file.name}
+                    )
+
+                    # Process CSV file
+                    records = []
+                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                        # Detect CSV format with fallback
+                        sample = csvfile.read(1024)
+                        csvfile.seek(0)
+
+                        # Try to detect delimiter, with fallbacks
+                        delimiter = ','  # Default fallback
+                        try:
+                            sniffer = csv.Sniffer()
+                            detected_delimiter = sniffer.sniff(sample).delimiter
+                            delimiter = detected_delimiter
+                        except:
+                            # Fallback: try common delimiters
+                            for test_delimiter in [',', '\t', ';', '|']:
+                                if test_delimiter in sample:
+                                    delimiter = test_delimiter
+                                    break
+
+                        reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                        for row_index, row in enumerate(reader):
+                            # Create searchable content
+                            content_parts = []
+                            metadata = {"content_hash": content_hash}  # Store content hash
+
+                            for key, value in row.items():
+                                if value and value.strip():
+                                    content_parts.append(f"{key}: {value.strip()}")
+                                    metadata[key] = value.strip()
+
+                            content = " | ".join(content_parts)
+
+                            if content.strip():
+                                records.append({
+                                    'file_name': csv_file.name,
+                                    'file_path': str(csv_file),
+                                    'row_index': row_index,
+                                    'content': content,
+                                    'metadata': json.dumps(metadata),
+                                    'created_at': int(time.time() * 1000),
+                                    'updated_at': int(time.time() * 1000)
+                                })
+
+                    # Insert records
+                    if records:
+                        insert_query = text("""
+                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
+                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
+                        """)
+                        await conn.execute(insert_query, records)
+
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': len(records),
+                            'content_hash': content_hash,
+                            'status': 'success'
+                        })
+                        total_records += len(records)
+                        logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
+                    else:
+                        indexed_files.append({
+                            'file_name': csv_file.name,
+                            'records_indexed': 0,
+                            'status': 'empty'
+                        })
+
+                except Exception as e:
+                    logger.error(f"Error indexing {csv_file.name}: {e}")
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'error',
+                        'error': str(e)
+                    })
+
+        await engine.dispose()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
diff --git a/server/app/main.py b/server/app/main.py
index a3ee42fdd..569d9cf80 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -2,7 +2,7 @@
 import io
 import time
 import zipfile
-import csv
+
 import re
 from fastapi import FastAPI, Request, HTTPException, Query, UploadFile, File, Depends
 from fastapi.middleware.cors import CORSMiddleware
@@ -55,10 +55,9 @@
 import logging
 from jsonpath_ng.ext import parse
 from cmflib.cmf_federation import update_mlmd
-from server.app.db.dbconfig import DATABASE_URL, async_session
-from server.app.utils import (
+from server.app.db.dbconfig import DATABASE_URL
+from server.app.label_utils import (
     auto_reindex_if_needed,
-    search_labels,
     get_label_stats,
     index_csv_labels,
     index_csv_labels_with_hash
@@ -1024,102 +1023,3 @@ async def get_label_search_status():
             "status": "error",
             "error": str(e)
         }
-
-@app.post("/api/labels/test")
-async def test_label_search():
-    """Test label search functionality with sample data"""
-    try:
-        # Create sample test data
-        sample_data = [
-            {"id": 1, "category": "training", "type": "dataset", "accuracy": 0.95},
-            {"id": 2, "category": "validation", "type": "dataset", "accuracy": 0.87},
-            {"id": 3, "category": "test", "type": "model", "performance": "high"},
-        ]
-
-        # Create temporary CSV file
-        labels_dir = Path("/cmf-server/data/labels")
-        labels_dir.mkdir(parents=True, exist_ok=True)
-
-        test_file = labels_dir / "test_labels.csv"
-        with open(test_file, 'w', newline='') as csvfile:
-            fieldnames = sample_data[0].keys()
-            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
-            writer.writeheader()
-            writer.writerows(sample_data)
-
-        # Index the test file
-        result = await index_csv_labels(DATABASE_URL)
-
-        # Test search
-        search_results = await search_labels(DATABASE_URL, "training", 5)
-
-        return {
-            "status": "success",
-            "message": "Label search test completed successfully",
-            "indexing_result": result,
-            "search_results": search_results,
-            "test_file": str(test_file)
-        }
-
-    except Exception as e:
-        return {
-            "status": "error",
-            "message": f"Label search test failed: {str(e)}"
-        }
-
-@app.get("/api/labels/search")
-async def search_label_content(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
-    """Search label content using PostgreSQL full-text search"""
-
-    try:
-        results = await search_labels(DATABASE_URL, query, limit)
-
-        return {
-            "status": "success",
-            "query": query,
-            "results": results,
-            "total_results": len(results)
-        }
-
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
-
-@app.get("/api/labels/search-direct")
-async def search_labels_direct(query: str = Query(..., description="Search query"), limit: int = Query(10, description="Maximum results")):
-    """Direct label search that returns label matches as pseudo-artifacts"""
-
-    try:
-        async with async_session() as db:
-            results = await search_labels_in_artifacts(db, query, None, limit)
-
-        return {
-            "status": "success",
-            "query": query,
-            "results": results,
-            "total_results": len(results)
-        }
-
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Direct label search failed: {str(e)}")
-
-@app.get("/api/labels/health")
-async def label_search_health():
-    """Health check for label search functionality"""
-    try:
-        stats = await get_label_stats(DATABASE_URL)
-
-        return {
-            "status": "healthy" if stats['status'] == 'success' else "unhealthy",
-            "service": "label-search-postgres",
-            "version": "1.0.0",
-            "database": "postgresql",
-            "indexed_files": stats['total_files'],
-            "indexed_records": stats['total_records']
-        }
-
-    except Exception as e:
-        return {
-            "status": "unhealthy",
-            "service": "label-search-postgres",
-            "error": str(e)
-        }
diff --git a/server/app/utils.py b/server/app/utils.py
index bea2997e9..d34d5b2e1 100644
--- a/server/app/utils.py
+++ b/server/app/utils.py
@@ -1,23 +1,3 @@
-# Standard library imports
-import asyncio
-import csv
-import json
-import time
-import os
-import hashlib
-import re
-import logging
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-
-# Third-party imports
-from sqlalchemy.ext.asyncio import create_async_engine
-from sqlalchemy import text
-
-# Set up logger
-logger = logging.getLogger(__name__)
-
-
 def modify_arti_name(arti_name, type):
     # artifact_name optimization based on artifact type.["Dataset","Model","Metrics"]
     try:
@@ -84,476 +64,3 @@ def modify_arti_name(arti_name, type):
         print(f"Error parsing artifact name: {e}")
         name = arti_name  # Fallback to the original arti_name in case of error
     return name
-
-
-# Label Search Utility Functions
-
-async def index_csv_labels(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Index CSV label files into PostgreSQL for full-text search
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
-
-        # Look for both .csv files and files without extension (assuming they're CSV)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also include files without extension that might be CSV files
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
-                # Try to detect if it's a CSV file by reading first few lines
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
-                        if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                            logger.info(f"Detected CSV-like file without extension: {file_path.name}")
-                except:
-                    pass  # Skip files that can't be read
-
-        if not csv_files:
-            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
-
-        engine = create_async_engine(database_url, echo=False)
-        indexed_files = []
-        total_records = 0
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                try:
-                    # Clear existing data for this file
-                    await conn.execute(
-                        text("DELETE FROM label_index WHERE file_name = :file_name"),
-                        {"file_name": csv_file.name}
-                    )
-
-                    # Process CSV file
-                    records = []
-                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                        # Detect CSV format with fallback
-                        sample = csvfile.read(1024)
-                        csvfile.seek(0)
-
-                        # Try to detect delimiter, with fallbacks
-                        delimiter = ','  # Default fallback
-                        try:
-                            sniffer = csv.Sniffer()
-                            detected_delimiter = sniffer.sniff(sample).delimiter
-                            delimiter = detected_delimiter
-                        except:
-                            # Fallback: try common delimiters
-                            for test_delimiter in [',', '\t', ';', '|']:
-                                if test_delimiter in sample:
-                                    delimiter = test_delimiter
-                                    break
-
-                        reader = csv.DictReader(csvfile, delimiter=delimiter)
-
-                        for row_index, row in enumerate(reader):
-                            # Create searchable content
-                            content_parts = []
-                            metadata = {}
-
-                            for key, value in row.items():
-                                if value and value.strip():
-                                    content_parts.append(f"{key}: {value.strip()}")
-                                    metadata[key] = value.strip()
-
-                            content = " | ".join(content_parts)
-
-                            if content.strip():
-                                records.append({
-                                    'file_name': csv_file.name,
-                                    'file_path': str(csv_file),
-                                    'row_index': row_index,
-                                    'content': content,
-                                    'metadata': json.dumps(metadata),
-                                    'created_at': int(time.time() * 1000),
-                                    'updated_at': int(time.time() * 1000)
-                                })
-
-                    # Insert records
-                    if records:
-                        insert_query = text("""
-                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
-                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
-                        """)
-                        await conn.execute(insert_query, records)
-
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': len(records),
-                            'status': 'success'
-                        })
-                        total_records += len(records)
-                        logger.info(f"Indexed {len(records)} records from {csv_file.name}")
-                    else:
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': 0,
-                            'status': 'empty'
-                        })
-
-                except Exception as e:
-                    logger.error(f"Error indexing {csv_file.name}: {e}")
-                    indexed_files.append({
-                        'file_name': csv_file.name,
-                        'records_indexed': 0,
-                        'status': 'error',
-                        'error': str(e)
-                    })
-
-        await engine.dispose()
-
-        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
-        return {
-            'status': 'success',
-            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
-            'indexed_files': indexed_files,
-            'total_files': len(csv_files),
-            'total_records': total_records
-        }
-
-    except Exception as e:
-        logger.error(f"Label indexing failed: {e}")
-        return {"status": "error", "message": str(e)}
-
-async def search_labels(database_url: str, query: str, limit: int = 10) -> List[Dict[str, Any]]:
-    """
-    Search indexed labels using PostgreSQL full-text search
-    """
-    try:
-
-        engine = create_async_engine(database_url, echo=False)
-
-        async with engine.begin() as conn:
-            result = await conn.execute(text("""
-                SELECT file_name, row_index, content, metadata,
-                       ts_rank(search_vector, plainto_tsquery('english', :query)) as relevance_score
-                FROM label_index
-                WHERE search_vector @@ plainto_tsquery('english', :query)
-                ORDER BY relevance_score DESC
-                LIMIT :limit
-            """), {"query": query, "limit": limit})
-
-            results = [
-                {
-                    "file_name": row[0],
-                    "row_index": row[1],
-                    "content": row[2],
-                    "metadata": json.loads(row[3]) if row[3] else {},
-                    "relevance_score": float(row[4])
-                }
-                for row in result.fetchall()
-            ]
-
-        await engine.dispose()
-        return results
-
-    except Exception as e:
-        logger.error(f"Label search failed: {e}")
-        return []
-
-async def get_label_stats(database_url: str) -> Dict[str, Any]:
-    """
-    Get statistics about indexed labels
-    """
-    try:
-
-        engine = create_async_engine(database_url, echo=False)
-
-        async with engine.begin() as conn:
-            # Get total records and files
-            result = await conn.execute(text("""
-                SELECT
-                    COUNT(*) as total_records,
-                    COUNT(DISTINCT file_name) as total_files
-                FROM label_index
-            """))
-            stats = result.fetchone()
-
-            # Get recent files
-            result = await conn.execute(text("""
-                SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
-                FROM label_index
-                GROUP BY file_name
-                ORDER BY last_updated DESC
-                LIMIT 10
-            """))
-            recent_files = [
-                {
-                    'file_name': row[0],
-                    'record_count': row[1],
-                    'last_updated': row[2]
-                }
-                for row in result.fetchall()
-            ]
-
-        await engine.dispose()
-
-        return {
-            'total_records': stats[0] if stats else 0,
-            'total_files': stats[1] if stats else 0,
-            'recent_files': recent_files,
-            'status': 'success'
-        }
-
-    except Exception as e:
-        logger.error(f"Error getting label stats: {e}")
-        return {
-            'status': 'error',
-            'error': str(e),
-            'total_records': 0,
-            'total_files': 0,
-            'recent_files': []
-        }
-
-async def auto_reindex_if_needed(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Automatically reindex labels if files have been modified or content has changed.
-    Handles MD5 hash filenames and detects content changes.
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "no_directory", "message": "Labels directory does not exist"}
-
-        # Get all potential CSV files (with and without .csv extension)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also check files without extension (including MD5 hash names)
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix:
-                # Check if it's an MD5 hash (32 hex characters)
-                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
-
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        # For MD5 hash files, be more lenient in detection
-                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                            if is_md5_hash:
-                                logger.info(f"Detected MD5 hash file: {file_path.name}")
-                except:
-                    pass
-
-        if not csv_files:
-            return {"status": "no_files", "message": "No CSV files found"}
-
-        # Check if we need to reindex based on content changes
-        engine = create_async_engine(database_url, echo=False)
-        needs_reindex = False
-        files_to_reindex = []
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                # Calculate content hash for change detection
-                try:
-                    with open(csv_file, 'rb') as f:
-                        content_hash = hashlib.md5(f.read()).hexdigest()
-                except:
-                    continue
-
-                # Check if file exists in index and get its content hash
-                result = await conn.execute(text("""
-                    SELECT
-                        MAX(updated_at) as last_indexed,
-                        COUNT(*) as record_count
-                    FROM label_index
-                    WHERE file_name = :file_name
-                """), {"file_name": csv_file.name})
-
-                row = result.fetchone()
-                last_indexed = row[0] if row else None
-                record_count = row[1] if row else 0
-
-                # Get stored content hash from metadata if available
-                result = await conn.execute(text("""
-                    SELECT metadata->>'content_hash' as stored_hash
-                    FROM label_index
-                    WHERE file_name = :file_name
-                    LIMIT 1
-                """), {"file_name": csv_file.name})
-
-                stored_hash_row = result.fetchone()
-                stored_hash = stored_hash_row[0] if stored_hash_row else None
-
-                # Check if reindexing is needed
-                file_mtime = int(os.path.getmtime(csv_file) * 1000)
-
-                if (not last_indexed or
-                    record_count == 0 or
-                    stored_hash != content_hash or
-                    file_mtime > (last_indexed + 60000)):  # 1 minute grace period
-
-                    needs_reindex = True
-                    files_to_reindex.append({
-                        'file': csv_file,
-                        'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
-                    })
-                    logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
-
-        await engine.dispose()
-
-        if needs_reindex:
-            logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
-            result = await index_csv_labels_with_hash(database_url, labels_directory)
-            return {
-                "status": "reindexed",
-                "message": f"Auto-reindexed {result.get('total_files', 0)} files",
-                "files_reindexed": [f['file'].name for f in files_to_reindex],
-                "details": result
-            }
-        else:
-            return {
-                "status": "up_to_date",
-                "message": "All label files are up to date"
-            }
-
-    except Exception as e:
-        logger.error(f"Auto-reindex failed: {e}")
-        return {"status": "error", "message": str(e)}
-
-async def index_csv_labels_with_hash(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Enhanced version of index_csv_labels that stores content hashes for change detection
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
-
-        # Get all potential CSV files (including MD5 hash files)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also include files without extension
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix:
-                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                except:
-                    pass
-
-        if not csv_files:
-            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
-
-        engine = create_async_engine(database_url, echo=False)
-        indexed_files = []
-        total_records = 0
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                try:
-                    # Calculate content hash
-                    with open(csv_file, 'rb') as f:
-                        content_hash = hashlib.md5(f.read()).hexdigest()
-
-                    # Clear existing data for this file
-                    await conn.execute(
-                        text("DELETE FROM label_index WHERE file_name = :file_name"),
-                        {"file_name": csv_file.name}
-                    )
-
-                    # Process CSV file
-                    records = []
-                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                        # Detect CSV format with fallback
-                        sample = csvfile.read(1024)
-                        csvfile.seek(0)
-
-                        # Try to detect delimiter, with fallbacks
-                        delimiter = ','  # Default fallback
-                        try:
-                            sniffer = csv.Sniffer()
-                            detected_delimiter = sniffer.sniff(sample).delimiter
-                            delimiter = detected_delimiter
-                        except:
-                            # Fallback: try common delimiters
-                            for test_delimiter in [',', '\t', ';', '|']:
-                                if test_delimiter in sample:
-                                    delimiter = test_delimiter
-                                    break
-
-                        reader = csv.DictReader(csvfile, delimiter=delimiter)
-
-                        for row_index, row in enumerate(reader):
-                            # Create searchable content
-                            content_parts = []
-                            metadata = {"content_hash": content_hash}  # Store content hash
-
-                            for key, value in row.items():
-                                if value and value.strip():
-                                    content_parts.append(f"{key}: {value.strip()}")
-                                    metadata[key] = value.strip()
-
-                            content = " | ".join(content_parts)
-
-                            if content.strip():
-                                records.append({
-                                    'file_name': csv_file.name,
-                                    'file_path': str(csv_file),
-                                    'row_index': row_index,
-                                    'content': content,
-                                    'metadata': json.dumps(metadata),
-                                    'created_at': int(time.time() * 1000),
-                                    'updated_at': int(time.time() * 1000)
-                                })
-
-                    # Insert records
-                    if records:
-                        insert_query = text("""
-                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
-                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
-                        """)
-                        await conn.execute(insert_query, records)
-
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': len(records),
-                            'content_hash': content_hash,
-                            'status': 'success'
-                        })
-                        total_records += len(records)
-                        logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
-                    else:
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': 0,
-                            'status': 'empty'
-                        })
-
-                except Exception as e:
-                    logger.error(f"Error indexing {csv_file.name}: {e}")
-                    indexed_files.append({
-                        'file_name': csv_file.name,
-                        'records_indexed': 0,
-                        'status': 'error',
-                        'error': str(e)
-                    })
-
-        await engine.dispose()
-
-        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
-        return {
-            'status': 'success',
-            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
-            'indexed_files': indexed_files,
-            'total_files': len(csv_files),
-            'total_records': total_records
-        }
-
-    except Exception as e:
-        logger.error(f"Label indexing failed: {e}")
-        return {"status": "error", "message": str(e)}
- 
\ No newline at end of file

From fef3aa05b284570cc7ff1ea2805a205b7c6a0ac7 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Tue, 5 Aug 2025 17:29:50 +0530
Subject: [PATCH 05/11] UI refactoring code changes

---
 .../components/LabelContentPanel.jsx          | 158 ++++++
 .../components/PaginationControls.jsx         | 108 ++++
 .../components/ResizableSplitPane.jsx         |  82 +++
 ui/src/pages/artifacts_postgres/index.jsx     | 499 +++---------------
 .../artifacts_postgres/utils/labelHandlers.js | 146 +++++
 5 files changed, 582 insertions(+), 411 deletions(-)
 create mode 100644 ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
 create mode 100644 ui/src/pages/artifacts_postgres/components/PaginationControls.jsx
 create mode 100644 ui/src/pages/artifacts_postgres/components/ResizableSplitPane.jsx
 create mode 100644 ui/src/pages/artifacts_postgres/utils/labelHandlers.js

diff --git a/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx b/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
new file mode 100644
index 000000000..ce99d23fe
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
@@ -0,0 +1,158 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React from "react";
+import Highlight from "../../../components/Highlight";
+import Loader from "../../../components/Loader";
+
+const LabelContentPanel = ({
+  selectedTableLabel,
+  labelContentLoading,
+  labelData,
+  parsedLabelData,
+  labelColumns,
+  currentPage,
+  rowsPerPage,
+  setCurrentPage,
+  setRowsPerPage
+}) => {
+  if (!selectedTableLabel) {
+    return (
+      <div className="p-4">
+        <div className="flex justify-center items-center py-12">
+          <div className="text-center">
+            <h3 className="text-xl font-medium text-gray-800 mb-2">
+              Select a Label
+            </h3>
+            <p className="text-gray-600">
+              Click on a label name in the table to view its content
+            </p>
+          </div>
+        </div>
+      </div>
+    );
+  }
+
+  if (labelContentLoading) {
+    return (
+      <div className="p-4">
+        <div className="flex justify-center items-center py-12">
+            <Loader />
+          </div>
+      </div>
+    );
+  }
+
+  if (!labelData) {
+    return (
+      <div className="p-4">
+        <div className="text-center py-12">
+          <p className="text-gray-600">No content available</p>
+        </div>
+      </div>
+    );
+  }
+
+  return (
+    <div className="p-4">
+      <div className="h-full flex flex-col">
+        {/* Header aligned with left table */}
+        <div className="p-4 border-b border-gray-200">
+          <h3 className="text-lg font-medium text-gray-900">
+            {selectedTableLabel.name.split(":")[1] || selectedTableLabel.name}
+          </h3>
+        </div>
+
+        {/* Fixed size table container */}
+        <div className="flex flex-col" style={{ height: '400px' }}>
+          <div className="overflow-auto bg-white border border-gray-300 rounded" style={{ height: '320px' }}>
+            <table className="divide-y divide-gray-200 border-4 w-full">
+              <thead className="sticky top-0">
+                <tr className="text-xs font-bold font-sans text-left text-black uppercase">
+                  {labelColumns.map((column, index) => (
+                    <th
+                      key={index}
+                      scope="col"
+                      className="px-6 py-3"
+                    >
+                      {column.name}
+                    </th>
+                  ))}
+                </tr>
+              </thead>
+              <tbody className="bg-white divide-y divide-gray-200">
+                {parsedLabelData.slice(currentPage * rowsPerPage, (currentPage + 1) * rowsPerPage).map((row, rowIndex) => (
+                  <tr key={rowIndex} className="text-sm font-medium text-gray-800">
+                    {labelColumns.map((column, colIndex) => (
+                      <td key={colIndex} className="px-6 py-4">
+                        <Highlight
+                          text={String(row[column.name] || '')}
+                          highlight={selectedTableLabel?.isSearchResult ? selectedTableLabel.searchFilter : ''}
+                        />
+                      </td>
+                    ))}
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+
+          {/* Pagination controls */}
+          <div className="flex items-center justify-between px-4 py-3 bg-gray-50 border-t border-gray-200">
+            <div className="flex items-center space-x-2">
+              <span className="text-sm text-gray-700">Rows per page:</span>
+              <select
+                value={rowsPerPage}
+                onChange={(e) => {
+                  setRowsPerPage(Number(e.target.value));
+                  setCurrentPage(0);
+                }}
+                className="border border-gray-300 rounded px-2 py-1 text-sm"
+              >
+                <option value={10}>10</option>
+                <option value={25}>25</option>
+                <option value={50}>50</option>
+                <option value={100}>100</option>
+              </select>
+            </div>
+
+            <div className="flex items-center space-x-2">
+              <span className="text-sm text-gray-700">
+                {currentPage * rowsPerPage + 1}-{Math.min((currentPage + 1) * rowsPerPage, parsedLabelData.length)} of {parsedLabelData.length}
+              </span>
+              <button
+                onClick={() => setCurrentPage(Math.max(0, currentPage - 1))}
+                disabled={currentPage === 0}
+                className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
+              >
+                Previous
+              </button>
+              <button
+                onClick={() => setCurrentPage(Math.min(Math.ceil(parsedLabelData.length / rowsPerPage) - 1, currentPage + 1))}
+                disabled={currentPage >= Math.ceil(parsedLabelData.length / rowsPerPage) - 1}
+                className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
+              >
+                Next
+              </button>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
+
+export default LabelContentPanel;
diff --git a/ui/src/pages/artifacts_postgres/components/PaginationControls.jsx b/ui/src/pages/artifacts_postgres/components/PaginationControls.jsx
new file mode 100644
index 000000000..c072076f9
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/components/PaginationControls.jsx
@@ -0,0 +1,108 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React from "react";
+
+const PaginationControls = ({
+  totalItems,
+  activePage,
+  clickedButton,
+  onPageClick,
+  onPrevClick,
+  onNextClick
+}) => {
+  const totalPages = Math.ceil(totalItems / 5);
+
+  if (totalItems === 0) {
+    return null;
+  }
+
+  return (
+    <div className="pagination-controls">
+      <button
+        onClick={onPrevClick}
+        disabled={activePage === 1}
+        className={clickedButton === "prev" ? "active-page" : ""}
+      >
+        Previous
+      </button>
+      
+      {Array.from({ length: totalPages }).map((_, index) => {
+        const pageNumber = index + 1;
+        
+        if (
+          pageNumber === 1 ||
+          pageNumber === totalPages
+        ) {
+          return (
+            <button
+              key={pageNumber}
+              onClick={() => onPageClick(pageNumber)}
+              className={`pagination-button ${
+                activePage === pageNumber && clickedButton === "page"
+                  ? "active-page"
+                  : ""
+              }`}
+            >
+              {pageNumber}
+            </button>
+          );
+        } else if (
+          (activePage <= 3 && pageNumber <= 6) ||
+          (activePage >= totalPages - 2 && pageNumber >= totalPages - 5) ||
+          Math.abs(pageNumber - activePage) <= 2
+        ) {
+          return (
+            <button
+              key={pageNumber}
+              onClick={() => onPageClick(pageNumber)}
+              className={`pagination-button ${
+                activePage === pageNumber && clickedButton === "page"
+                  ? "active-page"
+                  : ""
+              }`}
+            >
+              {pageNumber}
+            </button>
+          );
+        } else if (
+          (pageNumber === 2 && activePage > 3) ||
+          (pageNumber === totalPages - 1 && activePage < totalPages - 3)
+        ) {
+          return (
+            <span
+              key={`ellipsis-${pageNumber}`}
+              className="ellipsis"
+            >
+              ...
+            </span>
+          );
+        }
+        return null;
+      })}
+      
+      <button
+        onClick={onNextClick}
+        disabled={activePage === totalPages}
+        className={clickedButton === "next" ? "active-page" : ""}
+      >
+        Next
+      </button>
+    </div>
+  );
+};
+
+export default PaginationControls;
diff --git a/ui/src/pages/artifacts_postgres/components/ResizableSplitPane.jsx b/ui/src/pages/artifacts_postgres/components/ResizableSplitPane.jsx
new file mode 100644
index 000000000..55e5026fa
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/components/ResizableSplitPane.jsx
@@ -0,0 +1,82 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React, { useState, useEffect } from "react";
+
+const ResizableSplitPane = ({ leftContent, rightContent, initialSplitPercentage = 50 }) => {
+  const [splitPercentage, setSplitPercentage] = useState(initialSplitPercentage);
+  const [isDragging, setIsDragging] = useState(false);
+  const [containerRef, setContainerRef] = useState(null);
+
+  const handleMouseDown = (e) => {
+    setIsDragging(true);
+    e.preventDefault();
+  };
+
+  const handleMouseMove = (e) => {
+    if (!isDragging || !containerRef) return;
+
+    const containerRect = containerRef.getBoundingClientRect();
+    const newPercentage = ((e.clientX - containerRect.left) / containerRect.width) * 100;
+
+    // Limit between 20% and 80%
+    const clampedPercentage = Math.max(20, Math.min(80, newPercentage));
+    setSplitPercentage(clampedPercentage);
+  };
+
+  const handleMouseUp = () => {
+    setIsDragging(false);
+  };
+
+  useEffect(() => {
+    if (isDragging) {
+      document.addEventListener('mousemove', handleMouseMove);
+      document.addEventListener('mouseup', handleMouseUp);
+      return () => {
+        document.removeEventListener('mousemove', handleMouseMove);
+        document.removeEventListener('mouseup', handleMouseUp);
+      };
+    }
+  }, [isDragging]);
+
+  return (
+    <div ref={setContainerRef} className="flex h-full w-full">
+      {/* Left Pane */}
+      <div style={{ width: `${splitPercentage}%` }} className="overflow-auto">
+        {leftContent}
+      </div>
+
+      {/* Resizer */}
+      <div
+        className={`w-1 bg-gray-300 hover:bg-gray-400 cursor-col-resize flex-shrink-0 ${
+          isDragging ? 'bg-gray-400' : ''
+        }`}
+        onMouseDown={handleMouseDown}
+      >
+        <div className="w-full h-full flex items-center justify-center">
+          <div className="w-0.5 h-8 bg-gray-500"></div>
+        </div>
+      </div>
+
+      {/* Right Pane */}
+      <div style={{ width: `${100 - splitPercentage}%` }} className="overflow-auto">
+        {rightContent}
+      </div>
+    </div>
+  );
+};
+
+export default ResizableSplitPane;
diff --git a/ui/src/pages/artifacts_postgres/index.jsx b/ui/src/pages/artifacts_postgres/index.jsx
index 7b6820452..dbe04c8b6 100644
--- a/ui/src/pages/artifacts_postgres/index.jsx
+++ b/ui/src/pages/artifacts_postgres/index.jsx
@@ -14,7 +14,7 @@
  * limitations under the License.
  ***/
 
-import React, { useEffect, useState, useCallback, useRef, useMemo } from "react";
+import React, { useEffect, useState } from "react";
 import FastAPIClient from "../../client";
 import config from "../../config";
 import DashboardHeader from "../../components/DashboardHeader";
@@ -23,14 +23,14 @@ import Footer from "../../components/Footer";
 import "./index.css";
 import Sidebar from "../../components/Sidebar";
 import ArtifactTypeSidebar from "../../components/ArtifactTypeSidebar";
-import Papa from "papaparse";
-import Highlight from "../../components/Highlight";
+import LabelContentPanel from "./components/LabelContentPanel";
+import ResizableSplitPane from "./components/ResizableSplitPane";
+import PaginationControls from "./components/PaginationControls";
+import { handleTableLabelClick, clearLabelData } from "./utils/labelHandlers";
+import Loader from "../../components/Loader";
 
 const client = new FastAPIClient(config);
 
-// Memoized ArtifactPTable to prevent unnecessary re-renders
-const MemoizedArtifactPTable = React.memo(ArtifactPTable);
-
 const ArtifactsPostgres = () => {
   const [selectedPipeline, setSelectedPipeline] = useState(null);
   const [pipelines, setPipelines] = useState([]);
@@ -45,6 +45,7 @@ const ArtifactsPostgres = () => {
   const [activePage, setActivePage] = useState(1);
   const [clickedButton, setClickedButton] = useState("page");
   const [selectedCol, setSelectedCol] = useState("name");
+  const [loading, setLoading] = useState(true);
 
   // Label-specific state
   const [selectedTableLabel, setSelectedTableLabel] = useState(null);
@@ -55,20 +56,9 @@ const ArtifactsPostgres = () => {
   const [currentPage, setCurrentPage] = useState(0);
   const [rowsPerPage, setRowsPerPage] = useState(10);
 
-  const clearLabelData = () => {
-    setLabelData("");
-    setParsedLabelData([]);
-    setLabelColumns([]);
-    setLabelContentLoading(false);
-    setCurrentPage(0);
-  };
-
   // Flag to prevent re-fetching artifacts when just loading label content
   const [isLoadingLabelContent, setIsLoadingLabelContent] = useState(false);
 
-  // Ref to immediately block fetchArtifacts calls during label loading
-  const isLoadingLabelContentRef = useRef(false);
-
   // Lift accordion state to parent to preserve it across re-renders
   const [expandedRow, setExpandedRow] = useState(null);
 
@@ -86,9 +76,6 @@ const ArtifactsPostgres = () => {
     }
   }, [filter, selectedArtifactType]);
 
-  // Simple memoization
-  const stableArtifacts = useMemo(() => artifacts, [artifacts]);
-
   useEffect(() => {
     fetchPipelines(); // Fetch pipelines and artifact types when the component mounts
   },[]);
@@ -127,11 +114,12 @@ const ArtifactsPostgres = () => {
   const fetchArtifacts = async (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
     try {
       // Don't fetch if we're currently loading label content (prevents left panel reload)
-      if (isLoadingLabelContent || isLoadingLabelContentRef.current) {
+      if (isLoadingLabelContent) {
         return;
       }
 
-      // Clear artifacts immediately to show loading state (consistent with other artifact types)
+      // Set loading state and clear artifacts
+      setLoading(true);
       setArtifacts(null);
 
       // Handle Label search case
@@ -140,7 +128,6 @@ const ArtifactsPostgres = () => {
           const searchData = await client.searchLabelArtifacts(pipelineName, filter, sortOrder, activePage, 5);
 
           // Add search context to artifacts while preserving backend search_metadata
-          // Use a more efficient approach to avoid unnecessary object creation
           searchData.items.forEach(item => {
             item.isSearchResult = true;
             item.searchFilter = filter;
@@ -148,6 +135,7 @@ const ArtifactsPostgres = () => {
 
           setArtifacts(searchData.items);
           setTotalItems(searchData.total_items);
+          setLoading(false);
           return; // Early return
         } catch (searchError) {
           console.warn('Label search failed, falling back to regular fetch:', searchError);
@@ -164,12 +152,15 @@ const ArtifactsPostgres = () => {
       console.error('Failed to fetch artifacts:', error);
       setArtifacts([]);
       setTotalItems(0);
+    } finally {
+      setLoading(false);
     }
   };
   
   const handleArtifactTypeClick = (artifactType) => {
     if (selectedArtifactType !== artifactType) {
       // if same artifact type is not clicked, sets page as null until it retrieves data for that type.
+      setLoading(true);
       setArtifacts(null);
     }
     setSelectedArtifactType(artifactType);
@@ -177,12 +168,19 @@ const ArtifactsPostgres = () => {
 
     // Clear label-related state when switching artifact types
     setSelectedTableLabel(null);
-    clearLabelData();
+    clearLabelData({
+      setLabelData,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelContentLoading,
+      setCurrentPage
+    });
   };
 
   const handlePipelineClick = (pipeline) => {
     if (selectedPipeline !== pipeline) {
       // this condition sets page as null.
+      setLoading(true);
       setArtifacts(null);
     }
     setSelectedPipeline(pipeline);
@@ -190,23 +188,29 @@ const ArtifactsPostgres = () => {
 
     // Clear label-related state when switching pipelines
     setSelectedTableLabel(null);
-    clearLabelData();
+    clearLabelData({
+      setLabelData,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelContentLoading,
+      setCurrentPage
+    });
   };
 
-  const handleFilter = useCallback((value) => {
+  const handleFilter = (value) => {
     setFilter(value); // Update the filter string
     setActivePage(1);
-  }, []);
+  };
 
-  const toggleSortOrder = useCallback((newSortOrder) => {
+  const toggleSortOrder = (newSortOrder) => {
     setSortOrder(newSortOrder);
     setSelectedCol("name");
-  }, []);
+  };
 
-  const toggleSortTime = useCallback((newSortOrder) => {
+  const toggleSortTime = (newSortOrder) => {
     setSortOrder(newSortOrder);
     setSelectedCol("create_time_since_epoch");
-  }, []);
+  };
 
   const handlePageClick = (page) => {
     setActivePage(page);
@@ -229,248 +233,37 @@ const ArtifactsPostgres = () => {
     }  
   };
 
-  // Memoized label content display component
-  const LabelContentPanel = useMemo(() => {
-    return (
-      <div className="p-4">
-        {selectedTableLabel ? (
-        <div>
-          {labelContentLoading ? (
-            <div className="flex justify-center items-center py-12">
-              <div className="inline-block animate-spin rounded-full h-8 w-8 border-b-2 border-gray-900"></div>
-              <p className="ml-3 text-gray-600">Loading content...</p>
-            </div>
-          ) : labelData ? (
-            <div className="h-full flex flex-col">
-              {/* Header aligned with left table */}
-              <div className="p-4 border-b border-gray-200">
-                <h3 className="text-lg font-medium text-gray-900">
-                  {selectedTableLabel.name.split(":")[1] || selectedTableLabel.name}
-                </h3>
-              </div>
-
-              {/* Fixed size table container */}
-              <div className="flex flex-col" style={{ height: '400px' }}>
-                <div className="overflow-auto bg-white border border-gray-300 rounded" style={{ height: '320px' }}>
-                  <table className="divide-y divide-gray-200 border-4 w-full">
-                    <thead className="sticky top-0">
-                      <tr className="text-xs font-bold font-sans text-left text-black uppercase">
-                        {labelColumns.map((column, index) => (
-                          <th
-                            key={index}
-                            scope="col"
-                            className="px-6 py-3"
-                          >
-                            {column.name}
-                          </th>
-                        ))}
-                      </tr>
-                    </thead>
-                    <tbody className="bg-white divide-y divide-gray-200">
-                      {parsedLabelData.slice(currentPage * rowsPerPage, (currentPage + 1) * rowsPerPage).map((row, rowIndex) => (
-                        <tr key={rowIndex} className="text-sm font-medium text-gray-800">
-                          {labelColumns.map((column, colIndex) => (
-                            <td key={colIndex} className="px-6 py-4">
-                              <Highlight
-                                text={String(row[column.name] || '')}
-                                highlight={selectedTableLabel?.isSearchResult ? selectedTableLabel.searchFilter : ''}
-                              />
-                            </td>
-                          ))}
-                        </tr>
-                      ))}
-                    </tbody>
-                  </table>
-                </div>
-
-                {/* Pagination controls */}
-                <div className="flex items-center justify-between px-4 py-3 bg-gray-50 border-t border-gray-200">
-                  <div className="flex items-center space-x-2">
-                    <span className="text-sm text-gray-700">Rows per page:</span>
-                    <select
-                      value={rowsPerPage}
-                      onChange={(e) => {
-                        setRowsPerPage(Number(e.target.value));
-                        setCurrentPage(0);
-                      }}
-                      className="border border-gray-300 rounded px-2 py-1 text-sm"
-                    >
-                      <option value={10}>10</option>
-                      <option value={25}>25</option>
-                      <option value={50}>50</option>
-                      <option value={100}>100</option>
-                    </select>
-                  </div>
-
-                  <div className="flex items-center space-x-2">
-                    <span className="text-sm text-gray-700">
-                      {currentPage * rowsPerPage + 1}-{Math.min((currentPage + 1) * rowsPerPage, parsedLabelData.length)} of {parsedLabelData.length}
-                    </span>
-                    <button
-                      onClick={() => setCurrentPage(Math.max(0, currentPage - 1))}
-                      disabled={currentPage === 0}
-                      className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
-                    >
-                      Previous
-                    </button>
-                    <button
-                      onClick={() => setCurrentPage(Math.min(Math.ceil(parsedLabelData.length / rowsPerPage) - 1, currentPage + 1))}
-                      disabled={currentPage >= Math.ceil(parsedLabelData.length / rowsPerPage) - 1}
-                      className="px-3 py-1 text-sm border border-gray-300 rounded disabled:opacity-50 disabled:cursor-not-allowed hover:bg-gray-100"
-                    >
-                      Next
-                    </button>
-                  </div>
-                </div>
-              </div>
-            </div>
-          ) : (
-            <div className="text-center py-12">
-              <p className="text-gray-600">No content available</p>
-            </div>
-          )}
-        </div>
-      ) : (
-        <div className="flex justify-center items-center py-12">
-          <div className="text-center">
-            <h3 className="text-xl font-medium text-gray-800 mb-2">
-              Select a Label
-            </h3>
-            <p className="text-gray-600">
-              Click on a label name in the table to view its content
-            </p>
-          </div>
-        </div>
-      )}
-    </div>
-    );
-  }, [selectedTableLabel, labelContentLoading, labelData, parsedLabelData, labelColumns, currentPage, rowsPerPage]);
-
-  // Handle label click from table - memoized to prevent re-renders
-  const handleTableLabelClick = useCallback(async (labelName, artifact) => {
-    // Set ref IMMEDIATELY to block any fetchArtifacts calls
-    isLoadingLabelContentRef.current = true;
-
-    // Set loading flag to prevent fetchArtifacts from running
-    setIsLoadingLabelContent(true);
-
-    // Batch state updates to minimize re-renders
-    setSelectedTableLabel(artifact);
-    setLabelContentLoading(true);
-    setCurrentPage(0); // Reset pagination when new label is selected
-
-    // Use the URI from the artifact for getLabelData, not just the label name
-    const fileNameForAPI = artifact.uri || `artifacts/labels.csv:${labelName}`;
-
-    try {
-      // Clear old data first
-      setParsedLabelData([]);
-      setLabelColumns([]);
-
-      // Helper function to try different URI formats
-      const tryGetLabelData = async (labelName, fileNameForAPI) => {
-        const uriFormatsToTry = [
-          fileNameForAPI,                           // Original: artifacts/labels.csv:93951bf...
-          labelName,                                // Just the label name: 93951bf...
-          `artifacts/labels.csv/${labelName}`,      // Alternative format: artifacts/labels.csv/93951bf...
-          `labels.csv:${labelName}`,                // Without artifacts prefix: labels.csv:93951bf...
-          `${labelName}.csv`                        // As CSV file: 93951bf....csv
-        ];
-
-        for (const uriToTry of uriFormatsToTry) {
-          try {
-            const data = await client.getLabelData(uriToTry);
-            return data; // Success - return immediately
-          } catch (uriError) {
-            continue; // Try next URI format
-          }
-        }
-
-        throw new Error(`All URI formats failed. Tried: ${uriFormatsToTry.join(', ')}`);
-      };
-
-      const labelData = await tryGetLabelData(labelName, fileNameForAPI);
-
-      setLabelData(labelData);
-
-      const parsed = Papa.parse(labelData, { header: true });
-
-      // Check if this is a search result and if the search term was found in CSV content
-      if (artifact.isSearchResult && artifact.searchFilter && artifact.search_metadata?.content_match) {
-        const searchFilter = artifact.searchFilter;
-
-        // Filter CSV rows to show only those containing the search term
-        const matchingRows = parsed.data.filter((row) => {
-          const rowValues = Object.values(row);
-
-          const hasMatch = rowValues.some(value => {
-            if (value && value.toString().toLowerCase().includes(searchFilter.toLowerCase())) {
-              return true;
-            }
-            return false;
-          });
-
-          return hasMatch;
-        });
-
-        setParsedLabelData(matchingRows);
-      } else {
-        // Normal label OR search result with property-only match - show all data
-        setParsedLabelData(parsed.data);
-      }
-
-      if (parsed.meta.fields) {
-        setLabelColumns(
-          parsed.meta.fields.map(field => ({
-            name: field,
-            selector: row => row[field],
-            sortable: true,
-          }))
-        );
-      }
-    } catch (error) {
-      // Clear data on error to prevent showing old content
-      setParsedLabelData([]);
-      setLabelColumns([]);
-
-      // Show error message to user
-      setParsedLabelData([{
-        error: "Failed to load label content",
-        message: error.message,
-        uri: fileNameForAPI
-      }]);
-
-    } finally {
-      setLabelContentLoading(false);
-      setIsLoadingLabelContent(false); // Reset flag to allow normal useEffect behavior
-      isLoadingLabelContentRef.current = false; // Reset ref to allow future fetchArtifacts calls
-    }
-  }, []); // Empty dependency array for useCallback
-
-  // Memoized onLabelClick to prevent prop changes
-  const memoizedOnLabelClick = useMemo(() => {
-    return selectedArtifactType === "Label" ? handleTableLabelClick : undefined;
-  }, [selectedArtifactType, handleTableLabelClick]);
-
-  // Memoized setExpandedRow to prevent prop changes
-  const memoizedSetExpandedRow = useCallback((value) => {
-    setExpandedRow(value);
-  }, [selectedArtifactType, filter]);
+  // Handle label click from table
+  const handleLabelClick = async (labelName, artifact) => {
+    await handleTableLabelClick(labelName, artifact, client, {
+      setIsLoadingLabelContent,
+      setSelectedTableLabel,
+      setLabelContentLoading,
+      setCurrentPage,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelData
+    });
+  };
 
-  // Memoized Left Panel Component to prevent unnecessary re-renders
-  const MemoizedLeftPanel = useMemo(() => {
+  // Left Panel Component
+  const renderLeftPanel = () => {
     return (
       <div className="p-4">
-        {stableArtifacts !== null && stableArtifacts?.length > 0 ? (
-          <MemoizedArtifactPTable
-            artifacts={stableArtifacts}
+        {loading ? (
+          <div className="flex justify-center items-center py-12">
+            <Loader />
+          </div>
+        ) : artifacts !== null && artifacts?.length > 0 ? (
+          <ArtifactPTable
+            artifacts={artifacts}
             artifactType={selectedArtifactType}
             onsortOrder={toggleSortOrder}
             onsortTimeOrder={toggleSortTime}
             filterValue={filter}
-            onLabelClick={memoizedOnLabelClick}
+            onLabelClick={selectedArtifactType === "Label" ? handleLabelClick : undefined}
             expandedRow={expandedRow}
-            setExpandedRow={memoizedSetExpandedRow}
+            setExpandedRow={setExpandedRow}
           />
         ) : (
           <div className="text-center py-12">
@@ -479,74 +272,8 @@ const ArtifactsPostgres = () => {
         )}
       </div>
     );
-  }, [stableArtifacts, selectedArtifactType, filter, toggleSortOrder, toggleSortTime, memoizedOnLabelClick, expandedRow, memoizedSetExpandedRow]); // Include accordion state
-
-  // Resizable Split Pane Component
-  const ResizableSplitPane = ({ leftContent, rightContent, initialSplitPercentage = 50 }) => {
-    const [splitPercentage, setSplitPercentage] = useState(initialSplitPercentage);
-    const [isDragging, setIsDragging] = useState(false);
-    const containerRef = useRef(null);
-
-    const handleMouseDown = (e) => {
-      setIsDragging(true);
-      e.preventDefault();
-    };
-
-    const handleMouseMove = useCallback((e) => {
-      if (!isDragging || !containerRef.current) return;
-
-      const containerRect = containerRef.current.getBoundingClientRect();
-      const newPercentage = ((e.clientX - containerRect.left) / containerRect.width) * 100;
-
-      // Limit between 20% and 80%
-      const clampedPercentage = Math.max(20, Math.min(80, newPercentage));
-      setSplitPercentage(clampedPercentage);
-    }, [isDragging]);
-
-    const handleMouseUp = useCallback(() => {
-      setIsDragging(false);
-    }, []);
-
-    useEffect(() => {
-      if (isDragging) {
-        document.addEventListener('mousemove', handleMouseMove);
-        document.addEventListener('mouseup', handleMouseUp);
-        return () => {
-          document.removeEventListener('mousemove', handleMouseMove);
-          document.removeEventListener('mouseup', handleMouseUp);
-        };
-      }
-    }, [isDragging, handleMouseMove, handleMouseUp]);
-
-    return (
-      <div ref={containerRef} className="flex h-full w-full">
-        {/* Left Pane */}
-        <div style={{ width: `${splitPercentage}%` }} className="overflow-auto">
-          {leftContent}
-        </div>
-
-        {/* Resizer */}
-        <div
-          className={`w-1 bg-gray-300 hover:bg-gray-400 cursor-col-resize flex-shrink-0 ${
-            isDragging ? 'bg-gray-400' : ''
-          }`}
-          onMouseDown={handleMouseDown}
-        >
-          <div className="w-full h-full flex items-center justify-center">
-            <div className="w-0.5 h-8 bg-gray-500"></div>
-          </div>
-        </div>
-
-        {/* Right Pane */}
-        <div style={{ width: `${100 - splitPercentage}%` }} className="overflow-auto">
-          {rightContent}
-        </div>
-      </div>
-    );
   };
 
-
-
   return (
     <>
       <section
@@ -576,102 +303,52 @@ const ArtifactsPostgres = () => {
             {selectedArtifactType === "Label" ? (
               <div className="flex-grow" style={{ height: 'calc(100vh - 200px)' }}>
                 <ResizableSplitPane
-                  leftContent={MemoizedLeftPanel}
-                  rightContent={LabelContentPanel}
+                  leftContent={renderLeftPanel()}
+                  rightContent={
+                    <LabelContentPanel
+                      selectedTableLabel={selectedTableLabel}
+                      labelContentLoading={labelContentLoading}
+                      labelData={labelData}
+                      parsedLabelData={parsedLabelData}
+                      labelColumns={labelColumns}
+                      currentPage={currentPage}
+                      rowsPerPage={rowsPerPage}
+                      setCurrentPage={setCurrentPage}
+                      setRowsPerPage={setRowsPerPage}
+                    />
+                  }
                   initialSplitPercentage={50}
                 />
               </div>
             ) : (
               <div>
-                {artifacts !== null && artifacts.length > 0 ? (
-                  <MemoizedArtifactPTable
+                {loading ? (
+                  <div className="flex justify-center items-center py-12">
+                    <Loader />
+                  </div>
+                ) : artifacts !== null && artifacts.length > 0 ? (
+                  <ArtifactPTable
                     artifacts={artifacts}
                     artifactType={selectedArtifactType}
                     onsortOrder={toggleSortOrder}
                     onsortTimeOrder={toggleSortTime}
                     filterValue={filter}
                     expandedRow={expandedRow}
-                    setExpandedRow={memoizedSetExpandedRow}
+                    setExpandedRow={setExpandedRow}
                     />
 
                 ) : (
                   <div>No data available</div> // Display message when there are no artifacts
                 )}
-                {artifacts !== null && totalItems > 0 && (
-                  <>
-                    <button
-                      onClick={handlePrevClick}
-                      disabled={activePage === 1}
-                      className={clickedButton === "prev" ? "active-page" : ""}
-                    >
-                      Previous
-                    </button>
-                    {Array.from({ length: Math.ceil(totalItems / 5) }).map(
-                      (_, index) => {
-                        const pageNumber = index + 1;
-                        if (
-                          pageNumber === 1 ||
-                          pageNumber === Math.ceil(totalItems / 5)
-                        ) {
-                          return (
-                            <button
-                              key={pageNumber}
-                              onClick={() => handlePageClick(pageNumber)}
-                              className={`pagination-button ${
-                                activePage === pageNumber &&
-                                clickedButton === "page"
-                                  ? "active-page"
-                                  : ""
-                              }`}
-                            >
-                              {pageNumber}
-                            </button>
-                          );
-                        } else if (
-                          (activePage <= 3 && pageNumber <= 6) ||
-                          (activePage >= Math.ceil(totalItems / 5) - 2 &&
-                            pageNumber >= Math.ceil(totalItems / 5) - 5) ||
-                          Math.abs(pageNumber - activePage) <= 2
-                        ) {
-                          return (
-                            <button
-                              key={pageNumber}
-                              onClick={() => handlePageClick(pageNumber)}
-                              className={`pagination-button ${
-                                activePage === pageNumber &&
-                                clickedButton === "page"
-                                  ? "active-page"
-                                  : ""
-                              }`}
-                            >
-                              {pageNumber}
-                            </button>
-                          );
-                        } else if (
-                          (pageNumber === 2 && activePage > 3) ||
-                          (pageNumber === Math.ceil(totalItems / 5) - 1 &&
-                            activePage < Math.ceil(totalItems / 5) - 3)
-                        ) {
-                          return (
-                            <span
-                              key={`ellipsis-${pageNumber}`}
-                              className="ellipsis"
-                            >
-                              ...
-                            </span>
-                          );
-                        }
-                        return null;
-                      },
-                    )}
-                    <button
-                      onClick={handleNextClick}
-                      disabled={activePage === Math.ceil(totalItems / 5)}
-                      className={clickedButton === "next" ? "active-page" : ""}
-                    >
-                      Next
-                    </button>
-                  </>
+                {!loading && (
+                  <PaginationControls
+                    totalItems={totalItems}
+                    activePage={activePage}
+                    clickedButton={clickedButton}
+                    onPageClick={handlePageClick}
+                    onPrevClick={handlePrevClick}
+                    onNextClick={handleNextClick}
+                  />
                 )}
               </div>
             )}
diff --git a/ui/src/pages/artifacts_postgres/utils/labelHandlers.js b/ui/src/pages/artifacts_postgres/utils/labelHandlers.js
new file mode 100644
index 000000000..b5a564e49
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/utils/labelHandlers.js
@@ -0,0 +1,146 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import Papa from "papaparse";
+
+// Handle label click from table
+export const handleTableLabelClick = async (
+  labelName, 
+  artifact, 
+  client,
+  setters
+) => {
+  const {
+    setIsLoadingLabelContent,
+    setSelectedTableLabel,
+    setLabelContentLoading,
+    setCurrentPage,
+    setParsedLabelData,
+    setLabelColumns,
+    setLabelData
+  } = setters;
+
+  // Set loading flag to prevent fetchArtifacts from running
+  setIsLoadingLabelContent(true);
+
+  // Batch state updates to minimize re-renders
+  setSelectedTableLabel(artifact);
+  setLabelContentLoading(true);
+  setCurrentPage(0); // Reset pagination when new label is selected
+
+  // Use the URI from the artifact for getLabelData, not just the label name
+  const fileNameForAPI = artifact.uri || `artifacts/labels.csv:${labelName}`;
+
+  try {
+    // Clear old data first
+    setParsedLabelData([]);
+    setLabelColumns([]);
+
+    // Helper function to try different URI formats
+    const tryGetLabelData = async (labelName, fileNameForAPI) => {
+      const uriFormatsToTry = [
+        fileNameForAPI,                           // Original: artifacts/labels.csv:93951bf...
+        labelName,                                // Just the label name: 93951bf...
+        `artifacts/labels.csv/${labelName}`,      // Alternative format: artifacts/labels.csv/93951bf...
+        `labels.csv:${labelName}`,                // Without artifacts prefix: labels.csv:93951bf...
+        `${labelName}.csv`                        // As CSV file: 93951bf....csv
+      ];
+
+      for (const uriToTry of uriFormatsToTry) {
+        try {
+          const data = await client.getLabelData(uriToTry);
+          return data; // Success - return immediately
+        } catch (uriError) {
+          continue; // Try next URI format
+        }
+      }
+
+      throw new Error(`All URI formats failed. Tried: ${uriFormatsToTry.join(', ')}`);
+    };
+
+    const labelData = await tryGetLabelData(labelName, fileNameForAPI);
+
+    setLabelData(labelData);
+
+    const parsed = Papa.parse(labelData, { header: true });
+
+    // Check if this is a search result and if the search term was found in CSV content
+    if (artifact.isSearchResult && artifact.searchFilter && artifact.search_metadata?.content_match) {
+      const searchFilter = artifact.searchFilter;
+
+      // Filter CSV rows to show only those containing the search term
+      const matchingRows = parsed.data.filter((row) => {
+        const rowValues = Object.values(row);
+
+        const hasMatch = rowValues.some(value => {
+          if (value && value.toString().toLowerCase().includes(searchFilter.toLowerCase())) {
+            return true;
+          }
+          return false;
+        });
+
+        return hasMatch;
+      });
+
+      setParsedLabelData(matchingRows);
+    } else {
+      // Normal label OR search result with property-only match - show all data
+      setParsedLabelData(parsed.data);
+    }
+
+    if (parsed.meta.fields) {
+      setLabelColumns(
+        parsed.meta.fields.map(field => ({
+          name: field,
+          selector: row => row[field],
+          sortable: true,
+        }))
+      );
+    }
+  } catch (error) {
+    // Clear data on error to prevent showing old content
+    setParsedLabelData([]);
+    setLabelColumns([]);
+
+    // Show error message to user
+    setParsedLabelData([{
+      error: "Failed to load label content",
+      message: error.message,
+      uri: fileNameForAPI
+    }]);
+
+  } finally {
+    setLabelContentLoading(false);
+    setIsLoadingLabelContent(false); // Reset flag to allow normal useEffect behavior
+  }
+};
+
+// Clear label data utility
+export const clearLabelData = (setters) => {
+  const {
+    setLabelData,
+    setParsedLabelData,
+    setLabelColumns,
+    setLabelContentLoading,
+    setCurrentPage
+  } = setters;
+
+  setLabelData("");
+  setParsedLabelData([]);
+  setLabelColumns([]);
+  setLabelContentLoading(false);
+  setCurrentPage(0);
+};

From a114cec00af00917222a07a48b8939d5bf4f52e9 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Tue, 5 Aug 2025 18:02:37 +0530
Subject: [PATCH 06/11] Removed migrations file

---
 .../db/migrations/001_create_label_index.sql  | 124 ------------------
 1 file changed, 124 deletions(-)
 delete mode 100644 server/app/db/migrations/001_create_label_index.sql

diff --git a/server/app/db/migrations/001_create_label_index.sql b/server/app/db/migrations/001_create_label_index.sql
deleted file mode 100644
index fcb302bea..000000000
--- a/server/app/db/migrations/001_create_label_index.sql
+++ /dev/null
@@ -1,124 +0,0 @@
--- Migration: Create label_index table for full-text search of CSV label content
--- This migration adds the label_index table to support searching CSV label content
--- through the existing artifact search functionality
-
--- Create the label_index table
-CREATE TABLE IF NOT EXISTS label_index (
-    id SERIAL PRIMARY KEY,
-    file_name VARCHAR(255) NOT NULL,
-    file_path TEXT NOT NULL,
-    row_index INTEGER NOT NULL,
-    content TEXT NOT NULL,
-    metadata JSONB,
-    search_vector TSVECTOR,
-    created_at BIGINT NOT NULL,
-    updated_at BIGINT NOT NULL,
-    
-    -- Unique constraint to prevent duplicate entries
-    CONSTRAINT unique_label_file_row UNIQUE (file_name, row_index)
-);
-
--- Create indexes for performance
-CREATE INDEX IF NOT EXISTS idx_label_index_file_name ON label_index(file_name);
-CREATE INDEX IF NOT EXISTS idx_label_index_created_at ON label_index(created_at);
-
--- Create GIN index for full-text search (most important for performance)
-CREATE INDEX IF NOT EXISTS idx_label_index_search_vector ON label_index USING gin(search_vector);
-
--- Create a trigger to automatically update the search_vector column
--- This trigger will populate the tsvector column whenever content is inserted or updated
-CREATE OR REPLACE FUNCTION update_label_search_vector() RETURNS trigger AS $$
-BEGIN
-    NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, ''));
-    NEW.updated_at := EXTRACT(EPOCH FROM NOW()) * 1000;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
-
--- Create the trigger
-DROP TRIGGER IF EXISTS trigger_update_label_search_vector ON label_index;
-CREATE TRIGGER trigger_update_label_search_vector
-    BEFORE INSERT OR UPDATE ON label_index
-    FOR EACH ROW EXECUTE FUNCTION update_label_search_vector();
-
--- Create a function to search labels with ranking
-CREATE OR REPLACE FUNCTION search_labels(search_query TEXT, result_limit INTEGER DEFAULT 10)
-RETURNS TABLE (
-    file_name VARCHAR(255),
-    row_index INTEGER,
-    content TEXT,
-    metadata JSONB,
-    relevance_score REAL
-) AS $$
-BEGIN
-    RETURN QUERY
-    SELECT 
-        li.file_name,
-        li.row_index,
-        li.content,
-        li.metadata,
-        ts_rank(li.search_vector, plainto_tsquery('english', search_query))::REAL as relevance_score
-    FROM label_index li
-    WHERE li.search_vector @@ plainto_tsquery('english', search_query)
-    ORDER BY relevance_score DESC
-    LIMIT result_limit;
-END;
-$$ LANGUAGE plpgsql;
-
--- Create a function to find artifacts with matching labels
-CREATE OR REPLACE FUNCTION find_artifacts_with_label_matches(search_query TEXT, result_limit INTEGER DEFAULT 10)
-RETURNS TABLE (
-    artifact_id INTEGER,
-    artifact_name VARCHAR(255),
-    artifact_uri TEXT,
-    label_file VARCHAR(255),
-    matching_content TEXT,
-    relevance_score REAL
-) AS $$
-BEGIN
-    RETURN QUERY
-    SELECT DISTINCT
-        a.id as artifact_id,
-        a.name as artifact_name,
-        a.uri as artifact_uri,
-        li.file_name as label_file,
-        li.content as matching_content,
-        ts_rank(li.search_vector, plainto_tsquery('english', search_query))::REAL as relevance_score
-    FROM artifact a
-    JOIN artifactproperty ap ON a.id = ap.artifact_id
-    JOIN label_index li ON SPLIT_PART(ap.string_value, ':', 1) = li.file_name
-    WHERE ap.name = 'labels_uri'
-      AND li.search_vector @@ plainto_tsquery('english', search_query)
-    ORDER BY relevance_score DESC
-    LIMIT result_limit;
-END;
-$$ LANGUAGE plpgsql;
-
--- Add comments for documentation
-COMMENT ON TABLE label_index IS 'Stores indexed CSV label content for full-text search integration with artifact search';
-COMMENT ON COLUMN label_index.file_name IS 'Name of the CSV label file (without path)';
-COMMENT ON COLUMN label_index.file_path IS 'Full path to the CSV label file';
-COMMENT ON COLUMN label_index.row_index IS 'Row number within the CSV file (0-based)';
-COMMENT ON COLUMN label_index.content IS 'Concatenated content of the CSV row for search';
-COMMENT ON COLUMN label_index.metadata IS 'Additional metadata about the label entry (JSON format)';
-COMMENT ON COLUMN label_index.search_vector IS 'PostgreSQL tsvector for full-text search';
-COMMENT ON COLUMN label_index.created_at IS 'Timestamp when the record was created (milliseconds since epoch)';
-COMMENT ON COLUMN label_index.updated_at IS 'Timestamp when the record was last updated (milliseconds since epoch)';
-
--- Grant permissions (adjust as needed for your setup)
--- GRANT SELECT, INSERT, UPDATE, DELETE ON label_index TO your_app_user;
--- GRANT USAGE, SELECT ON SEQUENCE label_index_id_seq TO your_app_user;
-
--- Example usage queries (for testing):
--- 
--- 1. Search for labels containing "data":
--- SELECT * FROM search_labels('data', 5);
---
--- 2. Find artifacts with labels containing "training":
--- SELECT * FROM find_artifacts_with_label_matches('training', 10);
---
--- 3. Manual search with custom ranking:
--- SELECT file_name, content, ts_rank(search_vector, plainto_tsquery('english', 'your_search_term')) as rank
--- FROM label_index 
--- WHERE search_vector @@ plainto_tsquery('english', 'your_search_term')
--- ORDER BY rank DESC;

From b9a06a78ed6253d40262c361aa4eb8d4af9e13a5 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Tue, 5 Aug 2025 19:53:50 +0530
Subject: [PATCH 07/11] fixed loader

---
 ui/src/pages/artifacts_postgres/index.jsx | 37 ++++++++++++++++++-----
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/ui/src/pages/artifacts_postgres/index.jsx b/ui/src/pages/artifacts_postgres/index.jsx
index dbe04c8b6..16e3b1f33 100644
--- a/ui/src/pages/artifacts_postgres/index.jsx
+++ b/ui/src/pages/artifacts_postgres/index.jsx
@@ -84,25 +84,48 @@ const ArtifactsPostgres = () => {
   const fetchPipelines = () => {
     client.getPipelines("").then((data) => {
       setPipelines(data);
-      const defaultPipeline = data[0];
+      const defaultPipeline = data && data.length > 0 ? data[0] : null;
       setSelectedPipeline(defaultPipeline); // Set the first pipeline as default
+      // If no pipelines are available, set loading to false
+      if (!data || data.length === 0) {
+        setLoading(false);
+      }
+    }).catch((error) => {
+      console.error('Failed to fetch pipelines:', error);
+      setPipelines([]);
+      setSelectedPipeline(null);
+      setLoading(false);
     });
-  };  
+  };
   
   useEffect(() => {
     if (selectedPipeline){
       fetchArtifactTypes(selectedPipeline);
+    } else if (selectedPipeline === null && pipelines.length === 0) {
+      // If selectedPipeline is explicitly null and no pipelines exist, ensure loading is false
+      setLoading(false);
     }
-  },[selectedPipeline]);
+  },[selectedPipeline, pipelines.length]);
   
   const fetchArtifactTypes = () => {
     client.getArtifactTypes().then((types) => {
       setArtifactTypes(types);
-      const defaultArtifactType = types[0];
+      const defaultArtifactType = types && types.length > 0 ? types[0] : null;
       setSelectedArtifactType(defaultArtifactType); // Set the first artifact type as default
-      fetchArtifacts(selectedPipeline, defaultArtifactType, sortOrder, activePage, filter, selectedCol); // Fetch artifacts for the first artifact type and default pipeline
-    });  
-  };  
+
+      if (defaultArtifactType && selectedPipeline) {
+        fetchArtifacts(selectedPipeline, defaultArtifactType, sortOrder, activePage, filter, selectedCol); // Fetch artifacts for the first artifact type and default pipeline
+      } else {
+        // If no artifact types are available, set loading to false
+        setLoading(false);
+      }
+    }).catch((error) => {
+      console.error('Failed to fetch artifact types:', error);
+      setArtifactTypes([]);
+      setSelectedArtifactType(null);
+      setLoading(false);
+    });
+  };
  
   useEffect(() => {
     // Fetch artifacts when these dependencies change (but not when loading label content)

From bead0b48cb348459358f10b87035887e70f9425a Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Wed, 6 Aug 2025 13:21:17 +0530
Subject: [PATCH 08/11] refactored UI code

---
 .../components/ArtifactsLeftPanel.jsx         |  59 +++
 .../components/ArtifactsMainContent.jsx       | 124 +++++
 .../hooks/useArtifactsData.js                 | 146 ++++++
 .../hooks/useArtifactsHandlers.js             | 164 +++++++
 .../hooks/useArtifactsState.js                | 117 +++++
 ui/src/pages/artifacts_postgres/index.jsx     | 450 ++++--------------
 6 files changed, 712 insertions(+), 348 deletions(-)
 create mode 100644 ui/src/pages/artifacts_postgres/components/ArtifactsLeftPanel.jsx
 create mode 100644 ui/src/pages/artifacts_postgres/components/ArtifactsMainContent.jsx
 create mode 100644 ui/src/pages/artifacts_postgres/hooks/useArtifactsData.js
 create mode 100644 ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
 create mode 100644 ui/src/pages/artifacts_postgres/hooks/useArtifactsState.js

diff --git a/ui/src/pages/artifacts_postgres/components/ArtifactsLeftPanel.jsx b/ui/src/pages/artifacts_postgres/components/ArtifactsLeftPanel.jsx
new file mode 100644
index 000000000..a4a48106a
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/components/ArtifactsLeftPanel.jsx
@@ -0,0 +1,59 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React from "react";
+import ArtifactPTable from "../../../components/ArtifactPTable";
+import Loader from "../../../components/Loader";
+
+
+const ArtifactsLeftPanel = ({
+  loading,
+  artifacts,
+  selectedArtifactType,
+  toggleSortOrder,
+  toggleSortTime,
+  filter,
+  onLabelClick,
+  expandedRow,
+  setExpandedRow
+}) => {
+  return (
+    <div className="p-4">
+      {loading ? (
+        <div className="flex justify-center items-center py-12">
+          <Loader />
+        </div>
+      ) : artifacts !== null && artifacts?.length > 0 ? (
+        <ArtifactPTable
+          artifacts={artifacts}
+          artifactType={selectedArtifactType}
+          onsortOrder={toggleSortOrder}
+          onsortTimeOrder={toggleSortTime}
+          filterValue={filter}
+          onLabelClick={selectedArtifactType === "Label" ? onLabelClick : undefined}
+          expandedRow={expandedRow}
+          setExpandedRow={setExpandedRow}
+        />
+      ) : (
+        <div className="text-center py-12">
+          <p className="text-gray-600">No label artifacts available</p>
+        </div>
+      )}
+    </div>
+  );
+};
+
+export default ArtifactsLeftPanel;
diff --git a/ui/src/pages/artifacts_postgres/components/ArtifactsMainContent.jsx b/ui/src/pages/artifacts_postgres/components/ArtifactsMainContent.jsx
new file mode 100644
index 000000000..e83db4a54
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/components/ArtifactsMainContent.jsx
@@ -0,0 +1,124 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React from "react";
+import ArtifactPTable from "../../../components/ArtifactPTable";
+import Loader from "../../../components/Loader";
+import LabelContentPanel from "./LabelContentPanel";
+import ResizableSplitPane from "./ResizableSplitPane";
+import PaginationControls from "./PaginationControls";
+import ArtifactsLeftPanel from "./ArtifactsLeftPanel";
+
+
+const ArtifactsMainContent = ({
+  selectedArtifactType,
+  loading,
+  artifacts,
+  toggleSortOrder,
+  toggleSortTime,
+  filter,
+  expandedRow,
+  setExpandedRow,
+  onLabelClick,
+  // Label-specific props
+  selectedTableLabel,
+  labelContentLoading,
+  labelData,
+  parsedLabelData,
+  labelColumns,
+  currentPage,
+  rowsPerPage,
+  setCurrentPage,
+  setRowsPerPage,
+  // Pagination props
+  totalItems,
+  activePage,
+  clickedButton,
+  onPageClick,
+  onPrevClick,
+  onNextClick
+}) => {
+  if (selectedArtifactType === "Label") {
+    return (
+      <div className="flex-grow" style={{ height: 'calc(100vh - 200px)' }}>
+        <ResizableSplitPane
+          leftContent={
+            <ArtifactsLeftPanel
+              loading={loading}
+              artifacts={artifacts}
+              selectedArtifactType={selectedArtifactType}
+              toggleSortOrder={toggleSortOrder}
+              toggleSortTime={toggleSortTime}
+              filter={filter}
+              onLabelClick={onLabelClick}
+              expandedRow={expandedRow}
+              setExpandedRow={setExpandedRow}
+            />
+          }
+          rightContent={
+            <LabelContentPanel
+              selectedTableLabel={selectedTableLabel}
+              labelContentLoading={labelContentLoading}
+              labelData={labelData}
+              parsedLabelData={parsedLabelData}
+              labelColumns={labelColumns}
+              currentPage={currentPage}
+              rowsPerPage={rowsPerPage}
+              setCurrentPage={setCurrentPage}
+              setRowsPerPage={setRowsPerPage}
+            />
+          }
+          initialSplitPercentage={50}
+        />
+      </div>
+    );
+  }
+
+  // Non-Label artifact types
+  return (
+    <div>
+      {loading ? (
+        <div className="flex justify-center items-center py-12">
+          <Loader />
+        </div>
+      ) : artifacts !== null && artifacts.length > 0 ? (
+        <ArtifactPTable
+          artifacts={artifacts}
+          artifactType={selectedArtifactType}
+          onsortOrder={toggleSortOrder}
+          onsortTimeOrder={toggleSortTime}
+          filterValue={filter}
+          expandedRow={expandedRow}
+          setExpandedRow={setExpandedRow}
+        />
+      ) : (
+        <div>No data available</div>
+      )}
+      {!loading && (
+        <PaginationControls
+          totalItems={totalItems}
+          activePage={activePage}
+          clickedButton={clickedButton}
+          onPageClick={onPageClick}
+          onPrevClick={onPrevClick}
+          onNextClick={onNextClick}
+        />
+      )}
+    </div>
+  );
+};
+
+export default ArtifactsMainContent;
diff --git a/ui/src/pages/artifacts_postgres/hooks/useArtifactsData.js b/ui/src/pages/artifacts_postgres/hooks/useArtifactsData.js
new file mode 100644
index 000000000..f7daa0282
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/hooks/useArtifactsData.js
@@ -0,0 +1,146 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import { useEffect } from "react";
+
+/**
+ * Custom hook to manage data fetching operations for artifacts
+ */
+const useArtifactsData = (client, state) => {
+  const {
+    selectedPipeline,
+    setSelectedPipeline,
+    setPipelines,
+    setArtifacts,
+    artifactTypes,
+    setArtifactTypes,
+    selectedArtifactType,
+    setSelectedArtifactType,
+    sortOrder,
+    activePage,
+    filter,
+    selectedCol,
+    setTotalItems,
+    setLoading,
+    isLoadingLabelContent,
+    pipelines
+  } = state;
+
+  // Fetch pipelines on component mount
+  const fetchPipelines = () => {
+    client.getPipelines("").then((data) => {
+      setPipelines(data);
+      const defaultPipeline = data && data.length > 0 ? data[0] : null;
+      setSelectedPipeline(defaultPipeline);
+      if (!data || data.length === 0) {
+        setLoading(false);
+      }
+    }).catch((error) => {
+      console.error('Failed to fetch pipelines:', error);
+      setPipelines([]);
+      setSelectedPipeline(null);
+      setLoading(false);
+    });
+  };
+
+  const fetchArtifactTypes = () => {
+    client.getArtifactTypes().then((types) => {
+      setArtifactTypes(types);
+      const defaultArtifactType = types && types.length > 0 ? types[0] : null;
+      setSelectedArtifactType(defaultArtifactType);
+
+      if (defaultArtifactType && selectedPipeline) {
+        fetchArtifacts(selectedPipeline, defaultArtifactType, sortOrder, activePage, filter, selectedCol);
+      } else {
+        setLoading(false);
+      }
+    }).catch((error) => {
+      console.error('Failed to fetch artifact types:', error);
+      setArtifactTypes([]);
+      setSelectedArtifactType(null);
+      setLoading(false);
+    });
+  };
+
+  const fetchArtifacts = async (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
+    try {
+      if (isLoadingLabelContent) {
+        return;
+      }
+
+      setLoading(true);
+      setArtifacts(null);
+
+      // Handle Label search case
+      if (artifactType === "Label" && filter && filter.trim() !== "") {
+        try {
+          const searchData = await client.searchLabelArtifacts(pipelineName, filter, sortOrder, activePage, 5);
+
+          searchData.items.forEach(item => {
+            item.isSearchResult = true;
+            item.searchFilter = filter;
+          });
+
+          setArtifacts(searchData.items);
+          setTotalItems(searchData.total_items);
+          setLoading(false);
+          return;
+        } catch (searchError) {
+          console.warn('Label search failed, falling back to regular fetch:', searchError);
+        }
+      }
+
+      // Regular artifact fetching
+      const regularData = await client.getArtifacts(pipelineName, artifactType, sortOrder, activePage, filter, selectedCol);
+      setArtifacts(regularData.items);
+      setTotalItems(regularData.total_items);
+
+    } catch (error) {
+      console.error('Failed to fetch artifacts:', error);
+      setArtifacts([]);
+      setTotalItems(0);
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  // Effects for data fetching
+  useEffect(() => {
+    fetchPipelines();
+  }, []);
+
+  useEffect(() => {
+    if (selectedPipeline) {
+      fetchArtifactTypes(selectedPipeline);
+    } else if (selectedPipeline === null && pipelines.length === 0) {
+      setLoading(false);
+    }
+  }, [selectedPipeline, pipelines.length]);
+
+  useEffect(() => {
+    if (selectedPipeline && selectedArtifactType) {
+      fetchArtifacts(selectedPipeline, selectedArtifactType, sortOrder, activePage, filter, selectedCol);
+    }
+  }, [selectedPipeline, selectedArtifactType, sortOrder, activePage, selectedCol, filter]);
+
+  return {
+    fetchPipelines,
+    fetchArtifactTypes,
+    fetchArtifacts
+  };
+};
+
+export default useArtifactsData;
diff --git a/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js b/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
new file mode 100644
index 000000000..6072ccd51
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
@@ -0,0 +1,164 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import { useEffect } from "react";
+import { handleTableLabelClick, clearLabelData } from "../utils/labelHandlers";
+
+/**
+ * Custom hook to manage event handlers for the artifacts page
+ */
+const useArtifactsHandlers = (client, state) => {
+  const {
+    selectedArtifactType,
+    setSelectedArtifactType,
+    setActivePage,
+    setLoading,
+    setArtifacts,
+    setSelectedTableLabel,
+    setLabelData,
+    setParsedLabelData,
+    setLabelColumns,
+    setLabelContentLoading,
+    setCurrentPage,
+    selectedPipeline,
+    setSelectedPipeline,
+    setFilter,
+    setSortOrder,
+    setSelectedCol,
+    setClickedButton,
+    activePage,
+    totalItems,
+    filter,
+    expandedRow,
+    setExpandedRow,
+    setIsLoadingLabelContent
+  } = state;
+
+  // Reset accordion state when artifact type changes
+  useEffect(() => {
+    setExpandedRow(null);
+  }, [selectedArtifactType]);
+
+  // Handle accordion auto-expansion for search filters
+  useEffect(() => {
+    if (selectedArtifactType === "Label") {
+      if (filter && filter.trim() !== "") {
+        setExpandedRow("all");
+      }
+    }
+  }, [filter, selectedArtifactType]);
+
+  const handleArtifactTypeClick = (artifactType) => {
+    if (selectedArtifactType !== artifactType) {
+      setLoading(true);
+      setArtifacts(null);
+    }
+    setSelectedArtifactType(artifactType);
+    setActivePage(1);
+
+    // Clear label-related state when switching artifact types
+    setSelectedTableLabel(null);
+    clearLabelData({
+      setLabelData,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelContentLoading,
+      setCurrentPage
+    });
+  };
+
+  const handlePipelineClick = (pipeline) => {
+    if (selectedPipeline !== pipeline) {
+      setLoading(true);
+      setArtifacts(null);
+    }
+    setSelectedPipeline(pipeline);
+    setActivePage(1);
+
+    // Clear label-related state when switching pipelines
+    setSelectedTableLabel(null);
+    clearLabelData({
+      setLabelData,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelContentLoading,
+      setCurrentPage
+    });
+  };
+
+  const handleFilter = (value) => {
+    setFilter(value);
+    setActivePage(1);
+  };
+
+  const toggleSortOrder = (newSortOrder) => {
+    setSortOrder(newSortOrder);
+    setSelectedCol("name");
+  };
+
+  const toggleSortTime = (newSortOrder) => {
+    setSortOrder(newSortOrder);
+    setSelectedCol("create_time_since_epoch");
+  };
+
+  const handlePageClick = (page) => {
+    setActivePage(page);
+    setClickedButton("page");
+  };
+
+  const handlePrevClick = () => {
+    if (activePage > 1) {
+      setActivePage(activePage - 1);
+      setClickedButton("prev");
+      handlePageClick(activePage - 1);
+    }
+  };
+
+  const handleNextClick = () => {
+    if (activePage < Math.ceil(totalItems / 5)) {
+      setActivePage(activePage + 1);
+      setClickedButton("next");
+      handlePageClick(activePage + 1);
+    }
+  };
+
+  // Handle label click from table
+  const handleLabelClick = async (labelName, artifact) => {
+    await handleTableLabelClick(labelName, artifact, client, {
+      setIsLoadingLabelContent,
+      setSelectedTableLabel,
+      setLabelContentLoading,
+      setCurrentPage,
+      setParsedLabelData,
+      setLabelColumns,
+      setLabelData
+    });
+  };
+
+  return {
+    handleArtifactTypeClick,
+    handlePipelineClick,
+    handleFilter,
+    toggleSortOrder,
+    toggleSortTime,
+    handlePageClick,
+    handlePrevClick,
+    handleNextClick,
+    handleLabelClick
+  };
+};
+
+export default useArtifactsHandlers;
diff --git a/ui/src/pages/artifacts_postgres/hooks/useArtifactsState.js b/ui/src/pages/artifacts_postgres/hooks/useArtifactsState.js
new file mode 100644
index 000000000..84a0fd7cf
--- /dev/null
+++ b/ui/src/pages/artifacts_postgres/hooks/useArtifactsState.js
@@ -0,0 +1,117 @@
+/***
+ * Copyright (2023) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import { useState } from "react";
+
+/**
+ * Custom hook to manage all state variables for the artifacts page
+ */
+const useArtifactsState = () => {
+  // Pipeline and artifact state
+  const [selectedPipeline, setSelectedPipeline] = useState(null);
+  const [pipelines, setPipelines] = useState([]);
+  const [artifacts, setArtifacts] = useState([]);
+  const [artifactTypes, setArtifactTypes] = useState([]);
+  const [selectedArtifactType, setSelectedArtifactType] = useState(null);
+  
+  // Filter and sorting state
+  const [filter, setFilter] = useState("");
+  const [sortOrder, setSortOrder] = useState("asc");
+  const [selectedCol, setSelectedCol] = useState("name");
+  
+  // Pagination state
+  const [totalItems, setTotalItems] = useState(0);
+  const [activePage, setActivePage] = useState(1);
+  const [clickedButton, setClickedButton] = useState("page");
+  
+  // Loading state
+  const [loading, setLoading] = useState(true);
+  
+  // Label-specific state
+  const [selectedTableLabel, setSelectedTableLabel] = useState(null);
+  const [labelData, setLabelData] = useState("");
+  const [parsedLabelData, setParsedLabelData] = useState([]);
+  const [labelColumns, setLabelColumns] = useState([]);
+  const [labelContentLoading, setLabelContentLoading] = useState(false);
+  const [currentPage, setCurrentPage] = useState(0);
+  const [rowsPerPage, setRowsPerPage] = useState(10);
+  
+  // Flag to prevent re-fetching artifacts when just loading label content
+  const [isLoadingLabelContent, setIsLoadingLabelContent] = useState(false);
+  
+  // Accordion state
+  const [expandedRow, setExpandedRow] = useState(null);
+
+  return {
+    // Pipeline and artifact state
+    selectedPipeline,
+    setSelectedPipeline,
+    pipelines,
+    setPipelines,
+    artifacts,
+    setArtifacts,
+    artifactTypes,
+    setArtifactTypes,
+    selectedArtifactType,
+    setSelectedArtifactType,
+    
+    // Filter and sorting state
+    filter,
+    setFilter,
+    sortOrder,
+    setSortOrder,
+    selectedCol,
+    setSelectedCol,
+    
+    // Pagination state
+    totalItems,
+    setTotalItems,
+    activePage,
+    setActivePage,
+    clickedButton,
+    setClickedButton,
+    
+    // Loading state
+    loading,
+    setLoading,
+    
+    // Label-specific state
+    selectedTableLabel,
+    setSelectedTableLabel,
+    labelData,
+    setLabelData,
+    parsedLabelData,
+    setParsedLabelData,
+    labelColumns,
+    setLabelColumns,
+    labelContentLoading,
+    setLabelContentLoading,
+    currentPage,
+    setCurrentPage,
+    rowsPerPage,
+    setRowsPerPage,
+    
+    // Flags
+    isLoadingLabelContent,
+    setIsLoadingLabelContent,
+    
+    // Accordion state
+    expandedRow,
+    setExpandedRow
+  };
+};
+
+export default useArtifactsState;
diff --git a/ui/src/pages/artifacts_postgres/index.jsx b/ui/src/pages/artifacts_postgres/index.jsx
index 16e3b1f33..cb3ef7c33 100644
--- a/ui/src/pages/artifacts_postgres/index.jsx
+++ b/ui/src/pages/artifacts_postgres/index.jsx
@@ -14,372 +14,126 @@
  * limitations under the License.
  ***/
 
-import React, { useEffect, useState } from "react";
+import React from "react";
 import FastAPIClient from "../../client";
 import config from "../../config";
 import DashboardHeader from "../../components/DashboardHeader";
-import ArtifactPTable from "../../components/ArtifactPTable";
 import Footer from "../../components/Footer";
-import "./index.css";
 import Sidebar from "../../components/Sidebar";
 import ArtifactTypeSidebar from "../../components/ArtifactTypeSidebar";
-import LabelContentPanel from "./components/LabelContentPanel";
-import ResizableSplitPane from "./components/ResizableSplitPane";
-import PaginationControls from "./components/PaginationControls";
-import { handleTableLabelClick, clearLabelData } from "./utils/labelHandlers";
-import Loader from "../../components/Loader";
+import ArtifactsMainContent from "./components/ArtifactsMainContent";
+import useArtifactsState from "./hooks/useArtifactsState";
+import useArtifactsData from "./hooks/useArtifactsData";
+import useArtifactsHandlers from "./hooks/useArtifactsHandlers";
+import "./index.css";
 
 const client = new FastAPIClient(config);
 
 const ArtifactsPostgres = () => {
-  const [selectedPipeline, setSelectedPipeline] = useState(null);
-  const [pipelines, setPipelines] = useState([]);
-  // undefined state is to check whether artifacts data is set
-  // null state of artifacts we display No Data
-  const [artifacts, setArtifacts] = useState([]);
-  const [artifactTypes, setArtifactTypes] = useState([]);
-  const [selectedArtifactType, setSelectedArtifactType] = useState(null);
-  const [filter, setFilter] = useState("");
-  const [sortOrder, setSortOrder] = useState("asc");
-  const [totalItems, setTotalItems] = useState(0);
-  const [activePage, setActivePage] = useState(1);
-  const [clickedButton, setClickedButton] = useState("page");
-  const [selectedCol, setSelectedCol] = useState("name");
-  const [loading, setLoading] = useState(true);
-
-  // Label-specific state
-  const [selectedTableLabel, setSelectedTableLabel] = useState(null);
-  const [labelData, setLabelData] = useState("");
-  const [parsedLabelData, setParsedLabelData] = useState([]);
-  const [labelColumns, setLabelColumns] = useState([]);
-  const [labelContentLoading, setLabelContentLoading] = useState(false);
-  const [currentPage, setCurrentPage] = useState(0);
-  const [rowsPerPage, setRowsPerPage] = useState(10);
-
-  // Flag to prevent re-fetching artifacts when just loading label content
-  const [isLoadingLabelContent, setIsLoadingLabelContent] = useState(false);
-
-  // Lift accordion state to parent to preserve it across re-renders
-  const [expandedRow, setExpandedRow] = useState(null);
-
-  // Reset accordion state when artifact type changes
-  useEffect(() => {
-    setExpandedRow(null);
-  }, [selectedArtifactType]);
-
-  // Handle accordion auto-expansion for search filters at parent level
-  useEffect(() => {
-    if (selectedArtifactType === "Label") {
-      if (filter && filter.trim() !== "") {
-        setExpandedRow("all");
-      }
-    }
-  }, [filter, selectedArtifactType]);
-
-  useEffect(() => {
-    fetchPipelines(); // Fetch pipelines and artifact types when the component mounts
-  },[]);
-
-  // Fetch pipelines on component mount
-  const fetchPipelines = () => {
-    client.getPipelines("").then((data) => {
-      setPipelines(data);
-      const defaultPipeline = data && data.length > 0 ? data[0] : null;
-      setSelectedPipeline(defaultPipeline); // Set the first pipeline as default
-      // If no pipelines are available, set loading to false
-      if (!data || data.length === 0) {
-        setLoading(false);
-      }
-    }).catch((error) => {
-      console.error('Failed to fetch pipelines:', error);
-      setPipelines([]);
-      setSelectedPipeline(null);
-      setLoading(false);
-    });
-  };
-  
-  useEffect(() => {
-    if (selectedPipeline){
-      fetchArtifactTypes(selectedPipeline);
-    } else if (selectedPipeline === null && pipelines.length === 0) {
-      // If selectedPipeline is explicitly null and no pipelines exist, ensure loading is false
-      setLoading(false);
-    }
-  },[selectedPipeline, pipelines.length]);
-  
-  const fetchArtifactTypes = () => {
-    client.getArtifactTypes().then((types) => {
-      setArtifactTypes(types);
-      const defaultArtifactType = types && types.length > 0 ? types[0] : null;
-      setSelectedArtifactType(defaultArtifactType); // Set the first artifact type as default
-
-      if (defaultArtifactType && selectedPipeline) {
-        fetchArtifacts(selectedPipeline, defaultArtifactType, sortOrder, activePage, filter, selectedCol); // Fetch artifacts for the first artifact type and default pipeline
-      } else {
-        // If no artifact types are available, set loading to false
-        setLoading(false);
-      }
-    }).catch((error) => {
-      console.error('Failed to fetch artifact types:', error);
-      setArtifactTypes([]);
-      setSelectedArtifactType(null);
-      setLoading(false);
-    });
-  };
- 
-  useEffect(() => {
-    // Fetch artifacts when these dependencies change (but not when loading label content)
-    if ( selectedPipeline && selectedArtifactType ){
-      fetchArtifacts(selectedPipeline, selectedArtifactType, sortOrder, activePage, filter, selectedCol);
-    }
-  }, [selectedPipeline, selectedArtifactType, sortOrder, activePage, selectedCol, filter]);
-
-  const fetchArtifacts = async (pipelineName, artifactType, sortOrder, activePage, filter="", selectedCol) => {
-    try {
-      // Don't fetch if we're currently loading label content (prevents left panel reload)
-      if (isLoadingLabelContent) {
-        return;
-      }
-
-      // Set loading state and clear artifacts
-      setLoading(true);
-      setArtifacts(null);
-
-      // Handle Label search case
-      if (artifactType === "Label" && filter && filter.trim() !== "") {
-        try {
-          const searchData = await client.searchLabelArtifacts(pipelineName, filter, sortOrder, activePage, 5);
-
-          // Add search context to artifacts while preserving backend search_metadata
-          searchData.items.forEach(item => {
-            item.isSearchResult = true;
-            item.searchFilter = filter;
-          });
-
-          setArtifacts(searchData.items);
-          setTotalItems(searchData.total_items);
-          setLoading(false);
-          return; // Early return
-        } catch (searchError) {
-          console.warn('Label search failed, falling back to regular fetch:', searchError);
-          // Fall through to regular fetch
-        }
-      }
-
-      // Regular artifact fetching
-      const regularData = await client.getArtifacts(pipelineName, artifactType, sortOrder, activePage, filter, selectedCol);
-      setArtifacts(regularData.items);
-      setTotalItems(regularData.total_items);
-
-    } catch (error) {
-      console.error('Failed to fetch artifacts:', error);
-      setArtifacts([]);
-      setTotalItems(0);
-    } finally {
-      setLoading(false);
-    }
-  };
-  
-  const handleArtifactTypeClick = (artifactType) => {
-    if (selectedArtifactType !== artifactType) {
-      // if same artifact type is not clicked, sets page as null until it retrieves data for that type.
-      setLoading(true);
-      setArtifacts(null);
-    }
-    setSelectedArtifactType(artifactType);
-    setActivePage(1);
-
-    // Clear label-related state when switching artifact types
-    setSelectedTableLabel(null);
-    clearLabelData({
-      setLabelData,
-      setParsedLabelData,
-      setLabelColumns,
-      setLabelContentLoading,
-      setCurrentPage
-    });
-  };
-
-  const handlePipelineClick = (pipeline) => {
-    if (selectedPipeline !== pipeline) {
-      // this condition sets page as null.
-      setLoading(true);
-      setArtifacts(null);
-    }
-    setSelectedPipeline(pipeline);
-    setActivePage(1);
-
-    // Clear label-related state when switching pipelines
-    setSelectedTableLabel(null);
-    clearLabelData({
-      setLabelData,
-      setParsedLabelData,
-      setLabelColumns,
-      setLabelContentLoading,
-      setCurrentPage
-    });
-  };
-
-  const handleFilter = (value) => {
-    setFilter(value); // Update the filter string
-    setActivePage(1);
-  };
+  // Get all state from custom hook
+  const state = useArtifactsState();
+
+  // // Get data fetching functions
+  const { fetchPipelines, fetchArtifactTypes, fetchArtifacts } = useArtifactsData(client, state);
+
+  // Get event handlers
+  const handlers = useArtifactsHandlers(client, state);
+
+  const {
+    selectedPipeline,
+    pipelines,
+    artifacts,
+    artifactTypes,
+    selectedArtifactType,
+    filter,
+    loading,
+    totalItems,
+    activePage,
+    clickedButton,
+    expandedRow,
+    setExpandedRow,
+    // Label-specific state
+    selectedTableLabel,
+    labelContentLoading,
+    labelData,
+    parsedLabelData,
+    labelColumns,
+    currentPage,
+    rowsPerPage,
+    setCurrentPage,
+    setRowsPerPage
+  } = state;
+
+  const {
+    handleArtifactTypeClick,
+    handlePipelineClick,
+    handleFilter,
+    toggleSortOrder,
+    toggleSortTime,
+    handlePageClick,
+    handlePrevClick,
+    handleNextClick,
+    handleLabelClick
+  } = handlers;
 
-  const toggleSortOrder = (newSortOrder) => {
-    setSortOrder(newSortOrder);
-    setSelectedCol("name");
-  };
-
-  const toggleSortTime = (newSortOrder) => {
-    setSortOrder(newSortOrder);
-    setSelectedCol("create_time_since_epoch");
-  };
-
-  const handlePageClick = (page) => {
-    setActivePage(page);
-    setClickedButton("page");
-  };  
-
-  const handlePrevClick = () => {
-    if (activePage > 1) {
-      setActivePage(activePage - 1);
-      setClickedButton("prev");
-      handlePageClick(activePage - 1);
-    }  
-  };  
-
-  const handleNextClick = () => {
-    if (activePage < Math.ceil(totalItems / 5)) {
-      setActivePage(activePage + 1);
-      setClickedButton("next");
-      handlePageClick(activePage + 1);
-    }  
-  };
-
-  // Handle label click from table
-  const handleLabelClick = async (labelName, artifact) => {
-    await handleTableLabelClick(labelName, artifact, client, {
-      setIsLoadingLabelContent,
-      setSelectedTableLabel,
-      setLabelContentLoading,
-      setCurrentPage,
-      setParsedLabelData,
-      setLabelColumns,
-      setLabelData
-    });
-  };
+  return (
+    <section
+      className="flex flex-col bg-white min-h-screen"
+      style={{ minHeight: "100vh" }}
+    >
+      <DashboardHeader />
+      <div className="flex flex-grow" style={{ padding: "1px" }}>
+        <div className="sidebar-container min-h-140 bg-gray-100 pt-2 pr-2 pb-4 w-1/6 flex-grow-0">
+          <Sidebar
+            pipelines={pipelines}
+            handlePipelineClick={handlePipelineClick}
+            className="flex-grow"
+          />
+        </div>
 
-  // Left Panel Component
-  const renderLeftPanel = () => {
-    return (
-      <div className="p-4">
-        {loading ? (
-          <div className="flex justify-center items-center py-12">
-            <Loader />
+        <div className="justify-center items-center mx-auto px-4 flex-grow w-5/6">
+          <div className="flex flex-col w-full">
+            {selectedPipeline !== null && (
+              <ArtifactTypeSidebar
+                artifactTypes={artifactTypes}
+                handleArtifactTypeClick={handleArtifactTypeClick}
+                onFilter={handleFilter}
+              />
+            )}
           </div>
-        ) : artifacts !== null && artifacts?.length > 0 ? (
-          <ArtifactPTable
+
+          <ArtifactsMainContent
+            selectedArtifactType={selectedArtifactType}
+            loading={loading}
             artifacts={artifacts}
-            artifactType={selectedArtifactType}
-            onsortOrder={toggleSortOrder}
-            onsortTimeOrder={toggleSortTime}
-            filterValue={filter}
-            onLabelClick={selectedArtifactType === "Label" ? handleLabelClick : undefined}
+            toggleSortOrder={toggleSortOrder}
+            toggleSortTime={toggleSortTime}
+            filter={filter}
             expandedRow={expandedRow}
             setExpandedRow={setExpandedRow}
+            onLabelClick={handleLabelClick}
+            // Label-specific props
+            selectedTableLabel={selectedTableLabel}
+            labelContentLoading={labelContentLoading}
+            labelData={labelData}
+            parsedLabelData={parsedLabelData}
+            labelColumns={labelColumns}
+            currentPage={currentPage}
+            rowsPerPage={rowsPerPage}
+            setCurrentPage={setCurrentPage}
+            setRowsPerPage={setRowsPerPage}
+            // Pagination props
+            totalItems={totalItems}
+            activePage={activePage}
+            clickedButton={clickedButton}
+            onPageClick={handlePageClick}
+            onPrevClick={handlePrevClick}
+            onNextClick={handleNextClick}
           />
-        ) : (
-          <div className="text-center py-12">
-            <p className="text-gray-600">No label artifacts available</p>
-          </div>
-        )}
-      </div>
-    );
-  };
-
-  return (
-    <>
-      <section
-        className="flex flex-col bg-white min-h-screen"
-        style={{ minHeight: "100vh" }}
-      >
-        <DashboardHeader />
-        <div className="flex flex-grow" style={{ padding: "1px" }}>
-          <div className="sidebar-container min-h-140 bg-gray-100 pt-2 pr-2 pb-4 w-1/6 flex-grow-0">
-            <Sidebar
-              pipelines={pipelines}
-              handlePipelineClick={handlePipelineClick}
-              className="flex-grow"
-            />
-          </div>
-
-          <div className="justify-center items-center mx-auto px-4 flex-grow w-5/6">
-            <div className="flex flex-col w-full">
-                {selectedPipeline !== null && (
-                  <ArtifactTypeSidebar
-                    artifactTypes={artifactTypes}
-                    handleArtifactTypeClick={handleArtifactTypeClick}
-                    onFilter={handleFilter}
-                  />
-                )}
-            </div>
-            {selectedArtifactType === "Label" ? (
-              <div className="flex-grow" style={{ height: 'calc(100vh - 200px)' }}>
-                <ResizableSplitPane
-                  leftContent={renderLeftPanel()}
-                  rightContent={
-                    <LabelContentPanel
-                      selectedTableLabel={selectedTableLabel}
-                      labelContentLoading={labelContentLoading}
-                      labelData={labelData}
-                      parsedLabelData={parsedLabelData}
-                      labelColumns={labelColumns}
-                      currentPage={currentPage}
-                      rowsPerPage={rowsPerPage}
-                      setCurrentPage={setCurrentPage}
-                      setRowsPerPage={setRowsPerPage}
-                    />
-                  }
-                  initialSplitPercentage={50}
-                />
-              </div>
-            ) : (
-              <div>
-                {loading ? (
-                  <div className="flex justify-center items-center py-12">
-                    <Loader />
-                  </div>
-                ) : artifacts !== null && artifacts.length > 0 ? (
-                  <ArtifactPTable
-                    artifacts={artifacts}
-                    artifactType={selectedArtifactType}
-                    onsortOrder={toggleSortOrder}
-                    onsortTimeOrder={toggleSortTime}
-                    filterValue={filter}
-                    expandedRow={expandedRow}
-                    setExpandedRow={setExpandedRow}
-                    />
-
-                ) : (
-                  <div>No data available</div> // Display message when there are no artifacts
-                )}
-                {!loading && (
-                  <PaginationControls
-                    totalItems={totalItems}
-                    activePage={activePage}
-                    clickedButton={clickedButton}
-                    onPageClick={handlePageClick}
-                    onPrevClick={handlePrevClick}
-                    onNextClick={handleNextClick}
-                  />
-                )}
-              </div>
-            )}
-          </div>
         </div>
-        <Footer />
-      </section>
-    </>
+      </div>
+      <Footer />
+    </section>
   );
 };
 export default ArtifactsPostgres;

From fe1543dfb604b9fd606837ffb4fd3c20c6b63bab Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Thu, 7 Aug 2025 18:45:25 +0530
Subject: [PATCH 09/11] intermediate advanced search

---
 server/app/label_utils.py                     | 245 +++++++++++++++++
 server/app/main.py                            | 209 ++++++++++-----
 server/app/search_utils.py                    | 253 ++++++++++++++++++
 ui/src/client.js                              |  22 +-
 ui/src/components/AdvancedHighlight/index.jsx | 163 +++++++++++
 .../components/ArtifactTypeSidebar/index.jsx  |  74 ++++-
 .../components/LabelContentPanel.jsx          |   5 +-
 .../hooks/useArtifactsHandlers.js             |   3 +
 .../artifacts_postgres/utils/labelHandlers.js | 166 ++++++++++--
 9 files changed, 1053 insertions(+), 87 deletions(-)
 create mode 100644 server/app/search_utils.py
 create mode 100644 ui/src/components/AdvancedHighlight/index.jsx

diff --git a/server/app/label_utils.py b/server/app/label_utils.py
index 7d581e397..8ee4a07d8 100644
--- a/server/app/label_utils.py
+++ b/server/app/label_utils.py
@@ -10,6 +10,7 @@
 
 # Standard library imports
 import csv
+import io
 import json
 import time
 import os
@@ -26,6 +27,9 @@
 # Set up logger
 logger = logging.getLogger(__name__)
 
+# Labels directory constant
+LABELS_DIR = "/cmf-server/data/labels"
+
 
 async def index_csv_labels(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
     """
@@ -462,3 +466,244 @@ async def index_csv_labels_with_hash(database_url: str, labels_directory: str =
     except Exception as e:
         logger.error(f"Label indexing failed: {e}")
         return {"status": "error", "message": str(e)}
+
+
+# Label File Loading Functions
+async def initialize_label_search():
+    """Initialize label search functionality on startup"""
+    try:
+        logger.info("Initializing label search functionality...")
+
+        # Check primary directory for CSV files
+        primary_dir = Path(LABELS_DIR)
+        csv_files = []
+
+        if primary_dir.exists():
+            csv_files = list(primary_dir.glob("*.csv"))
+
+        if csv_files:
+            logger.info(f"Found {len(csv_files)} CSV label files in primary directory: {primary_dir}")
+
+            # Check if we need to index (if no records exist)
+            from server.app.db.dbconfig import DATABASE_URL
+            stats = await get_label_stats(DATABASE_URL)
+            if stats['total_records'] == 0:
+                logger.info("Indexing label files...")
+                result = await index_csv_labels(DATABASE_URL)
+                logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
+            else:
+                logger.info(f"Label search ready: {stats['total_records']} records indexed")
+        else:
+            logger.info(f"No CSV label files found in primary directory: {primary_dir}")
+
+        # Log the labels directory being used
+        logger.info(f"Labels directory: {LABELS_DIR}")
+
+    except Exception as e:
+        logger.warning(f"Label search initialization failed: {e}")
+        logger.info("Label search will be available once configured properly")
+
+
+async def filter_labels_by_csv_content(label_artifacts: list, conditions: list) -> list:
+    """
+    Filter label artifacts by checking if their CSV content matches the advanced search conditions.
+
+    Args:
+        label_artifacts: List of label artifact dictionaries
+        conditions: List of SearchCondition objects
+
+    Returns:
+        List of artifacts that contain CSV rows matching the conditions
+    """
+    matching_artifacts = []
+
+    for artifact in label_artifacts:
+        try:
+            # Try to load the CSV content for this label
+            csv_content = await load_label_csv_content(artifact)
+            if not csv_content:
+                continue
+
+            # Parse CSV content
+            csv_reader = csv.DictReader(io.StringIO(csv_content))
+            csv_rows = list(csv_reader)
+
+            if not csv_rows:
+                continue
+
+            # Apply advanced search conditions to CSV rows
+            from server.app.search_utils import LabelSearchFilter
+            matching_rows = LabelSearchFilter.apply_conditions(csv_rows, conditions)
+
+            # If any rows match, include this artifact
+            if matching_rows:
+                matching_artifacts.append(artifact)
+
+        except Exception:
+            continue
+
+    return matching_artifacts
+
+
+async def load_label_csv_content(artifact: dict) -> str:
+    """
+    Load CSV content for a label artifact.
+
+    Args:
+        artifact: Label artifact dictionary
+
+    Returns:
+        CSV content as string, or None if not found
+    """
+    try:
+        # Try different ways to get the file path
+        file_paths_to_try = []
+
+        # Method 1: Use URI if available
+        if artifact.get('uri'):
+            uri = artifact['uri']
+            if ':' in uri:
+                file_name = uri.split(':', 1)[1]
+                file_paths_to_try.append(file_name)
+            file_paths_to_try.append(uri)
+
+        # Method 2: Use artifact name
+        if artifact.get('name'):
+            name = artifact['name']
+            # Extract the base filename from the artifact name
+            # e.g., "artifacts/labels_m.csv:93951bf..." -> "labels_m.csv"
+            if 'artifacts/' in name and ':' in name:
+                # Extract the part between "artifacts/" and ":"
+                parts = name.split('artifacts/', 1)
+                if len(parts) > 1:
+                    file_part = parts[1].split(':', 1)[0]
+                    file_paths_to_try.append(file_part)
+
+            # Clean the name - remove prefixes
+            if ':' in name:
+                clean_name = name.split(':', 1)[1]
+                file_paths_to_try.append(clean_name)
+            file_paths_to_try.append(name)
+            file_paths_to_try.append(f"{name}.csv")
+
+        # Try to load the file from the labels directory
+        labels_dir = LABELS_DIR
+        if os.path.exists(labels_dir):
+            # First try exact matches
+            for file_path in file_paths_to_try:
+                try:
+                    full_path = os.path.join(labels_dir, os.path.basename(file_path))
+                    if os.path.exists(full_path):
+                        with open(full_path, 'r', encoding='utf-8') as f:
+                            content = f.read()
+                            return content
+                except Exception:
+                    continue
+
+            # Then try partial matches for CSV files
+            try:
+                csv_files = list(Path(labels_dir).glob("*.csv"))
+                for file_path_to_try in file_paths_to_try:
+                    base_name = os.path.basename(file_path_to_try).lower()
+                    # Remove common extensions and hash suffixes for matching
+                    clean_name = base_name.split('.')[0].split(':')[0]
+
+                    for csv_file in csv_files:
+                        csv_name = csv_file.name.lower()
+                        # Check if the clean name is contained in the CSV filename
+                        if clean_name in csv_name or csv_name.split('.')[0] in clean_name:
+                            try:
+                                with open(csv_file, 'r', encoding='utf-8') as f:
+                                    content = f.read()
+                                    return content
+                            except Exception:
+                                continue
+            except Exception:
+                pass
+
+            # Also try all CSV files in the directory as fallback
+            # Return the most recently modified CSV file if multiple exist
+            try:
+                csv_files = list(Path(labels_dir).glob("*.csv"))
+                if csv_files:
+                    # Sort by modification time, most recent first
+                    csv_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
+                    for file_path in csv_files:
+                        if file_path.is_file():
+                            try:
+                                with open(file_path, 'r', encoding='utf-8') as f:
+                                    content = f.read()
+                                    return content
+                            except Exception:
+                                continue
+            except Exception:
+                pass
+
+        return None
+
+    except Exception:
+        return None
+
+
+async def load_label_csv_by_filename(file_name: str, pipeline_name: str = None) -> str:
+    """
+    Load CSV content by filename, handling both hash names and actual filenames.
+
+    Args:
+        file_name: The filename (could be hash or actual filename)
+        pipeline_name: Optional pipeline name to help locate artifacts
+
+    Returns:
+        CSV content as string, or None if not found
+    """
+    try:
+        # Try to load using the same logic as load_label_csv_content
+        # Create a mock artifact to use the existing loading logic
+        mock_artifact = {
+            'name': file_name,
+            'uri': file_name
+        }
+
+        # Try the existing loading logic first
+        content = await load_label_csv_content(mock_artifact)
+        if content:
+            return content
+
+        # If that fails, try additional strategies for hash-based filenames
+        # Try to find CSV files that might correspond to this hash
+        labels_dir = LABELS_DIR
+        if os.path.exists(labels_dir):
+            # Try direct filename match first
+            direct_path = os.path.join(labels_dir, file_name)
+            if os.path.exists(direct_path):
+                with open(direct_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+
+            # Try with .csv extension
+            csv_path = os.path.join(labels_dir, f"{file_name}.csv")
+            if os.path.exists(csv_path):
+                with open(csv_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+
+            # If filename looks like a hash, try all CSV files in the directory
+            # Return the most recently modified CSV file
+            if len(file_name) > 20 and all(c in '0123456789abcdef' for c in file_name.lower()):
+                try:
+                    csv_files = list(Path(labels_dir).glob("*.csv"))
+                    if csv_files:
+                        # Sort by modification time, most recent first
+                        csv_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
+                        for file_path in csv_files:
+                            if file_path.is_file():
+                                try:
+                                    with open(file_path, 'r', encoding='utf-8') as f:
+                                        return f.read()
+                                except Exception:
+                                    continue
+                except Exception:
+                    pass
+
+        return None
+
+    except Exception:
+        return None
diff --git a/server/app/main.py b/server/app/main.py
index 569d9cf80..6e76c966d 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -59,43 +59,22 @@
 from server.app.label_utils import (
     auto_reindex_if_needed,
     get_label_stats,
-    index_csv_labels,
-    index_csv_labels_with_hash
+    index_csv_labels_with_hash,
+    initialize_label_search,
+    filter_labels_by_csv_content,
+    load_label_csv_by_filename
 )
+from server.app.search_utils import (
+    AdvancedSearchParser,
+    LabelSearchFilter,
+)
+import csv
+import io
 
 server_store_path = "/cmf-server/data/postgres_data"
+labels_dir = "/cmf-server/data/labels"
 query = CmfQuery(is_server=True)
 
-async def initialize_label_search():
-    """Initialize label search functionality on startup"""
-    try:
-        logger = logging.getLogger(__name__)
-        logger.info("Initializing label search functionality...")
-
-        # Check if labels directory exists and has CSV files
-        labels_dir = Path("/cmf-server/data/labels")
-        if labels_dir.exists():
-            csv_files = list(labels_dir.glob("*.csv"))
-            if csv_files:
-                logger.info(f"Found {len(csv_files)} CSV label files")
-
-                # Check if we need to index (if no records exist)
-                stats = await get_label_stats(DATABASE_URL)
-                if stats['total_records'] == 0:
-                    logger.info("Indexing label files...")
-                    result = await index_csv_labels(DATABASE_URL)
-                    logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
-                else:
-                    logger.info(f"Label search ready: {stats['total_records']} records indexed")
-            else:
-                logger.info("No CSV label files found in /cmf-server/data/labels")
-        else:
-            logger.info("Labels directory not found, label search will be available when files are added")
-
-    except Exception as e:
-        logger = logging.getLogger(__name__)
-        logger.warning(f"Label search initialization failed: {e}")
-        logger.info("Label search will be available once configured properly")
 
 #global variables
 dict_of_art_ids = {}
@@ -262,20 +241,51 @@ async def search_label_artifacts(
     db: AsyncSession = Depends(get_db)
 ):
     """
-    Search for label artifacts - returns same structure as regular artifacts API
+    Search for label artifacts with advanced search support - returns same structure as regular artifacts API
+    Supports advanced search syntax like: lines>240, score<=0.5, name="test", status!=active
     """
     try:
+        # Parse the search query for advanced conditions
+        conditions, plain_terms, parse_errors = AdvancedSearchParser.parse_search_query(content_filter)
+
+        # If there are parse errors, fall back to basic search
+        if parse_errors:
+            conditions = []
+            plain_terms = [content_filter]
+
         # Combined search: both label content and regular artifact properties
-        # 1. Search in label CSV content
-        label_content_results = await search_labels_in_artifacts(db, content_filter, pipeline_name, 100)
+        # 1. Search in label CSV content (basic text search for plain terms)
+        plain_search_term = " ".join(plain_terms) if plain_terms else ""
+        label_content_results = []
 
-        # 2. Search in regular artifact properties (like other artifact types)
-        property_search_results = await fetch_artifacts(
-            db, pipeline_name, "Label", content_filter, 1, 1000, "name", sort_order
-        )
+        if plain_search_term.strip():
+            label_content_results = await search_labels_in_artifacts(db, plain_search_term, pipeline_name, 100)
 
-        # If no results from either search, return empty
-        if not label_content_results and property_search_results['total_items'] == 0:
+        # 2. Search in regular artifact properties (like other artifact types)
+        # For advanced search queries, don't search properties with the raw query
+        if plain_terms:
+            # Only search properties if there are plain text terms
+            property_search_results = await fetch_artifacts(
+                db, pipeline_name, "Label", " ".join(plain_terms), 1, 1000, "name", sort_order
+            )
+        else:
+            # For pure advanced search, don't do property search - we'll rely on CSV content filtering
+            property_search_results = {'items': [], 'total_items': 0}
+
+        # For advanced search queries, we need to filter labels based on their CSV content
+        # This is different from plain text search - we need to load and filter each CSV file
+        advanced_search_results = []
+        if conditions and len(conditions) > 0:
+            # Get all label artifacts to check their CSV content
+            all_labels_result = await fetch_artifacts(
+                db, pipeline_name, "Label", "", 1, 1000, "name", sort_order
+            )
+
+            # Filter labels by checking their CSV content against advanced conditions
+            advanced_search_results = await filter_labels_by_csv_content(all_labels_result['items'], conditions)
+
+        # If still no results from any search, return empty
+        if not label_content_results and property_search_results['total_items'] == 0 and not advanced_search_results:
             return {
                 "total_items": 0,
                 "items": []
@@ -305,6 +315,11 @@ async def search_label_artifacts(
         for artifact in property_search_results['items']:
             property_match_ids.add(artifact['artifact_id'])
 
+        # Create a set of artifact IDs that match advanced search
+        advanced_search_ids = set()
+        for artifact in advanced_search_results:
+            advanced_search_ids.add(artifact['artifact_id'])
+
         for artifact in all_artifacts_result['items']:
             artifact_name = artifact['name']
             artifact_uri = artifact.get('uri', '')
@@ -313,6 +328,9 @@ async def search_label_artifacts(
             # Check if this artifact matches property search
             property_matches = artifact_id in property_match_ids
 
+            # Check if this artifact matches advanced search
+            advanced_search_matches = artifact_id in advanced_search_ids
+
             # Check if this artifact matches content search
             content_matches = False
             if matching_label_files:
@@ -340,14 +358,27 @@ async def search_label_artifacts(
 
                 content_matches = name_matches or uri_matches or original_name_matches
 
-            # Include artifact if it matches either property search OR content search
-            if property_matches or content_matches:
+            # Include artifact if it matches property search, content search, OR advanced search
+            should_include = property_matches or content_matches or advanced_search_matches
+
+            if should_include:
+                # Convert SearchCondition objects to dictionaries for JSON serialization
+                advanced_conditions_dict = []
+                for condition in conditions:
+                    advanced_conditions_dict.append({
+                        'column': condition.column,
+                        'operator': condition.operator.value,  # Convert enum to string value
+                        'value': condition.value
+                    })
+
                 # Add search metadata but keep same structure as regular artifacts
                 artifact['search_metadata'] = {
                     'search_term': content_filter,
                     'is_search_result': True,
                     'property_match': property_matches,
-                    'content_match': content_matches
+                    'content_match': content_matches or advanced_search_matches,  # Mark as content match for advanced conditions
+                    'advanced_conditions': advanced_conditions_dict,  # Store conditions as dicts for frontend filtering
+                    'plain_terms': plain_terms
                 }
                 filtered_artifacts.append(artifact)
 
@@ -578,8 +609,7 @@ async def upload_label(request:Request, file: UploadFile = File(..., description
         if not file.filename:
             raise HTTPException(status_code=400, detail="No file provided.")
         
-        # Construct the full file path in /server/data/labels
-        labels_dir = "/cmf-server/data/labels"
+        # Use the labels directory
         file_path = os.path.join(labels_dir, os.path.basename(file.filename))
         
         # Ensure the directory exists
@@ -602,30 +632,85 @@ async def upload_label(request:Request, file: UploadFile = File(..., description
 
 # Rest api to fetch the label data from the /cmf-server/data/labels folder
 @app.get("/label-data", response_class=PlainTextResponse)
-async def get_label_data(file_name: str) -> str:
+async def get_label_data(
+    file_name: str,
+    search_filter: str = None,
+    pipeline_name: str = None,
+    fallback_to_full: bool = Query(True, description="If true, return full content when no rows match the filter")
+) -> str:
     """
-    API endpoint to fetch the content of a requirements file.
+    API endpoint to fetch the content of a label CSV file, optionally filtered by search conditions.
 
     Args:
-        file_name (str): The name of the file to be fetched. Must end with .csv.
+        file_name (str): The hash name or actual filename of the CSV file to fetch
+        search_filter (str, optional): Advanced search filter to apply (e.g., "lines>24000")
+        pipeline_name (str, optional): Pipeline name to help locate the correct artifact
+        fallback_to_full (bool): If true, return full content when no rows match the filter (default: True)
 
     Returns:
-        str: The content of the file as plain text.
+        str: The content of the file as CSV text, optionally filtered
 
     Raises:
-        HTTPException: If the file does not exist or the extension is unsupported.
+        HTTPException: If the file does not exist or cannot be processed.
     """
-    
-    # Check if the file exists
-    file_path = os.path.join("/cmf-server/data/labels/", os.path.basename(file_name))
-    if not os.path.exists(file_path):
-        raise HTTPException(status_code=404, detail="File not found")
 
-    # Read and return the file content as plain text
     try:
-        with open(file_path, "r") as file:
-            content = file.read()
-        return content
+        # Try to load the CSV content using the same logic as the search
+        csv_content = await load_label_csv_by_filename(file_name, pipeline_name)
+
+        if not csv_content:
+            raise HTTPException(status_code=404, detail="File not found")
+
+        # If no search filter is provided, return the raw content
+        if not search_filter:
+            return csv_content
+
+        # Apply search filtering if provided
+        try:
+            # Parse the search filter
+            conditions, plain_terms, parse_errors = AdvancedSearchParser.parse_search_query(search_filter)
+
+            if parse_errors:
+                return csv_content  # Return unfiltered if parse fails
+
+            # If there are advanced conditions, filter the CSV content
+            if conditions and len(conditions) > 0:
+                # Parse CSV content
+                csv_reader = csv.DictReader(io.StringIO(csv_content))
+                all_rows = list(csv_reader)
+
+                # Apply filtering
+                matching_rows = LabelSearchFilter.apply_conditions(all_rows, conditions)
+
+                # Convert filtered rows back to CSV format
+                if matching_rows and len(matching_rows) > 0:
+                    output = io.StringIO()
+                    fieldnames = matching_rows[0].keys()
+                    writer = csv.DictWriter(output, fieldnames=fieldnames)
+                    writer.writeheader()
+                    writer.writerows(matching_rows)
+                    return output.getvalue()
+                else:
+                    # No matching rows found
+                    if fallback_to_full:
+                        # Return full content when no matches found (useful for label click scenarios)
+                        return csv_content
+                    else:
+                        # Return empty CSV with headers (strict filtering mode)
+                        if all_rows:
+                            output = io.StringIO()
+                            fieldnames = all_rows[0].keys()
+                            writer = csv.DictWriter(output, fieldnames=fieldnames)
+                            writer.writeheader()
+                            return output.getvalue()
+                        else:
+                            return ""
+
+            return csv_content
+
+        except Exception as filter_error:
+            return csv_content  # Return unfiltered content if filtering fails
+
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
 
@@ -1003,8 +1088,8 @@ async def get_label_search_status():
     """Get the current status of label search functionality"""
     try:
         # Check labels directory
-        labels_dir = Path("/cmf-server/data/labels")
-        csv_files = list(labels_dir.glob("*.csv")) if labels_dir.exists() else []
+        labels_path = Path(labels_dir)
+        csv_files = list(labels_path.glob("*.csv")) if labels_path.exists() else []
 
         # Check database
         stats = await get_label_stats(DATABASE_URL)
diff --git a/server/app/search_utils.py b/server/app/search_utils.py
new file mode 100644
index 000000000..8ad2e50a9
--- /dev/null
+++ b/server/app/search_utils.py
@@ -0,0 +1,253 @@
+"""
+Advanced search utilities for label content with comparison operators.
+Supports queries like: lines>240, score<=0.5, name="test", status!=active
+"""
+
+import re
+from typing import Dict, List, Tuple, Any, Union
+from enum import Enum
+import logging
+
+logger = logging.getLogger(__name__)
+
+class ComparisonOperator(Enum):
+    """Supported comparison operators"""
+    GREATER_THAN = ">"
+    LESS_THAN = "<"
+    GREATER_EQUAL = ">="
+    LESS_EQUAL = "<="
+    EQUAL = "="
+    NOT_EQUAL = "!="
+    CONTAINS = "~"  # For text contains (case-insensitive)
+    NOT_CONTAINS = "!~"  # For text does not contain
+
+class SearchCondition:
+    """Represents a single search condition"""
+    def __init__(self, column: str, operator: ComparisonOperator, value: Union[str, int, float]):
+        self.column = column.strip()
+        self.operator = operator
+        self.value = value
+        
+    def __repr__(self):
+        return f"SearchCondition({self.column} {self.operator.value} {self.value})"
+
+class AdvancedSearchParser:
+    """Parser for advanced search queries with comparison operators"""
+    
+    # Regex pattern to match search conditions
+    # Supports: column>value, column<=value, column="quoted value", etc.
+    CONDITION_PATTERN = re.compile(
+        r'(\w+)\s*(>=|<=|!=|!~|>|<|=|~)\s*("([^"]*)"|\'([^\']*)\'|([^\s,]+))',
+        re.IGNORECASE
+    )
+    
+    @classmethod
+    def parse_search_query(cls, query: str) -> Tuple[List[SearchCondition], List[str], List[str]]:
+        """
+        Parse a search query into structured conditions and plain text terms.
+        
+        Args:
+            query: Search query string (e.g., "lines>240 score<=0.5 test")
+            
+        Returns:
+            Tuple of (conditions, plain_text_terms, errors)
+        """
+        conditions = []
+        errors = []
+        
+        # Find all structured conditions
+        matches = cls.CONDITION_PATTERN.findall(query)
+        matched_positions = []
+        
+        for match in cls.CONDITION_PATTERN.finditer(query):
+            matched_positions.append((match.start(), match.end()))
+            
+        for match in matches:
+            column, operator_str, _, quoted_value1, quoted_value2, unquoted_value = match
+            
+            # Get the actual value (quoted or unquoted)
+            value = quoted_value1 or quoted_value2 or unquoted_value
+            
+            try:
+                # Parse operator
+                operator = ComparisonOperator(operator_str)
+                
+                # Convert value to appropriate type
+                parsed_value = cls._parse_value(value, operator)
+                
+                conditions.append(SearchCondition(column, operator, parsed_value))
+                
+            except ValueError as e:
+                errors.append(f"Invalid condition '{column}{operator_str}{value}': {str(e)}")
+                
+        # Extract remaining text as plain search terms
+        remaining_text = query
+        for start, end in reversed(matched_positions):
+            remaining_text = remaining_text[:start] + remaining_text[end:]
+            
+        plain_terms = [term.strip() for term in remaining_text.split() if term.strip()]
+        
+        return conditions, plain_terms, errors
+    
+    @classmethod
+    def _parse_value(cls, value: str, operator: ComparisonOperator) -> Union[str, int, float]:
+        """Parse value string to appropriate type based on operator"""
+        if operator in [ComparisonOperator.CONTAINS, ComparisonOperator.NOT_CONTAINS]:
+            return value  # Keep as string for text operations
+            
+        if operator in [ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL]:
+            # Try to parse as number, fall back to string
+            try:
+                if '.' in value:
+                    return float(value)
+                else:
+                    return int(value)
+            except ValueError:
+                return value
+                
+        # For numeric comparisons, try to parse as number
+        if operator in [ComparisonOperator.GREATER_THAN, ComparisonOperator.LESS_THAN,
+                       ComparisonOperator.GREATER_EQUAL, ComparisonOperator.LESS_EQUAL]:
+            try:
+                if '.' in value:
+                    return float(value)
+                else:
+                    return int(value)
+            except ValueError:
+                raise ValueError(f"Numeric value expected for operator {operator.value}, got '{value}'")
+                
+        return value
+
+class LabelSearchFilter:
+    """Filter label data based on advanced search conditions"""
+    
+    @classmethod
+    def apply_conditions(cls, label_data: List[Dict[str, Any]], conditions: List[SearchCondition]) -> List[Dict[str, Any]]:
+        """
+        Apply search conditions to filter label data.
+        
+        Args:
+            label_data: List of label row dictionaries
+            conditions: List of search conditions to apply
+            
+        Returns:
+            Filtered list of label rows
+        """
+        if not conditions:
+            return label_data
+            
+        filtered_data = []
+        
+        for row in label_data:
+            if cls._row_matches_conditions(row, conditions):
+                filtered_data.append(row)
+                
+        return filtered_data
+    
+    @classmethod
+    def _row_matches_conditions(cls, row: Dict[str, Any], conditions: List[SearchCondition]) -> bool:
+        """Check if a row matches all search conditions"""
+        for condition in conditions:
+            if not cls._evaluate_condition(row, condition):
+                return False
+        return True
+    
+    @classmethod
+    def _evaluate_condition(cls, row: Dict[str, Any], condition: SearchCondition) -> bool:
+        """Evaluate a single condition against a row"""
+        # Try case-insensitive column lookup
+        column_value = None
+        condition_column_lower = condition.column.lower()
+
+        # First try exact match
+        if condition.column in row:
+            column_value = row[condition.column]
+        else:
+            # Try case-insensitive match
+            for col_name, col_value in row.items():
+                if col_name.lower().strip() == condition_column_lower:
+                    column_value = col_value
+                    break
+
+        if column_value is None:
+            return False
+            
+        # Convert column value to string for processing
+        column_str = str(column_value).strip()
+        
+        try:
+            if condition.operator == ComparisonOperator.CONTAINS:
+                return condition.value.lower() in column_str.lower()
+                
+            elif condition.operator == ComparisonOperator.NOT_CONTAINS:
+                return condition.value.lower() not in column_str.lower()
+                
+            elif condition.operator == ComparisonOperator.EQUAL:
+                return cls._compare_values(column_str, condition.value, "==")
+                
+            elif condition.operator == ComparisonOperator.NOT_EQUAL:
+                return cls._compare_values(column_str, condition.value, "!=")
+                
+            elif condition.operator == ComparisonOperator.GREATER_THAN:
+                return cls._compare_values(column_str, condition.value, ">")
+                
+            elif condition.operator == ComparisonOperator.LESS_THAN:
+                return cls._compare_values(column_str, condition.value, "<")
+                
+            elif condition.operator == ComparisonOperator.GREATER_EQUAL:
+                return cls._compare_values(column_str, condition.value, ">=")
+                
+            elif condition.operator == ComparisonOperator.LESS_EQUAL:
+                return cls._compare_values(column_str, condition.value, "<=")
+                
+        except (ValueError, TypeError) as e:
+            logger.warning(f"Error evaluating condition {condition}: {e}")
+            return False
+            
+        return False
+    
+    @classmethod
+    def _compare_values(cls, column_value: str, condition_value: Union[str, int, float], operator: str) -> bool:
+        """Compare values with type coercion"""
+        # If condition value is numeric, try to parse column value as numeric
+        if isinstance(condition_value, (int, float)):
+            try:
+                if isinstance(condition_value, float):
+                    column_numeric = float(column_value)
+                else:
+                    column_numeric = int(column_value)
+                    
+                if operator == "==":
+                    return column_numeric == condition_value
+                elif operator == "!=":
+                    return column_numeric != condition_value
+                elif operator == ">":
+                    return column_numeric > condition_value
+                elif operator == "<":
+                    return column_numeric < condition_value
+                elif operator == ">=":
+                    return column_numeric >= condition_value
+                elif operator == "<=":
+                    return column_numeric <= condition_value
+                    
+            except ValueError:
+                # Fall back to string comparison if numeric parsing fails
+                pass
+                
+        # String comparison
+        if operator == "==":
+            return column_value.lower() == str(condition_value).lower()
+        elif operator == "!=":
+            return column_value.lower() != str(condition_value).lower()
+        else:
+            # For other operators on strings, try lexicographic comparison
+            if operator == ">":
+                return column_value.lower() > str(condition_value).lower()
+            elif operator == "<":
+                return column_value.lower() < str(condition_value).lower()
+            elif operator == ">=":
+                return column_value.lower() >= str(condition_value).lower()
+            elif operator == "<=":
+                return column_value.lower() <= str(condition_value).lower()
+
+        return False
diff --git a/ui/src/client.js b/ui/src/client.js
index 0fc19404f..b950ed68a 100644
--- a/ui/src/client.js
+++ b/ui/src/client.js
@@ -170,12 +170,24 @@ class FastAPIClient {
       });
   }
 
-  async getLabelData(file_name) {
+  async getLabelData(file_name, search_filter = null, pipeline_name = null, fallback_to_full = true) {
+    const params = {
+      file_name: file_name
+    };
+
+    // Add optional parameters if provided
+    if (search_filter) {
+      params.search_filter = search_filter;
+    }
+    if (pipeline_name) {
+      params.pipeline_name = pipeline_name;
+    }
+    // Always include fallback_to_full parameter
+    params.fallback_to_full = fallback_to_full;
+
     return this.apiClient
-    .get(`/label-data`,{
-      params: {
-        file_name: file_name
-      },
+    .get(`/label-data`, {
+      params: params,
       responseType: "text",
     })
     .then(( response ) => {
diff --git a/ui/src/components/AdvancedHighlight/index.jsx b/ui/src/components/AdvancedHighlight/index.jsx
new file mode 100644
index 000000000..042a50616
--- /dev/null
+++ b/ui/src/components/AdvancedHighlight/index.jsx
@@ -0,0 +1,163 @@
+/***
+ * Copyright (2025) Hewlett Packard Enterprise Development LP
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * You may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ***/
+
+import React from 'react';
+
+const AdvancedHighlight = ({ text, highlight, columnName, searchMetadata }) => {
+    // If no search metadata or not a search result, fall back to basic highlighting
+    if (!searchMetadata || !searchMetadata.is_search_result) {
+        return <BasicHighlight text={text} highlight={highlight} />;
+    }
+
+    // Ensure text is a string
+    const textStr = String(text || '');
+
+    // Check if this column matches any advanced search conditions
+    const matchingConditions = searchMetadata.advanced_conditions?.filter(
+        condition => condition.column === columnName
+    ) || [];
+
+    // Check if this column value matches any plain text terms
+    const plainTermsMatch = searchMetadata.plain_terms?.some(term =>
+        textStr.toLowerCase().includes(term.toLowerCase())
+    ) || false;
+
+    // If this column has matching conditions, highlight it differently
+    if (matchingConditions.length > 0) {
+        const conditionMatches = matchingConditions.some(condition => {
+            return evaluateConditionForHighlight(textStr, condition);
+        });
+
+        if (conditionMatches) {
+            return (
+                <span className="bg-yellow-300 text-black font-semibold" title={`Matches: ${matchingConditions.map(c => `${c.column}${c.operator}${c.value}`).join(', ')}`}>
+                    {textStr}
+                </span>
+            );
+        }
+    }
+
+    // If it matches plain text terms, use regular highlighting
+    if (plainTermsMatch) {
+        return <BasicHighlight text={textStr} highlight={searchMetadata.plain_terms.join(' ')} />;
+    }
+
+    // No match, return plain text
+    return <span>{textStr}</span>;
+};
+
+const BasicHighlight = ({ text, highlight }) => {
+    // If the highlight text is empty or contains only whitespace, return the original text
+    if (!highlight || !highlight.trim()) {
+        return <span>{text}</span>;
+    }
+
+    // Create a regular expression to match the highlight text, case insensitive
+    const regex = new RegExp(`(${highlight})`, 'gi');
+    // Split the text into parts based on the highlight text
+    const parts = text.split(regex);
+
+    return (
+        <span>
+            {parts.map((part, index) =>
+                // If the part matches the highlight text, wrap it in a mark element
+                regex.test(part) ? (
+                    <mark key={index} className="bg-yellow-300 text-black font-semibold">
+                        {part}
+                    </mark>
+                ) : (
+                    part
+                )
+            )}
+        </span>
+    );
+};
+
+// Helper function to evaluate if a condition matches for highlighting purposes
+const evaluateConditionForHighlight = (text, condition) => {
+    const value = String(text).trim();
+    
+    try {
+        switch (condition.operator) {
+            case '~': // Contains
+                return value.toLowerCase().includes(String(condition.value).toLowerCase());
+                
+            case '!~': // Not contains
+                return !value.toLowerCase().includes(String(condition.value).toLowerCase());
+                
+            case '=': // Equal
+                return compareValuesForHighlight(value, condition.value, '==');
+                
+            case '!=': // Not equal
+                return compareValuesForHighlight(value, condition.value, '!=');
+                
+            case '>': // Greater than
+                return compareValuesForHighlight(value, condition.value, '>');
+                
+            case '<': // Less than
+                return compareValuesForHighlight(value, condition.value, '<');
+                
+            case '>=': // Greater than or equal
+                return compareValuesForHighlight(value, condition.value, '>=');
+                
+            case '<=': // Less than or equal
+                return compareValuesForHighlight(value, condition.value, '<=');
+                
+            default:
+                return false;
+        }
+    } catch (error) {
+        return false;
+    }
+};
+
+// Helper function to compare values for highlighting
+const compareValuesForHighlight = (columnValue, conditionValue, operator) => {
+    // If condition value is numeric, try to parse column value as numeric
+    if (typeof conditionValue === 'number') {
+        try {
+            const columnNumeric = parseFloat(columnValue);
+            if (!isNaN(columnNumeric)) {
+                switch (operator) {
+                    case '==': return columnNumeric === conditionValue;
+                    case '!=': return columnNumeric !== conditionValue;
+                    case '>': return columnNumeric > conditionValue;
+                    case '<': return columnNumeric < conditionValue;
+                    case '>=': return columnNumeric >= conditionValue;
+                    case '<=': return columnNumeric <= conditionValue;
+                }
+            }
+        } catch (error) {
+            // Fall back to string comparison
+        }
+    }
+    
+    // String comparison
+    const columnStr = columnValue.toLowerCase();
+    const conditionStr = String(conditionValue).toLowerCase();
+    
+    switch (operator) {
+        case '==': return columnStr === conditionStr;
+        case '!=': return columnStr !== conditionStr;
+        case '>': return columnStr > conditionStr;
+        case '<': return columnStr < conditionStr;
+        case '>=': return columnStr >= conditionStr;
+        case '<=': return columnStr <= conditionStr;
+        default: return false;
+    }
+};
+
+export default AdvancedHighlight;
diff --git a/ui/src/components/ArtifactTypeSidebar/index.jsx b/ui/src/components/ArtifactTypeSidebar/index.jsx
index 17af3baca..2cd957aff 100644
--- a/ui/src/components/ArtifactTypeSidebar/index.jsx
+++ b/ui/src/components/ArtifactTypeSidebar/index.jsx
@@ -17,6 +17,54 @@
 import React, { useState, useEffect } from "react";
 import "./index.css";
 
+const SearchHelpModal = ({ isOpen, onClose }) => {
+  if (!isOpen) return null;
+
+  return (
+    <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
+      <div className="bg-white p-6 rounded-lg max-w-2xl max-h-96 overflow-y-auto">
+        <div className="flex justify-between items-center mb-4">
+          <h3 className="text-lg font-semibold">Advanced Search Help</h3>
+          <button onClick={onClose} className="text-gray-500 hover:text-gray-700">
+            ✕
+          </button>
+        </div>
+        <div className="space-y-4">
+          <div>
+            <h4 className="font-medium mb-2">Supported Operators:</h4>
+            <ul className="text-sm space-y-1">
+              <li><code>&gt;</code> - Greater than (e.g., lines&gt;240)</li>
+              <li><code>&lt;</code> - Less than (e.g., score&lt;0.5)</li>
+              <li><code>&gt;=</code> - Greater than or equal (e.g., count&gt;=100)</li>
+              <li><code>&lt;=</code> - Less than or equal (e.g., accuracy&lt;=0.8)</li>
+              <li><code>=</code> - Equal to (e.g., status=active)</li>
+              <li><code>!=</code> - Not equal to (e.g., type!=test)</li>
+              <li><code>~</code> - Contains (e.g., name~prod)</li>
+              <li><code>!~</code> - Does not contain (e.g., path!~temp)</li>
+            </ul>
+          </div>
+          <div>
+            <h4 className="font-medium mb-2">Examples:</h4>
+            <ul className="text-sm space-y-1">
+              <li><code>lines&gt;240</code> - Find rows where lines column &gt; 240</li>
+              <li><code>score&lt;=0.5</code> - Find rows where score column ≤ 0.5</li>
+              <li><code>name="test"</code> - Find rows where name equals "test"</li>
+              <li><code>status!=active</code> - Find rows where status is not "active"</li>
+              <li><code>lines&gt;240 score&lt;=0.5</code> - Multiple conditions (AND logic)</li>
+            </ul>
+          </div>
+          <div>
+            <p className="text-sm text-gray-600">
+              You can combine structured conditions with plain text search.
+              Use quotes for values containing spaces.
+            </p>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
+
 const ArtifactTypeSidebar = ({
   artifactTypes,
   handleArtifactTypeClick,
@@ -28,6 +76,7 @@ const ArtifactTypeSidebar = ({
 
   // Local filter value state
   const [filterValue, setFilterValue] = useState("");
+  const [showHelpModal, setShowHelpModal] = useState(false);
 
   useEffect(() => {
     handleClick(artifactTypes[0]);
@@ -80,16 +129,39 @@ const ArtifactTypeSidebar = ({
               type="text"
               value={filterValue}
               onChange={handleFilterChange}
-              placeholder="Filter by Name/Properties"
+              placeholder={
+                clickedArtifactType === "Label"
+                  ? "Search labels: lines>240, score<=0.5, name=\"test\""
+                  : "Filter by Name/Properties"
+              }
+              title={
+                clickedArtifactType === "Label"
+                  ? "Advanced search: Use operators like >, <, >=, <=, =, !=, ~, !~ with column names. Examples: lines>240, score<=0.5, status!=active"
+                  : "Filter by Name/Properties"
+              }
               style={{
                 marginRight: "1rem",
                 padding: "0.5rem",
                 border: "1px solid #ccc",
+                width: clickedArtifactType === "Label" ? "350px" : "auto",
               }}
             />
+            {clickedArtifactType === "Label" && (
+              <button
+                onClick={() => setShowHelpModal(true)}
+                className="ml-2 px-2 py-1 text-sm bg-blue-500 text-white rounded hover:bg-blue-600"
+                title="Show advanced search help"
+              >
+                ?
+              </button>
+            )}
           </div>
         </div>
       </div>
+      <SearchHelpModal
+        isOpen={showHelpModal}
+        onClose={() => setShowHelpModal(false)}
+      />
     </>
   );
 };
diff --git a/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx b/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
index ce99d23fe..97fc51dcb 100644
--- a/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
+++ b/ui/src/pages/artifacts_postgres/components/LabelContentPanel.jsx
@@ -16,6 +16,7 @@
 
 import React from "react";
 import Highlight from "../../../components/Highlight";
+import AdvancedHighlight from "../../../components/AdvancedHighlight";
 import Loader from "../../../components/Loader";
 
 const LabelContentPanel = ({
@@ -98,9 +99,11 @@ const LabelContentPanel = ({
                   <tr key={rowIndex} className="text-sm font-medium text-gray-800">
                     {labelColumns.map((column, colIndex) => (
                       <td key={colIndex} className="px-6 py-4">
-                        <Highlight
+                        <AdvancedHighlight
                           text={String(row[column.name] || '')}
                           highlight={selectedTableLabel?.isSearchResult ? selectedTableLabel.searchFilter : ''}
+                          columnName={column.name}
+                          searchMetadata={selectedTableLabel?.search_metadata}
                         />
                       </td>
                     ))}
diff --git a/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js b/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
index 6072ccd51..16d8c1888 100644
--- a/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
+++ b/ui/src/pages/artifacts_postgres/hooks/useArtifactsHandlers.js
@@ -145,6 +145,9 @@ const useArtifactsHandlers = (client, state) => {
       setParsedLabelData,
       setLabelColumns,
       setLabelData
+    }, {
+      searchFilter: filter,
+      pipelineName: selectedPipeline
     });
   };
 
diff --git a/ui/src/pages/artifacts_postgres/utils/labelHandlers.js b/ui/src/pages/artifacts_postgres/utils/labelHandlers.js
index b5a564e49..f620c6ee8 100644
--- a/ui/src/pages/artifacts_postgres/utils/labelHandlers.js
+++ b/ui/src/pages/artifacts_postgres/utils/labelHandlers.js
@@ -16,12 +16,94 @@
 
 import Papa from "papaparse";
 
+// Helper function to evaluate a single search condition against a row
+const evaluateSearchCondition = (row, condition) => {
+  const columnValue = row[condition.column];
+
+  if (columnValue === undefined || columnValue === null) {
+    return false;
+  }
+
+  const columnStr = String(columnValue).trim();
+
+  try {
+    switch (condition.operator) {
+      case "~": // Contains
+        return columnStr.toLowerCase().includes(String(condition.value).toLowerCase());
+
+      case "!~": // Not contains
+        return !columnStr.toLowerCase().includes(String(condition.value).toLowerCase());
+
+      case "=": // Equal
+        return compareValues(columnStr, condition.value, "==");
+
+      case "!=": // Not equal
+        return compareValues(columnStr, condition.value, "!=");
+
+      case ">": // Greater than
+        return compareValues(columnStr, condition.value, ">");
+
+      case "<": // Less than
+        return compareValues(columnStr, condition.value, "<");
+
+      case ">=": // Greater than or equal
+        return compareValues(columnStr, condition.value, ">=");
+
+      case "<=": // Less than or equal
+        return compareValues(columnStr, condition.value, "<=");
+
+      default:
+        return false;
+    }
+  } catch (error) {
+    console.warn("Error evaluating condition:", condition, error);
+    return false;
+  }
+};
+
+// Helper function to compare values with type coercion
+const compareValues = (columnValue, conditionValue, operator) => {
+  // If condition value is numeric, try to parse column value as numeric
+  if (typeof conditionValue === "number") {
+    try {
+      const columnNumeric = parseFloat(columnValue);
+      if (!isNaN(columnNumeric)) {
+        switch (operator) {
+          case "==": return columnNumeric === conditionValue;
+          case "!=": return columnNumeric !== conditionValue;
+          case ">": return columnNumeric > conditionValue;
+          case "<": return columnNumeric < conditionValue;
+          case ">=": return columnNumeric >= conditionValue;
+          case "<=": return columnNumeric <= conditionValue;
+        }
+      }
+    } catch (error) {
+      // Fall back to string comparison
+    }
+  }
+
+  // String comparison
+  const columnStr = columnValue.toLowerCase();
+  const conditionStr = String(conditionValue).toLowerCase();
+
+  switch (operator) {
+    case "==": return columnStr === conditionStr;
+    case "!=": return columnStr !== conditionStr;
+    case ">": return columnStr > conditionStr;
+    case "<": return columnStr < conditionStr;
+    case ">=": return columnStr >= conditionStr;
+    case "<=": return columnStr <= conditionStr;
+    default: return false;
+  }
+};
+
 // Handle label click from table
 export const handleTableLabelClick = async (
-  labelName, 
-  artifact, 
+  labelName,
+  artifact,
   client,
-  setters
+  setters,
+  context = {}
 ) => {
   const {
     setIsLoadingLabelContent,
@@ -44,13 +126,19 @@ export const handleTableLabelClick = async (
   // Use the URI from the artifact for getLabelData, not just the label name
   const fileNameForAPI = artifact.uri || `artifacts/labels.csv:${labelName}`;
 
+  // Extract search filter and pipeline name from context and artifact
+  const searchFilter = context.searchFilter || (artifact.search_metadata?.search_term);
+  const pipelineName = context.pipelineName;
+
+  console.log('Loading label data with search filter:', searchFilter, 'for artifact:', artifact.name);
+
   try {
     // Clear old data first
     setParsedLabelData([]);
     setLabelColumns([]);
 
     // Helper function to try different URI formats
-    const tryGetLabelData = async (labelName, fileNameForAPI) => {
+    const tryGetLabelData = async (labelName, fileNameForAPI, searchFilter, pipelineName) => {
       const uriFormatsToTry = [
         fileNameForAPI,                           // Original: artifacts/labels.csv:93951bf...
         labelName,                                // Just the label name: 93951bf...
@@ -61,7 +149,9 @@ export const handleTableLabelClick = async (
 
       for (const uriToTry of uriFormatsToTry) {
         try {
-          const data = await client.getLabelData(uriToTry);
+          // For label click scenario, use fallback_to_full=true so that if search filter doesn't match any rows,
+          // we still get the full CSV content instead of empty results
+          const data = await client.getLabelData(uriToTry, searchFilter, pipelineName, true);
           return data; // Success - return immediately
         } catch (uriError) {
           continue; // Try next URI format
@@ -71,31 +161,71 @@ export const handleTableLabelClick = async (
       throw new Error(`All URI formats failed. Tried: ${uriFormatsToTry.join(', ')}`);
     };
 
-    const labelData = await tryGetLabelData(labelName, fileNameForAPI);
+    const labelData = await tryGetLabelData(labelName, fileNameForAPI, searchFilter, pipelineName);
 
     setLabelData(labelData);
 
     const parsed = Papa.parse(labelData, { header: true });
 
-    // Check if this is a search result and if the search term was found in CSV content
-    if (artifact.isSearchResult && artifact.searchFilter && artifact.search_metadata?.content_match) {
+    // Check if we passed a search filter to the backend API
+    // If we did, the backend has already filtered the data, so we should NOT apply additional filtering
+    if (searchFilter) {
+      console.log('Backend already filtered data with search filter:', searchFilter, 'showing', parsed.data.length, 'rows');
+      // Backend has already applied the search filter, so just display the returned data
+      setParsedLabelData(parsed.data);
+    } else if (artifact.isSearchResult && artifact.searchFilter &&
+        (artifact.search_metadata?.content_match || artifact.search_metadata?.advanced_conditions)) {
+      // This is a search result but we didn't pass the search filter to backend
+      // Apply frontend filtering (this is the old behavior for backward compatibility)
       const searchFilter = artifact.searchFilter;
+      const searchMetadata = artifact.search_metadata;
+
+      console.log('Processing search result for label:', labelName, 'with', parsed.data.length, 'total rows');
 
-      // Filter CSV rows to show only those containing the search term
-      const matchingRows = parsed.data.filter((row) => {
-        const rowValues = Object.values(row);
+      // Apply advanced search filtering if conditions are present
+      if (searchMetadata.advanced_conditions && searchMetadata.advanced_conditions.length > 0) {
+        console.log('Applying advanced search conditions:', searchMetadata.advanced_conditions);
+        // Apply advanced search conditions
+        const matchingRows = parsed.data.filter((row) => {
+          // Check if row matches all advanced conditions
+          const conditionsMatch = searchMetadata.advanced_conditions.every(condition => {
+            return evaluateSearchCondition(row, condition);
+          });
 
-        const hasMatch = rowValues.some(value => {
-          if (value && value.toString().toLowerCase().includes(searchFilter.toLowerCase())) {
-            return true;
+          // Check if row matches plain text terms (if any)
+          let plainTermsMatch = true;
+          if (searchMetadata.plain_terms && searchMetadata.plain_terms.length > 0) {
+            const rowValues = Object.values(row);
+            plainTermsMatch = searchMetadata.plain_terms.every(term => {
+              return rowValues.some(value => {
+                return value && value.toString().toLowerCase().includes(term.toLowerCase());
+              });
+            });
           }
-          return false;
+
+          return conditionsMatch && plainTermsMatch;
         });
 
-        return hasMatch;
-      });
+        console.log('Advanced search filtering result:', matchingRows.length, 'of', parsed.data.length, 'rows matched');
+
+        setParsedLabelData(matchingRows);
+      } else {
+        // Fall back to basic text search
+        const matchingRows = parsed.data.filter((row) => {
+          const rowValues = Object.values(row);
 
-      setParsedLabelData(matchingRows);
+          const hasMatch = rowValues.some(value => {
+            if (value && value.toString().toLowerCase().includes(searchFilter.toLowerCase())) {
+              return true;
+            }
+            return false;
+          });
+
+          return hasMatch;
+        });
+
+        setParsedLabelData(matchingRows);
+      }
     } else {
       // Normal label OR search result with property-only match - show all data
       setParsedLabelData(parsed.data);

From e97501c7817b5ef462a102c5754b86790cf56e91 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Fri, 8 Aug 2025 16:23:21 +0530
Subject: [PATCH 10/11] optimized advanced label search

---
 db_init.sql                                   |   5 +
 server/app/db/dbmodels.py                     |   2 +
 server/app/db/dbqueries.py                    | 165 ++--
 server/app/label_management.py                | 898 ++++++++++++++++++
 server/app/label_utils.py                     | 709 --------------
 server/app/main.py                            | 143 ++-
 server/app/search_utils.py                    | 253 -----
 ui/src/components/AdvancedHighlight/index.jsx |   4 +-
 8 files changed, 1079 insertions(+), 1100 deletions(-)
 create mode 100644 server/app/label_management.py
 delete mode 100644 server/app/label_utils.py
 delete mode 100644 server/app/search_utils.py

diff --git a/db_init.sql b/db_init.sql
index 34d7a1db3..cf124a3c0 100644
--- a/db_init.sql
+++ b/db_init.sql
@@ -16,6 +16,8 @@ CREATE TABLE IF NOT EXISTS label_index (
     -- - Allows compression AND out-of-line storage
     -- - Compresses first, then moves to TOAST if still large
     metadata JSONB,
+    -- Parsed CSV row data with proper types for advanced search
+    parsed_data JSONB,
     search_vector TSVECTOR,
     created_at BIGINT NOT NULL,
     updated_at BIGINT NOT NULL,
@@ -31,6 +33,9 @@ CREATE INDEX IF NOT EXISTS idx_label_index_created_at ON label_index(created_at)
 -- Create GIN index for full-text search (most important for performance)
 CREATE INDEX IF NOT EXISTS idx_label_index_search_vector ON label_index USING gin(search_vector);
 
+-- Create GIN index for JSONB advanced search queries
+CREATE INDEX IF NOT EXISTS idx_label_index_parsed_data ON label_index USING gin(parsed_data);
+
 -- Create a trigger to automatically update the search_vector column
 CREATE OR REPLACE FUNCTION update_label_search_vector() RETURNS trigger AS $$
 BEGIN
diff --git a/server/app/db/dbmodels.py b/server/app/db/dbmodels.py
index bd833e134..a35ec1210 100644
--- a/server/app/db/dbmodels.py
+++ b/server/app/db/dbmodels.py
@@ -199,6 +199,7 @@
     Column("row_index", Integer, nullable=False),
     Column("content", Text, nullable=False),
     Column("metadata", JSON),
+    Column("parsed_data", JSON),  # JSONB column for structured CSV data
     Column("search_vector", TSVECTOR),
     Column("created_at", BigInteger, nullable=False),
     Column("updated_at", BigInteger, nullable=False),
@@ -206,6 +207,7 @@
     # Indexes for performance
     Index("idx_label_index_file_name", "file_name"),
     Index("idx_label_index_search_vector", "search_vector", postgresql_using="gin"),
+    Index("idx_label_index_parsed_data", "parsed_data", postgresql_using="gin"),
     Index("idx_label_index_created_at", "created_at"),
 
     # Unique constraint to prevent duplicate entries
diff --git a/server/app/db/dbqueries.py b/server/app/db/dbqueries.py
index eb7c68dd9..219be7864 100644
--- a/server/app/db/dbqueries.py
+++ b/server/app/db/dbqueries.py
@@ -365,65 +365,112 @@ async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeli
         return []
 
 
-async def fetch_artifacts_with_label_search(
-    db: AsyncSession,
-    pipeline_name: str,
-    artifact_type: str,
-    filter_value: str,
-    active_page: int = 1,
-    page_size: int = 5,
-    sort_column: str = "name",
-    sort_order: str = "ASC"
-):
+async def search_labels_with_advanced_conditions(db: AsyncSession, conditions: list, pipeline_name: str = None, limit: int = 50):
     """
-    Enhanced artifact search that includes label content search.
-    This combines regular artifact search with label content search.
+    Search for label artifacts using advanced JSONB queries for structured conditions.
+    This function uses PostgreSQL JSONB operators for efficient advanced search.
+
+    Args:
+        db: Database session
+        conditions: List of SearchCondition objects
+        pipeline_name: Optional pipeline name filter
+        limit: Maximum number of results
+
+    Returns:
+        List of matching label files with metadata
     """
-    # First, get regular artifact search results
-    artifact_results = await fetch_artifacts(
-        db, pipeline_name, artifact_type, filter_value,
-        active_page, page_size, sort_column, sort_order
-    )
+    try:
+        from server.app.label_management import JsonbQueryBuilder
+
+        # Build JSONB WHERE clause from conditions
+        where_clause, params = JsonbQueryBuilder.build_where_clause(conditions)
+
+        if not where_clause:
+            return []
+
+        # Base query using JSONB conditions
+        base_query = f"""
+            SELECT DISTINCT
+                li.file_name as label_file,
+                li.content as matching_content,
+                li.metadata as label_metadata,
+                li.parsed_data,
+                li.row_index,
+                1.0 as relevance_score  -- Static score for advanced search
+            FROM label_index li
+            WHERE {where_clause}
+            ORDER BY li.file_name, li.row_index
+            LIMIT :limit
+        """
+
+        # Add limit to params
+        params["limit"] = limit
 
-    # If filter_value is provided, also search in labels
-    if filter_value and filter_value.strip():
-        try:
-            label_results = await search_labels_in_artifacts(db, filter_value, pipeline_name, 50)
-
-            # Add label search results as separate items (since they don't correspond to existing artifacts)
-            if active_page == 1 and label_results:  # Only add on first page
-                added_count = 0
-                max_additional = max(0, page_size - len(artifact_results['items']))
-
-                for label_result in label_results:
-                    if added_count < max_additional:
-                        # Create a pseudo-artifact item from label search result
-                        # Make sure all fields have non-null values that frontend expects
-                        enhanced_item = {
-                            'artifact_id': f"label_{label_result['label_file']}_{label_result.get('row_index', 0)}",
-                            'name': f"{label_result['label_file']} (Row {label_result.get('row_index', 0) + 1})",
-                            'uri': label_result.get('uri', f"label://{label_result['label_file']}"),
-                            'type_id': 'Label',
-                            'create_time_since_epoch': 0,  # Use 0 instead of None
-                            'last_update_time_since_epoch': 0,  # Use 0 instead of None
-                            'artifact_properties': [],  # Empty array instead of None
-                            'execution': '',  # Empty string instead of None
-                            'label_match': True,
-                            'matching_label_content': label_result['matching_content'],
-                            'label_file': label_result['label_file'],
-                            'label_metadata': label_result.get('label_metadata', '{}'),
-                            'relevance_score': float(label_result['relevance_score'])
-                        }
-                        artifact_results['items'].append(enhanced_item)
-                        added_count += 1
-
-                # Update total count if we added items
-                if added_count > 0:
-                    artifact_results['total_items'] += added_count
-                    print(f"Added {added_count} label search results to artifacts")
-
-        except Exception as e:
-            print(f"Error in label search integration: {e}")
-            # Continue with regular results if label search fails
-
-    return artifact_results
+        result = await db.execute(text(base_query), params)
+        label_results = result.mappings().all()
+
+        # Convert to the expected format
+        converted_results = []
+        for row in label_results:
+            converted_results.append({
+                'label_file': row['label_file'],
+                'matching_content': row['matching_content'],
+                'label_metadata': row['label_metadata'],
+                'parsed_data': row['parsed_data'],
+                'row_index': row['row_index'],
+                'relevance_score': row['relevance_score']
+            })
+
+        return converted_results
+
+    except Exception as e:
+        print(f"Advanced label search error: {e}")
+        return []
+
+
+async def search_labels_combined(db: AsyncSession, plain_terms: list = None, conditions: list = None, pipeline_name: str = None, limit: int = 50):
+    """
+    Combined search function that handles both full-text search and advanced JSONB conditions.
+
+    Args:
+        db: Database session
+        plain_terms: List of plain text terms for full-text search
+        conditions: List of SearchCondition objects for advanced search
+        pipeline_name: Optional pipeline name filter
+        limit: Maximum number of results
+
+    Returns:
+        List of matching label files with metadata
+    """
+    try:
+        results = []
+
+        # If we have plain text terms, do full-text search
+        if plain_terms:
+            plain_search_term = " ".join(plain_terms)
+            if plain_search_term.strip():
+                text_results = await search_labels_in_artifacts(db, plain_search_term, pipeline_name, limit)
+                results.extend(text_results)
+
+        # If we have advanced conditions, do JSONB search
+        if conditions:
+            advanced_results = await search_labels_with_advanced_conditions(db, conditions, pipeline_name, limit)
+            results.extend(advanced_results)
+
+        # Remove duplicates based on file_name and row_index
+        seen = set()
+        unique_results = []
+        for result in results:
+            key = (result['label_file'], result.get('row_index', 0))
+            if key not in seen:
+                seen.add(key)
+                unique_results.append(result)
+
+        # Sort by relevance score (descending) and then by file name
+        unique_results.sort(key=lambda x: (-x.get('relevance_score', 0), x['label_file']))
+
+        return unique_results[:limit]
+
+    except Exception as e:
+        print(f"Combined label search error: {e}")
+        return []
diff --git a/server/app/label_management.py b/server/app/label_management.py
new file mode 100644
index 000000000..2747cf498
--- /dev/null
+++ b/server/app/label_management.py
@@ -0,0 +1,898 @@
+"""
+Label Management Module
+
+This module provides comprehensive label management functionality including:
+- CSV label file indexing and database operations
+- Advanced search query parsing and filtering
+- Label search initialization and statistics
+- Content hash-based change detection
+- JSONB query building for PostgreSQL
+
+Combines functionality from the former label_utils.py and search_utils.py
+"""
+
+# Standard library imports
+import csv
+import json
+import time
+import os
+import hashlib
+import re
+import logging
+from pathlib import Path
+from typing import Dict, Any, List, Tuple, Union
+from enum import Enum
+
+# Third-party imports
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import text
+
+# Set up logger
+logger = logging.getLogger(__name__)
+
+# Labels directory constant
+LABELS_DIR = "/cmf-server/data/labels"
+
+
+# SEARCH QUERY PARSING AND FILTERING
+class ComparisonOperator(Enum):
+    """Supported comparison operators"""
+    GREATER_THAN = ">"
+    LESS_THAN = "<"
+    GREATER_EQUAL = ">="
+    LESS_EQUAL = "<="
+    EQUAL = "="
+    NOT_EQUAL = "!="
+    CONTAINS = "~"  # For text contains (case-insensitive)
+    NOT_CONTAINS = "!~"  # For text does not contain
+
+class SearchCondition:
+    """Represents a single search condition"""
+    def __init__(self, column: str, operator: ComparisonOperator, value: Union[str, int, float]):
+        self.column = column.strip()
+        self.operator = operator
+        self.value = value
+        
+    def __repr__(self):
+        return f"SearchCondition({self.column} {self.operator.value} {self.value})"
+
+class AdvancedSearchParser:
+    """Parser for advanced search queries with comparison operators"""
+    
+    # Regex pattern to match search conditions
+    # Supports: column>value, column<=value, column="quoted value", etc.
+    CONDITION_PATTERN = re.compile(
+        r'(\w+)\s*(>=|<=|!=|!~|>|<|=|~)\s*("([^"]*)"|\'([^\']*)\'|([^\s,]+))',
+        re.IGNORECASE
+    )
+    
+    @classmethod
+    def parse_search_query(cls, query: str) -> Tuple[List[SearchCondition], List[str], List[str]]:
+        """
+        Parse a search query into structured conditions and plain text terms.
+        
+        Args:
+            query: Search query string (e.g., "lines>240 score<=0.5 test")
+            
+        Returns:
+            Tuple of (conditions, plain_text_terms, errors)
+        """
+        conditions = []
+        errors = []
+        
+        # Find all structured conditions
+        matches = cls.CONDITION_PATTERN.findall(query)
+        matched_positions = []
+        
+        for match in cls.CONDITION_PATTERN.finditer(query):
+            matched_positions.append((match.start(), match.end()))
+            
+        for match in matches:
+            column, operator_str, _, quoted_value1, quoted_value2, unquoted_value = match
+            
+            # Get the actual value (quoted or unquoted)
+            value = quoted_value1 or quoted_value2 or unquoted_value
+            
+            try:
+                # Parse operator
+                operator = ComparisonOperator(operator_str)
+                
+                # Convert value to appropriate type
+                parsed_value = cls._parse_value(value, operator)
+                
+                conditions.append(SearchCondition(column, operator, parsed_value))
+                
+            except ValueError as e:
+                errors.append(f"Invalid condition '{column}{operator_str}{value}': {str(e)}")
+                
+        # Extract remaining text as plain search terms
+        remaining_text = query
+        for start, end in reversed(matched_positions):
+            remaining_text = remaining_text[:start] + remaining_text[end:]
+            
+        plain_terms = [term.strip() for term in remaining_text.split() if term.strip()]
+        
+        return conditions, plain_terms, errors
+    
+    @classmethod
+    def _parse_value(cls, value: str, operator: ComparisonOperator) -> Union[str, int, float]:
+        """Parse value string to appropriate type based on operator"""
+        if operator in [ComparisonOperator.CONTAINS, ComparisonOperator.NOT_CONTAINS]:
+            return value  # Keep as string for text operations
+            
+        if operator in [ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL]:
+            # Try to parse as number, fall back to string
+            try:
+                if '.' in value:
+                    return float(value)
+                else:
+                    return int(value)
+            except ValueError:
+                return value
+                
+        # For numeric comparisons, try to parse as number
+        if operator in [ComparisonOperator.GREATER_THAN, ComparisonOperator.LESS_THAN,
+                       ComparisonOperator.GREATER_EQUAL, ComparisonOperator.LESS_EQUAL]:
+            try:
+                if '.' in value:
+                    return float(value)
+                else:
+                    return int(value)
+            except ValueError:
+                raise ValueError(f"Numeric value expected for operator {operator.value}, got '{value}'")
+                
+        return value
+
+
+class JsonbQueryBuilder:
+    """Build PostgreSQL JSONB queries from SearchCondition objects"""
+
+    @classmethod
+    def _get_case_insensitive_column_value(cls, column_key: str) -> str:
+        """
+        Generate SQL to get column value with case-insensitive matching and whitespace handling.
+        This handles cases where user searches for 'lines' but CSV has 'Lines', ' Lines', etc.
+        """
+        return f"""COALESCE(
+            parsed_data->>:{column_key},
+            (SELECT value FROM jsonb_each_text(parsed_data) WHERE lower(trim(key)) = lower(:{column_key}) LIMIT 1)
+        )"""
+
+    @classmethod
+    def build_where_clause(cls, conditions: List[SearchCondition]) -> Tuple[str, Dict[str, Any]]:
+        """
+        Build a PostgreSQL WHERE clause for JSONB queries from search conditions.
+
+        Args:
+            conditions: List of SearchCondition objects
+
+        Returns:
+            Tuple of (where_clause, parameters)
+        """
+        if not conditions:
+            return "", {}
+
+        where_parts = []
+        params = {}
+
+        for i, condition in enumerate(conditions):
+            clause, condition_params = cls._build_condition_clause(condition, i)
+            if clause:
+                where_parts.append(clause)
+                params.update(condition_params)
+
+        if not where_parts:
+            return "", {}
+
+        where_clause = " AND ".join(where_parts)
+        return where_clause, params
+
+    @classmethod
+    def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tuple[str, Dict[str, Any]]:
+        """
+        Build a single condition clause for JSONB query.
+
+        Args:
+            condition: SearchCondition object
+            index: Index for parameter naming
+
+        Returns:
+            Tuple of (clause, parameters)
+        """
+        column_key = f"col_{index}"
+        value_key = f"val_{index}"
+
+        # Use case-insensitive column value extraction
+        column_value_sql = cls._get_case_insensitive_column_value(column_key)
+
+        if condition.operator == ComparisonOperator.CONTAINS:
+            # For contains, use ILIKE on the text value
+            clause = f"({column_value_sql}) ILIKE :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: f"%{condition.value}%"
+            }
+
+        elif condition.operator == ComparisonOperator.NOT_CONTAINS:
+            # For not contains, use NOT ILIKE on the text value
+            clause = f"({column_value_sql}) NOT ILIKE :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: f"%{condition.value}%"
+            }
+
+        elif condition.operator == ComparisonOperator.EQUAL:
+            if isinstance(condition.value, (int, float)):
+                # Numeric comparison - cast to numeric
+                clause = f"({column_value_sql})::numeric = :{value_key}"
+            else:
+                # Text comparison - case insensitive
+                clause = f"LOWER({column_value_sql}) = LOWER(:{value_key})"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        elif condition.operator == ComparisonOperator.NOT_EQUAL:
+            if isinstance(condition.value, (int, float)):
+                # Numeric comparison - cast to numeric
+                clause = f"({column_value_sql})::numeric != :{value_key}"
+            else:
+                # Text comparison - case insensitive
+                clause = f"LOWER({column_value_sql}) != LOWER(:{value_key})"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        elif condition.operator == ComparisonOperator.GREATER_THAN:
+            # Numeric comparison - cast to numeric
+            clause = f"({column_value_sql})::numeric > :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        elif condition.operator == ComparisonOperator.LESS_THAN:
+            # Numeric comparison - cast to numeric
+            clause = f"({column_value_sql})::numeric < :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        elif condition.operator == ComparisonOperator.GREATER_EQUAL:
+            # Numeric comparison - cast to numeric
+            clause = f"({column_value_sql})::numeric >= :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        elif condition.operator == ComparisonOperator.LESS_EQUAL:
+            # Numeric comparison - cast to numeric
+            clause = f"({column_value_sql})::numeric <= :{value_key}"
+            params = {
+                column_key: condition.column,
+                value_key: condition.value
+            }
+
+        else:
+            return "", {}
+
+        return clause, params
+
+
+# CSV PARSING AND VALUE CONVERSION
+def parse_csv_value(value: str):
+    """
+    Parse a CSV value and convert it to the appropriate Python type for JSONB storage.
+
+    Args:
+        value: String value from CSV
+
+    Returns:
+        Parsed value (int, float, bool, or string)
+    """
+    if not value or not value.strip():
+        return None
+
+    value = value.strip()
+
+    # Try boolean first (case-insensitive)
+    if value.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
+        if value.lower() in ('true', 'yes', '1'):
+            return True
+        elif value.lower() in ('false', 'no', '0'):
+            return False
+
+    # Try integer
+    try:
+        # Check if it looks like an integer (no decimal point)
+        if '.' not in value and 'e' not in value.lower():
+            return int(value)
+    except ValueError:
+        pass
+
+    # Try float
+    try:
+        return float(value)
+    except ValueError:
+        pass
+
+    # Return as string
+    return value
+
+
+# DATABASE OPERATIONS FOR LABEL INDEXING
+async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Index CSV label files into PostgreSQL for full-text search
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Look for both .csv files and files without extension (assuming they're CSV)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension that might be CSV files
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
+                # Try to detect if it's a CSV file by reading first few lines
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
+                        if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            logger.info(f"Detected CSV-like file without extension: {file_path.name}")
+                except:
+                    pass  # Skip files that can't be read
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        indexed_files = []
+        total_records = 0
+
+        for csv_file in csv_files:
+            try:
+                # Clear existing data for this file
+                await db.execute(
+                    text("DELETE FROM label_index WHERE file_name = :file_name"),
+                    {"file_name": csv_file.name}
+                )
+
+                # Process CSV file
+                records = []
+                with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                    # Detect CSV format with fallback
+                    sample = csvfile.read(1024)
+                    csvfile.seek(0)
+
+                    # Try to detect delimiter, with fallbacks
+                    delimiter = ','  # Default fallback
+                    try:
+                        sniffer = csv.Sniffer()
+                        detected_delimiter = sniffer.sniff(sample).delimiter
+                        delimiter = detected_delimiter
+                    except:
+                        # Fallback: try common delimiters
+                        for test_delimiter in [',', '\t', ';', '|']:
+                            if test_delimiter in sample:
+                                delimiter = test_delimiter
+                                break
+
+                    reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                    for row_index, row in enumerate(reader):
+                        # Create searchable content
+                        content_parts = []
+                        metadata = {}
+
+                        for key, value in row.items():
+                            if value and value.strip():
+                                content_parts.append(f"{key}: {value.strip()}")
+                                metadata[key] = value.strip()
+
+                        content = " | ".join(content_parts)
+
+                        if content.strip():
+                            records.append({
+                                'file_name': csv_file.name,
+                                'file_path': str(csv_file),
+                                'row_index': row_index,
+                                'content': content,
+                                'metadata': json.dumps(metadata),
+                                'created_at': int(time.time() * 1000),
+                                'updated_at': int(time.time() * 1000)
+                            })
+
+                # Insert records
+                if records:
+                    insert_query = text("""
+                        INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
+                        VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
+                    """)
+                    await db.execute(insert_query, records)
+
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': len(records),
+                        'status': 'success'
+                    })
+                    total_records += len(records)
+                    logger.info(f"Indexed {len(records)} records from {csv_file.name}")
+                else:
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'empty'
+                    })
+
+            except Exception as e:
+                logger.error(f"Error indexing {csv_file.name}: {e}")
+                indexed_files.append({
+                    'file_name': csv_file.name,
+                    'records_indexed': 0,
+                    'status': 'error',
+                    'error': str(e)
+                })
+
+        await db.commit()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+
+async def get_label_stats(db: AsyncSession) -> Dict[str, Any]:
+    """
+    Get statistics about indexed labels
+    """
+    try:
+        # Get total records and files
+        result = await db.execute(text("""
+            SELECT
+                COUNT(*) as total_records,
+                COUNT(DISTINCT file_name) as total_files
+            FROM label_index
+        """))
+        stats = result.fetchone()
+
+        # Get recent files
+        result = await db.execute(text("""
+            SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
+            FROM label_index
+            GROUP BY file_name
+            ORDER BY last_updated DESC
+            LIMIT 10
+        """))
+        recent_files = [
+            {
+                'file_name': row[0],
+                'record_count': row[1],
+                'last_updated': row[2]
+            }
+            for row in result.fetchall()
+        ]
+
+        return {
+            'total_records': stats[0] if stats else 0,
+            'total_files': stats[1] if stats else 0,
+            'recent_files': recent_files,
+            'status': 'success'
+        }
+
+    except Exception as e:
+        logger.error(f"Error getting label stats: {e}")
+        return {
+            'status': 'error',
+            'error': str(e),
+            'total_records': 0,
+            'total_files': 0,
+            'recent_files': []
+        }
+
+
+async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Automatically reindex labels if files have been modified or content has changed.
+    Handles MD5 hash filenames and detects content changes.
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "no_directory", "message": "Labels directory does not exist"}
+
+        # Get all potential CSV files (with and without .csv extension)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also check files without extension (including MD5 hash names)
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                # Check if it's an MD5 hash (32 hex characters)
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        # For MD5 hash files, be more lenient in detection
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                            if is_md5_hash:
+                                logger.info(f"Detected MD5 hash file: {file_path.name}")
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "no_files", "message": "No CSV files found"}
+
+        # Check if we need to reindex based on content changes
+        needs_reindex = False
+        files_to_reindex = []
+
+        for csv_file in csv_files:
+            # Calculate content hash for change detection
+            try:
+                with open(csv_file, 'rb') as f:
+                    content_hash = hashlib.md5(f.read()).hexdigest()
+            except:
+                continue
+
+            # Check if file exists in index and get its content hash
+            result = await db.execute(text("""
+                SELECT
+                    MAX(updated_at) as last_indexed,
+                    COUNT(*) as record_count
+                FROM label_index
+                WHERE file_name = :file_name
+            """), {"file_name": csv_file.name})
+
+            row = result.fetchone()
+            last_indexed = row[0] if row else None
+            record_count = row[1] if row else 0
+
+            # Get stored content hash from metadata if available
+            result = await db.execute(text("""
+                SELECT metadata->>'content_hash' as stored_hash
+                FROM label_index
+                WHERE file_name = :file_name
+                LIMIT 1
+            """), {"file_name": csv_file.name})
+
+            stored_hash_row = result.fetchone()
+            stored_hash = stored_hash_row[0] if stored_hash_row else None
+
+            # Check if reindexing is needed
+            file_mtime = int(os.path.getmtime(csv_file) * 1000)
+
+            if (not last_indexed or
+                record_count == 0 or
+                stored_hash != content_hash or
+                file_mtime > (last_indexed + 60000)):  # 1 minute grace period
+
+                needs_reindex = True
+                files_to_reindex.append({
+                    'file': csv_file,
+                    'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
+                })
+                logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
+
+        if needs_reindex:
+            logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
+            result = await index_csv_labels_with_hash(db, labels_directory)
+            return {
+                "status": "reindexed",
+                "message": f"Auto-reindexed {result.get('total_files', 0)} files",
+                "files_reindexed": [f['file'].name for f in files_to_reindex],
+                "details": result
+            }
+        else:
+            return {
+                "status": "up_to_date",
+                "message": "All label files are up to date"
+            }
+
+    except Exception as e:
+        logger.error(f"Auto-reindex failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+
+async def index_csv_labels_with_hash(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
+    """
+    Enhanced version of index_csv_labels that stores content hashes for change detection
+    """
+    try:
+
+        labels_dir = Path(labels_directory)
+        if not labels_dir.exists():
+            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
+
+        # Get all potential CSV files (including MD5 hash files)
+        csv_files = list(labels_dir.glob("*.csv"))
+
+        # Also include files without extension
+        for file_path in labels_dir.iterdir():
+            if file_path.is_file() and not file_path.suffix:
+                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        first_line = f.readline().strip()
+                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
+                            csv_files.append(file_path)
+                except:
+                    pass
+
+        if not csv_files:
+            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
+
+        indexed_files = []
+        total_records = 0
+
+        for csv_file in csv_files:
+            try:
+                # Calculate content hash
+                with open(csv_file, 'rb') as f:
+                    content_hash = hashlib.md5(f.read()).hexdigest()
+
+                # Clear existing data for this file
+                await db.execute(
+                    text("DELETE FROM label_index WHERE file_name = :file_name"),
+                    {"file_name": csv_file.name}
+                )
+
+                # Process CSV file
+                records = []
+                with open(csv_file, 'r', encoding='utf-8') as csvfile:
+                    # Detect CSV format with fallback
+                    sample = csvfile.read(1024)
+                    csvfile.seek(0)
+
+                    # Try to detect delimiter, with fallbacks
+                    delimiter = ','  # Default fallback
+                    try:
+                        sniffer = csv.Sniffer()
+                        detected_delimiter = sniffer.sniff(sample).delimiter
+                        delimiter = detected_delimiter
+                    except:
+                        # Fallback: try common delimiters
+                        for test_delimiter in [',', '\t', ';', '|']:
+                            if test_delimiter in sample:
+                                delimiter = test_delimiter
+                                break
+
+                    reader = csv.DictReader(csvfile, delimiter=delimiter)
+
+                    for row_index, row in enumerate(reader):
+                        # Create searchable content and parsed data
+                        content_parts = []
+                        metadata = {'content_hash': content_hash}
+                        parsed_data = {}
+
+                        for key, value in row.items():
+                            if value and value.strip():
+                                content_parts.append(f"{key}: {value.strip()}")
+                                metadata[key] = value.strip()
+                                # Parse value for JSONB storage
+                                parsed_data[key] = parse_csv_value(value.strip())
+
+                        content = " | ".join(content_parts)
+
+                        if content.strip():
+                            records.append({
+                                'file_name': csv_file.name,
+                                'file_path': str(csv_file),
+                                'row_index': row_index,
+                                'content': content,
+                                'metadata': json.dumps(metadata),
+                                'parsed_data': json.dumps(parsed_data),
+                                'created_at': int(time.time() * 1000),
+                                'updated_at': int(time.time() * 1000)
+                            })
+
+                # Insert records
+                if records:
+                    insert_query = text("""
+                        INSERT INTO label_index (file_name, file_path, row_index, content, metadata, parsed_data, created_at, updated_at)
+                        VALUES (:file_name, :file_path, :row_index, :content, :metadata, :parsed_data, :created_at, :updated_at)
+                    """)
+                    await db.execute(insert_query, records)
+
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': len(records),
+                        'content_hash': content_hash,
+                        'status': 'success'
+                    })
+                    total_records += len(records)
+                    logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
+                else:
+                    indexed_files.append({
+                        'file_name': csv_file.name,
+                        'records_indexed': 0,
+                        'status': 'empty'
+                    })
+
+            except Exception as e:
+                logger.error(f"Error indexing {csv_file.name}: {e}")
+                indexed_files.append({
+                    'file_name': csv_file.name,
+                    'records_indexed': 0,
+                    'status': 'error',
+                    'error': str(e)
+                })
+
+        await db.commit()
+
+        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
+        return {
+            'status': 'success',
+            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
+            'indexed_files': indexed_files,
+            'total_files': len(csv_files),
+            'total_records': total_records
+        }
+
+    except Exception as e:
+        logger.error(f"Label indexing failed: {e}")
+        return {"status": "error", "message": str(e)}
+
+
+# LABEL SEARCH INITIALIZATION AND FILE LOADING
+async def initialize_label_search():
+    """Initialize label search functionality on startup"""
+    try:
+        logger.info("Initializing label search functionality...")
+
+        # Check primary directory for CSV files
+        primary_dir = Path(LABELS_DIR)
+        csv_files = []
+
+        if primary_dir.exists():
+            csv_files = list(primary_dir.glob("*.csv"))
+
+        if csv_files:
+            logger.info(f"Found {len(csv_files)} CSV label files in primary directory: {primary_dir}")
+
+            # Check if we need to index (if no records exist)
+            from server.app.db.dbconfig import async_session
+            async with async_session() as db:
+                stats = await get_label_stats(db)
+                if stats['total_records'] == 0:
+                    logger.info("Indexing label files...")
+                    result = await index_csv_labels(db)
+                    logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
+                else:
+                    logger.info(f"Label search ready: {stats['total_records']} records indexed")
+        else:
+            logger.info(f"No CSV label files found in primary directory: {primary_dir}")
+
+        # Log the labels directory being used
+        logger.info(f"Labels directory: {LABELS_DIR}")
+
+    except Exception as e:
+        logger.warning(f"Label search initialization failed: {e}")
+        logger.info("Label search will be available once configured properly")
+
+
+async def load_label_csv_content(artifact: dict) -> str:
+    """
+    Load CSV content for a label artifact.
+
+    Args:
+        artifact: Dictionary containing artifact information with 'name' and 'uri' keys
+
+    Returns:
+        CSV content as string, or None if not found
+    """
+    try:
+        # Extract file information
+        artifact_name = artifact.get('name', '')
+        artifact_uri = artifact.get('uri', '')
+
+        # Try different strategies to locate the file
+        potential_paths = []
+
+        # Strategy 1: Direct path from URI
+        if artifact_uri:
+            potential_paths.append(Path(artifact_uri))
+
+        # Strategy 2: Look in labels directory using name
+        if artifact_name:
+            labels_dir = Path(LABELS_DIR)
+            potential_paths.extend([
+                labels_dir / artifact_name,
+                labels_dir / f"{artifact_name}.csv"
+            ])
+
+        # Strategy 3: Look in labels directory using URI basename
+        if artifact_uri:
+            uri_basename = Path(artifact_uri).name
+            labels_dir = Path(LABELS_DIR)
+            potential_paths.extend([
+                labels_dir / uri_basename,
+                labels_dir / f"{uri_basename}.csv"
+            ])
+
+        # Try each potential path
+        for path in potential_paths:
+            if path.exists() and path.is_file():
+                try:
+                    with open(path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                        if content.strip():  # Only return non-empty content
+                            logger.info(f"Successfully loaded label content from: {path}")
+                            return content
+                except Exception as e:
+                    logger.warning(f"Failed to read file {path}: {e}")
+                    continue
+
+        # If no file found, log the attempt
+        logger.warning(f"Could not find label file for artifact: name='{artifact_name}', uri='{artifact_uri}'")
+        return None
+
+    except Exception as e:
+        logger.error(f"Error loading label CSV content: {e}")
+        return None
+
+
+async def load_label_csv_by_filename(file_name: str, pipeline_name: str = None) -> str:
+    """
+    Load CSV content by filename, handling both hash names and actual filenames.
+
+    Args:
+        file_name: The filename (could be hash or actual filename)
+        pipeline_name: Optional pipeline name to help locate artifacts
+
+    Returns:
+        CSV content as string, or None if not found
+    """
+    try:
+        # Try to load using the same logic as load_label_csv_content
+        # Create a mock artifact to use the existing loading logic
+        mock_artifact = {
+            'name': file_name,
+            'uri': file_name
+        }
+
+        # Try the existing loading logic first
+        content = await load_label_csv_content(mock_artifact)
+        if content:
+            return content
+
+        # If that fails, try additional strategies for hash-based filenames
+        # Try to find CSV files that might correspond to this hash
+        labels_dir = LABELS_DIR
+        if os.path.exists(labels_dir):
+            # Try direct filename match first
+            direct_path = os.path.join(labels_dir, file_name)
+            if os.path.exists(direct_path):
+                with open(direct_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+
+            # Try with .csv extension
+            csv_path = os.path.join(labels_dir, f"{file_name}.csv")
+            if os.path.exists(csv_path):
+                with open(csv_path, 'r', encoding='utf-8') as f:
+                    return f.read()
+
+        logger.warning(f"Could not find label file: {file_name}")
+        return None
+
+    except Exception as e:
+        logger.error(f"Error loading label CSV by filename: {e}")
+        return None
diff --git a/server/app/label_utils.py b/server/app/label_utils.py
deleted file mode 100644
index 8ee4a07d8..000000000
--- a/server/app/label_utils.py
+++ /dev/null
@@ -1,709 +0,0 @@
-"""
-Label Search Utility Functions
-
-This module contains all label-related functionality including:
-- CSV label file indexing
-- Label search statistics
-- Auto-reindexing capabilities
-- Content hash-based change detection
-"""
-
-# Standard library imports
-import csv
-import io
-import json
-import time
-import os
-import hashlib
-import re
-import logging
-from pathlib import Path
-from typing import Dict, Any
-
-# Third-party imports
-from sqlalchemy.ext.asyncio import create_async_engine
-from sqlalchemy import text
-
-# Set up logger
-logger = logging.getLogger(__name__)
-
-# Labels directory constant
-LABELS_DIR = "/cmf-server/data/labels"
-
-
-async def index_csv_labels(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Index CSV label files into PostgreSQL for full-text search
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
-
-        # Look for both .csv files and files without extension (assuming they're CSV)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also include files without extension that might be CSV files
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
-                # Try to detect if it's a CSV file by reading first few lines
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
-                        if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                            logger.info(f"Detected CSV-like file without extension: {file_path.name}")
-                except:
-                    pass  # Skip files that can't be read
-
-        if not csv_files:
-            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
-
-        engine = create_async_engine(database_url, echo=False)
-        indexed_files = []
-        total_records = 0
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                try:
-                    # Clear existing data for this file
-                    await conn.execute(
-                        text("DELETE FROM label_index WHERE file_name = :file_name"),
-                        {"file_name": csv_file.name}
-                    )
-
-                    # Process CSV file
-                    records = []
-                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                        # Detect CSV format with fallback
-                        sample = csvfile.read(1024)
-                        csvfile.seek(0)
-
-                        # Try to detect delimiter, with fallbacks
-                        delimiter = ','  # Default fallback
-                        try:
-                            sniffer = csv.Sniffer()
-                            detected_delimiter = sniffer.sniff(sample).delimiter
-                            delimiter = detected_delimiter
-                        except:
-                            # Fallback: try common delimiters
-                            for test_delimiter in [',', '\t', ';', '|']:
-                                if test_delimiter in sample:
-                                    delimiter = test_delimiter
-                                    break
-
-                        reader = csv.DictReader(csvfile, delimiter=delimiter)
-
-                        for row_index, row in enumerate(reader):
-                            # Create searchable content
-                            content_parts = []
-                            metadata = {}
-
-                            for key, value in row.items():
-                                if value and value.strip():
-                                    content_parts.append(f"{key}: {value.strip()}")
-                                    metadata[key] = value.strip()
-
-                            content = " | ".join(content_parts)
-
-                            if content.strip():
-                                records.append({
-                                    'file_name': csv_file.name,
-                                    'file_path': str(csv_file),
-                                    'row_index': row_index,
-                                    'content': content,
-                                    'metadata': json.dumps(metadata),
-                                    'created_at': int(time.time() * 1000),
-                                    'updated_at': int(time.time() * 1000)
-                                })
-
-                    # Insert records
-                    if records:
-                        insert_query = text("""
-                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
-                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
-                        """)
-                        await conn.execute(insert_query, records)
-
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': len(records),
-                            'status': 'success'
-                        })
-                        total_records += len(records)
-                        logger.info(f"Indexed {len(records)} records from {csv_file.name}")
-                    else:
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': 0,
-                            'status': 'empty'
-                        })
-
-                except Exception as e:
-                    logger.error(f"Error indexing {csv_file.name}: {e}")
-                    indexed_files.append({
-                        'file_name': csv_file.name,
-                        'records_indexed': 0,
-                        'status': 'error',
-                        'error': str(e)
-                    })
-
-        await engine.dispose()
-
-        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
-        return {
-            'status': 'success',
-            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
-            'indexed_files': indexed_files,
-            'total_files': len(csv_files),
-            'total_records': total_records
-        }
-
-    except Exception as e:
-        logger.error(f"Label indexing failed: {e}")
-        return {"status": "error", "message": str(e)}
-
-
-async def get_label_stats(database_url: str) -> Dict[str, Any]:
-    """
-    Get statistics about indexed labels
-    """
-    try:
-
-        engine = create_async_engine(database_url, echo=False)
-
-        async with engine.begin() as conn:
-            # Get total records and files
-            result = await conn.execute(text("""
-                SELECT
-                    COUNT(*) as total_records,
-                    COUNT(DISTINCT file_name) as total_files
-                FROM label_index
-            """))
-            stats = result.fetchone()
-
-            # Get recent files
-            result = await conn.execute(text("""
-                SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
-                FROM label_index
-                GROUP BY file_name
-                ORDER BY last_updated DESC
-                LIMIT 10
-            """))
-            recent_files = [
-                {
-                    'file_name': row[0],
-                    'record_count': row[1],
-                    'last_updated': row[2]
-                }
-                for row in result.fetchall()
-            ]
-
-        await engine.dispose()
-
-        return {
-            'total_records': stats[0] if stats else 0,
-            'total_files': stats[1] if stats else 0,
-            'recent_files': recent_files,
-            'status': 'success'
-        }
-
-    except Exception as e:
-        logger.error(f"Error getting label stats: {e}")
-        return {
-            'status': 'error',
-            'error': str(e),
-            'total_records': 0,
-            'total_files': 0,
-            'recent_files': []
-        }
-
-
-async def auto_reindex_if_needed(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Automatically reindex labels if files have been modified or content has changed.
-    Handles MD5 hash filenames and detects content changes.
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "no_directory", "message": "Labels directory does not exist"}
-
-        # Get all potential CSV files (with and without .csv extension)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also check files without extension (including MD5 hash names)
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix:
-                # Check if it's an MD5 hash (32 hex characters)
-                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
-
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        # For MD5 hash files, be more lenient in detection
-                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                            if is_md5_hash:
-                                logger.info(f"Detected MD5 hash file: {file_path.name}")
-                except:
-                    pass
-
-        if not csv_files:
-            return {"status": "no_files", "message": "No CSV files found"}
-
-        # Check if we need to reindex based on content changes
-        engine = create_async_engine(database_url, echo=False)
-        needs_reindex = False
-        files_to_reindex = []
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                # Calculate content hash for change detection
-                try:
-                    with open(csv_file, 'rb') as f:
-                        content_hash = hashlib.md5(f.read()).hexdigest()
-                except:
-                    continue
-
-                # Check if file exists in index and get its content hash
-                result = await conn.execute(text("""
-                    SELECT
-                        MAX(updated_at) as last_indexed,
-                        COUNT(*) as record_count
-                    FROM label_index
-                    WHERE file_name = :file_name
-                """), {"file_name": csv_file.name})
-
-                row = result.fetchone()
-                last_indexed = row[0] if row else None
-                record_count = row[1] if row else 0
-
-                # Get stored content hash from metadata if available
-                result = await conn.execute(text("""
-                    SELECT metadata->>'content_hash' as stored_hash
-                    FROM label_index
-                    WHERE file_name = :file_name
-                    LIMIT 1
-                """), {"file_name": csv_file.name})
-
-                stored_hash_row = result.fetchone()
-                stored_hash = stored_hash_row[0] if stored_hash_row else None
-
-                # Check if reindexing is needed
-                file_mtime = int(os.path.getmtime(csv_file) * 1000)
-
-                if (not last_indexed or
-                    record_count == 0 or
-                    stored_hash != content_hash or
-                    file_mtime > (last_indexed + 60000)):  # 1 minute grace period
-
-                    needs_reindex = True
-                    files_to_reindex.append({
-                        'file': csv_file,
-                        'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
-                    })
-                    logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
-
-        await engine.dispose()
-
-        if needs_reindex:
-            logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
-            result = await index_csv_labels_with_hash(database_url, labels_directory)
-            return {
-                "status": "reindexed",
-                "message": f"Auto-reindexed {result.get('total_files', 0)} files",
-                "files_reindexed": [f['file'].name for f in files_to_reindex],
-                "details": result
-            }
-        else:
-            return {
-                "status": "up_to_date",
-                "message": "All label files are up to date"
-            }
-
-    except Exception as e:
-        logger.error(f"Auto-reindex failed: {e}")
-        return {"status": "error", "message": str(e)}
-
-
-async def index_csv_labels_with_hash(database_url: str, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
-    """
-    Enhanced version of index_csv_labels that stores content hashes for change detection
-    """
-    try:
-
-        labels_dir = Path(labels_directory)
-        if not labels_dir.exists():
-            return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
-
-        # Get all potential CSV files (including MD5 hash files)
-        csv_files = list(labels_dir.glob("*.csv"))
-
-        # Also include files without extension
-        for file_path in labels_dir.iterdir():
-            if file_path.is_file() and not file_path.suffix:
-                is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        first_line = f.readline().strip()
-                        if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
-                            csv_files.append(file_path)
-                except:
-                    pass
-
-        if not csv_files:
-            return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
-
-        engine = create_async_engine(database_url, echo=False)
-        indexed_files = []
-        total_records = 0
-
-        async with engine.begin() as conn:
-            for csv_file in csv_files:
-                try:
-                    # Calculate content hash
-                    with open(csv_file, 'rb') as f:
-                        content_hash = hashlib.md5(f.read()).hexdigest()
-
-                    # Clear existing data for this file
-                    await conn.execute(
-                        text("DELETE FROM label_index WHERE file_name = :file_name"),
-                        {"file_name": csv_file.name}
-                    )
-
-                    # Process CSV file
-                    records = []
-                    with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                        # Detect CSV format with fallback
-                        sample = csvfile.read(1024)
-                        csvfile.seek(0)
-
-                        # Try to detect delimiter, with fallbacks
-                        delimiter = ','  # Default fallback
-                        try:
-                            sniffer = csv.Sniffer()
-                            detected_delimiter = sniffer.sniff(sample).delimiter
-                            delimiter = detected_delimiter
-                        except:
-                            # Fallback: try common delimiters
-                            for test_delimiter in [',', '\t', ';', '|']:
-                                if test_delimiter in sample:
-                                    delimiter = test_delimiter
-                                    break
-
-                        reader = csv.DictReader(csvfile, delimiter=delimiter)
-
-                        for row_index, row in enumerate(reader):
-                            # Create searchable content
-                            content_parts = []
-                            metadata = {"content_hash": content_hash}  # Store content hash
-
-                            for key, value in row.items():
-                                if value and value.strip():
-                                    content_parts.append(f"{key}: {value.strip()}")
-                                    metadata[key] = value.strip()
-
-                            content = " | ".join(content_parts)
-
-                            if content.strip():
-                                records.append({
-                                    'file_name': csv_file.name,
-                                    'file_path': str(csv_file),
-                                    'row_index': row_index,
-                                    'content': content,
-                                    'metadata': json.dumps(metadata),
-                                    'created_at': int(time.time() * 1000),
-                                    'updated_at': int(time.time() * 1000)
-                                })
-
-                    # Insert records
-                    if records:
-                        insert_query = text("""
-                            INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
-                            VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
-                        """)
-                        await conn.execute(insert_query, records)
-
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': len(records),
-                            'content_hash': content_hash,
-                            'status': 'success'
-                        })
-                        total_records += len(records)
-                        logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
-                    else:
-                        indexed_files.append({
-                            'file_name': csv_file.name,
-                            'records_indexed': 0,
-                            'status': 'empty'
-                        })
-
-                except Exception as e:
-                    logger.error(f"Error indexing {csv_file.name}: {e}")
-                    indexed_files.append({
-                        'file_name': csv_file.name,
-                        'records_indexed': 0,
-                        'status': 'error',
-                        'error': str(e)
-                    })
-
-        await engine.dispose()
-
-        success_count = sum(1 for f in indexed_files if f['status'] == 'success')
-        return {
-            'status': 'success',
-            'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
-            'indexed_files': indexed_files,
-            'total_files': len(csv_files),
-            'total_records': total_records
-        }
-
-    except Exception as e:
-        logger.error(f"Label indexing failed: {e}")
-        return {"status": "error", "message": str(e)}
-
-
-# Label File Loading Functions
-async def initialize_label_search():
-    """Initialize label search functionality on startup"""
-    try:
-        logger.info("Initializing label search functionality...")
-
-        # Check primary directory for CSV files
-        primary_dir = Path(LABELS_DIR)
-        csv_files = []
-
-        if primary_dir.exists():
-            csv_files = list(primary_dir.glob("*.csv"))
-
-        if csv_files:
-            logger.info(f"Found {len(csv_files)} CSV label files in primary directory: {primary_dir}")
-
-            # Check if we need to index (if no records exist)
-            from server.app.db.dbconfig import DATABASE_URL
-            stats = await get_label_stats(DATABASE_URL)
-            if stats['total_records'] == 0:
-                logger.info("Indexing label files...")
-                result = await index_csv_labels(DATABASE_URL)
-                logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
-            else:
-                logger.info(f"Label search ready: {stats['total_records']} records indexed")
-        else:
-            logger.info(f"No CSV label files found in primary directory: {primary_dir}")
-
-        # Log the labels directory being used
-        logger.info(f"Labels directory: {LABELS_DIR}")
-
-    except Exception as e:
-        logger.warning(f"Label search initialization failed: {e}")
-        logger.info("Label search will be available once configured properly")
-
-
-async def filter_labels_by_csv_content(label_artifacts: list, conditions: list) -> list:
-    """
-    Filter label artifacts by checking if their CSV content matches the advanced search conditions.
-
-    Args:
-        label_artifacts: List of label artifact dictionaries
-        conditions: List of SearchCondition objects
-
-    Returns:
-        List of artifacts that contain CSV rows matching the conditions
-    """
-    matching_artifacts = []
-
-    for artifact in label_artifacts:
-        try:
-            # Try to load the CSV content for this label
-            csv_content = await load_label_csv_content(artifact)
-            if not csv_content:
-                continue
-
-            # Parse CSV content
-            csv_reader = csv.DictReader(io.StringIO(csv_content))
-            csv_rows = list(csv_reader)
-
-            if not csv_rows:
-                continue
-
-            # Apply advanced search conditions to CSV rows
-            from server.app.search_utils import LabelSearchFilter
-            matching_rows = LabelSearchFilter.apply_conditions(csv_rows, conditions)
-
-            # If any rows match, include this artifact
-            if matching_rows:
-                matching_artifacts.append(artifact)
-
-        except Exception:
-            continue
-
-    return matching_artifacts
-
-
-async def load_label_csv_content(artifact: dict) -> str:
-    """
-    Load CSV content for a label artifact.
-
-    Args:
-        artifact: Label artifact dictionary
-
-    Returns:
-        CSV content as string, or None if not found
-    """
-    try:
-        # Try different ways to get the file path
-        file_paths_to_try = []
-
-        # Method 1: Use URI if available
-        if artifact.get('uri'):
-            uri = artifact['uri']
-            if ':' in uri:
-                file_name = uri.split(':', 1)[1]
-                file_paths_to_try.append(file_name)
-            file_paths_to_try.append(uri)
-
-        # Method 2: Use artifact name
-        if artifact.get('name'):
-            name = artifact['name']
-            # Extract the base filename from the artifact name
-            # e.g., "artifacts/labels_m.csv:93951bf..." -> "labels_m.csv"
-            if 'artifacts/' in name and ':' in name:
-                # Extract the part between "artifacts/" and ":"
-                parts = name.split('artifacts/', 1)
-                if len(parts) > 1:
-                    file_part = parts[1].split(':', 1)[0]
-                    file_paths_to_try.append(file_part)
-
-            # Clean the name - remove prefixes
-            if ':' in name:
-                clean_name = name.split(':', 1)[1]
-                file_paths_to_try.append(clean_name)
-            file_paths_to_try.append(name)
-            file_paths_to_try.append(f"{name}.csv")
-
-        # Try to load the file from the labels directory
-        labels_dir = LABELS_DIR
-        if os.path.exists(labels_dir):
-            # First try exact matches
-            for file_path in file_paths_to_try:
-                try:
-                    full_path = os.path.join(labels_dir, os.path.basename(file_path))
-                    if os.path.exists(full_path):
-                        with open(full_path, 'r', encoding='utf-8') as f:
-                            content = f.read()
-                            return content
-                except Exception:
-                    continue
-
-            # Then try partial matches for CSV files
-            try:
-                csv_files = list(Path(labels_dir).glob("*.csv"))
-                for file_path_to_try in file_paths_to_try:
-                    base_name = os.path.basename(file_path_to_try).lower()
-                    # Remove common extensions and hash suffixes for matching
-                    clean_name = base_name.split('.')[0].split(':')[0]
-
-                    for csv_file in csv_files:
-                        csv_name = csv_file.name.lower()
-                        # Check if the clean name is contained in the CSV filename
-                        if clean_name in csv_name or csv_name.split('.')[0] in clean_name:
-                            try:
-                                with open(csv_file, 'r', encoding='utf-8') as f:
-                                    content = f.read()
-                                    return content
-                            except Exception:
-                                continue
-            except Exception:
-                pass
-
-            # Also try all CSV files in the directory as fallback
-            # Return the most recently modified CSV file if multiple exist
-            try:
-                csv_files = list(Path(labels_dir).glob("*.csv"))
-                if csv_files:
-                    # Sort by modification time, most recent first
-                    csv_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
-                    for file_path in csv_files:
-                        if file_path.is_file():
-                            try:
-                                with open(file_path, 'r', encoding='utf-8') as f:
-                                    content = f.read()
-                                    return content
-                            except Exception:
-                                continue
-            except Exception:
-                pass
-
-        return None
-
-    except Exception:
-        return None
-
-
-async def load_label_csv_by_filename(file_name: str, pipeline_name: str = None) -> str:
-    """
-    Load CSV content by filename, handling both hash names and actual filenames.
-
-    Args:
-        file_name: The filename (could be hash or actual filename)
-        pipeline_name: Optional pipeline name to help locate artifacts
-
-    Returns:
-        CSV content as string, or None if not found
-    """
-    try:
-        # Try to load using the same logic as load_label_csv_content
-        # Create a mock artifact to use the existing loading logic
-        mock_artifact = {
-            'name': file_name,
-            'uri': file_name
-        }
-
-        # Try the existing loading logic first
-        content = await load_label_csv_content(mock_artifact)
-        if content:
-            return content
-
-        # If that fails, try additional strategies for hash-based filenames
-        # Try to find CSV files that might correspond to this hash
-        labels_dir = LABELS_DIR
-        if os.path.exists(labels_dir):
-            # Try direct filename match first
-            direct_path = os.path.join(labels_dir, file_name)
-            if os.path.exists(direct_path):
-                with open(direct_path, 'r', encoding='utf-8') as f:
-                    return f.read()
-
-            # Try with .csv extension
-            csv_path = os.path.join(labels_dir, f"{file_name}.csv")
-            if os.path.exists(csv_path):
-                with open(csv_path, 'r', encoding='utf-8') as f:
-                    return f.read()
-
-            # If filename looks like a hash, try all CSV files in the directory
-            # Return the most recently modified CSV file
-            if len(file_name) > 20 and all(c in '0123456789abcdef' for c in file_name.lower()):
-                try:
-                    csv_files = list(Path(labels_dir).glob("*.csv"))
-                    if csv_files:
-                        # Sort by modification time, most recent first
-                        csv_files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
-                        for file_path in csv_files:
-                            if file_path.is_file():
-                                try:
-                                    with open(file_path, 'r', encoding='utf-8') as f:
-                                        return f.read()
-                                except Exception:
-                                    continue
-                except Exception:
-                    pass
-
-        return None
-
-    except Exception:
-        return None
diff --git a/server/app/main.py b/server/app/main.py
index 6e76c966d..5c9cafc64 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -37,7 +37,7 @@
     get_registered_server_details,
     get_sync_status,
     update_sync_status,
-    search_labels_in_artifacts
+    search_labels_combined
 )
 from pathlib import Path
 import os
@@ -56,17 +56,13 @@
 from jsonpath_ng.ext import parse
 from cmflib.cmf_federation import update_mlmd
 from server.app.db.dbconfig import DATABASE_URL
-from server.app.label_utils import (
+from server.app.label_management import (
     auto_reindex_if_needed,
     get_label_stats,
     index_csv_labels_with_hash,
     initialize_label_search,
-    filter_labels_by_csv_content,
-    load_label_csv_by_filename
-)
-from server.app.search_utils import (
-    AdvancedSearchParser,
-    LabelSearchFilter,
+    load_label_csv_by_filename,
+    AdvancedSearchParser
 )
 import csv
 import io
@@ -135,7 +131,7 @@ async def read_root(request: Request):
 
 # api to post mlmd file to cmf-server
 @app.post("/mlmd_push")
-async def mlmd_push(info: MLMDPushRequest):
+async def mlmd_push(info: MLMDPushRequest, db: AsyncSession = Depends(get_db)):
     print("mlmd push started")
     print("......................")
     status = "unknown_error"
@@ -162,7 +158,7 @@ async def mlmd_push(info: MLMDPushRequest):
                 # Auto-reindex labels after artifact push
                 try:
                     logger = logging.getLogger(__name__)
-                    reindex_result = await auto_reindex_if_needed(DATABASE_URL)
+                    reindex_result = await auto_reindex_if_needed(db)
                     if reindex_result['status'] == 'reindexed':
                         logger.info(f"Auto-reindexed after artifact push: {reindex_result['message']}")
                 except Exception as e:
@@ -219,7 +215,7 @@ async def get_artifacts(
     if active_page == 1:
         try:
             logger = logging.getLogger(__name__)
-            reindex_result = await auto_reindex_if_needed(DATABASE_URL)
+            reindex_result = await auto_reindex_if_needed(db)
             if reindex_result['status'] == 'reindexed':
                 logger.info(f"{reindex_result['message']}")
         except Exception as e:
@@ -253,16 +249,16 @@ async def search_label_artifacts(
             conditions = []
             plain_terms = [content_filter]
 
-        # Combined search: both label content and regular artifact properties
-        # 1. Search in label CSV content (basic text search for plain terms)
-        plain_search_term = " ".join(plain_terms) if plain_terms else ""
-        label_content_results = []
-
-        if plain_search_term.strip():
-            label_content_results = await search_labels_in_artifacts(db, plain_search_term, pipeline_name, 100)
+        # Use combined search with JSONB queries for advanced conditions
+        label_content_results = await search_labels_combined(
+            db,
+            plain_terms=plain_terms if plain_terms else None,
+            conditions=conditions if conditions else None,
+            pipeline_name=pipeline_name,
+            limit=100
+        )
 
-        # 2. Search in regular artifact properties (like other artifact types)
-        # For advanced search queries, don't search properties with the raw query
+        # Search in regular artifact properties (like other artifact types)
         if plain_terms:
             # Only search properties if there are plain text terms
             property_search_results = await fetch_artifacts(
@@ -272,20 +268,8 @@ async def search_label_artifacts(
             # For pure advanced search, don't do property search - we'll rely on CSV content filtering
             property_search_results = {'items': [], 'total_items': 0}
 
-        # For advanced search queries, we need to filter labels based on their CSV content
-        # This is different from plain text search - we need to load and filter each CSV file
-        advanced_search_results = []
-        if conditions and len(conditions) > 0:
-            # Get all label artifacts to check their CSV content
-            all_labels_result = await fetch_artifacts(
-                db, pipeline_name, "Label", "", 1, 1000, "name", sort_order
-            )
-
-            # Filter labels by checking their CSV content against advanced conditions
-            advanced_search_results = await filter_labels_by_csv_content(all_labels_result['items'], conditions)
-
         # If still no results from any search, return empty
-        if not label_content_results and property_search_results['total_items'] == 0 and not advanced_search_results:
+        if not label_content_results and property_search_results['total_items'] == 0:
             return {
                 "total_items": 0,
                 "items": []
@@ -315,11 +299,6 @@ async def search_label_artifacts(
         for artifact in property_search_results['items']:
             property_match_ids.add(artifact['artifact_id'])
 
-        # Create a set of artifact IDs that match advanced search
-        advanced_search_ids = set()
-        for artifact in advanced_search_results:
-            advanced_search_ids.add(artifact['artifact_id'])
-
         for artifact in all_artifacts_result['items']:
             artifact_name = artifact['name']
             artifact_uri = artifact.get('uri', '')
@@ -328,9 +307,6 @@ async def search_label_artifacts(
             # Check if this artifact matches property search
             property_matches = artifact_id in property_match_ids
 
-            # Check if this artifact matches advanced search
-            advanced_search_matches = artifact_id in advanced_search_ids
-
             # Check if this artifact matches content search
             content_matches = False
             if matching_label_files:
@@ -358,8 +334,8 @@ async def search_label_artifacts(
 
                 content_matches = name_matches or uri_matches or original_name_matches
 
-            # Include artifact if it matches property search, content search, OR advanced search
-            should_include = property_matches or content_matches or advanced_search_matches
+            # Include artifact if it matches property search OR content search (includes advanced search)
+            should_include = property_matches or content_matches
 
             if should_include:
                 # Convert SearchCondition objects to dictionaries for JSON serialization
@@ -376,7 +352,7 @@ async def search_label_artifacts(
                     'search_term': content_filter,
                     'is_search_result': True,
                     'property_match': property_matches,
-                    'content_match': content_matches or advanced_search_matches,  # Mark as content match for advanced conditions
+                    'content_match': content_matches,  # Content matches now include advanced search results
                     'advanced_conditions': advanced_conditions_dict,  # Store conditions as dicts for frontend filtering
                     'plain_terms': plain_terms
                 }
@@ -636,7 +612,8 @@ async def get_label_data(
     file_name: str,
     search_filter: str = None,
     pipeline_name: str = None,
-    fallback_to_full: bool = Query(True, description="If true, return full content when no rows match the filter")
+    fallback_to_full: bool = Query(True, description="If true, return full content when no rows match the filter"),
+    db: AsyncSession = Depends(get_db)
 ) -> str:
     """
     API endpoint to fetch the content of a label CSV file, optionally filtered by search conditions.
@@ -665,7 +642,7 @@ async def get_label_data(
         if not search_filter:
             return csv_content
 
-        # Apply search filtering if provided
+        # Use JSONB database queries
         try:
             # Parse the search filter
             conditions, plain_terms, parse_errors = AdvancedSearchParser.parse_search_query(search_filter)
@@ -673,38 +650,50 @@ async def get_label_data(
             if parse_errors:
                 return csv_content  # Return unfiltered if parse fails
 
-            # If there are advanced conditions, filter the CSV content
+            # If there are advanced conditions, use JSONB database queries
             if conditions and len(conditions) > 0:
-                # Parse CSV content
-                csv_reader = csv.DictReader(io.StringIO(csv_content))
-                all_rows = list(csv_reader)
+                from server.app.db.dbqueries import search_labels_with_advanced_conditions
+
+                # Search using JSONB queries
+                results = await search_labels_with_advanced_conditions(db, conditions, pipeline_name, 1000)
 
-                # Apply filtering
-                matching_rows = LabelSearchFilter.apply_conditions(all_rows, conditions)
+                # Filter results to only include rows from the requested file
+                matching_results = [r for r in results if r['label_file'] == file_name]
 
-                # Convert filtered rows back to CSV format
-                if matching_rows and len(matching_rows) > 0:
+                if matching_results:
+                    # Convert JSONB results back to CSV format
                     output = io.StringIO()
-                    fieldnames = matching_rows[0].keys()
-                    writer = csv.DictWriter(output, fieldnames=fieldnames)
-                    writer.writeheader()
-                    writer.writerows(matching_rows)
-                    return output.getvalue()
+
+                    # Get column names from the first result's parsed_data
+                    first_result = matching_results[0]
+                    if 'parsed_data' in first_result and first_result['parsed_data']:
+                        fieldnames = list(first_result['parsed_data'].keys())
+                        writer = csv.DictWriter(output, fieldnames=fieldnames)
+                        writer.writeheader()
+
+                        # Write all matching rows
+                        for result in matching_results:
+                            if 'parsed_data' in result and result['parsed_data']:
+                                writer.writerow(result['parsed_data'])
+
+                        return output.getvalue()
+
+                # No matching rows found
+                if fallback_to_full:
+                    # Return full content when no matches found (useful for label click scenarios)
+                    return csv_content
                 else:
-                    # No matching rows found
-                    if fallback_to_full:
-                        # Return full content when no matches found (useful for label click scenarios)
-                        return csv_content
-                    else:
-                        # Return empty CSV with headers (strict filtering mode)
-                        if all_rows:
-                            output = io.StringIO()
-                            fieldnames = all_rows[0].keys()
-                            writer = csv.DictWriter(output, fieldnames=fieldnames)
-                            writer.writeheader()
-                            return output.getvalue()
-                        else:
-                            return ""
+                    # Return empty CSV with headers (get headers from full content)
+                    csv_reader = csv.DictReader(io.StringIO(csv_content))
+                    try:
+                        first_row = next(csv_reader)
+                        output = io.StringIO()
+                        fieldnames = list(first_row.keys())
+                        writer = csv.DictWriter(output, fieldnames=fieldnames)
+                        writer.writeheader()
+                        return output.getvalue()
+                    except StopIteration:
+                        return ""
 
             return csv_content
 
@@ -1069,10 +1058,10 @@ async def artifact_lineage(request: Request, pipeline_name: str):
 
 # Label Search Management Endpoints
 @app.post("/api/labels/reindex")
-async def reindex_labels():
+async def reindex_labels(db: AsyncSession = Depends(get_db)):
     """Reindex all label files - useful when CSV files are updated"""
     try:
-        result = await index_csv_labels_with_hash(DATABASE_URL)
+        result = await index_csv_labels_with_hash(db)
 
         return {
             "status": "success",
@@ -1084,7 +1073,7 @@ async def reindex_labels():
         raise HTTPException(status_code=500, detail=f"Reindexing failed: {str(e)}")
 
 @app.get("/api/labels/status")
-async def get_label_search_status():
+async def get_label_search_status(db: AsyncSession = Depends(get_db)):
     """Get the current status of label search functionality"""
     try:
         # Check labels directory
@@ -1092,7 +1081,7 @@ async def get_label_search_status():
         csv_files = list(labels_path.glob("*.csv")) if labels_path.exists() else []
 
         # Check database
-        stats = await get_label_stats(DATABASE_URL)
+        stats = await get_label_stats(db)
 
         return {
             "status": "active" if stats['total_records'] > 0 else "inactive",
diff --git a/server/app/search_utils.py b/server/app/search_utils.py
deleted file mode 100644
index 8ad2e50a9..000000000
--- a/server/app/search_utils.py
+++ /dev/null
@@ -1,253 +0,0 @@
-"""
-Advanced search utilities for label content with comparison operators.
-Supports queries like: lines>240, score<=0.5, name="test", status!=active
-"""
-
-import re
-from typing import Dict, List, Tuple, Any, Union
-from enum import Enum
-import logging
-
-logger = logging.getLogger(__name__)
-
-class ComparisonOperator(Enum):
-    """Supported comparison operators"""
-    GREATER_THAN = ">"
-    LESS_THAN = "<"
-    GREATER_EQUAL = ">="
-    LESS_EQUAL = "<="
-    EQUAL = "="
-    NOT_EQUAL = "!="
-    CONTAINS = "~"  # For text contains (case-insensitive)
-    NOT_CONTAINS = "!~"  # For text does not contain
-
-class SearchCondition:
-    """Represents a single search condition"""
-    def __init__(self, column: str, operator: ComparisonOperator, value: Union[str, int, float]):
-        self.column = column.strip()
-        self.operator = operator
-        self.value = value
-        
-    def __repr__(self):
-        return f"SearchCondition({self.column} {self.operator.value} {self.value})"
-
-class AdvancedSearchParser:
-    """Parser for advanced search queries with comparison operators"""
-    
-    # Regex pattern to match search conditions
-    # Supports: column>value, column<=value, column="quoted value", etc.
-    CONDITION_PATTERN = re.compile(
-        r'(\w+)\s*(>=|<=|!=|!~|>|<|=|~)\s*("([^"]*)"|\'([^\']*)\'|([^\s,]+))',
-        re.IGNORECASE
-    )
-    
-    @classmethod
-    def parse_search_query(cls, query: str) -> Tuple[List[SearchCondition], List[str], List[str]]:
-        """
-        Parse a search query into structured conditions and plain text terms.
-        
-        Args:
-            query: Search query string (e.g., "lines>240 score<=0.5 test")
-            
-        Returns:
-            Tuple of (conditions, plain_text_terms, errors)
-        """
-        conditions = []
-        errors = []
-        
-        # Find all structured conditions
-        matches = cls.CONDITION_PATTERN.findall(query)
-        matched_positions = []
-        
-        for match in cls.CONDITION_PATTERN.finditer(query):
-            matched_positions.append((match.start(), match.end()))
-            
-        for match in matches:
-            column, operator_str, _, quoted_value1, quoted_value2, unquoted_value = match
-            
-            # Get the actual value (quoted or unquoted)
-            value = quoted_value1 or quoted_value2 or unquoted_value
-            
-            try:
-                # Parse operator
-                operator = ComparisonOperator(operator_str)
-                
-                # Convert value to appropriate type
-                parsed_value = cls._parse_value(value, operator)
-                
-                conditions.append(SearchCondition(column, operator, parsed_value))
-                
-            except ValueError as e:
-                errors.append(f"Invalid condition '{column}{operator_str}{value}': {str(e)}")
-                
-        # Extract remaining text as plain search terms
-        remaining_text = query
-        for start, end in reversed(matched_positions):
-            remaining_text = remaining_text[:start] + remaining_text[end:]
-            
-        plain_terms = [term.strip() for term in remaining_text.split() if term.strip()]
-        
-        return conditions, plain_terms, errors
-    
-    @classmethod
-    def _parse_value(cls, value: str, operator: ComparisonOperator) -> Union[str, int, float]:
-        """Parse value string to appropriate type based on operator"""
-        if operator in [ComparisonOperator.CONTAINS, ComparisonOperator.NOT_CONTAINS]:
-            return value  # Keep as string for text operations
-            
-        if operator in [ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL]:
-            # Try to parse as number, fall back to string
-            try:
-                if '.' in value:
-                    return float(value)
-                else:
-                    return int(value)
-            except ValueError:
-                return value
-                
-        # For numeric comparisons, try to parse as number
-        if operator in [ComparisonOperator.GREATER_THAN, ComparisonOperator.LESS_THAN,
-                       ComparisonOperator.GREATER_EQUAL, ComparisonOperator.LESS_EQUAL]:
-            try:
-                if '.' in value:
-                    return float(value)
-                else:
-                    return int(value)
-            except ValueError:
-                raise ValueError(f"Numeric value expected for operator {operator.value}, got '{value}'")
-                
-        return value
-
-class LabelSearchFilter:
-    """Filter label data based on advanced search conditions"""
-    
-    @classmethod
-    def apply_conditions(cls, label_data: List[Dict[str, Any]], conditions: List[SearchCondition]) -> List[Dict[str, Any]]:
-        """
-        Apply search conditions to filter label data.
-        
-        Args:
-            label_data: List of label row dictionaries
-            conditions: List of search conditions to apply
-            
-        Returns:
-            Filtered list of label rows
-        """
-        if not conditions:
-            return label_data
-            
-        filtered_data = []
-        
-        for row in label_data:
-            if cls._row_matches_conditions(row, conditions):
-                filtered_data.append(row)
-                
-        return filtered_data
-    
-    @classmethod
-    def _row_matches_conditions(cls, row: Dict[str, Any], conditions: List[SearchCondition]) -> bool:
-        """Check if a row matches all search conditions"""
-        for condition in conditions:
-            if not cls._evaluate_condition(row, condition):
-                return False
-        return True
-    
-    @classmethod
-    def _evaluate_condition(cls, row: Dict[str, Any], condition: SearchCondition) -> bool:
-        """Evaluate a single condition against a row"""
-        # Try case-insensitive column lookup
-        column_value = None
-        condition_column_lower = condition.column.lower()
-
-        # First try exact match
-        if condition.column in row:
-            column_value = row[condition.column]
-        else:
-            # Try case-insensitive match
-            for col_name, col_value in row.items():
-                if col_name.lower().strip() == condition_column_lower:
-                    column_value = col_value
-                    break
-
-        if column_value is None:
-            return False
-            
-        # Convert column value to string for processing
-        column_str = str(column_value).strip()
-        
-        try:
-            if condition.operator == ComparisonOperator.CONTAINS:
-                return condition.value.lower() in column_str.lower()
-                
-            elif condition.operator == ComparisonOperator.NOT_CONTAINS:
-                return condition.value.lower() not in column_str.lower()
-                
-            elif condition.operator == ComparisonOperator.EQUAL:
-                return cls._compare_values(column_str, condition.value, "==")
-                
-            elif condition.operator == ComparisonOperator.NOT_EQUAL:
-                return cls._compare_values(column_str, condition.value, "!=")
-                
-            elif condition.operator == ComparisonOperator.GREATER_THAN:
-                return cls._compare_values(column_str, condition.value, ">")
-                
-            elif condition.operator == ComparisonOperator.LESS_THAN:
-                return cls._compare_values(column_str, condition.value, "<")
-                
-            elif condition.operator == ComparisonOperator.GREATER_EQUAL:
-                return cls._compare_values(column_str, condition.value, ">=")
-                
-            elif condition.operator == ComparisonOperator.LESS_EQUAL:
-                return cls._compare_values(column_str, condition.value, "<=")
-                
-        except (ValueError, TypeError) as e:
-            logger.warning(f"Error evaluating condition {condition}: {e}")
-            return False
-            
-        return False
-    
-    @classmethod
-    def _compare_values(cls, column_value: str, condition_value: Union[str, int, float], operator: str) -> bool:
-        """Compare values with type coercion"""
-        # If condition value is numeric, try to parse column value as numeric
-        if isinstance(condition_value, (int, float)):
-            try:
-                if isinstance(condition_value, float):
-                    column_numeric = float(column_value)
-                else:
-                    column_numeric = int(column_value)
-                    
-                if operator == "==":
-                    return column_numeric == condition_value
-                elif operator == "!=":
-                    return column_numeric != condition_value
-                elif operator == ">":
-                    return column_numeric > condition_value
-                elif operator == "<":
-                    return column_numeric < condition_value
-                elif operator == ">=":
-                    return column_numeric >= condition_value
-                elif operator == "<=":
-                    return column_numeric <= condition_value
-                    
-            except ValueError:
-                # Fall back to string comparison if numeric parsing fails
-                pass
-                
-        # String comparison
-        if operator == "==":
-            return column_value.lower() == str(condition_value).lower()
-        elif operator == "!=":
-            return column_value.lower() != str(condition_value).lower()
-        else:
-            # For other operators on strings, try lexicographic comparison
-            if operator == ">":
-                return column_value.lower() > str(condition_value).lower()
-            elif operator == "<":
-                return column_value.lower() < str(condition_value).lower()
-            elif operator == ">=":
-                return column_value.lower() >= str(condition_value).lower()
-            elif operator == "<=":
-                return column_value.lower() <= str(condition_value).lower()
-
-        return False
diff --git a/ui/src/components/AdvancedHighlight/index.jsx b/ui/src/components/AdvancedHighlight/index.jsx
index 042a50616..4dc1fa302 100644
--- a/ui/src/components/AdvancedHighlight/index.jsx
+++ b/ui/src/components/AdvancedHighlight/index.jsx
@@ -25,9 +25,9 @@ const AdvancedHighlight = ({ text, highlight, columnName, searchMetadata }) => {
     // Ensure text is a string
     const textStr = String(text || '');
 
-    // Check if this column matches any advanced search conditions
+    // Check if this column matches any advanced search conditions (case-insensitive)
     const matchingConditions = searchMetadata.advanced_conditions?.filter(
-        condition => condition.column === columnName
+        condition => condition.column.toLowerCase() === columnName.toLowerCase()
     ) || [];
 
     // Check if this column value matches any plain text terms

From e04201d109217bb244fe23a6fab9ae520ff42598 Mon Sep 17 00:00:00 2001
From: Jaychaware <jaychaware@gmail.com>
Date: Fri, 5 Sep 2025 17:45:07 +0530
Subject: [PATCH 11/11] updated backend code with detailed comments

---
 server/app/db/dbqueries.py     | 233 ++++++++++--
 server/app/label_management.py | 673 +++++++++++++++++++++++++--------
 server/app/main.py             | 176 +++++++--
 3 files changed, 849 insertions(+), 233 deletions(-)

diff --git a/server/app/db/dbqueries.py b/server/app/db/dbqueries.py
index 219be7864..d5aa60dab 100644
--- a/server/app/db/dbqueries.py
+++ b/server/app/db/dbqueries.py
@@ -317,13 +317,55 @@ async def fetch_executions(
 
 async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeline_name: str = None, limit: int = 50):
     """
-    Search for artifacts that have labels matching the filter value.
-    This function searches within label CSV content using PostgreSQL full-text search.
-    Works with or without explicit labels_uri properties.
+    Search for artifacts that have labels matching the filter value using PostgreSQL full-text search.
+
+    This function provides high-performance full-text search across CSV label content stored
+    in the label_index table. It uses PostgreSQL's built-in text search capabilities with
+    ts_rank scoring for relevance-based result ordering.
+
+    Key Features:
+    - Full-text search with relevance scoring
+    - Works independently of artifact metadata
+    - Optimized for large datasets (1 lakh+ rows)
+    - Returns structured results compatible with artifact API
+
+    Performance Characteristics:
+    - Uses PostgreSQL's GIN indexes on search_vector for fast text search
+    - LIMIT parameter prevents memory exhaustion on large result sets
+    - ts_rank scoring provides meaningful result ordering
+
+    Args:
+        db: Database session for executing queries
+        filter_value: Text to search for within label content
+        pipeline_name: Optional pipeline name filter (reserved for future use)
+        limit: Maximum number of results to return (default: 50)
+
+    Returns:
+        List of dictionaries containing:
+        - artifact_id: None (label search doesn't map to specific artifacts)
+        - name: Descriptive name indicating label match
+        - uri: Label URI in format "label://filename#row_index"
+        - label_file: Source CSV filename
+        - matching_content: The actual content that matched
+        - label_metadata: Raw metadata from the CSV row
+        - relevance_score: PostgreSQL ts_rank relevance score (0.0-1.0)
+
+    Examples:
+        # Search for "error" in all label content
+        results = await search_labels_in_artifacts(db, "error")
+
+        # Limited search for performance
+        results = await search_labels_in_artifacts(db, "classification", limit=10)
+
+    Note:
+        The pipeline_name parameter is reserved for future pipeline-specific filtering
+        but is not currently implemented. It's maintained for API compatibility.
     """
     try:
-        # First, try to search labels directly and return any matching content
-        # This approach works even if artifacts don't have labels_uri properties
+        # Note: pipeline_name parameter reserved for future pipeline-specific filtering
+
+        # PostgreSQL full-text search with relevance scoring
+        # Uses tsvector/tsquery for high-performance text search across large datasets
         base_query = """
             SELECT DISTINCT
                 li.file_name as label_file,
@@ -334,33 +376,39 @@ async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeli
             FROM label_index li
             WHERE li.search_vector @@ plainto_tsquery('english', :filter_value)
             ORDER BY relevance_score DESC
-            LIMIT :limit
+            LIMIT :limit  -- CRITICAL: Prevents memory exhaustion with large datasets (1 lakh+ rows)
         """
 
+        # Parameterized query for security and performance
         params = {"filter_value": filter_value, "limit": limit}
 
+        # Execute search query
         result = await db.execute(text(base_query), params)
         label_results = result.mappings().all()
 
-        # Convert label results to a format compatible with artifact results
+        # Transform database results to API-compatible format
+        # This ensures consistency with other artifact search functions
         converted_results = []
         for label_result in label_results:
             converted_results.append({
-                'artifact_id': None,  # No specific artifact ID
-                'name': f"Label Match: {label_result['label_file']}",
-                'uri': f"label://{label_result['label_file']}#{label_result['row_index']}",
+                'artifact_id': None,  # Label search doesn't map to specific artifacts
+                'name': f"Label Match: {label_result['label_file']}",  # Descriptive name
+                'uri': f"label://{label_result['label_file']}#{label_result['row_index']}",  # Unique label URI
                 'type_id': None,
                 'create_time_since_epoch': None,
                 'last_update_time_since_epoch': None,
+                # Label-specific fields for frontend processing
                 'label_file': label_result['label_file'],
                 'matching_content': label_result['matching_content'],
                 'label_metadata': label_result['label_metadata'],
-                'relevance_score': float(label_result['relevance_score'])
+                'relevance_score': float(label_result['relevance_score'])  # Convert Decimal to float
             })
 
         return converted_results
 
     except Exception as e:
+        # Log error but return empty list to prevent API failures
+        # This allows partial success in combined searches
         print(f"Label search error: {e}")
         return []
 
@@ -368,27 +416,73 @@ async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeli
 async def search_labels_with_advanced_conditions(db: AsyncSession, conditions: list, pipeline_name: str = None, limit: int = 50):
     """
     Search for label artifacts using advanced JSONB queries for structured conditions.
-    This function uses PostgreSQL JSONB operators for efficient advanced search.
+
+    This function enables sophisticated search queries on CSV label data using PostgreSQL's
+    JSONB operators. It supports numeric comparisons, text operations, and complex conditions
+    that would be impossible with simple full-text search.
+
+    Advanced Search Capabilities:
+    - Numeric comparisons: "lines>240", "score<=0.5", "confidence>=0.8"
+    - Text operations: "status~active", "name!~error"
+    - Multiple conditions: Combined with AND logic
+    - Type-aware queries: Automatic type casting for proper comparisons
+
+    Performance Features:
+    - Uses JSONB indexes for fast query execution
+    - Deterministic ordering for consistent pagination
+    - LIMIT parameter prevents performance degradation
+    - Efficient query plan generation
 
     Args:
-        db: Database session
-        conditions: List of SearchCondition objects
-        pipeline_name: Optional pipeline name filter
-        limit: Maximum number of results
+        db: Database session for executing queries
+        conditions: List of SearchCondition objects from AdvancedSearchParser
+        pipeline_name: Optional pipeline name filter (reserved for future use)
+        limit: Maximum number of results to return (default: 50)
 
     Returns:
-        List of matching label files with metadata
+        List of dictionaries containing:
+        - label_file: Source CSV filename
+        - matching_content: The actual content that matched
+        - label_metadata: Raw metadata from the CSV row
+        - parsed_data: Type-converted data used for the search
+        - row_index: Row number within the CSV file
+        - relevance_score: Static score (1.0) for deterministic ordering
+
+    Examples:
+        # Numeric comparison
+        conditions = [SearchCondition("lines", ComparisonOperator.GREATER_THAN, 240)]
+        results = await search_labels_with_advanced_conditions(db, conditions)
+
+        # Text operation
+        conditions = [SearchCondition("status", ComparisonOperator.CONTAINS, "active")]
+        results = await search_labels_with_advanced_conditions(db, conditions)
+
+        # Multiple conditions
+        conditions = [
+            SearchCondition("lines", ComparisonOperator.GREATER_THAN, 240),
+            SearchCondition("score", ComparisonOperator.LESS_EQUAL, 0.5)
+        ]
+        results = await search_labels_with_advanced_conditions(db, conditions)
+
+    Note:
+        The pipeline_name parameter is reserved for future pipeline-specific filtering
+        but is not currently implemented. It's maintained for API compatibility.
     """
     try:
+        # Note: pipeline_name parameter reserved for future pipeline-specific filtering
+
+        # Import query builder for JSONB condition construction
         from server.app.label_management import JsonbQueryBuilder
 
-        # Build JSONB WHERE clause from conditions
+        # Step 1: Convert SearchCondition objects to PostgreSQL JSONB WHERE clause
         where_clause, params = JsonbQueryBuilder.build_where_clause(conditions)
 
+        # Early return if no valid conditions provided
         if not where_clause:
             return []
 
-        # Base query using JSONB conditions
+        # Step 2: Build advanced search query using JSONB operators
+        # This enables type-aware comparisons on parsed CSV data
         base_query = f"""
             SELECT DISTINCT
                 li.file_name as label_file,
@@ -398,25 +492,27 @@ async def search_labels_with_advanced_conditions(db: AsyncSession, conditions: l
                 li.row_index,
                 1.0 as relevance_score  -- Static score for advanced search
             FROM label_index li
-            WHERE {where_clause}
-            ORDER BY li.file_name, li.row_index
-            LIMIT :limit
+            WHERE {where_clause}  -- Dynamic JSONB conditions (e.g., parsed_data->>'lines'::numeric > 240)
+            ORDER BY li.file_name, li.row_index  -- Consistent ordering for pagination
+            LIMIT :limit  -- ESSENTIAL: Prevents performance degradation with large datasets
         """
 
-        # Add limit to params
+        # Step 3: Add limit parameter to the existing condition parameters
         params["limit"] = limit
 
+        # Step 4: Execute the parameterized query
         result = await db.execute(text(base_query), params)
         label_results = result.mappings().all()
 
-        # Convert to the expected format
+        # Step 5: Transform results to consistent format
+        # Maintains compatibility with other search functions
         converted_results = []
         for row in label_results:
             converted_results.append({
                 'label_file': row['label_file'],
                 'matching_content': row['matching_content'],
                 'label_metadata': row['label_metadata'],
-                'parsed_data': row['parsed_data'],
+                'parsed_data': row['parsed_data'],  # Include parsed data for debugging
                 'row_index': row['row_index'],
                 'relevance_score': row['relevance_score']
             })
@@ -424,6 +520,8 @@ async def search_labels_with_advanced_conditions(db: AsyncSession, conditions: l
         return converted_results
 
     except Exception as e:
+        # Log error but return empty list to prevent API failures
+        # This allows graceful degradation in combined searches
         print(f"Advanced label search error: {e}")
         return []
 
@@ -432,45 +530,108 @@ async def search_labels_combined(db: AsyncSession, plain_terms: list = None, con
     """
     Combined search function that handles both full-text search and advanced JSONB conditions.
 
+    This is the main entry point for label content search, combining multiple search strategies
+    to provide comprehensive search capabilities. It intelligently combines full-text search
+    with structured condition matching for maximum flexibility.
+
+    Search Strategy:
+    1. Execute full-text search on plain terms (if provided)
+    2. Execute advanced JSONB search on conditions (if provided)
+    3. Combine and deduplicate results from both searches
+    4. Sort by relevance score for optimal user experience
+    5. Apply final limit for performance protection
+
+    Deduplication Logic:
+    - Uses (file_name, row_index) as unique key
+    - Prevents duplicate results when same row matches both searches
+    - Maintains highest relevance score for duplicates
+
+    Performance Optimizations:
+    - Individual search functions have their own limits
+    - Final deduplication and sorting on smaller result sets
+    - Multiple layers of limit protection
+    - Efficient set-based duplicate detection
+
     Args:
-        db: Database session
-        plain_terms: List of plain text terms for full-text search
-        conditions: List of SearchCondition objects for advanced search
-        pipeline_name: Optional pipeline name filter
-        limit: Maximum number of results
+        db: Database session for executing queries
+        plain_terms: List of plain text terms for full-text search (e.g., ["error", "classification"])
+        conditions: List of SearchCondition objects for advanced search (e.g., lines>240)
+        pipeline_name: Optional pipeline name filter (reserved for future use)
+        limit: Maximum number of results to return after deduplication (default: 50)
 
     Returns:
-        List of matching label files with metadata
+        List of dictionaries containing combined and deduplicated results:
+        - Results from full-text search (with relevance scores)
+        - Results from advanced search (with static scores)
+        - Sorted by relevance score (descending) then by filename
+        - Limited to specified maximum count
+
+    Examples:
+        # Full-text search only
+        results = await search_labels_combined(db, plain_terms=["error", "classification"])
+
+        # Advanced search only
+        conditions = [SearchCondition("lines", ComparisonOperator.GREATER_THAN, 240)]
+        results = await search_labels_combined(db, conditions=conditions)
+
+        # Combined search (most powerful)
+        results = await search_labels_combined(
+            db,
+            plain_terms=["error"],
+            conditions=[SearchCondition("confidence", ComparisonOperator.LESS_THAN, 0.5)]
+        )
+
+    Note:
+        The pipeline_name parameter is reserved for future pipeline-specific filtering
+        but is not currently implemented. It's maintained for API compatibility.
     """
     try:
+        # Note: pipeline_name parameter reserved for future pipeline-specific filtering
+
+        # Step 1: Initialize results collection
         results = []
 
-        # If we have plain text terms, do full-text search
+        # Step 2: Execute full-text search if plain text terms are provided
+        # This handles searches like "error classification" or "data processing"
         if plain_terms:
             plain_search_term = " ".join(plain_terms)
-            if plain_search_term.strip():
+            if plain_search_term.strip():  # Ensure non-empty search term
+                # Full-text search with individual limit for performance protection
                 text_results = await search_labels_in_artifacts(db, plain_search_term, pipeline_name, limit)
                 results.extend(text_results)
 
-        # If we have advanced conditions, do JSONB search
+        # Step 3: Execute advanced JSONB search if structured conditions are provided
+        # This handles searches like "lines>240 AND score<=0.5"
         if conditions:
+            # Advanced search with individual limit for complex query protection
             advanced_results = await search_labels_with_advanced_conditions(db, conditions, pipeline_name, limit)
             results.extend(advanced_results)
 
-        # Remove duplicates based on file_name and row_index
+        # Step 4: Deduplication - Remove duplicate rows from combined results
+        # Critical when same CSV row matches both full-text and advanced criteria
+        # Example: Row contains "error" (text match) AND has lines>240 (condition match)
         seen = set()
         unique_results = []
         for result in results:
+            # Use (filename, row_index) as unique identifier
             key = (result['label_file'], result.get('row_index', 0))
             if key not in seen:
                 seen.add(key)
                 unique_results.append(result)
+                # Note: First occurrence wins (preserves higher relevance scores from text search)
 
-        # Sort by relevance score (descending) and then by file name
+        # Step 5: Sort results for optimal user experience
+        # Primary sort: Relevance score (descending) - best matches first
+        # Secondary sort: Filename (ascending) - consistent ordering for same scores
         unique_results.sort(key=lambda x: (-x.get('relevance_score', 0), x['label_file']))
 
+        # Step 6: Apply final limit as last line of defense
+        # Even with individual limits and deduplication, ensure response size is manageable
+        # This protects against edge cases where deduplication is minimal
         return unique_results[:limit]
 
     except Exception as e:
+        # Log error but return empty list to prevent complete API failure
+        # This allows other search components to continue functioning
         print(f"Combined label search error: {e}")
         return []
diff --git a/server/app/label_management.py b/server/app/label_management.py
index 2747cf498..2045de66f 100644
--- a/server/app/label_management.py
+++ b/server/app/label_management.py
@@ -47,20 +47,54 @@ class ComparisonOperator(Enum):
     NOT_CONTAINS = "!~"  # For text does not contain
 
 class SearchCondition:
-    """Represents a single search condition"""
+    """
+    Represents a single search condition for advanced label search queries.
+
+    This class encapsulates a structured search condition that can be converted
+    into PostgreSQL JSONB queries for efficient database searching.
+
+    Examples:
+        SearchCondition("lines", ComparisonOperator.GREATER_THAN, 240)
+        SearchCondition("score", ComparisonOperator.LESS_EQUAL, 0.5)
+        SearchCondition("status", ComparisonOperator.EQUAL, "active")
+        SearchCondition("name", ComparisonOperator.CONTAINS, "test")
+
+    Attributes:
+        column: The CSV column name to search in
+        operator: The comparison operator to apply
+        value: The value to compare against (auto-typed)
+    """
     def __init__(self, column: str, operator: ComparisonOperator, value: Union[str, int, float]):
         self.column = column.strip()
         self.operator = operator
         self.value = value
-        
+
     def __repr__(self):
         return f"SearchCondition({self.column} {self.operator.value} {self.value})"
 
 class AdvancedSearchParser:
-    """Parser for advanced search queries with comparison operators"""
-    
+    """
+    Parser for advanced search queries with comparison operators.
+
+    This class enables users to perform sophisticated searches on CSV label content
+    using structured query syntax. It parses queries like:
+    - "lines>240 score<=0.5 status=active"
+    - "name~test AND size>=1000"
+    - 'description="error message" OR type!=warning'
+
+    Supported Features:
+    - Numeric comparisons: >, <, >=, <=, =, !=
+    - Text operations: ~ (contains), !~ (not contains)
+    - Quoted values: "value with spaces", 'single quotes'
+    - Mixed queries: Combines structured conditions with plain text search
+
+    The parser separates structured conditions from plain text terms,
+    enabling hybrid search that uses both JSONB queries and full-text search.
+    """
+
     # Regex pattern to match search conditions
-    # Supports: column>value, column<=value, column="quoted value", etc.
+    # Captures: column_name operator "quoted_value" or unquoted_value
+    # Supports all comparison operators with flexible whitespace handling
     CONDITION_PATTERN = re.compile(
         r'(\w+)\s*(>=|<=|!=|!~|>|<|=|~)\s*("([^"]*)"|\'([^\']*)\'|([^\s,]+))',
         re.IGNORECASE
@@ -70,82 +104,129 @@ class AdvancedSearchParser:
     def parse_search_query(cls, query: str) -> Tuple[List[SearchCondition], List[str], List[str]]:
         """
         Parse a search query into structured conditions and plain text terms.
-        
+
         Args:
             query: Search query string (e.g., "lines>240 score<=0.5 test")
-            
+
         Returns:
             Tuple of (conditions, plain_text_terms, errors)
         """
         conditions = []
         errors = []
-        
-        # Find all structured conditions
+
+        # Step 1: Find all structured conditions using regex
+        # This captures patterns like "column>value", "name='quoted value'", etc.
         matches = cls.CONDITION_PATTERN.findall(query)
         matched_positions = []
-        
+
+        # Track positions of matched conditions so we can remove them later
+        # This allows us to extract remaining text as plain search terms
         for match in cls.CONDITION_PATTERN.finditer(query):
             matched_positions.append((match.start(), match.end()))
-            
+
+        # Step 2: Process each structured condition
         for match in matches:
+            # Regex groups: (column, operator, full_value, quoted1, quoted2, unquoted)
             column, operator_str, _, quoted_value1, quoted_value2, unquoted_value = match
-            
-            # Get the actual value (quoted or unquoted)
+
+            # Extract the actual value - prioritize quoted values over unquoted
+            # This handles cases like: name="test value" vs name=test
             value = quoted_value1 or quoted_value2 or unquoted_value
-            
+
             try:
-                # Parse operator
+                # Convert operator string to enum (validates supported operators)
                 operator = ComparisonOperator(operator_str)
-                
-                # Convert value to appropriate type
+
+                # Convert value to appropriate type based on operator context
+                # This enables proper numeric comparisons and text operations
                 parsed_value = cls._parse_value(value, operator)
-                
+
+                # Create structured condition object for database query building
                 conditions.append(SearchCondition(column, operator, parsed_value))
-                
+
             except ValueError as e:
+                # Collect parsing errors without stopping the entire parse
+                # This allows partial success - valid conditions work, invalid ones are reported
                 errors.append(f"Invalid condition '{column}{operator_str}{value}': {str(e)}")
-                
-        # Extract remaining text as plain search terms
+
+        # Step 3: Extract remaining text as plain search terms
+        # Remove all structured conditions from the original query
         remaining_text = query
+        # Process in reverse order to maintain correct string positions
         for start, end in reversed(matched_positions):
             remaining_text = remaining_text[:start] + remaining_text[end:]
-            
+
+        # Split remaining text into individual terms, filtering out empty strings
+        # These terms will be used for full-text search
         plain_terms = [term.strip() for term in remaining_text.split() if term.strip()]
-        
+
         return conditions, plain_terms, errors
     
     @classmethod
     def _parse_value(cls, value: str, operator: ComparisonOperator) -> Union[str, int, float]:
         """Parse value string to appropriate type based on operator"""
+
+        # Text operations: Keep values as strings for ILIKE operations
         if operator in [ComparisonOperator.CONTAINS, ComparisonOperator.NOT_CONTAINS]:
-            return value  # Keep as string for text operations
-            
+            return value  # No conversion needed for text matching
+
+        # Equality operations: Try numeric conversion but allow strings
         if operator in [ComparisonOperator.EQUAL, ComparisonOperator.NOT_EQUAL]:
-            # Try to parse as number, fall back to string
+            # Attempt intelligent type detection for flexible comparisons
             try:
+                # Check for decimal point to distinguish int vs float
                 if '.' in value:
-                    return float(value)
+                    return float(value)  # Decimal numbers become floats
                 else:
-                    return int(value)
+                    return int(value)    # Whole numbers become integers
             except ValueError:
+                # If numeric conversion fails, keep as string
+                # This allows comparisons like: status="active"
                 return value
-                
-        # For numeric comparisons, try to parse as number
+
+        # Numeric comparisons: Require valid numbers
         if operator in [ComparisonOperator.GREATER_THAN, ComparisonOperator.LESS_THAN,
                        ComparisonOperator.GREATER_EQUAL, ComparisonOperator.LESS_EQUAL]:
             try:
+                # Parse as appropriate numeric type
                 if '.' in value:
-                    return float(value)
+                    return float(value)  # Handle decimal comparisons: score<=0.85
                 else:
-                    return int(value)
+                    return int(value)    # Handle integer comparisons: lines>240
             except ValueError:
+                # Numeric operators require valid numbers - this is a user error
                 raise ValueError(f"Numeric value expected for operator {operator.value}, got '{value}'")
-                
+
+        # Fallback for any unhandled operators
         return value
 
 
 class JsonbQueryBuilder:
-    """Build PostgreSQL JSONB queries from SearchCondition objects"""
+    """
+    Build PostgreSQL JSONB queries from SearchCondition objects for high-performance search.
+
+    This class converts structured search conditions into optimized PostgreSQL JSONB queries
+    that can efficiently search through parsed CSV data stored in the database.
+
+    Key Features:
+    - Case-insensitive column name matching (handles "Lines" vs "lines")
+    - Automatic type casting for numeric comparisons
+    - Support for text operations (ILIKE for contains/not contains)
+    - Parameterized queries to prevent SQL injection
+    - Flexible column name handling with whitespace tolerance
+
+    Query Generation Process:
+    1. Takes SearchCondition objects from AdvancedSearchParser
+    2. Builds PostgreSQL WHERE clauses using JSONB operators
+    3. Handles type conversion and case sensitivity
+    4. Returns parameterized query components for safe execution
+
+    Performance Benefits:
+    - Uses PostgreSQL's native JSONB indexing capabilities
+    - Enables complex queries without full table scans
+    - Supports efficient numeric range queries
+    - Leverages database-level type casting and comparison
+    """
 
     @classmethod
     def _get_case_insensitive_column_value(cls, column_key: str) -> str:
@@ -153,6 +234,9 @@ def _get_case_insensitive_column_value(cls, column_key: str) -> str:
         Generate SQL to get column value with case-insensitive matching and whitespace handling.
         This handles cases where user searches for 'lines' but CSV has 'Lines', ' Lines', etc.
         """
+        # Two-tier approach for robust column matching:
+        # 1. First try exact key match (fastest path)
+        # 2. Fall back to case-insensitive search with whitespace trimming
         return f"""COALESCE(
             parsed_data->>:{column_key},
             (SELECT value FROM jsonb_each_text(parsed_data) WHERE lower(trim(key)) = lower(:{column_key}) LIMIT 1)
@@ -169,21 +253,27 @@ def build_where_clause(cls, conditions: List[SearchCondition]) -> Tuple[str, Dic
         Returns:
             Tuple of (where_clause, parameters)
         """
+        # Early return for empty conditions to avoid unnecessary processing
         if not conditions:
             return "", {}
 
         where_parts = []
         params = {}
 
+        # Process each condition into a SQL clause with unique parameter names
         for i, condition in enumerate(conditions):
+            # Use index-based parameter naming to avoid conflicts
             clause, condition_params = cls._build_condition_clause(condition, i)
-            if clause:
+            if clause:  # Only include valid clauses
                 where_parts.append(clause)
                 params.update(condition_params)
 
+        # Safety check - ensure we have valid clauses to combine
         if not where_parts:
             return "", {}
 
+        # Combine all conditions with AND logic
+        # Note: Future enhancement could support OR logic with parentheses
         where_clause = " AND ".join(where_parts)
         return where_clause, params
 
@@ -199,34 +289,37 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
         Returns:
             Tuple of (clause, parameters)
         """
+        # Generate unique parameter names to avoid conflicts in complex queries
         column_key = f"col_{index}"
         value_key = f"val_{index}"
 
-        # Use case-insensitive column value extraction
+        # Get SQL for extracting column value with case-insensitive matching
         column_value_sql = cls._get_case_insensitive_column_value(column_key)
 
+        # Text containment operations: Use PostgreSQL ILIKE for case-insensitive matching
         if condition.operator == ComparisonOperator.CONTAINS:
-            # For contains, use ILIKE on the text value
+            # ILIKE with wildcards for substring matching: "name~test" → name ILIKE '%test%'
             clause = f"({column_value_sql}) ILIKE :{value_key}"
             params = {
                 column_key: condition.column,
-                value_key: f"%{condition.value}%"
+                value_key: f"%{condition.value}%"  # Add wildcards for partial matching
             }
 
         elif condition.operator == ComparisonOperator.NOT_CONTAINS:
-            # For not contains, use NOT ILIKE on the text value
+            # NOT ILIKE for exclusion: "name!~error" → name NOT ILIKE '%error%'
             clause = f"({column_value_sql}) NOT ILIKE :{value_key}"
             params = {
                 column_key: condition.column,
-                value_key: f"%{condition.value}%"
+                value_key: f"%{condition.value}%"  # Add wildcards for partial matching
             }
 
+        # Equality operations: Handle both numeric and text comparisons
         elif condition.operator == ComparisonOperator.EQUAL:
             if isinstance(condition.value, (int, float)):
-                # Numeric comparison - cast to numeric
+                # Numeric equality: Cast JSONB value to numeric for proper comparison
                 clause = f"({column_value_sql})::numeric = :{value_key}"
             else:
-                # Text comparison - case insensitive
+                # Text equality: Case-insensitive comparison using LOWER()
                 clause = f"LOWER({column_value_sql}) = LOWER(:{value_key})"
             params = {
                 column_key: condition.column,
@@ -235,18 +328,19 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
 
         elif condition.operator == ComparisonOperator.NOT_EQUAL:
             if isinstance(condition.value, (int, float)):
-                # Numeric comparison - cast to numeric
+                # Numeric inequality: Cast for proper numeric comparison
                 clause = f"({column_value_sql})::numeric != :{value_key}"
             else:
-                # Text comparison - case insensitive
+                # Text inequality: Case-insensitive comparison
                 clause = f"LOWER({column_value_sql}) != LOWER(:{value_key})"
             params = {
                 column_key: condition.column,
                 value_key: condition.value
             }
 
+        # Numeric comparison operations: All require casting to numeric type
         elif condition.operator == ComparisonOperator.GREATER_THAN:
-            # Numeric comparison - cast to numeric
+            # Greater than: "lines>240" → (parsed_data->>'lines')::numeric > 240
             clause = f"({column_value_sql})::numeric > :{value_key}"
             params = {
                 column_key: condition.column,
@@ -254,7 +348,7 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
             }
 
         elif condition.operator == ComparisonOperator.LESS_THAN:
-            # Numeric comparison - cast to numeric
+            # Less than: "score<0.5" → (parsed_data->>'score')::numeric < 0.5
             clause = f"({column_value_sql})::numeric < :{value_key}"
             params = {
                 column_key: condition.column,
@@ -262,7 +356,7 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
             }
 
         elif condition.operator == ComparisonOperator.GREATER_EQUAL:
-            # Numeric comparison - cast to numeric
+            # Greater than or equal: "size>=1000" → (parsed_data->>'size')::numeric >= 1000
             clause = f"({column_value_sql})::numeric >= :{value_key}"
             params = {
                 column_key: condition.column,
@@ -270,7 +364,7 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
             }
 
         elif condition.operator == ComparisonOperator.LESS_EQUAL:
-            # Numeric comparison - cast to numeric
+            # Less than or equal: "confidence<=0.95" → (parsed_data->>'confidence')::numeric <= 0.95
             clause = f"({column_value_sql})::numeric <= :{value_key}"
             params = {
                 column_key: condition.column,
@@ -278,6 +372,7 @@ def _build_condition_clause(cls, condition: SearchCondition, index: int) -> Tupl
             }
 
         else:
+            # Unsupported operator - return empty clause (will be filtered out)
             return "", {}
 
         return clause, params
@@ -288,46 +383,93 @@ def parse_csv_value(value: str):
     """
     Parse a CSV value and convert it to the appropriate Python type for JSONB storage.
 
+    This function enables advanced search capabilities by converting CSV string values
+    into their appropriate types, allowing for:
+    - Numeric comparisons (e.g., "lines>240", "score<=0.5")
+    - Boolean filtering (e.g., "active=true")
+    - Proper sorting and range queries in PostgreSQL JSONB
+
+    Type Detection Logic:
+    1. Empty/whitespace values → None
+    2. Boolean values (true/false/yes/no/1/0) → bool
+    3. Integer values (no decimal point) → int
+    4. Floating point values → float
+    5. Everything else → string (preserved as-is)
+
     Args:
-        value: String value from CSV
+        value: String value from CSV cell
 
     Returns:
-        Parsed value (int, float, bool, or string)
+        Parsed value (int, float, bool, None, or string) suitable for JSONB storage
+
+    Examples:
+        parse_csv_value("240") → 240 (int)
+        parse_csv_value("0.85") → 0.85 (float)
+        parse_csv_value("true") → True (bool)
+        parse_csv_value("active") → "active" (str)
+        parse_csv_value("  ") → None
     """
+    # Handle empty or whitespace-only values
     if not value or not value.strip():
         return None
 
     value = value.strip()
 
-    # Try boolean first (case-insensitive)
+    # Boolean detection: Check common boolean representations
+    # This enables searches like "active=true" or "enabled=yes"
     if value.lower() in ('true', 'false', 'yes', 'no', '1', '0'):
         if value.lower() in ('true', 'yes', '1'):
             return True
         elif value.lower() in ('false', 'no', '0'):
             return False
 
-    # Try integer
+    # Integer detection: Avoid scientific notation and decimals
     try:
-        # Check if it looks like an integer (no decimal point)
+        # Exclude scientific notation (1e5) and decimals to ensure clean integers
         if '.' not in value and 'e' not in value.lower():
-            return int(value)
+            return int(value)  # Clean integer values like "240", "1000"
     except ValueError:
-        pass
+        pass  # Not an integer, continue to next type
 
-    # Try float
+    # Float detection: Handle decimal numbers and scientific notation
     try:
-        return float(value)
+        return float(value)  # Handles "0.85", "1.5", "1e-3", etc.
     except ValueError:
-        pass
+        pass  # Not a float, treat as string
 
-    # Return as string
+    # Default: Return as string for text values
+    # This preserves original formatting for text searches
     return value
 
 
 # DATABASE OPERATIONS FOR LABEL INDEXING
 async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
     """
-    Index CSV label files into PostgreSQL for full-text search
+    Index CSV label files into PostgreSQL for full-text search.
+
+    This function scans the labels directory for CSV files and indexes their content
+    into the label_index table for efficient searching. It handles:
+    - CSV files with .csv extension
+    - Files without extension that appear to be CSV format
+    - Automatic delimiter detection
+    - Content parsing and metadata extraction
+    - Full-text search vector creation
+
+    Args:
+        db: Database session for executing queries
+        labels_directory: Path to directory containing CSV label files
+
+    Returns:
+        Dictionary containing indexing results:
+        - status: 'success', 'warning', or 'error'
+        - message: Human-readable status message
+        - indexed_files: List of file indexing results
+        - total_files: Number of files processed
+        - total_records: Total records indexed across all files
+
+    Note:
+        This function clears existing data for each file before reindexing.
+        For change detection and incremental updates, use index_csv_labels_with_hash().
     """
     try:
 
@@ -335,22 +477,27 @@ async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-serve
         if not labels_dir.exists():
             return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
 
-        # Look for both .csv files and files without extension (assuming they're CSV)
+        # Step 1: Find all potential CSV files using multiple strategies
+        # Start with obvious .csv files
         csv_files = list(labels_dir.glob("*.csv"))
 
-        # Also include files without extension that might be CSV files
+        # Step 2: Auto-detect CSV files without extensions (including hash-based names)
+        # This handles cases where files are uploaded without proper extensions
         for file_path in labels_dir.iterdir():
             if file_path.is_file() and not file_path.suffix and file_path.name not in [f.stem for f in csv_files]:
-                # Try to detect if it's a CSV file by reading first few lines
+                # Content-based CSV detection using heuristics
                 try:
                     with open(file_path, 'r', encoding='utf-8') as f:
                         first_line = f.readline().strip()
-                        # Simple heuristic: if it contains commas or common CSV patterns, treat as CSV
+                        # Heuristic: Look for CSV indicators in the first line
+                        # - Comma separators (most common)
+                        # - Common CSV header keywords
                         if ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
                             csv_files.append(file_path)
                             logger.info(f"Detected CSV-like file without extension: {file_path.name}")
                 except:
-                    pass  # Skip files that can't be read
+                    # Silently skip files that can't be read (permissions, binary files, etc.)
+                    pass
 
         if not csv_files:
             return {"status": "warning", "message": "No CSV files found", "indexed_files": [], "total_files": 0}
@@ -358,67 +505,82 @@ async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-serve
         indexed_files = []
         total_records = 0
 
+        # Step 3: Process each CSV file for indexing
         for csv_file in csv_files:
             try:
-                # Clear existing data for this file
+                # Clean slate: Remove any existing data for this file
+                # This ensures fresh indexing without duplicate records
                 await db.execute(
                     text("DELETE FROM label_index WHERE file_name = :file_name"),
                     {"file_name": csv_file.name}
                 )
 
-                # Process CSV file
+                # Step 4: Parse CSV file with robust format detection
                 records = []
                 with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                    # Detect CSV format with fallback
+                    # Read sample for delimiter detection
                     sample = csvfile.read(1024)
-                    csvfile.seek(0)
+                    csvfile.seek(0)  # Reset file pointer
 
-                    # Try to detect delimiter, with fallbacks
-                    delimiter = ','  # Default fallback
+                    # Intelligent delimiter detection with fallbacks
+                    delimiter = ','  # Safe default
                     try:
+                        # Use Python's CSV sniffer for automatic detection
                         sniffer = csv.Sniffer()
                         detected_delimiter = sniffer.sniff(sample).delimiter
                         delimiter = detected_delimiter
                     except:
-                        # Fallback: try common delimiters
+                        # Fallback strategy: Test common delimiters by frequency in sample
                         for test_delimiter in [',', '\t', ';', '|']:
                             if test_delimiter in sample:
                                 delimiter = test_delimiter
                                 break
+                        # Note: Comma remains default if no delimiters found
 
+                    # Create CSV reader with detected format
                     reader = csv.DictReader(csvfile, delimiter=delimiter)
 
+                    # Step 5: Process each row for full-text search indexing
                     for row_index, row in enumerate(reader):
-                        # Create searchable content
+                        # Build searchable content from all non-empty fields
                         content_parts = []
                         metadata = {}
 
+                        # Extract and clean all field values
                         for key, value in row.items():
-                            if value and value.strip():
+                            if value and value.strip():  # Skip empty/whitespace values
+                                # Format for full-text search: "column: value"
                                 content_parts.append(f"{key}: {value.strip()}")
+                                # Store clean value in metadata
                                 metadata[key] = value.strip()
 
+                        # Combine all fields into searchable text
+                        # Format: "col1: val1 | col2: val2 | col3: val3"
                         content = " | ".join(content_parts)
 
+                        # Only index rows with actual content
                         if content.strip():
                             records.append({
                                 'file_name': csv_file.name,
                                 'file_path': str(csv_file),
                                 'row_index': row_index,
-                                'content': content,
-                                'metadata': json.dumps(metadata),
-                                'created_at': int(time.time() * 1000),
+                                'content': content,  # For full-text search
+                                'metadata': json.dumps(metadata),  # For display/filtering
+                                'created_at': int(time.time() * 1000),  # Timestamp in milliseconds
                                 'updated_at': int(time.time() * 1000)
                             })
 
-                # Insert records
+                # Step 6: Bulk insert records into database
                 if records:
+                    # Use parameterized query for security and performance
                     insert_query = text("""
                         INSERT INTO label_index (file_name, file_path, row_index, content, metadata, created_at, updated_at)
                         VALUES (:file_name, :file_path, :row_index, :content, :metadata, :created_at, :updated_at)
                     """)
+                    # Bulk insert all records for this file in one operation
                     await db.execute(insert_query, records)
 
+                    # Track successful indexing
                     indexed_files.append({
                         'file_name': csv_file.name,
                         'records_indexed': len(records),
@@ -427,6 +589,7 @@ async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-serve
                     total_records += len(records)
                     logger.info(f"Indexed {len(records)} records from {csv_file.name}")
                 else:
+                    # Handle empty files (no content rows)
                     indexed_files.append({
                         'file_name': csv_file.name,
                         'records_indexed': 0,
@@ -460,10 +623,32 @@ async def index_csv_labels(db: AsyncSession, labels_directory: str = "/cmf-serve
 
 async def get_label_stats(db: AsyncSession) -> Dict[str, Any]:
     """
-    Get statistics about indexed labels
+    Get comprehensive statistics about indexed label files and records.
+
+    This function provides insights into the current state of the label search system:
+    - Total number of indexed records across all files
+    - Total number of unique label files indexed
+    - Recent file activity with record counts and timestamps
+
+    Args:
+        db: Database session for executing queries
+
+    Returns:
+        Dictionary containing label statistics:
+        - total_records: Total number of indexed records
+        - total_files: Total number of unique label files
+        - recent_files: List of recent files with metadata
+        - status: 'success' or 'error'
+        - error: Error message if status is 'error'
+
+    Use Cases:
+        - System health monitoring and dashboards
+        - Debugging label search issues
+        - Administrative oversight of indexed content
+        - Capacity planning for label storage
     """
     try:
-        # Get total records and files
+        # Query 1: Get overall statistics (total records and unique files)
         result = await db.execute(text("""
             SELECT
                 COUNT(*) as total_records,
@@ -472,7 +657,7 @@ async def get_label_stats(db: AsyncSession) -> Dict[str, Any]:
         """))
         stats = result.fetchone()
 
-        # Get recent files
+        # Query 2: Get recent file activity for monitoring/debugging
         result = await db.execute(text("""
             SELECT file_name, COUNT(*) as record_count, MAX(updated_at) as last_updated
             FROM label_index
@@ -480,17 +665,19 @@ async def get_label_stats(db: AsyncSession) -> Dict[str, Any]:
             ORDER BY last_updated DESC
             LIMIT 10
         """))
+        # Convert query results to structured format
         recent_files = [
             {
                 'file_name': row[0],
                 'record_count': row[1],
-                'last_updated': row[2]
+                'last_updated': row[2]  # Timestamp in milliseconds
             }
             for row in result.fetchall()
         ]
 
+        # Return comprehensive statistics
         return {
-            'total_records': stats[0] if stats else 0,
+            'total_records': stats[0] if stats else 0,  # Handle empty database
             'total_files': stats[1] if stats else 0,
             'recent_files': recent_files,
             'status': 'success'
@@ -510,7 +697,35 @@ async def get_label_stats(db: AsyncSession) -> Dict[str, Any]:
 async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
     """
     Automatically reindex labels if files have been modified or content has changed.
-    Handles MD5 hash filenames and detects content changes.
+
+    This intelligent reindexing function performs change detection to avoid unnecessary
+    reprocessing of unchanged files. It handles:
+    - MD5 hash-based filenames (32 hex character names)
+    - Content hash comparison for change detection
+    - File modification time checking
+    - Graceful handling of new, modified, and deleted files
+
+    Change Detection Strategy:
+    1. Calculates MD5 hash of current file content
+    2. Compares with stored content hash in database
+    3. Checks file modification time vs last index time
+    4. Only reindexes files that have actually changed
+
+    Args:
+        db: Database session for executing queries
+        labels_directory: Path to directory containing CSV label files
+
+    Returns:
+        Dictionary containing reindex results:
+        - status: 'reindexed', 'up_to_date', 'no_directory', 'no_files', or 'error'
+        - message: Human-readable status message
+        - files_reindexed: List of filenames that were reindexed (if applicable)
+        - details: Full indexing results (if reindexing occurred)
+
+    Performance Benefits:
+        - Avoids reprocessing unchanged files
+        - Reduces database load and indexing time
+        - Maintains data freshness without unnecessary overhead
     """
     try:
 
@@ -546,14 +761,14 @@ async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf
         files_to_reindex = []
 
         for csv_file in csv_files:
-            # Calculate content hash for change detection
+            # Calculate current file content hash for comparison
             try:
                 with open(csv_file, 'rb') as f:
                     content_hash = hashlib.md5(f.read()).hexdigest()
             except:
-                continue
+                continue  # Skip files that can't be read
 
-            # Check if file exists in index and get its content hash
+            # Query database for existing index information
             result = await db.execute(text("""
                 SELECT
                     MAX(updated_at) as last_indexed,
@@ -563,10 +778,10 @@ async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf
             """), {"file_name": csv_file.name})
 
             row = result.fetchone()
-            last_indexed = row[0] if row else None
-            record_count = row[1] if row else 0
+            last_indexed = row[0] if row else None  # When was this file last indexed?
+            record_count = row[1] if row else 0     # How many records exist?
 
-            # Get stored content hash from metadata if available
+            # Retrieve stored content hash from previous indexing
             result = await db.execute(text("""
                 SELECT metadata->>'content_hash' as stored_hash
                 FROM label_index
@@ -577,31 +792,40 @@ async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf
             stored_hash_row = result.fetchone()
             stored_hash = stored_hash_row[0] if stored_hash_row else None
 
-            # Check if reindexing is needed
-            file_mtime = int(os.path.getmtime(csv_file) * 1000)
+            # Multi-criteria reindexing decision
+            file_mtime = int(os.path.getmtime(csv_file) * 1000)  # File modification time
 
-            if (not last_indexed or
-                record_count == 0 or
-                stored_hash != content_hash or
-                file_mtime > (last_indexed + 60000)):  # 1 minute grace period
+            # Reindex if ANY of these conditions are true:
+            if (not last_indexed or                              # Never indexed before
+                record_count == 0 or                            # No records in database
+                stored_hash != content_hash or                   # Content has changed
+                file_mtime > (last_indexed + 60000)):           # File modified recently (1 min grace)
 
                 needs_reindex = True
+                # Determine the specific reason for logging/debugging
+                reason = ('new' if not last_indexed else
+                         'content_changed' if stored_hash != content_hash else
+                         'modified')
+
                 files_to_reindex.append({
                     'file': csv_file,
-                    'reason': 'new' if not last_indexed else 'content_changed' if stored_hash != content_hash else 'modified'
+                    'reason': reason
                 })
-                logger.info(f"File {csv_file.name} needs reindexing - {files_to_reindex[-1]['reason']}")
+                logger.info(f"File {csv_file.name} needs reindexing - {reason}")
 
+        # Step 5: Execute reindexing decision
         if needs_reindex:
+            # Perform reindexing using enhanced function with hash storage
             logger.info(f"Auto-reindexing {len(files_to_reindex)} files...")
             result = await index_csv_labels_with_hash(db, labels_directory)
             return {
                 "status": "reindexed",
                 "message": f"Auto-reindexed {result.get('total_files', 0)} files",
-                "files_reindexed": [f['file'].name for f in files_to_reindex],
-                "details": result
+                "files_reindexed": [f['file'].name for f in files_to_reindex],  # List of affected files
+                "details": result  # Full indexing results
             }
         else:
+            # No changes detected - system is up to date
             return {
                 "status": "up_to_date",
                 "message": "All label files are up to date"
@@ -614,7 +838,36 @@ async def auto_reindex_if_needed(db: AsyncSession, labels_directory: str = "/cmf
 
 async def index_csv_labels_with_hash(db: AsyncSession, labels_directory: str = "/cmf-server/data/labels") -> Dict[str, Any]:
     """
-    Enhanced version of index_csv_labels that stores content hashes for change detection
+    Enhanced version of index_csv_labels that stores content hashes for change detection.
+
+    This is the preferred indexing function for production use as it enables:
+    - Content hash storage for efficient change detection
+    - Support for MD5 hash-based filenames
+    - Enhanced JSONB data parsing for advanced search
+    - Automatic type conversion (strings, numbers, booleans)
+    - Comprehensive metadata storage
+
+    Key Improvements over basic index_csv_labels:
+    1. Stores MD5 content hash in metadata for change detection
+    2. Parses CSV values into appropriate types for JSONB storage
+    3. Better support for files without .csv extension
+    4. Enhanced delimiter detection and CSV format handling
+
+    Args:
+        db: Database session for executing queries
+        labels_directory: Path to directory containing CSV label files
+
+    Returns:
+        Dictionary containing indexing results:
+        - status: 'success', 'warning', or 'error'
+        - message: Human-readable status message
+        - indexed_files: List of file indexing results with content hashes
+        - total_files: Number of files processed
+        - total_records: Total records indexed across all files
+
+    Note:
+        This function is used by auto_reindex_if_needed() for intelligent reindexing.
+        The content hashes enable efficient change detection in future operations.
     """
     try:
 
@@ -622,19 +875,22 @@ async def index_csv_labels_with_hash(db: AsyncSession, labels_directory: str = "
         if not labels_dir.exists():
             return {"status": "error", "message": f"Labels directory not found: {labels_directory}"}
 
-        # Get all potential CSV files (including MD5 hash files)
+        # Step 1: Comprehensive file discovery including hash-based names
         csv_files = list(labels_dir.glob("*.csv"))
 
-        # Also include files without extension
+        # Step 2: Auto-detect CSV files without extensions (including MD5 hash names)
         for file_path in labels_dir.iterdir():
             if file_path.is_file() and not file_path.suffix:
+                # Check if filename is an MD5 hash (32 hex characters)
                 is_md5_hash = bool(re.match(r'^[a-f0-9]{32}$', file_path.name))
                 try:
                     with open(file_path, 'r', encoding='utf-8') as f:
                         first_line = f.readline().strip()
+                        # Include if: MD5 hash OR contains CSV indicators
                         if is_md5_hash or ',' in first_line or any(keyword in first_line.lower() for keyword in ['file', 'name', 'id', 'type', 'size']):
                             csv_files.append(file_path)
                 except:
+                    # Skip files that can't be read (permissions, binary, etc.)
                     pass
 
         if not csv_files:
@@ -643,84 +899,98 @@ async def index_csv_labels_with_hash(db: AsyncSession, labels_directory: str = "
         indexed_files = []
         total_records = 0
 
+        # Step 3: Process each file with enhanced change detection
         for csv_file in csv_files:
             try:
-                # Calculate content hash
+                # Calculate MD5 hash for change detection (binary mode for accuracy)
                 with open(csv_file, 'rb') as f:
                     content_hash = hashlib.md5(f.read()).hexdigest()
 
-                # Clear existing data for this file
+                # Clean slate: Remove existing records for this file
                 await db.execute(
                     text("DELETE FROM label_index WHERE file_name = :file_name"),
                     {"file_name": csv_file.name}
                 )
 
-                # Process CSV file
+                # Step 4: Parse CSV with robust format detection
                 records = []
                 with open(csv_file, 'r', encoding='utf-8') as csvfile:
-                    # Detect CSV format with fallback
+                    # Sample-based delimiter detection
                     sample = csvfile.read(1024)
-                    csvfile.seek(0)
+                    csvfile.seek(0)  # Reset file pointer
 
-                    # Try to detect delimiter, with fallbacks
-                    delimiter = ','  # Default fallback
+                    # Intelligent delimiter detection with fallbacks
+                    delimiter = ','  # Safe default
                     try:
+                        # Use Python's built-in CSV sniffer
                         sniffer = csv.Sniffer()
                         detected_delimiter = sniffer.sniff(sample).delimiter
                         delimiter = detected_delimiter
                     except:
-                        # Fallback: try common delimiters
+                        # Manual fallback: Test common delimiters by presence
                         for test_delimiter in [',', '\t', ';', '|']:
                             if test_delimiter in sample:
                                 delimiter = test_delimiter
                                 break
 
+                    # Create reader with detected format
                     reader = csv.DictReader(csvfile, delimiter=delimiter)
 
+                    # Step 5: Enhanced row processing with type conversion
                     for row_index, row in enumerate(reader):
-                        # Create searchable content and parsed data
+                        # Initialize data structures for this row
                         content_parts = []
-                        metadata = {'content_hash': content_hash}
-                        parsed_data = {}
+                        metadata = {'content_hash': content_hash}  # Store hash for change detection
+                        parsed_data = {}  # Type-converted data for advanced search
 
+                        # Process each column in the row
                         for key, value in row.items():
-                            if value and value.strip():
+                            if value and value.strip():  # Skip empty values
+                                # Build searchable text content
                                 content_parts.append(f"{key}: {value.strip()}")
+                                # Store raw string value in metadata
                                 metadata[key] = value.strip()
-                                # Parse value for JSONB storage
+                                # Convert to appropriate type for JSONB advanced search
                                 parsed_data[key] = parse_csv_value(value.strip())
 
+                        # Combine all fields into searchable text
                         content = " | ".join(content_parts)
 
+                        # Only index rows with actual content
                         if content.strip():
                             records.append({
                                 'file_name': csv_file.name,
                                 'file_path': str(csv_file),
                                 'row_index': row_index,
-                                'content': content,
-                                'metadata': json.dumps(metadata),
-                                'parsed_data': json.dumps(parsed_data),
-                                'created_at': int(time.time() * 1000),
+                                'content': content,                          # For full-text search
+                                'metadata': json.dumps(metadata),           # Raw values + hash
+                                'parsed_data': json.dumps(parsed_data),     # Type-converted for advanced search
+                                'created_at': int(time.time() * 1000),      # Timestamp in milliseconds
                                 'updated_at': int(time.time() * 1000)
                             })
 
-                # Insert records
+                # Step 6: Bulk database insertion with enhanced schema
                 if records:
+                    # Use parameterized query for security and performance
                     insert_query = text("""
                         INSERT INTO label_index (file_name, file_path, row_index, content, metadata, parsed_data, created_at, updated_at)
                         VALUES (:file_name, :file_path, :row_index, :content, :metadata, :parsed_data, :created_at, :updated_at)
                     """)
+                    # Bulk insert all records for this file
                     await db.execute(insert_query, records)
 
+                    # Track successful indexing with content hash
                     indexed_files.append({
                         'file_name': csv_file.name,
                         'records_indexed': len(records),
-                        'content_hash': content_hash,
+                        'content_hash': content_hash,  # Store for change detection
                         'status': 'success'
                     })
                     total_records += len(records)
+                    # Log with truncated hash for debugging
                     logger.info(f"Indexed {len(records)} records from {csv_file.name} (hash: {content_hash[:8]}...)")
                 else:
+                    # Handle empty files
                     indexed_files.append({
                         'file_name': csv_file.name,
                         'records_indexed': 0,
@@ -736,53 +1006,85 @@ async def index_csv_labels_with_hash(db: AsyncSession, labels_directory: str = "
                     'error': str(e)
                 })
 
+        # Step 7: Commit all changes to database
         await db.commit()
 
+        # Calculate success metrics for reporting
         success_count = sum(1 for f in indexed_files if f['status'] == 'success')
         return {
             'status': 'success',
             'message': f'Indexed {success_count}/{len(csv_files)} files successfully',
-            'indexed_files': indexed_files,
+            'indexed_files': indexed_files,  # Detailed per-file results
             'total_files': len(csv_files),
             'total_records': total_records
         }
 
     except Exception as e:
+        # Top-level error handling for database or system failures
         logger.error(f"Label indexing failed: {e}")
         return {"status": "error", "message": str(e)}
 
 
 # LABEL SEARCH INITIALIZATION AND FILE LOADING
 async def initialize_label_search():
-    """Initialize label search functionality on startup"""
+    """
+    Initialize label search functionality on server startup.
+
+    This function sets up the label search system by:
+    1. Checking for existing CSV files in the labels directory
+    2. Verifying if label records are already indexed in the database
+    3. Performing initial indexing if no records exist
+    4. Logging system status and configuration
+
+    Startup Behavior:
+    - If CSV files exist but no database records: Performs full indexing
+    - If database records exist: Assumes system is ready (no reindexing)
+    - If no CSV files found: Logs warning but continues (files can be added later)
+
+    This function is called during server startup via the lifespan context manager
+    to ensure label search is ready when the API becomes available.
+
+    Note:
+        This function does not perform reindexing of existing data.
+        For ongoing change detection, use auto_reindex_if_needed().
+    """
     try:
         logger.info("Initializing label search functionality...")
 
-        # Check primary directory for CSV files
+        # Step 1: Scan for existing CSV files in the configured directory
         primary_dir = Path(LABELS_DIR)
         csv_files = []
 
         if primary_dir.exists():
+            # Look for .csv files (most common case)
             csv_files = list(primary_dir.glob("*.csv"))
 
         if csv_files:
             logger.info(f"Found {len(csv_files)} CSV label files in primary directory: {primary_dir}")
 
-            # Check if we need to index (if no records exist)
+            # Step 2: Check if database indexing is needed
+            # Import here to avoid circular dependencies during startup
             from server.app.db.dbconfig import async_session
             async with async_session() as db:
+                # Query current index status
                 stats = await get_label_stats(db)
+
                 if stats['total_records'] == 0:
-                    logger.info("Indexing label files...")
+                    # No existing index - perform initial indexing
+                    logger.info("No existing index found. Performing initial indexing...")
                     result = await index_csv_labels(db)
-                    logger.info(f"Indexed {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
+                    logger.info(f"Initial indexing complete: {result.get('total_records', 0)} records from {result.get('total_files', 0)} files")
                 else:
-                    logger.info(f"Label search ready: {stats['total_records']} records indexed")
+                    # Index exists - system is ready
+                    logger.info(f"Label search ready: {stats['total_records']} records already indexed")
         else:
+            # No CSV files found - log but don't error (files can be added later)
             logger.info(f"No CSV label files found in primary directory: {primary_dir}")
+            logger.info("Label search will be available once CSV files are uploaded")
 
-        # Log the labels directory being used
-        logger.info(f"Labels directory: {LABELS_DIR}")
+        # Step 3: Log configuration for debugging/monitoring
+        logger.info(f"Labels directory configured: {LABELS_DIR}")
+        logger.info("Label search initialization complete")
 
     except Exception as e:
         logger.warning(f"Label search initialization failed: {e}")
@@ -791,53 +1093,78 @@ async def initialize_label_search():
 
 async def load_label_csv_content(artifact: dict) -> str:
     """
-    Load CSV content for a label artifact.
+    Load CSV content for a label artifact using multiple fallback strategies.
+
+    This function implements a robust file location strategy to handle various
+    artifact naming conventions and storage patterns:
+
+    Search Strategies (in order):
+    1. Direct path from artifact URI
+    2. Filename from artifact name in labels directory
+    3. Filename with .csv extension added
+    4. URI basename in labels directory
+    5. URI basename with .csv extension
+
+    This multi-strategy approach handles cases where:
+    - Artifacts reference absolute paths vs relative names
+    - Files may or may not have .csv extensions
+    - Artifact names may include path prefixes or suffixes
+    - Files may be stored with hash-based names
 
     Args:
         artifact: Dictionary containing artifact information with 'name' and 'uri' keys
+                 Expected format: {'name': 'filename', 'uri': '/path/to/file'}
 
     Returns:
-        CSV content as string, or None if not found
+        CSV content as string if file found and readable, None otherwise
+
+    Note:
+        This function is used by the label content display API to show
+        actual CSV data when users click on label artifacts.
     """
     try:
         # Extract file information
         artifact_name = artifact.get('name', '')
         artifact_uri = artifact.get('uri', '')
 
-        # Try different strategies to locate the file
+        # Multi-strategy file location approach to handle various naming conventions
         potential_paths = []
 
-        # Strategy 1: Direct path from URI
+        # Strategy 1: Direct path from URI (absolute or relative paths)
+        # Handles cases where URI contains full file path
         if artifact_uri:
             potential_paths.append(Path(artifact_uri))
 
-        # Strategy 2: Look in labels directory using name
+        # Strategy 2: Search in labels directory using artifact name
+        # Handles cases where name is just the filename
         if artifact_name:
             labels_dir = Path(LABELS_DIR)
             potential_paths.extend([
-                labels_dir / artifact_name,
-                labels_dir / f"{artifact_name}.csv"
+                labels_dir / artifact_name,           # Exact name match
+                labels_dir / f"{artifact_name}.csv"   # Add .csv extension
             ])
 
-        # Strategy 3: Look in labels directory using URI basename
+        # Strategy 3: Extract basename from URI and search in labels directory
+        # Handles cases where URI has path but file is in labels directory
         if artifact_uri:
             uri_basename = Path(artifact_uri).name
             labels_dir = Path(LABELS_DIR)
             potential_paths.extend([
-                labels_dir / uri_basename,
-                labels_dir / f"{uri_basename}.csv"
+                labels_dir / uri_basename,            # URI basename as-is
+                labels_dir / f"{uri_basename}.csv"    # URI basename with .csv
             ])
 
-        # Try each potential path
+        # Try each potential path until we find a readable file
         for path in potential_paths:
             if path.exists() and path.is_file():
                 try:
                     with open(path, 'r', encoding='utf-8') as f:
                         content = f.read()
-                        if content.strip():  # Only return non-empty content
+                        if content.strip():  # Ensure file has actual content
                             logger.info(f"Successfully loaded label content from: {path}")
                             return content
                 except Exception as e:
+                    # Log but continue trying other paths
                     logger.warning(f"Failed to read file {path}: {e}")
                     continue
 
@@ -852,43 +1179,73 @@ async def load_label_csv_content(artifact: dict) -> str:
 
 async def load_label_csv_by_filename(file_name: str, pipeline_name: str = None) -> str:
     """
-    Load CSV content by filename, handling both hash names and actual filenames.
+    Load CSV content by filename, with special handling for hash-based and regular filenames.
+
+    This function provides flexible file loading that handles various filename formats:
+    - Regular filenames (e.g., "labels.csv", "data.csv")
+    - Hash-based filenames (e.g., "a1b2c3d4e5f6...") - 32 character MD5 hashes
+    - Filenames with or without .csv extensions
+
+    Loading Strategy:
+    1. First attempts to use existing load_label_csv_content() logic
+    2. Falls back to direct file system searches in labels directory
+    3. Tries both with and without .csv extension
+    4. Handles edge cases for hash-based artifact storage
+
+    This function is particularly useful for:
+    - API endpoints that receive filename parameters
+    - Loading content for search result display
+    - Handling artifacts stored with hash-based names
+    - Supporting both legacy and modern filename conventions
 
     Args:
-        file_name: The filename (could be hash or actual filename)
-        pipeline_name: Optional pipeline name to help locate artifacts
+        file_name: The filename to load (hash, regular name, with/without extension)
+        pipeline_name: Optional pipeline name (reserved for future pipeline-specific logic)
 
     Returns:
-        CSV content as string, or None if not found
+        CSV content as string if file found and readable, None otherwise
+
+    Note:
+        The pipeline_name parameter is currently unused but reserved for future
+        enhancements that may require pipeline-specific file location logic.
     """
     try:
-        # Try to load using the same logic as load_label_csv_content
-        # Create a mock artifact to use the existing loading logic
+        # Note: pipeline_name parameter is reserved for future pipeline-specific logic
+        # Currently unused but maintained for API compatibility and future enhancements
+
+        # Strategy 1: Leverage existing multi-path loading logic
+        # Create a mock artifact to reuse the robust path resolution
         mock_artifact = {
             'name': file_name,
             'uri': file_name
         }
 
-        # Try the existing loading logic first
+        # Try the comprehensive loading logic first
         content = await load_label_csv_content(mock_artifact)
         if content:
             return content
 
-        # If that fails, try additional strategies for hash-based filenames
-        # Try to find CSV files that might correspond to this hash
+        # Strategy 2: Direct file system fallback for edge cases
+        # This handles scenarios not covered by the main loading logic
         labels_dir = LABELS_DIR
         if os.path.exists(labels_dir):
-            # Try direct filename match first
+            # Direct filename match (handles hash-based names)
             direct_path = os.path.join(labels_dir, file_name)
             if os.path.exists(direct_path):
-                with open(direct_path, 'r', encoding='utf-8') as f:
-                    return f.read()
+                try:
+                    with open(direct_path, 'r', encoding='utf-8') as f:
+                        return f.read()
+                except Exception as e:
+                    logger.warning(f"Failed to read direct path {direct_path}: {e}")
 
-            # Try with .csv extension
+            # Try with .csv extension (handles missing extensions)
             csv_path = os.path.join(labels_dir, f"{file_name}.csv")
             if os.path.exists(csv_path):
-                with open(csv_path, 'r', encoding='utf-8') as f:
-                    return f.read()
+                try:
+                    with open(csv_path, 'r', encoding='utf-8') as f:
+                        return f.read()
+                except Exception as e:
+                    logger.warning(f"Failed to read CSV path {csv_path}: {e}")
 
         logger.warning(f"Could not find label file: {file_name}")
         return None
diff --git a/server/app/main.py b/server/app/main.py
index 5c9cafc64..dad8c10b0 100644
--- a/server/app/main.py
+++ b/server/app/main.py
@@ -3,7 +3,7 @@
 import time
 import zipfile
 
-import re
+import re  # Used for cleaning artifact names in label search
 from fastapi import FastAPI, Request, HTTPException, Query, UploadFile, File, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import HTMLResponse, PlainTextResponse, StreamingResponse
@@ -56,18 +56,22 @@
 from jsonpath_ng.ext import parse
 from cmflib.cmf_federation import update_mlmd
 from server.app.db.dbconfig import DATABASE_URL
+# Label search functionality imports
+# These modules provide comprehensive label content search capabilities
 from server.app.label_management import (
-    auto_reindex_if_needed,
-    get_label_stats,
-    index_csv_labels_with_hash,
-    initialize_label_search,
-    load_label_csv_by_filename,
-    AdvancedSearchParser
+    auto_reindex_if_needed,      # Automatically reindex CSV files when changes are detected
+    get_label_stats,             # Get statistics about indexed label files and records
+    index_csv_labels_with_hash,  # Index CSV files with hash-based change detection
+    initialize_label_search,     # Initialize the label search system on startup
+    load_label_csv_by_filename,  # Load specific CSV file content by filename or hash
+    AdvancedSearchParser         # Parse complex search queries with operators (>, <, =, etc.)
 )
 import csv
 import io
 
 server_store_path = "/cmf-server/data/postgres_data"
+# Directory where label CSV files are stored for content search functionality
+# This is the central location where all label files are uploaded and indexed
 labels_dir = "/cmf-server/data/labels"
 query = CmfQuery(is_server=True)
 
@@ -89,7 +93,12 @@ async def lifespan(app: FastAPI):
         # loaded artifact ids into memory
         dict_of_art_ids = await async_api(get_all_artifact_ids, query, dict_of_exe_ids)
 
-        # Initialize label search functionality
+        # Initialize label search functionality on server startup
+        # This sets up the label content search system by:
+        # 1. Scanning the labels directory for CSV files
+        # 2. Indexing CSV content into the database for fast searching
+        # 3. Creating JSONB records for advanced query capabilities
+        # 4. Enabling content-based search across all label files
         await initialize_label_search()
 
     yield
@@ -156,6 +165,9 @@ async def mlmd_push(info: MLMDPushRequest, db: AsyncSession = Depends(get_db)):
                 await update_global_art_dict(pipeline_name)
 
                 # Auto-reindex labels after artifact push
+                # When new artifacts are pushed, they may include new label files
+                # This ensures the label search index stays up-to-date automatically
+                # by detecting file changes and reindexing only when necessary
                 try:
                     logger = logging.getLogger(__name__)
                     reindex_result = await auto_reindex_if_needed(db)
@@ -212,6 +224,9 @@ async def get_artifacts(
     """Retrieve paginated artifacts with filtering, sorting, and full-text search including label content."""
 
     # Auto-reindex labels if needed (only on first page to avoid performance issues)
+    # This ensures that when users browse artifacts, the label search index is current
+    # Only runs on first page to prevent performance impact on pagination
+    # Detects if CSV files have been modified and reindexes only when necessary
     if active_page == 1:
         try:
             logger = logging.getLogger(__name__)
@@ -236,20 +251,43 @@ async def search_label_artifacts(
     record_per_page: int = Query(5, gt=0, description="Number of records per page"),
     db: AsyncSession = Depends(get_db)
 ):
+    """
+    Advanced search endpoint for label artifacts that combines content-based and property-based search.
+
+    This function provides sophisticated search capabilities for label artifacts by:
+    1. Parsing advanced search queries (e.g., "lines>240", "score<=0.5", "name='test'")
+    2. Searching within CSV label file content using JSONB database queries
+    3. Searching artifact properties (name, URI, etc.) using traditional text search
+    4. Combining and deduplicating results from both search methods
+    5. Applying pagination to the final result set
+
+    The search supports both simple text queries and complex conditions with operators
+    like >, <, >=, <=, =, !=, ~(contains), !~(not contains).
+    """
     """
     Search for label artifacts with advanced search support - returns same structure as regular artifacts API
     Supports advanced search syntax like: lines>240, score<=0.5, name="test", status!=active
     """
     try:
         # Parse the search query for advanced conditions
+        # This separates complex queries like "lines>240 AND score<=0.5" into:
+        # - conditions: structured queries with operators (>, <, =, etc.)
+        # - plain_terms: simple text search terms
+        # - parse_errors: any syntax errors in the query
         conditions, plain_terms, parse_errors = AdvancedSearchParser.parse_search_query(content_filter)
 
         # If there are parse errors, fall back to basic search
+        # This ensures the search still works even with malformed advanced syntax
         if parse_errors:
             conditions = []
             plain_terms = [content_filter]
 
         # Use combined search with JSONB queries for advanced conditions
+        # Set limit to 100 to balance performance and completeness:
+        # - Prevents excessive memory usage when searching large CSV files
+        # - Provides sufficient results for most search scenarios
+        # - Avoids database query timeouts on complex searches
+        # - Results are later filtered and paginated, so this is an initial fetch limit
         label_content_results = await search_labels_combined(
             db,
             plain_terms=plain_terms if plain_terms else None,
@@ -259,16 +297,20 @@ async def search_label_artifacts(
         )
 
         # Search in regular artifact properties (like other artifact types)
+        # This searches artifact metadata (name, URI, properties) using traditional text search
+        # Complements the CSV content search to provide comprehensive results
         if plain_terms:
             # Only search properties if there are plain text terms
+            # Advanced conditions (like "lines>240") don't apply to artifact properties
             property_search_results = await fetch_artifacts(
                 db, pipeline_name, "Label", " ".join(plain_terms), 1, 1000, "name", sort_order
             )
         else:
             # For pure advanced search, don't do property search - we'll rely on CSV content filtering
+            # Advanced conditions only make sense for CSV content, not artifact metadata
             property_search_results = {'items': [], 'total_items': 0}
 
-        # If still no results from any search, return empty
+        # If no results from content search, return empty with proper pagination info
         if not label_content_results and property_search_results['total_items'] == 0:
             return {
                 "total_items": 0,
@@ -280,21 +322,28 @@ async def search_label_artifacts(
         for result in label_content_results:
             label_file = result.get('label_file', '')
             if label_file:
-                # Remove file extension if present
+                # Clean the label file name by removing .csv extension for flexible matching
+                # This is necessary because artifact names in the database may or may not include
+                # the .csv extension, depending on how they were originally stored.
+                # By storing both versions (with and without extension), we ensure successful
+                # matching regardless of the naming convention used in the artifact metadata.
                 clean_label_file = label_file.replace('.csv', '') if label_file.endswith('.csv') else label_file
                 matching_label_files.add(clean_label_file)
                 # Also add the original name in case it's used as-is
                 matching_label_files.add(label_file)
 
         # Get all label artifacts (for content matching)
+        # We need all artifacts to cross-reference with content search results
         all_artifacts_result = await fetch_artifacts(
             db, pipeline_name, "Label", "", 1, 1000, "name", sort_order
         )
 
         # Combine results: artifacts that match either content search OR property search
+        # This creates a union of both search methods for comprehensive results
         filtered_artifacts = []
 
         # Create a set of artifact IDs that match property search
+        # This allows for efficient lookup when combining results
         property_match_ids = set()
         for artifact in property_search_results['items']:
             property_match_ids.add(artifact['artifact_id'])
@@ -308,20 +357,25 @@ async def search_label_artifacts(
             property_matches = artifact_id in property_match_ids
 
             # Check if this artifact matches content search
+            # This complex matching logic handles various artifact naming conventions
             content_matches = False
             if matching_label_files:
                 # Clean the artifact name - remove prefixes and suffixes
+                # Artifacts may be stored as "artifacts/labels.csv:filename" or just "filename"
                 clean_name = artifact_name
                 if ':' in clean_name:
                     clean_name = clean_name.split(':', 1)[1]
 
                 # Remove row indicators like "(Row 1)" from the name
+                # These are added by the system for multi-row artifacts
                 clean_name = re.sub(r'\s*\(Row\s+\d+\)$', '', clean_name).strip()
 
                 # Check multiple matching strategies for content
+                # This ensures we catch artifacts regardless of naming convention
                 name_matches = clean_name in matching_label_files
 
                 # Also check if URI contains any of the matching label files
+                # URIs may contain the full path to the CSV file
                 uri_matches = False
                 if artifact_uri:
                     for label_file in matching_label_files:
@@ -330,6 +384,7 @@ async def search_label_artifacts(
                             break
 
                 # Check if original artifact name matches (without cleaning)
+                # Some artifacts may use the exact filename from the search results
                 original_name_matches = artifact_name in matching_label_files
 
                 content_matches = name_matches or uri_matches or original_name_matches
@@ -339,6 +394,7 @@ async def search_label_artifacts(
 
             if should_include:
                 # Convert SearchCondition objects to dictionaries for JSON serialization
+                # This is necessary because SearchCondition objects contain enums that can't be JSON serialized
                 advanced_conditions_dict = []
                 for condition in conditions:
                     advanced_conditions_dict.append({
@@ -348,13 +404,14 @@ async def search_label_artifacts(
                     })
 
                 # Add search metadata but keep same structure as regular artifacts
+                # This metadata enables the frontend to highlight matching terms and show search context
                 artifact['search_metadata'] = {
                     'search_term': content_filter,
                     'is_search_result': True,
-                    'property_match': property_matches,
-                    'content_match': content_matches,  # Content matches now include advanced search results
-                    'advanced_conditions': advanced_conditions_dict,  # Store conditions as dicts for frontend filtering
-                    'plain_terms': plain_terms
+                    'property_match': property_matches,      # Matched via artifact properties (name, URI, etc.)
+                    'content_match': content_matches,        # Matched via CSV content search
+                    'advanced_conditions': advanced_conditions_dict,  # Structured conditions for frontend highlighting
+                    'plain_terms': plain_terms               # Simple text terms for basic highlighting
                 }
                 filtered_artifacts.append(artifact)
 
@@ -579,29 +636,39 @@ async def get_python_env(file_name: str) -> str:
         raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
 
 # Rest api to push the label to /cmf-server/data/labels dir.
+# This endpoint allows users to upload CSV label files that will be indexed for content search
 @app.post("/label")
 async def upload_label(request:Request, file: UploadFile = File(..., description="The file to upload")):
+    """
+    Upload a CSV label file to the server for content search indexing.
+
+    The uploaded file will be:
+    1. Stored in the labels directory (/cmf-server/data/labels)
+    2. Automatically indexed for content search (via auto-reindex functionality)
+    3. Made available for advanced search queries
+    4. Accessible through the label content search APIs
+    """
     try:
         if not file.filename:
             raise HTTPException(status_code=400, detail="No file provided.")
-        
-        # Use the labels directory
+
+        # Use the labels directory - central location for all label files
         file_path = os.path.join(labels_dir, os.path.basename(file.filename))
-        
+
         # Ensure the directory exists
         os.makedirs(os.path.dirname(file_path), exist_ok=True)
 
-        # Check if the file already exists
+        # Check if the file already exists to prevent accidental overwrites
         if os.path.exists(file_path):
             return {"message": f"File '{file.filename}' already exists at {labels_dir}. Skipping upload."}
 
-
-        # Save the uploaded file
+        # Save the uploaded file to the labels directory
+        # The file will be automatically indexed on the next search or reindex operation
         with open(file_path, "wb") as buffer:
             buffer.write(await file.read())
 
         return {"message": f"File '{file.filename}' uploaded successfully to {labels_dir}."}
-    
+
     except Exception as e:
         return {"error": f"Failed to up load file: {e}"}
     
@@ -616,19 +683,15 @@ async def get_label_data(
     db: AsyncSession = Depends(get_db)
 ) -> str:
     """
-    API endpoint to fetch the content of a label CSV file, optionally filtered by search conditions.
-
-    Args:
-        file_name (str): The hash name or actual filename of the CSV file to fetch
-        search_filter (str, optional): Advanced search filter to apply (e.g., "lines>24000")
-        pipeline_name (str, optional): Pipeline name to help locate the correct artifact
-        fallback_to_full (bool): If true, return full content when no rows match the filter (default: True)
-
-    Returns:
-        str: The content of the file as CSV text, optionally filtered
-
-    Raises:
-        HTTPException: If the file does not exist or cannot be processed.
+    API endpoint to fetch the actual CSV content of a specific label file.
+
+    This endpoint:
+    - Returns the raw CSV file content as plain text
+    - Supports optional filtering using advanced search syntax (e.g., "lines>240", "score<=0.5")
+    - Used when users click on a label to view its detailed content
+    - Returns data that gets parsed by the frontend (Papa Parse) for table display
+    - Can return either filtered content (when search_filter is provided) or full content
+    - Supports fallback behavior when no rows match the filter criteria
     """
 
     try:
@@ -642,22 +705,26 @@ async def get_label_data(
         if not search_filter:
             return csv_content
 
-        # Use JSONB database queries
+        # Use JSONB database queries for filtering CSV content
+        # This allows for advanced filtering like "lines>240" or "score<=0.5"
         try:
-            # Parse the search filter
+            # Parse the search filter into structured conditions
+            # Same parsing logic as the main search endpoint for consistency
             conditions, plain_terms, parse_errors = AdvancedSearchParser.parse_search_query(search_filter)
 
             if parse_errors:
                 return csv_content  # Return unfiltered if parse fails
 
             # If there are advanced conditions, use JSONB database queries
+            # This leverages the indexed CSV data for fast filtering
             if conditions and len(conditions) > 0:
                 from server.app.db.dbqueries import search_labels_with_advanced_conditions
 
-                # Search using JSONB queries
+                # Search using JSONB queries against the indexed CSV data
                 results = await search_labels_with_advanced_conditions(db, conditions, pipeline_name, 1000)
 
                 # Filter results to only include rows from the requested file
+                # This ensures we only return content from the specific file requested
                 matching_results = [r for r in results if r['label_file'] == file_name]
 
                 if matching_results:
@@ -1057,9 +1124,25 @@ async def artifact_lineage(request: Request, pipeline_name: str):
 """
 
 # Label Search Management Endpoints
+# These endpoints provide administrative control over the label search system
+
 @app.post("/api/labels/reindex")
 async def reindex_labels(db: AsyncSession = Depends(get_db)):
-    """Reindex all label files - useful when CSV files are updated"""
+    """
+    Manually trigger reindexing of all label files.
+
+    This endpoint is useful when:
+    1. CSV files have been updated outside the normal upload process
+    2. The search index appears to be out of sync
+    3. New files have been added directly to the labels directory
+    4. Troubleshooting search issues
+
+    The reindexing process:
+    - Scans all CSV files in the labels directory
+    - Calculates file hashes to detect changes
+    - Updates the database with new/changed content
+    - Creates JSONB records for advanced search capabilities
+    """
     try:
         result = await index_csv_labels_with_hash(db)
 
@@ -1074,7 +1157,22 @@ async def reindex_labels(db: AsyncSession = Depends(get_db)):
 
 @app.get("/api/labels/status")
 async def get_label_search_status(db: AsyncSession = Depends(get_db)):
-    """Get the current status of label search functionality"""
+    """
+    Get the current status of label search functionality.
+
+    This API endpoint is used to:
+    1. Monitor the health and status of the label search system
+    2. Check if CSV files are properly indexed in the database
+    3. Verify that the labels directory is accessible and contains files
+    4. Provide diagnostic information for troubleshooting search issues
+    5. Display statistics about indexed files and records for system administrators
+
+    Use cases:
+    - System health checks and monitoring dashboards
+    - Debugging when label search is not working as expected
+    - Verifying successful file uploads and indexing operations
+    - Administrative oversight of the label management system
+    """
     try:
         # Check labels directory
         labels_path = Path(labels_dir)