Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
fcfd340
Initial changes
Jaychaware Jul 28, 2025
68b1c18
Changes for label content search with highlight
Jaychaware Jul 31, 2025
347ab20
fixed UI for comprehensive searching on label metadata and csv content
Jaychaware Jul 31, 2025
1066453
refactored backend server code
Jaychaware Aug 4, 2025
fef3aa0
UI refactoring code changes
Jaychaware Aug 5, 2025
883107d
Merge branch 'HewlettPackard:master' into label_postgresql_full_text_…
Jaychaware Aug 5, 2025
a114cec
Removed migrations file
Jaychaware Aug 5, 2025
66a400e
Merge branch 'label_postgresql_full_text_search' of github.com:varkha…
Jaychaware Aug 5, 2025
b9a06a7
fixed loader
Jaychaware Aug 5, 2025
bead0b4
refactored UI code
Jaychaware Aug 6, 2025
fe1543d
intermediate advanced search
Jaychaware Aug 7, 2025
e97501c
optimized advanced label search
Jaychaware Aug 8, 2025
d3b8dc8
resolved conflicts
Jaychaware Aug 28, 2025
878904c
Merge branch 'HewlettPackard:master' into label_postgresql_full_text_…
AyeshaSanadi Sep 5, 2025
e04201d
updated backend code with detailed comments
Jaychaware Sep 5, 2025
e4bdaf2
Merge branch 'label_postgresql_full_text_search' of github.com:varkha…
Jaychaware Sep 5, 2025
1f49ab6
Merge branch 'HewlettPackard:master' into label_postgresql_full_text_…
varkha-d-sharma Sep 26, 2025
9db6348
Resolved discard commit issue
AyeshaSanadi Nov 5, 2025
dc81d50
Merge branch 'HewlettPackard:master' into label_postgresql_full_text_…
AyeshaSanadi Nov 7, 2025
cea26a5
Merge branch 'HewlettPackard:master' into label_postgresql_full_text_…
varkha-d-sharma Nov 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions db_init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,49 @@ CREATE TABLE IF NOT EXISTS registered_servers(
last_sync_time BIGINT DEFAULT NULL
);

-- Label indexing table for full-text search of CSV label content
CREATE TABLE IF NOT EXISTS label_index (
id SERIAL PRIMARY KEY,
file_name VARCHAR(255) NOT NULL,
file_path TEXT NOT NULL,
row_index INTEGER NOT NULL,
content TEXT NOT NULL,
-- PostgreSQL automatically uses EXTENDED strategy:
-- - Allows compression AND out-of-line storage
-- - Compresses first, then moves to TOAST if still large
metadata JSONB,
search_vector TSVECTOR,
created_at BIGINT NOT NULL,
updated_at BIGINT NOT NULL,

-- Unique constraint to prevent duplicate entries
CONSTRAINT unique_label_file_row UNIQUE (file_name, row_index)
);

-- Create indexes for performance
CREATE INDEX IF NOT EXISTS idx_label_index_file_name ON label_index(file_name);
CREATE INDEX IF NOT EXISTS idx_label_index_created_at ON label_index(created_at);

-- Create GIN index for full-text search (most important for performance)
CREATE INDEX IF NOT EXISTS idx_label_index_search_vector ON label_index USING gin(search_vector);

-- Create a trigger to automatically update the search_vector column
CREATE OR REPLACE FUNCTION update_label_search_vector() RETURNS trigger AS $$
BEGIN
NEW.search_vector := to_tsvector('english', COALESCE(NEW.content, ''));
NEW.updated_at := EXTRACT(EPOCH FROM NOW()) * 1000;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- Create the trigger (only if table exists)
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'label_index') THEN
DROP TRIGGER IF EXISTS trigger_update_label_search_vector ON label_index;
CREATE TRIGGER trigger_update_label_search_vector
BEFORE INSERT OR UPDATE ON label_index
FOR EACH ROW EXECUTE FUNCTION update_label_search_vector();
END IF;
END $$;

27 changes: 26 additions & 1 deletion server/app/db/dbmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
Index,
UniqueConstraint,
MetaData,
SmallInteger
SmallInteger,
JSON
)
from sqlalchemy.dialects.postgresql import TSVECTOR

metadata = MetaData()

Expand Down Expand Up @@ -185,4 +187,27 @@

# Unique Constraint
UniqueConstraint("artifact_id", "execution_id", "type", name="uniqueevent")
)


# Label indexing table for PostgreSQL full-text search
label_index = Table(
"label_index", metadata,
Column("id", Integer, primary_key=True, nullable=False),
Column("file_name", String(255), nullable=False),
Column("file_path", Text, nullable=False),
Column("row_index", Integer, nullable=False),
Column("content", Text, nullable=False),
Column("metadata", JSON),
Column("search_vector", TSVECTOR),
Column("created_at", BigInteger, nullable=False),
Column("updated_at", BigInteger, nullable=False),

# Indexes for performance
Index("idx_label_index_file_name", "file_name"),
Index("idx_label_index_search_vector", "search_vector", postgresql_using="gin"),
Index("idx_label_index_created_at", "created_at"),

# Unique constraint to prevent duplicate entries
UniqueConstraint("file_name", "row_index", name="unique_label_file_row")
)
117 changes: 116 additions & 1 deletion server/app/db/dbqueries.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from sqlalchemy.ext.asyncio import AsyncSession
from fastapi import Depends
from server.app.db.dbconfig import get_db
from sqlalchemy import select, func, text, String, bindparam, case, distinct
from sqlalchemy import select, func, String, distinct, text
from server.app.db.dbmodels import (
artifact,
artifactproperty,
Expand All @@ -13,6 +13,7 @@
execution,
executionproperty,
event,
label_index,
)

async def register_server_details(db: AsyncSession, server_name: str, host_info: str):
Expand Down Expand Up @@ -312,3 +313,117 @@ async def fetch_executions(
"total_items": total_record,
"items": [dict(row) for row in rows]
}


async def search_labels_in_artifacts(db: AsyncSession, filter_value: str, pipeline_name: str = None, limit: int = 50):
"""
Search for artifacts that have labels matching the filter value.
This function searches within label CSV content using PostgreSQL full-text search.
Works with or without explicit labels_uri properties.
"""
try:
# First, try to search labels directly and return any matching content
# This approach works even if artifacts don't have labels_uri properties
base_query = """
SELECT DISTINCT
li.file_name as label_file,
li.content as matching_content,
li.metadata as label_metadata,
li.row_index,
ts_rank(li.search_vector, plainto_tsquery('english', :filter_value)) as relevance_score
FROM label_index li
WHERE li.search_vector @@ plainto_tsquery('english', :filter_value)
ORDER BY relevance_score DESC
LIMIT :limit
"""

params = {"filter_value": filter_value, "limit": limit}

result = await db.execute(text(base_query), params)
label_results = result.mappings().all()

# Convert label results to a format compatible with artifact results
converted_results = []
for label_result in label_results:
converted_results.append({
'artifact_id': None, # No specific artifact ID
'name': f"Label Match: {label_result['label_file']}",
'uri': f"label://{label_result['label_file']}#{label_result['row_index']}",
'type_id': None,
'create_time_since_epoch': None,
'last_update_time_since_epoch': None,
'label_file': label_result['label_file'],
'matching_content': label_result['matching_content'],
'label_metadata': label_result['label_metadata'],
'relevance_score': float(label_result['relevance_score'])
})

return converted_results

except Exception as e:
print(f"Label search error: {e}")
return []


async def fetch_artifacts_with_label_search(
db: AsyncSession,
pipeline_name: str,
artifact_type: str,
filter_value: str,
active_page: int = 1,
page_size: int = 5,
sort_column: str = "name",
sort_order: str = "ASC"
):
"""
Enhanced artifact search that includes label content search.
This combines regular artifact search with label content search.
"""
# First, get regular artifact search results
artifact_results = await fetch_artifacts(
db, pipeline_name, artifact_type, filter_value,
active_page, page_size, sort_column, sort_order
)

# If filter_value is provided, also search in labels
if filter_value and filter_value.strip():
try:
label_results = await search_labels_in_artifacts(db, filter_value, pipeline_name, 50)

# Add label search results as separate items (since they don't correspond to existing artifacts)
if active_page == 1 and label_results: # Only add on first page
added_count = 0
max_additional = max(0, page_size - len(artifact_results['items']))

for label_result in label_results:
if added_count < max_additional:
# Create a pseudo-artifact item from label search result
# Make sure all fields have non-null values that frontend expects
enhanced_item = {
'artifact_id': f"label_{label_result['label_file']}_{label_result.get('row_index', 0)}",
'name': f"{label_result['label_file']} (Row {label_result.get('row_index', 0) + 1})",
'uri': label_result.get('uri', f"label://{label_result['label_file']}"),
'type_id': 'Label',
'create_time_since_epoch': 0, # Use 0 instead of None
'last_update_time_since_epoch': 0, # Use 0 instead of None
'artifact_properties': [], # Empty array instead of None
'execution': '', # Empty string instead of None
'label_match': True,
'matching_label_content': label_result['matching_content'],
'label_file': label_result['label_file'],
'label_metadata': label_result.get('label_metadata', '{}'),
'relevance_score': float(label_result['relevance_score'])
}
artifact_results['items'].append(enhanced_item)
added_count += 1

# Update total count if we added items
if added_count > 0:
artifact_results['total_items'] += added_count
print(f"Added {added_count} label search results to artifacts")

except Exception as e:
print(f"Error in label search integration: {e}")
# Continue with regular results if label search fails

return artifact_results
Loading