diff --git a/.gitignore b/.gitignore index db9aba9..b9b5315 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,9 @@ coverage.xml .ruff_cache htmlcov +# Ignore Claude Code +.claude + # Keep folders indicated by .gitkeep !/**/.gitkeep !/**/.gitignore diff --git a/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_protein_symbol.py b/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_protein_symbol.py index 234a5c1..4d84f50 100644 --- a/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_protein_symbol.py +++ b/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_protein_symbol.py @@ -14,24 +14,16 @@ @core_mcp.tool() def get_alphafold_info_by_protein_symbol( - protein_symbol: Annotated[str, Field(description="The name of the gene to search for (e.g., 'SYNPO')")], + protein_symbol: Annotated[str, Field(description="Gene/protein name (e.g., 'SYNPO')")], species: Annotated[ str, - Field(description="The organism ID (e.g., '9606' for human)"), + Field(description="Taxonomy ID (e.g., '9606' for human)"), ] = "9606", ) -> dict: - """Query the AlphaFold database for the protein structure information using the protein name. - - This function constructs a query URL to fetch data from the AlphaFold database based on the provided protein name. - The response contains links to the PDB and CIF files for the protein structure, as well as general information about - the protein. - - Args: - protein_symbol (Annotated[str, Field, optional): The name of the protein to search for (e.g., "SYNPO"). - species (str): The organism ID (e.g., "9606" for human). Default is "9606". + """Query AlphaFold database using protein name. First converts protein symbol to UniProt ID, then fetches structure predictions. Returns: - dict: Protein structure information or an error message. + dict: AlphaFold prediction data including PDB/CIF file URLs, confidence scores, and metadata or error message. """ # Get the UniProt Id from the protein_symbol try: diff --git a/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_uniprot_id.py b/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_uniprot_id.py index 14a01e5..c346d3e 100644 --- a/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_uniprot_id.py +++ b/src/biocontext_kb/core/alphafold/_get_alphafold_info_by_uniprot_id.py @@ -6,15 +6,12 @@ def get_alphafold_info_by_uniprot_id( - uniprot_id: Annotated[str, Field(description="The UniProt ID of the protein (e.g., 'P62258')")], + uniprot_id: Annotated[str, Field(description="UniProt protein ID (e.g., 'P62258')")], ) -> dict: - """Query the AlphaFold database for the protein structure information using the UniProt ID. - - Args: - uniprot_id (str): The UniProt ID of the protein (e.g., "P62258"). + """Query AlphaFold database for protein structure data. Returns: - dict: Protein structure information or an error message. + dict: AlphaFold prediction data including PDB/CIF file URLs, confidence scores, and metadata or error message. """ # Ensure the UniProt ID is in uppercase uniprot_id = uniprot_id.upper() diff --git a/src/biocontext_kb/core/antibodyregistry/_get_antibody_information.py b/src/biocontext_kb/core/antibodyregistry/_get_antibody_information.py index 1a02b7a..c03794a 100644 --- a/src/biocontext_kb/core/antibodyregistry/_get_antibody_information.py +++ b/src/biocontext_kb/core/antibodyregistry/_get_antibody_information.py @@ -8,27 +8,12 @@ @core_mcp.tool() def get_antibody_information( - ab_id: Annotated[str, Field(description="Antibody ID from the Antibody Registry (e.g., '3643095')")], + ab_id: Annotated[str, Field(description="Antibody Registry ID (e.g., '3643095')")], ) -> dict: - """Get detailed information for a specific antibody by its ID. - - This function retrieves comprehensive information about a single antibody from the Antibody Registry - using its unique antibody ID (abId). The antibody ID is typically obtained from the results of - get_antibody_list() function, where each antibody entry contains an 'abId' field that can be used - with this function to get detailed information. - - Note: Some information provided by the Antibody Registry is for non-commercial use only. - Users should refer to antibodyregistry.org for complete terms of use and licensing details. - - Args: - ab_id (str): The unique antibody ID from the Antibody Registry. This is typically obtained - from the 'abId' field in the results of get_antibody_list(), unless the ID - is directly provided by the user. + """Get detailed antibody information by ID. Retrieves catalog number, vendor, clonality, epitope, applications, and more. Returns: - dict: Detailed antibody information including catalog number, vendor, clonality, epitope, - applications, target species, isotype, source organism, citations, and other metadata, - or error message if the request fails. + dict: Antibody details including abId, catalog numbers, vendor, clonality, epitope, applications, target species, isotype, citations or error message. """ ab_id = ab_id.strip() if not ab_id: diff --git a/src/biocontext_kb/core/antibodyregistry/_get_antibody_list.py b/src/biocontext_kb/core/antibodyregistry/_get_antibody_list.py index 00ace83..b4fb68c 100644 --- a/src/biocontext_kb/core/antibodyregistry/_get_antibody_list.py +++ b/src/biocontext_kb/core/antibodyregistry/_get_antibody_list.py @@ -8,25 +8,12 @@ @core_mcp.tool() def get_antibody_list( - search: Annotated[ - str, Field(description="Search term for antibodies (e.g., gene symbol, protein name, UniProt ID)") - ], + search: Annotated[str, Field(description="Gene symbol, protein name, or UniProt ID (e.g., 'TRPC6')")], ) -> dict: - """Query the Antibody Registry for available antibodies. - - This function searches the Antibody Registry database for antibodies matching the search term. - Common search parameters include gene symbols (e.g., 'TRPC6'), protein names, UniProt IDs, - or other relevant identifiers. - - Note: Some information provided by the Antibody Registry is for non-commercial use only. - Users should refer to antibodyregistry.org for complete terms of use and licensing details. - - Args: - search (str): Search term for antibodies. Can be a gene symbol, protein name, UniProt ID, or similar identifier. + """Search Antibody Registry for antibodies. Returns catalog numbers, vendors, clonality, applications, and metadata. Returns: - dict: Antibody search results including catalog numbers, vendor information, clonality, - applications, and other antibody metadata, or error message if the request fails. + dict: Search results containing list of antibodies with catalog numbers, vendors, clonality, applications, metadata or error message. """ search = search.strip() if not search: diff --git a/src/biocontext_kb/core/biorxiv/_get_preprint_details.py b/src/biocontext_kb/core/biorxiv/_get_preprint_details.py index 128badb..1006a28 100644 --- a/src/biocontext_kb/core/biorxiv/_get_preprint_details.py +++ b/src/biocontext_kb/core/biorxiv/_get_preprint_details.py @@ -11,20 +11,13 @@ @core_mcp.tool() def get_biorxiv_preprint_details( - doi: Annotated[str, Field(description="DOI of the preprint (e.g., '10.1101/2020.09.09.20191205')")], - server: Annotated[str, Field(description="Server to search: 'biorxiv' or 'medrxiv'")] = "biorxiv", + doi: Annotated[str, Field(description="Preprint DOI (e.g., '10.1101/2020.09.09.20191205')")], + server: Annotated[str, Field(description="'biorxiv' or 'medrxiv'")] = "biorxiv", ) -> Dict[str, Any]: - """Get detailed information about a specific preprint by DOI. - - This tool retrieves detailed metadata for a single preprint from bioRxiv or medRxiv - using its DOI identifier. - - Args: - doi (str): DOI of the preprint (e.g., '10.1101/2020.09.09.20191205'). - server (str): Server to search - 'biorxiv' or 'medrxiv' (default: 'biorxiv'). + """Get detailed preprint metadata by DOI. Retrieves title, authors, abstract, date, version, category, license, and publication status. Returns: - dict: Detailed preprint information or error message + dict: Preprint metadata including doi, title, authors, abstract, date, version, category, license, publication status or error message. """ # Validate server if server.lower() not in ["biorxiv", "medrxiv"]: diff --git a/src/biocontext_kb/core/biorxiv/_get_recent_biorxiv_preprints.py b/src/biocontext_kb/core/biorxiv/_get_recent_biorxiv_preprints.py index 29badab..54d739e 100644 --- a/src/biocontext_kb/core/biorxiv/_get_recent_biorxiv_preprints.py +++ b/src/biocontext_kb/core/biorxiv/_get_recent_biorxiv_preprints.py @@ -12,39 +12,25 @@ @core_mcp.tool() def get_recent_biorxiv_preprints( - server: Annotated[str, Field(description="Server to search: 'biorxiv' or 'medrxiv'")] = "biorxiv", - start_date: Annotated[Optional[str], Field(description="Start date in YYYY-MM-DD format")] = None, - end_date: Annotated[Optional[str], Field(description="End date in YYYY-MM-DD format")] = None, + server: Annotated[str, Field(description="'biorxiv' or 'medrxiv'")] = "biorxiv", + start_date: Annotated[Optional[str], Field(description="Start date (YYYY-MM-DD)")] = None, + end_date: Annotated[Optional[str], Field(description="End date (YYYY-MM-DD)")] = None, days: Annotated[ - Optional[int], Field(description="Number of recent days to search (alternative to date range)", ge=1, le=365) + Optional[int], Field(description="Search last N days (1-365, alternative to date range)", ge=1, le=365) ] = None, recent_count: Annotated[ - Optional[int], Field(description="Number of most recent preprints (alternative to date range)", ge=1, le=1000) + Optional[int], Field(description="Most recent N preprints (1-1000, alternative to date range)", ge=1, le=1000) ] = None, category: Annotated[ - Optional[str], Field(description="Subject category filter (e.g., 'cell biology', 'neuroscience')") + Optional[str], Field(description="Filter by subject (e.g., 'cell biology', 'neuroscience')") ] = None, - cursor: Annotated[int, Field(description="Starting position for pagination", ge=0)] = 0, - max_results: Annotated[int, Field(description="Maximum number of results to return", ge=1, le=500)] = 100, + cursor: Annotated[int, Field(description="Pagination: starting position", ge=0)] = 0, + max_results: Annotated[int, Field(description="Max results per page (1-500)", ge=1, le=500)] = 100, ) -> Dict[str, Any]: - """Get recent preprints from bioRxiv or medRxiv. - - This tool searches the bioRxiv and medRxiv preprint servers for research papers. - You can search by date range, recent posts, or most recent papers. - Results are paginated with up to 100 papers per API call. - - Args: - server (str): Server to search - 'biorxiv' or 'medrxiv' (default: 'biorxiv'). - start_date (str, optional): Start date in YYYY-MM-DD format. - end_date (str, optional): End date in YYYY-MM-DD format. - days (int, optional): Number of recent days to search (1-365). - recent_count (int, optional): Number of most recent preprints (1-1000). - category (str, optional): Subject category filter (e.g., 'cell biology', 'neuroscience'). - cursor (int): Starting position for pagination (default: 0). - max_results (int): Maximum number of results to return (default: 100, max: 500). + """Search bioRxiv/medRxiv preprints by date range or recent count. Specify one search method: date range, days, or recent_count. Returns: - dict: Preprint search results or error message + dict: Search results with server, search_params, total_returned, papers list (each with title, authors, abstract, metadata), pagination info or error message. """ # Validate server if server.lower() not in ["biorxiv", "medrxiv"]: diff --git a/src/biocontext_kb/core/clinicaltrials/_get_recruiting_studies_by_location.py b/src/biocontext_kb/core/clinicaltrials/_get_recruiting_studies_by_location.py index 59c71f9..b09e2f5 100644 --- a/src/biocontext_kb/core/clinicaltrials/_get_recruiting_studies_by_location.py +++ b/src/biocontext_kb/core/clinicaltrials/_get_recruiting_studies_by_location.py @@ -8,38 +8,18 @@ @core_mcp.tool() def get_recruiting_studies_by_location( - location_country: Annotated[ - str, Field(description="Country name (e.g., 'United States', 'Germany', 'United Kingdom')") - ], - location_state: Annotated[ - Optional[str], Field(description="State or province (e.g., 'California', 'New York')") - ] = None, - location_city: Annotated[Optional[str], Field(description="City name (e.g., 'Los Angeles', 'Boston')")] = None, - condition: Annotated[ - Optional[str], Field(description="Medical condition to filter by (e.g., 'cancer', 'diabetes')") - ] = None, - study_type: Annotated[ - Optional[str], Field(description="Type of study: 'INTERVENTIONAL', 'OBSERVATIONAL', 'ALL'") - ] = "ALL", - age_range: Annotated[Optional[str], Field(description="Age group: 'CHILD', 'ADULT', 'OLDER_ADULT', 'ALL'")] = "ALL", - page_size: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 50, + location_country: Annotated[str, Field(description="Country name (e.g., 'United States', 'Germany')")], + location_state: Annotated[Optional[str], Field(description="State/province (e.g., 'California')")] = None, + location_city: Annotated[Optional[str], Field(description="City name")] = None, + condition: Annotated[Optional[str], Field(description="Medical condition filter (e.g., 'cancer')")] = None, + study_type: Annotated[Optional[str], Field(description="'INTERVENTIONAL', 'OBSERVATIONAL', or 'ALL'")] = "ALL", + age_range: Annotated[Optional[str], Field(description="'CHILD', 'ADULT', 'OLDER_ADULT', or 'ALL'")] = "ALL", + page_size: Annotated[int, Field(description="Results per page (1-1000)", ge=1, le=1000)] = 50, ) -> Union[Dict[str, Any], dict]: - """Find recruiting clinical trials in a specific geographic location. - - This function helps patients and healthcare providers find clinical trials - that are currently recruiting participants in their area. - - Args: - location_country (str): Country name where studies are conducted. - location_state (str, optional): State or province name. - location_city (str, optional): City name. - condition (str, optional): Medical condition to filter by. - study_type (str, optional): Type of study filter (default: "ALL"). - age_range (str, optional): Age group filter (default: "ALL"). - page_size (int): Number of results to return (default: 50, max: 1000). + """Find recruiting clinical trials by geographic location. Returns paginated results with summary breakdowns. Returns: - dict: Recruiting studies in the specified location or error message + dict: Studies list with summary containing search location, total studies, study type/phase/condition breakdowns, recruiting locations or error message. """ if not location_country: return {"error": "Location country must be provided"} diff --git a/src/biocontext_kb/core/clinicaltrials/_get_studies_by_condition.py b/src/biocontext_kb/core/clinicaltrials/_get_studies_by_condition.py index e946593..1996dc6 100644 --- a/src/biocontext_kb/core/clinicaltrials/_get_studies_by_condition.py +++ b/src/biocontext_kb/core/clinicaltrials/_get_studies_by_condition.py @@ -8,40 +8,23 @@ @core_mcp.tool() def get_studies_by_condition( - condition: Annotated[ - str, Field(description="Medical condition or disease name (e.g., 'breast cancer', 'diabetes', 'alzheimer')") - ], + condition: Annotated[str, Field(description="Medical condition/disease (e.g., 'cancer', 'diabetes')")], status: Annotated[ Optional[str], - Field(description="Study status filter: 'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', 'ALL'"), + Field(description="'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', or 'ALL'"), ] = "ALL", - study_type: Annotated[ - Optional[str], Field(description="Type of study: 'INTERVENTIONAL', 'OBSERVATIONAL', 'ALL'") - ] = "ALL", - location_country: Annotated[ - Optional[str], Field(description="Country filter (e.g., 'United States', 'Germany')") - ] = None, - page_size: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 50, + study_type: Annotated[Optional[str], Field(description="'INTERVENTIONAL', 'OBSERVATIONAL', or 'ALL'")] = "ALL", + location_country: Annotated[Optional[str], Field(description="Country filter (e.g., 'United States')")] = None, + page_size: Annotated[int, Field(description="Results per page (1-1000)", ge=1, le=1000)] = 50, sort: Annotated[ str, - Field(description="Sort order: 'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', 'EnrollmentCount:desc'"), + Field(description="'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', or 'EnrollmentCount:desc'"), ] = "LastUpdatePostDate:desc", ) -> Union[Dict[str, Any], dict]: - """Search for clinical trials by medical condition with simplified parameters. - - This function provides a focused search for clinical trials related to a specific - medical condition, with common filters that biomedical researchers typically use. - - Args: - condition (str): Medical condition or disease name to search for. - status (str, optional): Study status filter (default: "ALL"). - study_type (str, optional): Type of study filter (default: "ALL"). - location_country (str, optional): Country where studies are conducted. - page_size (int): Number of results to return (default: 50, max: 1000). - sort (str): Sort order for results (default: most recently updated). + """Search trials by condition with summary statistics. Returns paginated results with breakdowns by status, study type, and phase. Returns: - dict: Study search results with summary statistics or error message + dict: Studies list with summary containing condition searched, total studies, status/study type/phase breakdowns or error message. """ if not condition: return {"error": "Medical condition must be provided"} diff --git a/src/biocontext_kb/core/clinicaltrials/_get_studies_by_intervention.py b/src/biocontext_kb/core/clinicaltrials/_get_studies_by_intervention.py index 0bd6490..82dd752 100644 --- a/src/biocontext_kb/core/clinicaltrials/_get_studies_by_intervention.py +++ b/src/biocontext_kb/core/clinicaltrials/_get_studies_by_intervention.py @@ -10,45 +10,29 @@ def get_studies_by_intervention( intervention: Annotated[ str, - Field(description="Drug, therapy, or treatment name (e.g., 'aspirin', 'pembrolizumab', 'radiation therapy')"), + Field(description="Drug/therapy name (e.g., 'aspirin', 'pembrolizumab', 'radiation')"), ], - condition: Annotated[ - Optional[str], Field(description="Medical condition to filter by (e.g., 'cancer', 'diabetes')") - ] = None, + condition: Annotated[Optional[str], Field(description="Medical condition filter (e.g., 'cancer')")] = None, phase: Annotated[ - Optional[str], Field(description="Clinical trial phase: 'PHASE1', 'PHASE2', 'PHASE3', 'PHASE4', 'EARLY_PHASE1'") + Optional[str], Field(description="'PHASE1', 'PHASE2', 'PHASE3', 'PHASE4', or 'EARLY_PHASE1'") ] = None, status: Annotated[ - Optional[str], Field(description="Study status: 'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', 'ALL'") + Optional[str], Field(description="'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', or 'ALL'") ] = "ALL", intervention_type: Annotated[ Optional[str], - Field( - description="Type of intervention: 'DRUG', 'BIOLOGICAL', 'DEVICE', 'PROCEDURE', 'RADIATION', 'BEHAVIORAL', 'ALL'" - ), + Field(description="'DRUG', 'BIOLOGICAL', 'DEVICE', 'PROCEDURE', 'RADIATION', 'BEHAVIORAL', or 'ALL'"), ] = "ALL", - page_size: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 50, + page_size: Annotated[int, Field(description="Results per page (1-1000)", ge=1, le=1000)] = 50, sort: Annotated[ str, - Field(description="Sort order: 'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', 'EnrollmentCount:desc'"), + Field(description="'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', or 'EnrollmentCount:desc'"), ] = "LastUpdatePostDate:desc", ) -> Union[Dict[str, Any], dict]: - """Search for clinical trials by drug or intervention name. - - This function helps biomedical researchers find clinical trials testing specific - drugs, therapies, or treatments, with optional filters for condition and phase. - - Args: - intervention (str): Drug, therapy, or treatment name to search for. - condition (str, optional): Medical condition to filter by. - phase (str, optional): Clinical trial phase to filter by. - status (str, optional): Study status filter (default: "ALL"). - intervention_type (str, optional): Type of intervention filter (default: "ALL"). - page_size (int): Number of results to return (default: 50, max: 1000). - sort (str): Sort order for results (default: most recently updated). + """Search trials by intervention with condition and phase filters. Returns paginated results with breakdowns. Returns: - dict: Study search results with summary statistics or error message + dict: Studies list with summary containing intervention searched, total studies, status/phase breakdowns, top conditions/sponsors or error message. """ if not intervention: return {"error": "Intervention name must be provided"} diff --git a/src/biocontext_kb/core/clinicaltrials/_get_study_details.py b/src/biocontext_kb/core/clinicaltrials/_get_study_details.py index 2d38b73..59815f1 100644 --- a/src/biocontext_kb/core/clinicaltrials/_get_study_details.py +++ b/src/biocontext_kb/core/clinicaltrials/_get_study_details.py @@ -8,23 +8,15 @@ @core_mcp.tool() def get_study_details( - nct_id: Annotated[str, Field(description="NCT ID of the clinical trial (e.g., 'NCT01234567')")], + nct_id: Annotated[str, Field(description="NCT ID (e.g., 'NCT01234567')")], fields: Annotated[ - str, Field(description="Comma-separated list of fields to return or 'all' for complete study data") + str, Field(description="Comma-separated fields or 'all' for complete data. Default includes key modules.") ] = "IdentificationModule,StatusModule,SponsorCollaboratorsModule,DescriptionModule,ConditionsModule,DesignModule,ArmsInterventionsModule,OutcomesModule,EligibilityModule,ContactsLocationsModule", ) -> Union[Dict[str, Any], dict]: - """Get detailed information about a specific clinical trial by its NCT ID. - - This function retrieves comprehensive data about a single clinical trial, including - study design, eligibility criteria, outcomes, locations, and contact information. - - Args: - nct_id (str): NCT ID of the clinical trial (e.g., "NCT01234567"). - fields (str): Comma-separated list of fields to return, or "all" for complete data. - Default includes key modules for biomedical researchers. + """Get complete trial details by NCT ID. Retrieves study design, eligibility, outcomes, locations, contacts, and metadata. Returns: - dict: Detailed study information or error message + dict: Study details with protocol sections including identification, status, sponsors, description, conditions, design, interventions, outcomes, eligibility, locations or error message. """ if not nct_id: return {"error": "NCT ID must be provided"} diff --git a/src/biocontext_kb/core/clinicaltrials/_search_studies.py b/src/biocontext_kb/core/clinicaltrials/_search_studies.py index 378b4d5..4f1a198 100644 --- a/src/biocontext_kb/core/clinicaltrials/_search_studies.py +++ b/src/biocontext_kb/core/clinicaltrials/_search_studies.py @@ -9,58 +9,35 @@ @core_mcp.tool() def search_studies( - condition: Annotated[ - Optional[str], Field(description="Medical condition or disease (e.g., 'cancer', 'diabetes')") - ] = None, - intervention: Annotated[ - Optional[str], Field(description="Drug, therapy, or treatment name (e.g., 'aspirin', 'chemotherapy')") - ] = None, - sponsor: Annotated[Optional[str], Field(description="Study sponsor organization (e.g., 'Pfizer', 'NIH')")] = None, + condition: Annotated[Optional[str], Field(description="Medical condition (e.g., 'cancer')")] = None, + intervention: Annotated[Optional[str], Field(description="Drug/therapy name (e.g., 'aspirin')")] = None, + sponsor: Annotated[Optional[str], Field(description="Sponsor org (e.g., 'Pfizer')")] = None, status: Annotated[ Optional[str], Field( - description="Study status: 'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', 'TERMINATED', 'SUSPENDED', 'WITHDRAWN', 'NOT_YET_RECRUITING'" + description="'RECRUITING', 'ACTIVE_NOT_RECRUITING', 'COMPLETED', 'TERMINATED', 'SUSPENDED', 'WITHDRAWN', or 'NOT_YET_RECRUITING'" ), ] = None, phase: Annotated[ - Optional[str], Field(description="Study phase: 'PHASE1', 'PHASE2', 'PHASE3', 'PHASE4', 'EARLY_PHASE1', 'NA'") + Optional[str], Field(description="'PHASE1', 'PHASE2', 'PHASE3', 'PHASE4', 'EARLY_PHASE1', or 'NA'") ] = None, study_type: Annotated[ - Optional[str], Field(description="Type of study: 'INTERVENTIONAL', 'OBSERVATIONAL', 'EXPANDED_ACCESS'") - ] = None, - location_country: Annotated[ - Optional[str], Field(description="Country where study is conducted (e.g., 'United States', 'Germany')") + Optional[str], Field(description="'INTERVENTIONAL', 'OBSERVATIONAL', or 'EXPANDED_ACCESS'") ] = None, - min_age: Annotated[Optional[int], Field(description="Minimum age of participants in years", ge=0)] = None, - max_age: Annotated[Optional[int], Field(description="Maximum age of participants in years", ge=0)] = None, - sex: Annotated[Optional[str], Field(description="Sex of participants: 'ALL', 'FEMALE', 'MALE'")] = None, - page_size: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 25, + location_country: Annotated[Optional[str], Field(description="Country (e.g., 'United States')")] = None, + min_age: Annotated[Optional[int], Field(description="Min participant age (years)", ge=0)] = None, + max_age: Annotated[Optional[int], Field(description="Max participant age (years)", ge=0)] = None, + sex: Annotated[Optional[str], Field(description="'ALL', 'FEMALE', or 'MALE'")] = None, + page_size: Annotated[int, Field(description="Results per page (1-1000)", ge=1, le=1000)] = 25, sort: Annotated[ str, - Field(description="Sort order: 'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', 'EnrollmentCount:desc'"), + Field(description="'LastUpdatePostDate:desc', 'StudyFirstPostDate:desc', or 'EnrollmentCount:desc'"), ] = "LastUpdatePostDate:desc", ) -> Union[Dict[str, Any], dict]: - """Search for clinical trials studies based on various criteria. - - This function allows biomedical researchers to find relevant clinical trials by searching - across conditions, interventions, sponsors, and other study characteristics. - - Args: - condition (str, optional): Medical condition or disease to search for. - intervention (str, optional): Drug, therapy, or treatment name to search for. - sponsor (str, optional): Study sponsor organization. - status (str, optional): Current status of the study. - phase (str, optional): Clinical trial phase. - study_type (str, optional): Type of study (interventional, observational, etc.). - location_country (str, optional): Country where study is conducted. - min_age (int, optional): Minimum age of participants in years. - max_age (int, optional): Maximum age of participants in years. - sex (str, optional): Sex of participants. - page_size (int): Number of results to return (default: 25, max: 1000). - sort (str): Sort order for results (default: most recently updated). + """Advanced search for trials with flexible multi-field filtering. Specify at least one search parameter. Returns: - dict: Study search results or error message + dict: Paginated search results containing studies list with trial metadata or error message. """ # Ensure at least one search parameter was provided if not any([condition, intervention, sponsor, status, phase, study_type, location_country, min_age, max_age, sex]): diff --git a/src/biocontext_kb/core/ensembl/_get_ensembl_id_from_gene_symbol.py b/src/biocontext_kb/core/ensembl/_get_ensembl_id_from_gene_symbol.py index 1210d31..70eca7d 100644 --- a/src/biocontext_kb/core/ensembl/_get_ensembl_id_from_gene_symbol.py +++ b/src/biocontext_kb/core/ensembl/_get_ensembl_id_from_gene_symbol.py @@ -9,22 +9,16 @@ @core_mcp.tool() def get_ensembl_id_from_gene_symbol( - gene_symbol: Annotated[str, Field(description="The name of the gene to search for (e.g., 'TP53')")], + gene_symbol: Annotated[str, Field(description="Gene name (e.g., 'TP53')")], species: Annotated[ str, - Field(description="Taxonomy ID (e.g., 10090) or species name as string"), + Field(description="Taxonomy ID (e.g., 9606 for human, 10090 for mouse)"), ] = "9606", ) -> dict: - """Query the Ensembl database for the Ensembl ID of a given gene name. - - Always provide the species parameter to ensure the correct protein is returned. - - Args: - gene_symbol (str): The name of the gene to search for (e.g., "TP53"). - species (str): Taxonomy ID (e.g., 10090) as string (default: "9606"). + """Get Ensembl gene ID from gene symbol. Returns the stable Ensembl ID (ENSG*) for the given gene symbol and species. Returns: - dict: Gene data or error message + dict: Ensembl gene ID in format {'ensembl_id': 'ENSG...'} or error message. """ # Ensure at least one search parameter was provided if not gene_symbol: diff --git a/src/biocontext_kb/core/europepmc/_get_europepmc_articles.py b/src/biocontext_kb/core/europepmc/_get_europepmc_articles.py index 1e08080..e22778b 100644 --- a/src/biocontext_kb/core/europepmc/_get_europepmc_articles.py +++ b/src/biocontext_kb/core/europepmc/_get_europepmc_articles.py @@ -9,42 +9,21 @@ @core_mcp.tool() def get_europepmc_articles( - query: Annotated[Optional[str], Field(description="Search query string, not specific to any field")] = None, - title: Annotated[Optional[str], Field(description="Search term for article titles")] = None, - abstract: Annotated[Optional[str], Field(description="Search term for article abstracts")] = None, - author: Annotated[Optional[str], Field(description="Author name (e.g., 'kuehl,malte')")] = None, - search_type: Annotated[str, Field(description="Search type: 'and' or 'or'")] = "or", + query: Annotated[Optional[str], Field(description="General search query")] = None, + title: Annotated[Optional[str], Field(description="Search in article titles")] = None, + abstract: Annotated[Optional[str], Field(description="Search in abstracts")] = None, + author: Annotated[Optional[str], Field(description="Author name (e.g., 'lastname,firstname')")] = None, + search_type: Annotated[str, Field(description="'and' or 'or' (default: 'or')")] = "or", sort_by: Annotated[ Optional[str], - Field(description="Sort by: 'recent' for most recent, 'cited' for most cited or None for no specific sorting"), + Field(description="'recent' or 'cited' (default: none)"), ] = None, - page_size: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 25, + page_size: Annotated[int, Field(description="Results per page (1-1000)", ge=1, le=1000)] = 25, ) -> dict: - """Query the Europe PMC database for scientific articles. - - Use 'recent' sort for current research queries and 'cited' sort for comprehensive career overviews - or well-established topics (e.g., "what has author X published on in their career"). - - Provide at least one of the following search parameters: - - query: General search query string - - title: Search term for article titles - - abstract: Search term for article abstracts. - - author: Author name (e.g., "last_name,first_name"). Should not contain spaces. - These will be combined with the specified search type ("and" or "or"). - For a broad search, prefer the "query" parameter and "or" search type. - Only use the "and" search type if you want to ensure all terms must match. - - Args: - query (str, optional): General search query string. - title (str, optional): Search term for article titles. - abstract (str, optional): Search term for article abstracts. - author (str, optional): Author name (e.g., "last_name,first_name"). Should not contain spaces. - search_type (str): Search type - "and" or "or" (default: "or"). - sort_by (str): Sort by - "recent" for most recent, "cited" for most cited or None for no specific sorting (default: None). - page_size (int): Number of results to return (default: 25, max: 1000). + """Search Europe PMC articles by query, title, abstract, or author. Combine search terms with 'and'/'or' logic. Returns: - dict: Article search results or error message + dict: Search results with resultList containing articles (title, authors, abstract, journal, PMC/DOI IDs) or error message. """ # Ensure at least one search parameter was provided if not any([query, title, abstract, author]): diff --git a/src/biocontext_kb/core/europepmc/_get_europepmc_fulltext.py b/src/biocontext_kb/core/europepmc/_get_europepmc_fulltext.py index 9950910..d595b31 100644 --- a/src/biocontext_kb/core/europepmc/_get_europepmc_fulltext.py +++ b/src/biocontext_kb/core/europepmc/_get_europepmc_fulltext.py @@ -8,15 +8,12 @@ @core_mcp.tool() def get_europepmc_fulltext( - pmc_id: Annotated[str, Field(description="PMC ID starting with 'PMC' (e.g., 'PMC11629965')")], + pmc_id: Annotated[str, Field(description="PMC ID (e.g., 'PMC11629965')")], ) -> dict: - """Get the full text XML for a given PMC ID from Europe PMC. - - Args: - pmc_id (str): PMC ID starting with "PMC" (e.g., "PMC11629965"). + """Get full-text XML for a PMC ID. Returns the complete article XML for processing and analysis. Returns: - dict: Full text XML content or error message + dict: Full-text XML content in format {'fulltext_xml': '...'} or error message. """ # Validate PMC ID format pmc_id = pmc_id.strip().upper() diff --git a/src/biocontext_kb/core/grants/_search_grants_gov.py b/src/biocontext_kb/core/grants/_search_grants_gov.py index 495f10a..5c80ef8 100644 --- a/src/biocontext_kb/core/grants/_search_grants_gov.py +++ b/src/biocontext_kb/core/grants/_search_grants_gov.py @@ -8,31 +8,21 @@ @core_mcp.tool() def search_grants_gov( - keyword: Annotated[Optional[str], Field(description="Keyword to search for")] = None, + keyword: Annotated[Optional[str], Field(description="Search keyword")] = None, opp_num: Annotated[Optional[str], Field(description="Opportunity number")] = None, - eligibilities: Annotated[Optional[str], Field(description="Eligibility criteria (comma-separated)")] = None, + eligibilities: Annotated[Optional[str], Field(description="Eligibilities (comma-separated)")] = None, agencies: Annotated[Optional[str], Field(description="Agency codes (comma-separated)")] = None, - rows: Annotated[int, Field(description="Number of results to return")] = 10, + rows: Annotated[int, Field(description="Results to return")] = 10, opp_statuses: Annotated[ - Optional[str], Field(description="Opportunity statuses (pipe-separated, e.g. 'forecasted|posted')") + Optional[str], Field(description="'forecasted|posted' (pipe-separated, default: 'forecasted|posted')") ] = "forecasted|posted", aln: Annotated[Optional[str], Field(description="Assistance Listing Number")] = None, - funding_categories: Annotated[Optional[str], Field(description="Funding categories (comma-separated)")] = None, + funding_categories: Annotated[Optional[str], Field(description="Categories (comma-separated)")] = None, ) -> dict: - """Search for grants from grants.gov using the Search2 API. - - Args: - keyword: Keyword to search for - opp_num: Opportunity number - eligibilities: Eligibility criteria (comma-separated) - agencies: Agency codes (comma-separated) - rows: Number of results to return - opp_statuses: Opportunity statuses (pipe-separated, e.g. 'forecasted|posted') - aln: Assistance Listing Number - funding_categories: Funding categories (comma-separated) + """Search grants.gov by keyword, agency, or other criteria. Returns opportunity listings with deadlines and eligibility. Returns: - dict: Search results from grants.gov or error message + dict: Grant opportunities list with titles, agencies, deadlines, funding amounts, eligibility criteria or error message. """ url = "https://api.grants.gov/v1/api/search2" diff --git a/src/biocontext_kb/core/interpro/_get_interpro_entry.py b/src/biocontext_kb/core/interpro/_get_interpro_entry.py index c77b157..33d3773 100644 --- a/src/biocontext_kb/core/interpro/_get_interpro_entry.py +++ b/src/biocontext_kb/core/interpro/_get_interpro_entry.py @@ -10,34 +10,25 @@ def get_interpro_entry( interpro_id: Annotated[ str, - Field(description="The InterPro entry identifier (e.g., 'IPR000001')"), + Field(description="InterPro ID (e.g., 'IPR000001')"), ], include_interactions: Annotated[ bool, - Field(description="Whether to include protein-protein interactions data"), + Field(description="Include protein-protein interactions data"), ] = False, include_pathways: Annotated[ bool, - Field(description="Whether to include pathway information"), + Field(description="Include pathway information"), ] = False, include_cross_references: Annotated[ bool, - Field(description="Whether to include cross-references to other databases"), + Field(description="Include cross-references to other databases"), ] = False, ) -> dict: - """Get detailed information about a specific InterPro entry. - - InterPro entries represent protein families, domains, and functional sites. - Each entry integrates information from multiple member databases. - - Args: - interpro_id (str): The InterPro entry identifier (e.g., "IPR000001"). - include_interactions (bool, optional): Whether to include protein-protein interactions data. Defaults to False. - include_pathways (bool, optional): Whether to include pathway information. Defaults to False. - include_cross_references (bool, optional): Whether to include cross-references to other databases. Defaults to False. + """Get InterPro entry details (family, domain, or functional site). Returns metadata from member databases like PFAM, PROSITE. Returns: - dict: InterPro entry data including description, type, member databases, and optional additional data + dict: Entry metadata including name, type, description, member databases, optionally interactions/pathways/cross-references or error message. """ # Validate InterPro ID format interpro_id = interpro_id.upper().strip() diff --git a/src/biocontext_kb/core/interpro/_get_protein_domains.py b/src/biocontext_kb/core/interpro/_get_protein_domains.py index fc975de..daf8ae7 100644 --- a/src/biocontext_kb/core/interpro/_get_protein_domains.py +++ b/src/biocontext_kb/core/interpro/_get_protein_domains.py @@ -10,36 +10,25 @@ def get_protein_domains( protein_id: Annotated[ str, - Field(description="The protein identifier/accession (e.g., 'P04637' or 'CYC_HUMAN')"), + Field(description="UniProt ID/accession (e.g., 'P04637' or 'CYC_HUMAN')"), ], source_db: Annotated[ str, - Field(description="The protein database source ('uniprot', 'reviewed', or 'unreviewed')"), + Field(description="Database source ('uniprot', 'reviewed', or 'unreviewed')"), ] = "uniprot", include_structure_info: Annotated[ bool, - Field(description="Whether to include structural information"), + Field(description="Include structural information"), ] = False, species_filter: Annotated[ Optional[str], - Field(description="Taxonomy ID to filter results (e.g., '9606' for human)"), + Field(description="Taxonomy ID filter (e.g., '9606' for human)"), ] = None, ) -> dict: - """Get domain architecture and InterPro matches for a specific protein. - - This function retrieves all InterPro domain matches for a given protein, - providing insight into the protein's functional domains and architecture. - - To get the protein's UniProt ID, use the `get_uniprot_id_by_protein_symbol` tool first. - - Args: - protein_id (str): The protein identifier or accession (e.g., "P04637" or "CYC_HUMAN"). - source_db (str, optional): The protein database source. Defaults to "uniprot". - include_structure_info (bool, optional): Whether to include structural information. Defaults to False. - species_filter (str, optional): Taxonomy ID to filter results (e.g., "9606" for human). Defaults to None. + """Get protein domain architecture and InterPro matches. Returns all InterPro domains, functional sites, and domain architecture. Returns: - dict: Protein domain information including InterPro matches, domain architecture, and optional structural data + dict: Protein metadata with interpro_matches array, interpro_match_count, domain_architecture, optionally structure data or error message. """ base_url = f"https://www.ebi.ac.uk/interpro/api/protein/{source_db}/{protein_id}" diff --git a/src/biocontext_kb/core/interpro/_search_interpro_entries.py b/src/biocontext_kb/core/interpro/_search_interpro_entries.py index fbb5766..7758024 100644 --- a/src/biocontext_kb/core/interpro/_search_interpro_entries.py +++ b/src/biocontext_kb/core/interpro/_search_interpro_entries.py @@ -10,46 +10,35 @@ def search_interpro_entries( query: Annotated[ Optional[str], - Field(description="Search term for InterPro entry names or descriptions"), + Field(description="Search term for entry names/descriptions"), ] = None, entry_type: Annotated[ Optional[str], Field( - description="Filter by entry type: family, domain, homologous_superfamily, repeat, conserved_site, binding_site, active_site, ptm" + description="family, domain, homologous_superfamily, repeat, conserved_site, binding_site, active_site, or ptm" ), ] = None, source_database: Annotated[ Optional[str], - Field(description="Filter by member database: pfam, prosite, panther, smart, etc."), + Field(description="pfam, prosite, panther, smart, cdd, hamap, pirsf, prints, etc."), ] = None, go_term: Annotated[ Optional[str], - Field(description="Filter by GO term (e.g., 'GO:0006122')"), + Field(description="GO term filter (e.g., 'GO:0006122')"), ] = None, species_filter: Annotated[ Optional[str], - Field(description="Filter by taxonomy ID (e.g., '9606' for human)"), + Field(description="Taxonomy ID filter (e.g., '9606')"), ] = None, page_size: Annotated[ int, - Field(description="Number of results to return (max 200)"), + Field(description="Results per page (max 200)"), ] = 20, ) -> dict: - """Search InterPro entries by various criteria. - - This function allows searching the InterPro database using different filters - such as entry type, source database, GO terms, and species. - - Args: - query (str, optional): Search term for InterPro entry names or descriptions. - entry_type (str, optional): Filter by entry type (family, domain, etc.). - source_database (str, optional): Filter by member database (pfam, prosite, etc.). - go_term (str, optional): Filter by GO term (e.g., "GO:0006122"). - species_filter (str, optional): Filter by taxonomy ID (e.g., "9606" for human). - page_size (int, optional): Number of results to return (max 200). Defaults to 20. + """Search InterPro entries by name, type, database, GO term, or species. Returns matching entries with metadata. Returns: - dict: Search results with InterPro entries matching the criteria + dict: Search results with results array (InterPro entries), count, total_available, search_criteria or error message. """ base_url = "https://www.ebi.ac.uk/interpro/api/entry/interpro" diff --git a/src/biocontext_kb/core/kegg/_get_kegg_id_by_gene_symbol.py b/src/biocontext_kb/core/kegg/_get_kegg_id_by_gene_symbol.py index 5daab95..be6b5e6 100644 --- a/src/biocontext_kb/core/kegg/_get_kegg_id_by_gene_symbol.py +++ b/src/biocontext_kb/core/kegg/_get_kegg_id_by_gene_symbol.py @@ -9,40 +9,15 @@ @core_mcp.tool() def get_kegg_id_by_gene_symbol( - gene_symbol: Annotated[ - str, Field(description="Gene symbol to convert to KEGG ID (e.g., 'TP53' for human, 'Trp53' for mouse)") - ], + gene_symbol: Annotated[str, Field(description="Gene symbol (e.g., 'TP53' for human, 'Trp53' for mouse)")], organism_code: Annotated[ - str, Field(description="KEGG organism code or taxonomy ID (e.g., '9606' for human, '10090' for mouse)") + str, Field(description="Taxonomy ID: 9606 (human), 10090 (mouse), 10116 (rat), 562 (E. coli), 4932 (yeast)") ], ) -> str | dict: - """Get KEGG ID by gene symbol. - - This function converts a gene symbol (like TP53) to a KEGG gene ID (like hsa:7157) for use in the KEGG API. - The KEGG API typically requires KEGG IDs rather than gene symbols for most operations. - - This is often the first step in a workflow - get the KEGG ID, then use it in subsequent API calls. - - Common organism codes: - - Human: 9606 (KEGG code: hsa) - - Mouse: 10090 (KEGG code: mmu) - - Rat: 10116 (KEGG code: rno) - - E. coli: 562 (KEGG code: eco) - - Yeast: 4932 (KEGG code: sce) - - Args: - gene_symbol (str): The gene symbol to search for (e.g., "TP53" for human, "Trp53" for mouse). - organism_code (str): The organism code as taxonomy ID (e.g., "9606" for human, "10090" for mouse). + """Convert gene symbol to KEGG ID for use in subsequent API calls. Returns KEGG gene ID required for query_kegg(). Returns: - str | dict: The KEGG ID (e.g., "hsa:7157") or an error message. - - Examples: - >>> get_kegg_id_by_gene_symbol(gene_symbol="TP53", organism_code="9606") - "hsa:7157" - - >>> get_kegg_id_by_gene_symbol(gene_symbol="Trp53", organism_code="10090") - "mmu:22059" + str or dict: KEGG gene ID string (e.g., 'hsa:7157') or error dict. """ if not gene_symbol or not organism_code: return "Gene symbol and organism code are required." diff --git a/src/biocontext_kb/core/kegg/_query_kegg.py b/src/biocontext_kb/core/kegg/_query_kegg.py index 9641abc..00e3a06 100644 --- a/src/biocontext_kb/core/kegg/_query_kegg.py +++ b/src/biocontext_kb/core/kegg/_query_kegg.py @@ -222,94 +222,32 @@ def execute(self) -> str: @core_mcp.tool() def query_kegg( - operation: Annotated[ - KeggOperation, Field(description="The KEGG API operation to perform (info, list, find, get, conv, link, ddi)") - ], + operation: Annotated[KeggOperation, Field(description="info, list, find, get, conv, link, or ddi")], database: Annotated[ Optional[Union[KeggDatabase, KeggOutsideDb, str]], - Field(description="The KEGG database to query (e.g., pathway, genes, compound) or organism code (e.g., hsa)"), + Field(description="pathway, compound, genes, organism code (hsa, mmu, etc.), or other DB"), ] = None, target_db: Annotated[ Optional[Union[KeggDatabase, KeggOutsideDb, str]], - Field(description="Target database for conversion or linking operations"), + Field(description="Target DB for conversion/linking operations"), ] = None, source_db: Annotated[ Optional[Union[KeggDatabase, KeggOutsideDb, str]], - Field(description="Source database for conversion or linking operations"), - ] = None, - query: Annotated[ - Optional[str], Field(description="Query string for operations like FIND, or organism code for LIST") + Field(description="Source DB for conversion/linking operations"), ] = None, + query: Annotated[Optional[str], Field(description="Query string for FIND/LIST, or organism code for LIST")] = None, option: Annotated[ Optional[Union[KeggOption, KeggFindOption, KeggRdfFormat]], - Field(description="Additional options like sequence formats, chemical formula search, etc."), + Field(description="aaseq, ntseq, mol, formula, exact_mass, mol_weight, etc."), ] = None, entries: Annotated[ - Optional[List[str]], Field(description="List of KEGG entry IDs (e.g., ['hsa:7157', 'hsa:00010'])") + Optional[List[str]], Field(description="KEGG entry IDs (e.g., ['hsa:7157', 'hsa00010'])") ] = None, ) -> str | dict: - """Execute a KEGG API query. - - This function provides access to the KEGG API, allowing you to query biological data across - pathways, genes, compounds, diseases, and more. The function can perform all KEGG API operations - and accepts various parameters depending on the operation. - - When searching for genes in KEGG, you typically need KEGG IDs rather than gene symbols. - Use the get_kegg_id_by_gene_symbol function first to convert gene symbols to KEGG IDs. - - Common operations: - - info: Get database metadata (e.g., operation=info, database=PATHWAY) - - list: List entries in a database (e.g., operation=list, database=PATHWAY, query="hsa") - - get: Retrieve specific entries (e.g., operation=get, entries=["hsa:7157"]) - - find: Search for entries by keyword (e.g., operation=find, database=COMPOUND, query="glucose") - - link: Find related entries (e.g., operation=link, target_db=PATHWAY, entries=["hsa:7157"]) - - conv: Convert between identifiers (e.g., operation=conv, target_db=NCBI_GENEID, entries=["hsa:7157"]) - - Args: - operation (KeggOperation): The KEGG operation to perform. - database (KeggDatabase | KeggOutsideDb | str, optional): The database to query. - target_db (KeggDatabase | KeggOutsideDb | str, optional): The target database for conversion. - source_db (KeggDatabase | KeggOutsideDb | str, optional): The source database for conversion. - query (str, optional): The query string for FIND or LIST operations. - option (KeggOption | KeggFindOption | KeggRdfFormat, optional): Additional options for the operation. - entries (List[str], optional): List of entries for GET or LINK operations. + """Execute flexible KEGG API queries across pathways, genes, compounds, diseases, drugs. Use get_kegg_id_by_gene_symbol() first. Returns: - str | dict: The result of the KEGG query or an error message. - - Examples: - # List human pathways - >>> query_kegg(operation=KeggOperation.LIST, database=KeggDatabase.PATHWAY, query="hsa") - - # Get data for the glycolysis pathway - >>> query_kegg(operation=KeggOperation.GET, entries=["hsa00010"]) - - # Get data for the TP53 gene - >>> query_kegg(operation=KeggOperation.GET, entries=["hsa:7157"]) - - # Get amino acid sequence for TP53 - >>> query_kegg(operation=KeggOperation.GET, entries=["hsa:7157"], option=KeggOption.AASEQ) - - # Find compounds with formula C7H10O5 - >>> query_kegg(operation=KeggOperation.FIND, database=KeggDatabase.COMPOUND, query="C7H10O5", option="formula") - - # Find pathways related to TP53 - >>> query_kegg(operation=KeggOperation.LINK, target_db=KeggDatabase.PATHWAY, entries=["hsa:7157"]) - - # Convert KEGG ID to NCBI Gene ID - >>> query_kegg(operation=KeggOperation.CONV, target_db="ncbi-geneid", source_db="hsa:7157") - - # Get information about a specific pathway - >>> query_kegg(operation=KeggOperation.GET, entries=["hsa00010"]) - - # Get the compound ID for caffeine - >>> query_kegg(operation=KeggOperation.FIND, database=KeggDatabase.COMPOUND, query="caffeine") - - # Get the drug ID for acetaminophen - >>> query_kegg(operation=KeggOperation.FIND, database=KeggDatabase.DRUG, query="acetaminophen") - - # Check if two drugs interact (ibuprofen and aspirin) - >>> query_kegg(operation=KeggOperation.DDI, entries=["dr:D00126", "dr:D00109"]) + str or dict: Raw text response from KEGG API with requested data (pathways, genes, compounds, etc.) or error dict. """ config = KeggConfig( operation=operation, diff --git a/src/biocontext_kb/core/ols/_get_available_ontologies.py b/src/biocontext_kb/core/ols/_get_available_ontologies.py index 258db24..8cc16a9 100644 --- a/src/biocontext_kb/core/ols/_get_available_ontologies.py +++ b/src/biocontext_kb/core/ols/_get_available_ontologies.py @@ -7,14 +7,10 @@ @core_mcp.tool() def get_available_ontologies() -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for all available ontologies. - - This function retrieves a list of all ontologies available in OLS, including - their names, descriptions, and metadata. Use this function first to discover - which ontologies are available before using other search functions. + """Query OLS for all available ontologies with their metadata. Use this first to discover available ontologies. Returns: - dict: Dictionary containing available ontologies and their information or error message + dict: Ontologies list with id, name, description, prefix, homepage, number of terms, status or error message. """ url = "https://www.ebi.ac.uk/ols4/api/v2/ontologies" diff --git a/src/biocontext_kb/core/ols/_get_cell_ontology_terms.py b/src/biocontext_kb/core/ols/_get_cell_ontology_terms.py index e9ffd7b..49f49e8 100644 --- a/src/biocontext_kb/core/ols/_get_cell_ontology_terms.py +++ b/src/biocontext_kb/core/ols/_get_cell_ontology_terms.py @@ -8,30 +8,20 @@ @core_mcp.tool() def get_cell_ontology_terms( - cell_type: Annotated[ - str, Field(description="The cell type to search for (e.g., 'T cell', 'neuron', 'hepatocyte')") - ], + cell_type: Annotated[str, Field(description="Cell type to search for (e.g., 'T cell', 'neuron')")], size: Annotated[ int, - Field(description="The maximum number of results to return"), + Field(description="Maximum number of results to return"), ] = 10, exact_match: Annotated[ bool, - Field(description="Whether to perform an exact match search"), + Field(description="Whether to perform exact match search"), ] = False, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for Cell Ontology (CL) terms. - - This function searches for Cell Ontology terms associated with cell types - using the OLS API. The Cell Ontology provides a controlled vocabulary for cell types. - - Args: - cell_type (str): The cell type to search for (e.g., "T cell"). - size (int): Maximum number of results to return (default: 10). - exact_match (bool): Whether to perform an exact match search (default: False). + """Search OLS for Cell Ontology (CL) terms using a controlled vocabulary for cell types. Returns: - dict: Dictionary containing Cell Ontology terms and information or error message + dict: Cell ontology terms with cl_terms array containing id, label, definition, synonyms or error message. """ if not cell_type: return {"error": "cell_type must be provided"} diff --git a/src/biocontext_kb/core/ols/_get_chebi_terms_by_chemical.py b/src/biocontext_kb/core/ols/_get_chebi_terms_by_chemical.py index 2a32de1..8fd5345 100644 --- a/src/biocontext_kb/core/ols/_get_chebi_terms_by_chemical.py +++ b/src/biocontext_kb/core/ols/_get_chebi_terms_by_chemical.py @@ -9,29 +9,21 @@ @core_mcp.tool() def get_chebi_terms_by_chemical( chemical_name: Annotated[ - str, Field(description="The chemical or drug name to search for (e.g., 'aspirin', 'glucose')") + str, Field(description="Chemical or drug name to search for (e.g., 'aspirin', 'glucose')") ], size: Annotated[ int, - Field(description="The maximum number of results to return"), + Field(description="Maximum number of results to return"), ] = 10, exact_match: Annotated[ bool, - Field(description="Whether to perform an exact match search"), + Field(description="Whether to perform exact match search"), ] = False, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for ChEBI terms related to a chemical name. - - This function searches for ChEBI (Chemical Entities of Biological Interest) terms - associated with a given chemical name using the OLS API. - - Args: - chemical_name (str): The chemical or drug name to search for (e.g., "aspirin"). - size (int): Maximum number of results to return (default: 10). - exact_match (bool): Whether to perform an exact match search (default: False). + """Search OLS for ChEBI (Chemical Entities of Biological Interest) terms for a chemical or drug name. Returns: - dict: Dictionary containing ChEBI terms and information or error message + dict: ChEBI terms with chebi_terms array containing id, label, description, synonyms or error message. """ if not chemical_name: return {"error": "chemical_name must be provided"} diff --git a/src/biocontext_kb/core/ols/_get_efo_id_by_disease_name.py b/src/biocontext_kb/core/ols/_get_efo_id_by_disease_name.py index ced82f3..8b10732 100644 --- a/src/biocontext_kb/core/ols/_get_efo_id_by_disease_name.py +++ b/src/biocontext_kb/core/ols/_get_efo_id_by_disease_name.py @@ -8,30 +8,20 @@ @core_mcp.tool() def get_efo_id_by_disease_name( - disease_name: Annotated[ - str, Field(description="The name of the disease to search for (e.g., 'choledocholithiasis')") - ], + disease_name: Annotated[str, Field(description="Disease name to search for (e.g., 'choledocholithiasis')")], size: Annotated[ int, - Field(description="The maximum number of results to return"), + Field(description="Maximum number of results to return"), ] = 5, exact_match: Annotated[ bool, - Field(description="Whether to perform an exact match search"), + Field(description="Whether to perform exact match search"), ] = False, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for EFO/Mondo/HP IDs related to a disease name. - - This function searches for EFO IDs associated with a given disease name using the OLS API. - Always use this function if you need EFO IDs, e.g., for use in the Open Targets API. - - Args: - disease_name (str): The name of the disease to search for (e.g., "SIDS"). - size (int): Maximum number of results to return (default: 5). - exact_match (bool): Whether to perform an exact match search (default: False). + """Search OLS for EFO/Mondo/HP IDs related to a disease name. Use this to get EFO IDs for Open Targets queries. Returns: - dict: Dictionary containing EFO IDs and information or error message + dict: EFO IDs with efo_ids array containing id, label, description or error message. """ if not disease_name: return {"error": "disease_name must be provided"} diff --git a/src/biocontext_kb/core/ols/_get_go_terms_by_gene.py b/src/biocontext_kb/core/ols/_get_go_terms_by_gene.py index 5f7b274..73a508e 100644 --- a/src/biocontext_kb/core/ols/_get_go_terms_by_gene.py +++ b/src/biocontext_kb/core/ols/_get_go_terms_by_gene.py @@ -8,28 +8,20 @@ @core_mcp.tool() def get_go_terms_by_gene( - gene_name: Annotated[str, Field(description="The gene name or symbol to search for (e.g., 'TP53', 'BRCA1')")], + gene_name: Annotated[str, Field(description="Gene name or symbol to search for (e.g., 'TP53', 'BRCA1')")], size: Annotated[ int, - Field(description="The maximum number of results to return"), + Field(description="Maximum number of results to return"), ] = 10, exact_match: Annotated[ bool, - Field(description="Whether to perform an exact match search"), + Field(description="Whether to perform exact match search"), ] = False, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for Gene Ontology (GO) terms related to a gene name. - - This function searches for GO terms associated with a given gene name using the OLS API. - Gene Ontology provides structured vocabularies for gene and gene product attributes. - - Args: - gene_name (str): The gene name or symbol to search for (e.g., "TP53"). - size (int): Maximum number of results to return (default: 10). - exact_match (bool): Whether to perform an exact match search (default: False). + """Search OLS for Gene Ontology (GO) terms related to a gene name using structured vocabularies. Returns: - dict: Dictionary containing GO terms and information or error message + dict: GO terms with go_terms array containing id, label, description, type or error message. """ if not gene_name: return {"error": "gene_name must be provided"} diff --git a/src/biocontext_kb/core/ols/_get_term_details.py b/src/biocontext_kb/core/ols/_get_term_details.py index 1043176..69beb49 100644 --- a/src/biocontext_kb/core/ols/_get_term_details.py +++ b/src/biocontext_kb/core/ols/_get_term_details.py @@ -8,24 +8,15 @@ @core_mcp.tool() def get_term_details( - term_id: Annotated[ - str, Field(description="The term ID (CURIE) to get details for (e.g., 'EFO:0000001', 'GO:0008150')") - ], + term_id: Annotated[str, Field(description="Term ID in CURIE format (e.g., 'EFO:0000001', 'GO:0008150')")], ontology_id: Annotated[ - str, Field(description="The ontology ID where the term is defined (e.g., 'efo', 'go', 'chebi')") + str, Field(description="Ontology ID where the term is defined (e.g., 'efo', 'go', 'chebi')") ], ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for detailed information about a specific term. - - This function retrieves comprehensive information about a specific ontology term, - including its definition, synonyms, hierarchical relationships, and cross-references. - - Args: - term_id (str): The term ID in CURIE format (e.g., "EFO:0000001"). - ontology_id (str): The ontology ID (e.g., "efo"). + """Get comprehensive details about a specific ontology term including definition, synonyms, hierarchical relationships. Returns: - dict: Dictionary containing detailed term information or error message + dict: Term details with id, label, definition, synonyms, hierarchical info, num_descendants or error message. """ if not term_id: return {"error": "term_id must be provided"} diff --git a/src/biocontext_kb/core/ols/_get_term_hierarchical_children.py b/src/biocontext_kb/core/ols/_get_term_hierarchical_children.py index 3414954..a31b6c5 100644 --- a/src/biocontext_kb/core/ols/_get_term_hierarchical_children.py +++ b/src/biocontext_kb/core/ols/_get_term_hierarchical_children.py @@ -8,29 +8,17 @@ @core_mcp.tool() def get_term_hierarchical_children( - term_id: Annotated[ - str, Field(description="The term ID (CURIE) to get children for (e.g., 'EFO:0000001', 'GO:0008150')") - ], - ontology_id: Annotated[ - str, Field(description="The ontology ID where the term is defined (e.g., 'efo', 'go', 'chebi')") - ], + term_id: Annotated[str, Field(description="Term ID in CURIE format (e.g., 'EFO:0000001', 'GO:0008150')")], + ontology_id: Annotated[str, Field(description="Ontology ID (e.g., 'efo', 'go', 'chebi')")], size: Annotated[ int, - Field(description="The maximum number of children to return"), + Field(description="Maximum number of children to return"), ] = 20, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for hierarchical children of a term. - - This function retrieves the hierarchical children of a specific ontology term, - including subclasses and terms related via hierarchical properties like 'part of'. - - Args: - term_id (str): The term ID in CURIE format (e.g., "EFO:0000001"). - ontology_id (str): The ontology ID (e.g., "efo"). - size (int): Maximum number of children to return (default: 20). + """Get hierarchical children of an ontology term from OLS. Includes subclasses and hierarchical properties. Returns: - dict: Dictionary containing hierarchical children or error message + dict: Parent term, hierarchical_children array with id/label/definition, total_children, page_info or error message. """ if not term_id: return {"error": "term_id must be provided"} diff --git a/src/biocontext_kb/core/ols/_search_ontology_terms.py b/src/biocontext_kb/core/ols/_search_ontology_terms.py index b76efc8..30b9cf6 100644 --- a/src/biocontext_kb/core/ols/_search_ontology_terms.py +++ b/src/biocontext_kb/core/ols/_search_ontology_terms.py @@ -8,39 +8,26 @@ @core_mcp.tool() def search_ontology_terms( - search_term: Annotated[str, Field(description="The term to search for across all ontologies")], + search_term: Annotated[str, Field(description="Term to search for")], ontologies: Annotated[ str, Field( - description="Comma-separated list of ontology IDs to search in (e.g., 'efo,go,chebi'). Leave empty to search all ontologies. Use get_available_ontologies() to see all available ontology IDs." + description="Comma-separated ontology IDs (e.g., 'efo,go,chebi'). Leave empty for all. Use get_available_ontologies() to see options" ), ] = "", size: Annotated[ int, - Field(description="The maximum number of results to return"), + Field(description="Maximum number of results to return"), ] = 20, exact_match: Annotated[ bool, - Field(description="Whether to perform an exact match search"), + Field(description="Whether to perform exact match search"), ] = False, ) -> Dict[str, Any]: - """Query the Ontology Lookup Service (OLS) for terms across multiple ontologies. - - This function provides a general search across ontologies in OLS, allowing you to - find terms from multiple ontologies or search all ontologies at once. - - TIP: Use get_available_ontologies() first to discover which ontologies are available - and their IDs before searching. - - Args: - search_term (str): The term to search for. - ontologies (str): Comma-separated ontology IDs (e.g., "efo,go,chebi"). Empty for all. - Use get_available_ontologies() to see available options. - size (int): Maximum number of results to return (default: 20). - exact_match (bool): Whether to perform an exact match search (default: False). + """Search for terms across multiple ontologies in OLS. Use get_available_ontologies() first to discover ontologies. Returns: - dict: Dictionary containing terms from various ontologies or error message + dict: Terms array, terms_by_ontology grouped results, total_results, ontologies_found list or error message. """ if not search_term: return {"error": "search_term must be provided"} diff --git a/src/biocontext_kb/core/openfda/_advanced_search.py b/src/biocontext_kb/core/openfda/_advanced_search.py index 30122f4..7e24a58 100644 --- a/src/biocontext_kb/core/openfda/_advanced_search.py +++ b/src/biocontext_kb/core/openfda/_advanced_search.py @@ -11,23 +11,15 @@ def get_available_pharmacologic_classes( class_type: Annotated[ str, Field( - description="Type of pharmacologic class: 'epc' (Established Pharmacologic Class), 'moa' (Mechanism of Action), 'pe' (Physiologic Effect), or 'cs' (Chemical Structure)" + description="Class type: 'epc' (Established Pharmacologic Class), 'moa' (Mechanism of Action), 'pe' (Physiologic Effect), or 'cs' (Chemical Structure)" ), ] = "epc", limit: Annotated[int, Field(description="Number of unique classes to return", ge=1, le=1000)] = 100, ) -> dict: - """Get available pharmacologic classes from the FDA database. - - This function retrieves the actual pharmacologic class values available in the - FDA database, which can then be used with search_drugs_by_therapeutic_class. - Always call this function first to see available options before searching. - - Args: - class_type (str): Type of classification - epc, moa, pe, or cs. - limit (int): Maximum number of unique classes to return. + """Get available pharmacologic classes from FDA database. Call this first to see available options. Returns: - dict: Available pharmacologic class values in the FDA database. + dict: Class type, field, available_classes array with term/count, total_found or error message. """ # Map class type to the appropriate OpenFDA field class_field_mapping = { @@ -66,30 +58,21 @@ def search_drugs_by_therapeutic_class( therapeutic_class: Annotated[ str, Field( - description="Exact therapeutic/pharmacologic class term from FDA database (use get_available_pharmacologic_classes first to see options)" + description="Exact therapeutic/pharmacologic class term from FDA (use get_available_pharmacologic_classes first)" ), ], class_type: Annotated[ str, Field( - description="Type of pharmacologic class: 'epc' (Established Pharmacologic Class), 'moa' (Mechanism of Action), 'pe' (Physiologic Effect), or 'cs' (Chemical Structure)" + description="Class type: 'epc' (Established Pharmacologic Class), 'moa' (Mechanism of Action), 'pe' (Physiologic Effect), or 'cs' (Chemical Structure)" ), ] = "epc", limit: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 25, ) -> dict: - """Search for drugs by their therapeutic or pharmacologic class. - - IMPORTANT: Use get_available_pharmacologic_classes() first to see the exact - class terms available in the FDA database. This function requires exact matches - of the pharmacologic class terms as they appear in the FDA data. - - Args: - therapeutic_class (str): The exact therapeutic class term from FDA database. - class_type (str): Type of classification - epc, moa, pe, or cs. - limit (int): Maximum number of results to return. + """Search for drugs by therapeutic or pharmacologic class. Use get_available_pharmacologic_classes() first for exact terms. Returns: - dict: Search results for drugs in the specified therapeutic class. + dict: FDA drug results array with application info, products, sponsor names or error message. """ # Map class type to the appropriate OpenFDA field class_field_mapping = { @@ -120,18 +103,12 @@ class terms available in the FDA database. This function requires exact matches @core_mcp.tool() def get_generic_equivalents( - brand_name: Annotated[str, Field(description="Brand name drug to find generic equivalents for")], + brand_name: Annotated[str, Field(description="Brand name drug to find generics for")], ) -> dict: - """Find generic equivalents for a brand name drug. - - This function searches for ANDA (Abbreviated New Drug Application) entries - that are generic equivalents of a specified brand name drug. - - Args: - brand_name (str): The brand name drug to find generics for. + """Find generic equivalents for a brand name drug. Searches ANDA entries with matching active ingredients. Returns: - dict: Generic drug equivalents and their manufacturers. + dict: Brand drug info, generic_equivalents array, total_generics_found count or error message. """ # First, search for the brand name drug to get its active ingredient brand_query = f"(openfda.brand_name:{brand_name} OR products.brand_name:{brand_name})" diff --git a/src/biocontext_kb/core/openfda/_count_drugs.py b/src/biocontext_kb/core/openfda/_count_drugs.py index 9798481..6116a88 100644 --- a/src/biocontext_kb/core/openfda/_count_drugs.py +++ b/src/biocontext_kb/core/openfda/_count_drugs.py @@ -11,7 +11,7 @@ def count_drugs_by_field( field: Annotated[ str, Field( - description="Field to count by (e.g., 'sponsor_name', 'products.dosage_form', 'products.route', 'products.marketing_status', 'openfda.pharm_class_epc')" + description="Field to count (e.g., 'sponsor_name', 'products.dosage_form', 'products.route', 'openfda.pharm_class_epc')" ), ], search_filter: Annotated[ @@ -19,23 +19,10 @@ def count_drugs_by_field( ] = None, limit: Annotated[int, Field(description="Maximum number of count results to return", ge=1, le=1000)] = 100, ) -> dict: - """Count unique values in a specific field across FDA-approved drugs. - - This function is useful for statistical analysis and getting overviews of the drug database. - Common fields to count include: - - sponsor_name: Count drugs by pharmaceutical company - - products.dosage_form: Count by dosage forms (tablet, injection, etc.) - - products.route: Count by administration routes (oral, injection, etc.) - - products.marketing_status: Count by marketing status - - openfda.pharm_class_epc: Count by pharmacologic class - - Args: - field (str): The field to count unique values for. - search_filter (str, optional): Search filter to apply before counting. - limit (int): Maximum number of count results to return. + """Count unique values in a field across FDA-approved drugs. Useful for statistical analysis. Returns: - dict: Count results showing terms and their frequencies. + dict: Results array with term and count for each unique value or error message. """ # If field is an array, use .exact for correct counting array_fields = [ @@ -68,16 +55,10 @@ def count_drugs_by_field( @core_mcp.tool() def get_drug_statistics() -> dict: - """Get general statistics about the FDA Drugs@FDA database. - - This function provides an overview of the database including: - - Top pharmaceutical sponsors by number of approved drugs - - Most common dosage forms - - Most common routes of administration - - Distribution of marketing statuses + """Get general statistics about the FDA Drugs@FDA database. Includes top sponsors, dosage forms, routes, marketing status. Returns: - dict: Statistical overview of the FDA drugs database. + dict: Top sponsors, dosage_forms, administration_routes, marketing_statuses with counts or error message. """ statistics = {} diff --git a/src/biocontext_kb/core/openfda/_get_drug_info.py b/src/biocontext_kb/core/openfda/_get_drug_info.py index dccb7cb..02ef140 100644 --- a/src/biocontext_kb/core/openfda/_get_drug_info.py +++ b/src/biocontext_kb/core/openfda/_get_drug_info.py @@ -12,18 +12,10 @@ def get_drug_by_application_number( str, Field(description="FDA application number (e.g., 'NDA021436', 'ANDA123456', 'BLA761234')") ], ) -> dict: - """Get detailed information about a specific FDA-approved drug by its application number. - - Application numbers follow the format: NDA, ANDA, or BLA followed by 6 digits. - - NDA: New Drug Application (brand name drugs) - - ANDA: Abbreviated New Drug Application (generic drugs) - - BLA: Biologics License Application (biological products) - - Args: - application_number (str): The FDA application number. + """Get detailed information about an FDA-approved drug by application number. Format: NDA/ANDA/BLA followed by 6 digits. Returns: - dict: Detailed drug information from the FDA Drugs@FDA API. + dict: FDA drug results with application details, products, sponsor information or error message. """ # Validate application number format if not application_number or len(application_number) < 9: @@ -46,21 +38,12 @@ def get_drug_by_application_number( def get_drug_label_info( brand_name: Annotated[Optional[str], Field(description="Brand name of the drug")] = None, generic_name: Annotated[Optional[str], Field(description="Generic name of the drug")] = None, - ndc: Annotated[Optional[str], Field(description="National Drug Code (NDC) number")] = None, + ndc: Annotated[Optional[str], Field(description="National Drug Code (NDC)")] = None, ) -> dict: - """Get drug labeling information including active ingredients, dosage, and usage instructions. - - This function retrieves comprehensive drug label information from the FDA's drug labeling - database, which includes detailed product information, active ingredients, dosage forms, - and administration routes. - - Args: - brand_name (str, optional): Brand name of the drug. - generic_name (str, optional): Generic name of the drug. - ndc (str, optional): National Drug Code number. + """Get comprehensive drug labeling information from FDA. Includes active ingredients, dosage forms, administration routes. Returns: - dict: Drug labeling information from the FDA API. + dict: Drug label results with indications, warnings, dosage, active ingredients or error message. """ if not any([brand_name, generic_name, ndc]): return {"error": "At least one of brand_name, generic_name, or ndc must be provided"} diff --git a/src/biocontext_kb/core/openfda/_search_drugs.py b/src/biocontext_kb/core/openfda/_search_drugs.py index a955cf6..57a33a0 100644 --- a/src/biocontext_kb/core/openfda/_search_drugs.py +++ b/src/biocontext_kb/core/openfda/_search_drugs.py @@ -8,10 +8,8 @@ @core_mcp.tool() def search_drugs_fda( - brand_name: Annotated[Optional[str], Field(description="Brand or trade name of the drug (e.g., 'Tylenol')")] = None, - generic_name: Annotated[ - Optional[str], Field(description="Generic name of the drug (e.g., 'acetaminophen')") - ] = None, + brand_name: Annotated[Optional[str], Field(description="Brand or trade name (e.g., 'Tylenol')")] = None, + generic_name: Annotated[Optional[str], Field(description="Generic name (e.g., 'acetaminophen')")] = None, active_ingredient: Annotated[Optional[str], Field(description="Active ingredient name")] = None, sponsor_name: Annotated[Optional[str], Field(description="Company/sponsor name")] = None, application_number: Annotated[ @@ -29,36 +27,17 @@ def search_drugs_fda( route: Annotated[ Optional[str], Field(description="Route of administration (e.g., 'ORAL', 'INJECTION', 'TOPICAL')") ] = None, - search_type: Annotated[ - str, Field(description="Search type: 'and' for all terms must match, 'or' for any term matches") - ] = "or", + search_type: Annotated[str, Field(description="'and' for all terms must match, 'or' for any term matches")] = "or", sort_by: Annotated[ - Optional[str], Field(description="Sort by field (e.g., 'sponsor_name', 'application_number')") + Optional[str], Field(description="Field to sort by (e.g., 'sponsor_name', 'application_number')") ] = None, limit: Annotated[int, Field(description="Number of results to return", ge=1, le=1000)] = 25, skip: Annotated[int, Field(description="Number of results to skip for pagination", ge=0, le=25000)] = 0, ) -> dict: - """Search the FDA Drugs@FDA database for approved drug products. - - This function searches for FDA-approved drugs based on various criteria including - brand names, generic names, active ingredients, sponsors, and regulatory information. - - Args: - brand_name (str, optional): Brand or trade name of the drug. - generic_name (str, optional): Generic name of the drug. - active_ingredient (str, optional): Active ingredient name. - sponsor_name (str, optional): Company or sponsor name. - application_number (str, optional): FDA application number (NDA, ANDA, or BLA). - marketing_status (str, optional): Marketing status of the drug. - dosage_form (str, optional): Dosage form of the drug. - route (str, optional): Route of administration. - search_type (str): How to combine search terms - "and" or "or". - sort_by (str, optional): Field to sort results by. - limit (int): Maximum number of results to return (1-1000). - skip (int): Number of results to skip for pagination (0-25000). + """Search FDA Drugs@FDA database for approved drug products. Supports multiple search criteria. Returns: - dict: Search results from the FDA Drugs@FDA API. + dict: Results array with drug products including application numbers, sponsors, products array or error message. """ # Ensure at least one search parameter is provided search_params = [ diff --git a/src/biocontext_kb/core/opentargets/_get_open_targets_graphql_schema.py b/src/biocontext_kb/core/opentargets/_get_open_targets_graphql_schema.py index fe229d6..e7c531d 100644 --- a/src/biocontext_kb/core/opentargets/_get_open_targets_graphql_schema.py +++ b/src/biocontext_kb/core/opentargets/_get_open_targets_graphql_schema.py @@ -6,7 +6,11 @@ @core_mcp.tool() def get_open_targets_graphql_schema() -> dict: - """Fetch the Open Targets GraphQL schema.""" + """Retrieve the Open Targets GraphQL schema for query construction. + + Returns: + dict: Schema string in format {'schema': '...'} containing GraphQL type definitions or error message. + """ base_url = "https://api.platform.opentargets.org/api/v4/graphql" try: schema = fetch_graphql_schema(base_url) diff --git a/src/biocontext_kb/core/opentargets/_get_open_targets_query_examples.py b/src/biocontext_kb/core/opentargets/_get_open_targets_query_examples.py index 7603bdc..5a82265 100644 --- a/src/biocontext_kb/core/opentargets/_get_open_targets_query_examples.py +++ b/src/biocontext_kb/core/opentargets/_get_open_targets_query_examples.py @@ -221,10 +221,9 @@ @core_mcp.tool() def get_open_targets_query_examples() -> dict: - """Get example GraphQL queries for the Open Targets API. + """Retrieve example GraphQL queries for the Open Targets API. Examples demonstrate common use cases. - Returns a dictionary of named example queries that can be used with the - query_open_targets_graphql tool. These examples demonstrate common use cases - for retrieving data about targets, diseases, drugs, and their associations. + Returns: + dict: Example queries mapped by category (informationForTarget, drugsForTarget, associatedDiseases, etc.) with GraphQL query strings. """ return EXAMPLE_QUERIES diff --git a/src/biocontext_kb/core/opentargets/_query_open_targets_graphql.py b/src/biocontext_kb/core/opentargets/_query_open_targets_graphql.py index 6588761..00d6948 100644 --- a/src/biocontext_kb/core/opentargets/_query_open_targets_graphql.py +++ b/src/biocontext_kb/core/opentargets/_query_open_targets_graphql.py @@ -8,40 +8,13 @@ @core_mcp.tool() def query_open_targets_graphql( - query_string: Annotated[str, Field(description="The GraphQL query string")], - variables: Annotated[Optional[dict], Field(description="The variables for the GraphQL query")] = None, + query_string: Annotated[str, Field(description="GraphQL query string starting with 'query' keyword")], + variables: Annotated[Optional[dict], Field(description="Optional variables for the GraphQL query")] = None, ) -> dict: - """Execute a GraphQL query against the Open Targets API after fetching the schema. - - Important: Always first fetch examples using the schema using `get_open_targets_query_examples`. If the examples are - not sufficient, also get the schema using the `get_open_targets_graphql_schema` tool before executing a query. - Relying on either of these options provides the necessary context for the query and ensures that the query is valid. - - Queries should use the Ensembl gene ID (e.g., "ENSG00000141510"). - If necessary, first use `get_ensembl_id_from_gene_symbol` to convert gene symbols (e.g., "TP53") to Ensembl IDs. - - If a disease ID is needed, use the `get_efo_id_from_disease_name` tool to get the EFO ID (e.g., "EFO_0004705") for a - disease name (e.g., "Hypothyroidism"). - - Make sure to always start the query string with the keyword `query` followed by the query name. - The query string should be a valid GraphQL query, and the variables should be a dictionary of parameters - that the query requires. - - Open Targets provides data on: - - target: annotations, tractability, mouse models, expression, disease/phenotype associations, available drugs. - - disease: annotations, ontology, drugs, symptoms, target associations. - - drug: annotations, mechanisms, indications, pharmacovigilance. - - variant: annotations, frequencies, effects, consequences, credible sets. - - studies: annotations, traits, publications, cohorts, credible sets. - - credibleSet: annotations, variant sets, gene assignments, colocalization. - - search: index of all platform entities. - - Args: - query_string (str): The GraphQL query string. - variables (dict): The variables for the GraphQL query. + """Execute GraphQL queries against the Open Targets API. Use get_open_targets_query_examples() or get_open_targets_graphql_schema() first. Returns: - dict: The response data from the GraphQL API. + dict: GraphQL response with data field containing targets, diseases, drugs, variants, studies or error message. """ base_url = "https://api.platform.opentargets.org/api/v4/graphql" try: diff --git a/src/biocontext_kb/core/panglaodb/_get_panglaodb_marker_genes.py b/src/biocontext_kb/core/panglaodb/_get_panglaodb_marker_genes.py index 1539f3d..db45415 100644 --- a/src/biocontext_kb/core/panglaodb/_get_panglaodb_marker_genes.py +++ b/src/biocontext_kb/core/panglaodb/_get_panglaodb_marker_genes.py @@ -8,11 +8,11 @@ @core_mcp.tool() def get_panglaodb_marker_genes( - species: Annotated[str, Field(description="The species ('Hs' for Human or 'Mm' for Mouse)")], + species: Annotated[str, Field(description="Species: 'Hs' for Human or 'Mm' for Mouse")], min_sensitivity: Annotated[ Optional[float], Field( - description="Minimum sensitivity score (0-1). Applied to species-specific column.", + description="Minimum sensitivity score (0-1), applied to species-specific column", ge=0, le=1, ), @@ -20,36 +20,28 @@ def get_panglaodb_marker_genes( min_specificity: Annotated[ Optional[float], Field( - description="Minimum specificity score (0-1). Applied to species-specific column.", + description="Minimum specificity score (0-1), applied to species-specific column", ge=0, le=1, ), ] = None, organ: Annotated[ Optional[str], - Field(description="Filter by organ (e.g., 'Brain', 'Lung'). Case-insensitive."), + Field(description="Organ filter (e.g., 'Brain', 'Lung'), case-insensitive"), ] = None, cell_type: Annotated[ Optional[str], - Field(description="Filter by cell type (e.g., 'Smooth muscle cells', 'T cells'). Case-insensitive."), + Field(description="Cell type filter (e.g., 'Smooth muscle cells', 'T cells'), case-insensitive"), ] = None, gene_symbol: Annotated[ Optional[str], - Field(description="Filter by gene symbol (e.g., 'MAFB', 'SYNPO'). Case-insensitive."), + Field(description="Gene symbol filter (e.g., 'MAFB', 'SYNPO'), case-insensitive"), ] = None, ) -> Dict[str, Any]: - """Retrieves marker genes from the PanglaoDB dataset based on specified filters. - - Args: - species: The species ('Hs' for Human or 'Mm' for Mouse). - min_sensitivity: Minimum sensitivity score (0-1). - min_specificity: Minimum specificity score (0-1). - organ: Filter by organ name (case-insensitive). - cell_type: Filter by cell type name (case-insensitive). - gene_symbol: Filter by gene symbol (case-insensitive). + """Retrieve marker genes from PanglaoDB dataset with optional filters. Supports filtering by species, scores, organ, cell type, gene symbol. Returns: - A dictionary containing a list of matching marker gene records or an error message. + dict: Markers array with gene symbols, cell types, organs, sensitivity/specificity scores or error message. """ panglao_db_df = get_panglaodb_df() if panglao_db_df is None: diff --git a/src/biocontext_kb/core/panglaodb/_get_panglaodb_options.py b/src/biocontext_kb/core/panglaodb/_get_panglaodb_options.py index 93aec7b..6b04888 100644 --- a/src/biocontext_kb/core/panglaodb/_get_panglaodb_options.py +++ b/src/biocontext_kb/core/panglaodb/_get_panglaodb_options.py @@ -6,10 +6,10 @@ @core_mcp.tool() def get_panglaodb_options() -> Dict[str, List[str] | str]: - """Retrieves the available options for filtering marker genes in the PanglaoDB dataset. + """Retrieve available filter options for PanglaoDB marker genes. Returns unique values for organs and cell types. Returns: - A dictionary containing lists of unique values for species, organ, cell type, and gene symbols. + dict: Lists of unique organ and cell_type values available in PanglaoDB dataset or error message. """ panglao_db_df = get_panglaodb_df() if panglao_db_df is None: diff --git a/src/biocontext_kb/core/pride/_get_pride_project.py b/src/biocontext_kb/core/pride/_get_pride_project.py index b5a39eb..3200391 100644 --- a/src/biocontext_kb/core/pride/_get_pride_project.py +++ b/src/biocontext_kb/core/pride/_get_pride_project.py @@ -10,31 +10,21 @@ def get_pride_project( project_accession: Annotated[ str, - Field(description="The PRIDE project accession (e.g., 'PRD000001')"), + Field(description="PRIDE project accession (e.g., 'PRD000001')"), ], include_files: Annotated[ bool, - Field(description="Whether to include file information for the project"), + Field(description="Include file information (limited to first 20 files)"), ] = False, include_similar_projects: Annotated[ bool, - Field(description="Whether to include similar projects based on metadata"), + Field(description="Include similar projects based on metadata (limited to 10)"), ] = False, ) -> dict: - """Get detailed information about a specific PRIDE project. - - PRIDE (PRoteomics IDEntifications) is a public repository for mass spectrometry - proteomics data. This function retrieves comprehensive information about a - specific project including metadata, experimental details, and optionally - associated files and similar projects. - - Args: - project_accession (str): The PRIDE project accession (e.g., "PRD000001"). - include_files (bool, optional): Whether to include file information. Defaults to False. - include_similar_projects (bool, optional): Whether to include similar projects. Defaults to False. + """Retrieve detailed information about a specific PRIDE mass spectrometry proteomics project. Returns metadata and experimental details. Returns: - dict: Project information including metadata, experimental details, and optional file/similar project data + dict: Project details with accession, title, description, organisms, instruments, publications, optionally files/similar_projects or error message. """ base_url = "https://www.ebi.ac.uk/pride/ws/archive/v3" diff --git a/src/biocontext_kb/core/pride/_search_pride_projects.py b/src/biocontext_kb/core/pride/_search_pride_projects.py index 6f9cc0a..97f9541 100644 --- a/src/biocontext_kb/core/pride/_search_pride_projects.py +++ b/src/biocontext_kb/core/pride/_search_pride_projects.py @@ -14,15 +14,15 @@ def search_pride_projects( ] = None, organism_filter: Annotated[ Optional[str], - Field(description="Filter by organism (e.g., 'Homo sapiens', 'human')"), + Field(description="Organism filter (e.g., 'Homo sapiens', 'human')"), ] = None, instrument_filter: Annotated[ Optional[str], - Field(description="Filter by instrument type (e.g., 'Orbitrap', 'LTQ')"), + Field(description="Instrument type filter (e.g., 'Orbitrap', 'LTQ')"), ] = None, experiment_type_filter: Annotated[ Optional[str], - Field(description="Filter by experiment type (e.g., 'TMT', 'Label-free')"), + Field(description="Experiment type filter (e.g., 'TMT', 'Label-free')"), ] = None, page_size: Annotated[ int, @@ -30,30 +30,17 @@ def search_pride_projects( ] = 20, sort_field: Annotated[ str, - Field(description="Field to sort by: submissionDate, publicationDate"), + Field(description="Sort field: submissionDate or publicationDate"), ] = "submissionDate", sort_direction: Annotated[ str, Field(description="Sort direction: ASC or DESC"), ] = "DESC", ) -> dict: - """Search PRIDE Archive projects by various criteria. - - This function searches the PRIDE database for mass spectrometry proteomics - projects using keywords and filters. Useful for finding relevant datasets - for comparative analysis or method validation. - - Args: - keyword (str, optional): Search keywords for project titles/descriptions. - organism_filter (str, optional): Filter by organism name. - instrument_filter (str, optional): Filter by mass spectrometer instrument. - experiment_type_filter (str, optional): Filter by experimental approach. - page_size (int, optional): Number of results (max 100). Defaults to 20. - sort_field (str, optional): Sort field. Defaults to "submissionDate". - sort_direction (str, optional): Sort direction. Defaults to "DESC". + """Search PRIDE database for mass spectrometry proteomics projects using keywords and filters. Returns: - dict: Search results with matching PRIDE projects and metadata + dict: Results array with project accessions, titles, descriptions, organisms, instruments, experiment types, count, search_criteria or error message. """ base_url = "https://www.ebi.ac.uk/pride/ws/archive/v3/search/projects" diff --git a/src/biocontext_kb/core/pride/_search_pride_proteins.py b/src/biocontext_kb/core/pride/_search_pride_proteins.py index 349cb46..ae7bd1b 100644 --- a/src/biocontext_kb/core/pride/_search_pride_proteins.py +++ b/src/biocontext_kb/core/pride/_search_pride_proteins.py @@ -10,7 +10,7 @@ def search_pride_proteins( project_accession: Annotated[ str, - Field(description="The PRIDE project accession to search proteins in"), + Field(description="PRIDE project accession to search proteins in"), ], keyword: Annotated[ Optional[str], @@ -22,28 +22,17 @@ def search_pride_proteins( ] = 20, sort_field: Annotated[ str, - Field(description="Field to sort by: accession, proteinName, gene"), + Field(description="Sort field: accession, proteinName, or gene"), ] = "accession", sort_direction: Annotated[ str, Field(description="Sort direction: ASC or DESC"), ] = "ASC", ) -> dict: - """Search proteins identified in a specific PRIDE project. - - This function searches for proteins identified in a specific PRIDE mass - spectrometry project. Useful for finding specific proteins of interest - in proteomics datasets. - - Args: - project_accession (str): The PRIDE project accession to search in. - keyword (str, optional): Search keyword for protein names or accessions. - page_size (int, optional): Number of results (max 100). Defaults to 20. - sort_field (str, optional): Sort field. Defaults to "accession". - sort_direction (str, optional): Sort direction. Defaults to "ASC". + """Search for proteins identified in a specific PRIDE mass spectrometry project. Useful for finding specific proteins in proteomics datasets. Returns: - dict: Search results with proteins found in the specified project + dict: Proteins list with accessions, names, genes, sequences, modifications, associated projects or error message. """ base_url = "https://www.ebi.ac.uk/pride/ws/archive/v3/pride-ap/search/proteins" diff --git a/src/biocontext_kb/core/proteinatlas/_get_human_protein_atlas_info.py b/src/biocontext_kb/core/proteinatlas/_get_human_protein_atlas_info.py index 17358d8..c7ffa68 100644 --- a/src/biocontext_kb/core/proteinatlas/_get_human_protein_atlas_info.py +++ b/src/biocontext_kb/core/proteinatlas/_get_human_protein_atlas_info.py @@ -9,10 +9,14 @@ @core_mcp.tool() def get_human_protein_atlas_info( - gene_id: Annotated[Optional[str], Field(description="The Ensembl gene ID (e.g., 'ENSG00000141510')")], - gene_symbol: Annotated[Optional[str], Field(description="The gene name (e.g., 'TP53')")], + gene_id: Annotated[Optional[str], Field(description="Ensembl gene ID (e.g., 'ENSG00000141510')")], + gene_symbol: Annotated[Optional[str], Field(description="Gene symbol (e.g., 'TP53')")], ) -> dict: - """Query the Human Protein Atlas API for target general information, genetic constraint, and tractability.""" + """Retrieve Human Protein Atlas information including expression, localization, and pathology data. Provide either gene_id or gene_symbol. + + Returns: + dict: Protein atlas data with tissue_expression, subcellular_location, pathology, antibodies, RNA/protein levels or error message. + """ if gene_id is None and gene_symbol is None: return {"error": "At least one of gene_id or gene_symbol must be provided"} diff --git a/src/biocontext_kb/core/scholarly/_search_publications.py b/src/biocontext_kb/core/scholarly/_search_publications.py index d062e16..d375970 100644 --- a/src/biocontext_kb/core/scholarly/_search_publications.py +++ b/src/biocontext_kb/core/scholarly/_search_publications.py @@ -13,36 +13,15 @@ def search_google_scholar_publications( query: Annotated[ str, - Field( - description="Search query for publications (e.g., 'machine learning' or 'author:\"John Smith\" deep learning')" - ), + Field(description="Search query (e.g., 'machine learning' or 'author:\"John Smith\" deep learning')"), ], - max_results: Annotated[int, Field(description="Maximum number of publications to return", ge=1, le=50)] = 10, - use_proxy: Annotated[bool, Field(description="Whether to use free proxies to avoid rate limiting")] = True, + max_results: Annotated[int, Field(description="Maximum number of publications to return (1-50)", ge=1, le=50)] = 10, + use_proxy: Annotated[bool, Field(description="Use free proxies to avoid rate limiting")] = True, ) -> Dict[str, Any]: - """Search for publications on Google Scholar. - - Supports advanced search operators including author search using 'author:"Name"' syntax. - - Examples: - - 'machine learning' - General topic search - - 'author:"John Smith"' - Publications by specific author - - 'author:"John Smith" neural networks' - Author's work on specific topic - - WARNING: Google Scholar may block requests and IP addresses for excessive queries. - Publication searches are particularly prone to triggering anti-bot measures. - This tool automatically uses free proxies to mitigate blocking, but use responsibly. - - For academic research, consider using alternative databases like PubMed/EuropePMC - when possible to reduce load on Google Scholar. - - Args: - query (str): Search query for publications. Use 'author:"Name"' to search by author. - max_results (int): Maximum number of publications to return (default: 10, max: 50). - use_proxy (bool): Whether to use free proxies to avoid rate limiting (default: True). + """Search Google Scholar for publications with support for author search using 'author:"Name"' syntax. WARNING: Use responsibly, may block excessive queries. Returns: - dict: Publication search results or error message + dict: Publications list with title, authors, venue, year, citations, abstract, bib entry or error message. """ try: # Set up proxy if requested diff --git a/src/biocontext_kb/core/stringdb/_get_string_id.py b/src/biocontext_kb/core/stringdb/_get_string_id.py index e510ecf..31d28c8 100644 --- a/src/biocontext_kb/core/stringdb/_get_string_id.py +++ b/src/biocontext_kb/core/stringdb/_get_string_id.py @@ -8,27 +8,15 @@ @core_mcp.tool() def get_string_id( - protein_symbol: Annotated[str, Field(description="The name of the protein to search for (e.g., 'TP53')")], - species: Annotated[str, Field(description="The species taxonomy ID (e.g., '9606' for human)")] = "", - return_field: Annotated[ - str, Field(description="Which field to return. Either `stringId` (default) or `preferredName`.") - ] = "stringId", - limit: Annotated[int, Field(description="Limit the number of matches returned")] = 1, + protein_symbol: Annotated[str, Field(description="Protein name or identifier (e.g., 'TP53')")], + species: Annotated[str, Field(description="Species taxonomy ID (e.g., '9606' for human)")] = "", + return_field: Annotated[str, Field(description="Field to return: 'stringId' or 'preferredName'")] = "stringId", + limit: Annotated[int, Field(description="Maximum number of matches to return")] = 1, ) -> Union[dict, str]: - """Map a protein identifier to STRING database IDs. - - This function helps resolve common gene names, synonyms, or UniProt identifiers - to the STRING-specific identifiers. Using STRING IDs in subsequent API calls - improves reliability and performance. - - Args: - protein_symbol (str): The name of the protein to search for (e.g., "TP53"). - species (str): The species taxonomy ID (e.g., "9606" for human). Optional. - return_field (str): The field to return. Either `stringId` or `preferredName` (default: stringId). - limit (int): Limit the number of matches returned per query (default: 1). + """Map protein identifiers (gene names, synonyms, UniProt IDs) to STRING database IDs. Using STRING IDs improves reliability. Returns: - str: The STRING ID or preferred name if found, otherwise an error message. + str or dict: STRING ID string (e.g., '9606.ENSP00000269305') or dict with error message. """ url = f"https://string-db.org/api/json/get_string_ids?identifiers={protein_symbol}&echo_query=1&limit={limit}" diff --git a/src/biocontext_kb/core/stringdb/_get_string_interactions.py b/src/biocontext_kb/core/stringdb/_get_string_interactions.py index 1522312..8fa9716 100644 --- a/src/biocontext_kb/core/stringdb/_get_string_interactions.py +++ b/src/biocontext_kb/core/stringdb/_get_string_interactions.py @@ -9,21 +9,14 @@ @core_mcp.tool() def get_string_interactions( - protein_symbol: Annotated[str, Field(description="The name of the protein to search for (e.g., 'TP53')")], - species: Annotated[str, Field(description="The species taxonomy ID (e.g., '10090' for mouse)")], - min_score: Annotated[int, Field(description="Minimum combined score threshold", ge=0, le=1000)] = 700, + protein_symbol: Annotated[str, Field(description="Protein name to search for (e.g., 'TP53')")], + species: Annotated[str, Field(description="Species taxonomy ID (e.g., '10090' for mouse)")], + min_score: Annotated[int, Field(description="Minimum combined score threshold (0-1000)", ge=0, le=1000)] = 700, ) -> Union[List[Dict[str, Any]], dict]: - """Get all protein-protein interactions for a given protein with a combined score above the threshold. - - Always provide the species parameter to ensure the correct protein is returned. - - Args: - protein_symbol (str): The name of the protein to search for (e.g., "TP53"). - species (str): The species taxonomy ID (e.g., "10090" for mouse). - min_score (int): Minimum combined score threshold (default: 700). + """Retrieve protein-protein interactions for a given protein with scores above threshold. Always provide species parameter. Returns: - list: A list of dictionaries containing interacting proteins and their scores. + list or dict: Protein interactions array with stringId_A, stringId_B, preferredName_A/B, score, evidence channels or error message. """ # First resolve the protein name to a STRING ID try: diff --git a/src/biocontext_kb/core/stringdb/_get_string_network_image.py b/src/biocontext_kb/core/stringdb/_get_string_network_image.py index 7751d22..5383814 100644 --- a/src/biocontext_kb/core/stringdb/_get_string_network_image.py +++ b/src/biocontext_kb/core/stringdb/_get_string_network_image.py @@ -12,23 +12,17 @@ @core_mcp.tool() def get_string_network_image( - protein_symbol: Annotated[str, Field(description="The name of the protein to search for (e.g., 'TP53')")], - species: Annotated[str, Field(description="The species taxonomy ID (e.g., '10090' for mouse)")], - flavor: Annotated[str, Field(description="The network flavor to use")] = "confidence", - min_score: Annotated[int, Field(description="Minimum combined score threshold", ge=0, le=1000)] = 700, + protein_symbol: Annotated[str, Field(description="Protein name to search for (e.g., 'TP53')")], + species: Annotated[str, Field(description="Species taxonomy ID (e.g., '10090' for mouse)")], + flavor: Annotated[ + str, Field(description="Network flavor (e.g., 'confidence', 'evidence', 'actions')") + ] = "confidence", + min_score: Annotated[int, Field(description="Minimum combined score threshold (0-1000)", ge=0, le=1000)] = 700, ) -> Image | dict: - """Get a network image for a given protein from the STRING database. - - Always provide the species parameter to ensure the correct protein is returned. - - Args: - protein_symbol (str): The name of the protein to search for (e.g., "TP53"). - species (str): The species taxonomy ID (e.g., "10090" for mouse). - flavor (str): The network flavor to use (default: "confidence"). - min_score (int): Minimum combined score threshold (default: 700). + """Generate protein-protein interaction network image from STRING database. Always provide species parameter. Returns: - Image: The network image for the protein. + Image or dict: Network visualization as PNG image object or error message. """ # First resolve the protein name to a STRING ID try: diff --git a/src/biocontext_kb/core/stringdb/_get_string_similarity_scores.py b/src/biocontext_kb/core/stringdb/_get_string_similarity_scores.py index e0eb91d..d01f1ac 100644 --- a/src/biocontext_kb/core/stringdb/_get_string_similarity_scores.py +++ b/src/biocontext_kb/core/stringdb/_get_string_similarity_scores.py @@ -9,25 +9,14 @@ @core_mcp.tool() def get_string_similarity_scores( - protein_symbol: Annotated[str, Field(description="The protein symbol of the first protein (e.g., 'TP53')")], - protein_symbol_comparison: Annotated[ - str, Field(description="The protein symbol of the second protein (e.g., 'MKI67')") - ], - species: Annotated[str, Field(description="The species taxonomy ID (e.g., '9606' for human)")] = "", + protein_symbol: Annotated[str, Field(description="First protein symbol (e.g., 'TP53')")], + protein_symbol_comparison: Annotated[str, Field(description="Second protein symbol (e.g., 'MKI67')")], + species: Annotated[str, Field(description="Species taxonomy ID (e.g., '9606' for human)")] = "", ) -> Union[List[Dict[str, Any]], dict]: - """Get similarity scores between proteins from the STRING database. - - The scores represent protein homology based on Smith-Waterman bit scores. - Only scores above 50 are reported, and only half of the similarity matrix - (since it's symmetric) plus self-hits are returned. - - Args: - protein_symbol (str): The protein symbol of the first protein (e.g., "TP53"). - protein_symbol_comparison (str): The protein symbol of the second protein (e.g., "MKI67"). - species (str): The species taxonomy ID (e.g., "9606" for human). Optional. + """Retrieve protein homology similarity scores from STRING database based on Smith-Waterman bit scores. Only scores above 50 reported. Returns: - list: A list of dictionaries containing protein pairs and their bit scores. + list or dict: Similarity scores array with stringId_A, stringId_B, bitscore or error message. """ # Resolve both protein symbols to STRING IDs try: diff --git a/src/biocontext_kb/core/uniprot/_get_uniprot_id_by_protein_symbol.py b/src/biocontext_kb/core/uniprot/_get_uniprot_id_by_protein_symbol.py index 603514e..2fb347d 100644 --- a/src/biocontext_kb/core/uniprot/_get_uniprot_id_by_protein_symbol.py +++ b/src/biocontext_kb/core/uniprot/_get_uniprot_id_by_protein_symbol.py @@ -8,23 +8,16 @@ @core_mcp.tool() def get_uniprot_id_by_protein_symbol( - protein_symbol: Annotated[str, Field(description="The name of the gene to search for (e.g., 'SYNPO')")], + protein_symbol: Annotated[str, Field(description="Gene or protein name to search for (e.g., 'SYNPO')")], species: Annotated[ str, - Field(description="The organism ID (e.g., '9606' for human)"), + Field(description="Organism taxonomy ID (e.g., '9606' for human)"), ] = "9606", ) -> str | None: - """Query the UniProt database for the UniProt ID using the protein name. - - Args: - protein_symbol (str): The name of the protein to search for (e.g., "SYNPO"). - species (str): The organism ID (e.g., "9606" for human). Default is "9606". + """Retrieve UniProt accession ID from protein name and species. Returns the primary accession or None if not found. Returns: - str: The UniProt ID of the protein. - - Raises: - ValueError: If no results are found for the given protein name. + str or None: UniProt accession ID string (e.g., 'P04637') or None if not found. """ url = f"https://rest.uniprot.org/uniprotkb/search?query=protein_name:{protein_symbol}+AND+organism_id:{species}&format=json" diff --git a/src/biocontext_kb/core/uniprot/_get_uniprot_protein_info.py b/src/biocontext_kb/core/uniprot/_get_uniprot_protein_info.py index c477d50..141c8f9 100644 --- a/src/biocontext_kb/core/uniprot/_get_uniprot_protein_info.py +++ b/src/biocontext_kb/core/uniprot/_get_uniprot_protein_info.py @@ -10,39 +10,29 @@ def get_uniprot_protein_info( protein_id: Annotated[ Optional[str], - Field(description="The protein identifier or accession number (e.g., 'P04637')"), + Field(description="Protein accession number (e.g., 'P04637')"), ] = None, protein_name: Annotated[ Optional[str], - Field(description="The name of the protein to search for (e.g., 'P53')"), + Field(description="Protein name to search for (e.g., 'P53')"), ] = None, gene_symbol: Annotated[ Optional[str], - Field(description="The gene symbol to search for (e.g., 'TP53')"), + Field(description="Gene symbol to search for (e.g., 'TP53')"), ] = None, species: Annotated[ Optional[str], - Field(description="Taxonomy ID (e.g., 10090) or species name as string"), + Field(description="Taxonomy ID (e.g., '10090') or species name"), ] = None, include_references: Annotated[ bool, - Field(description="Whether to include references and cross-references in the response"), + Field(description="Include references and cross-references in response"), ] = False, ) -> dict: - """Query the UniProt database for protein information. - - Provide either protein_id or protein_name to search for a specific protein. - Always provide the species parameter to ensure the correct protein is returned. - - Args: - protein_id (str, optional): The protein identifier or accession number (e.g., "P04637"). Only provide if protein_name is None. - protein_name (str, optional): The name of the protein to search for (e.g., "P53"). - gene_symbol (str, optional): The gene name to search for (e.g., "TP53"). - species (str, optional): Taxonomy ID (e.g., 10090) as string. - include_references (bool, optional): Whether to include references and cross-references in the response. Defaults to False. + """Retrieve protein information from UniProt database. Provide at least one of protein_id, protein_name, or gene_symbol. Returns: - dict: Protein data or error message + dict: Protein information with accession, proteinDescription, genes, organism, sequence, functions, keywords, references or error message. """ base_url = "https://rest.uniprot.org/uniprotkb/search" diff --git a/uv.lock b/uv.lock index c1aa103..dd587d2 100644 --- a/uv.lock +++ b/uv.lock @@ -167,7 +167,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/92/8d/e296c7af03757debd [[package]] name = "biocontext-kb" -version = "0.1.6" +version = "0.1.7" source = { editable = "." } dependencies = [ { name = "asyncio" },