@@ -467,4 +467,158 @@ def update_data_analytics(collection_id: str, user: str, payload: dict, bot: str
467467 return {
468468 "message" : "Record updated!" ,
469469 "data" : {"_id" : collection_id }
470- }
470+ }
471+
472+ @staticmethod
473+ def extract_data (input_source : str ,
474+ prompt : str = None ,
475+ result_type : str = "markdown" ,
476+ llm_type : str = "openrouter" ,
477+ high_res_ocr : bool = False ,
478+ language : str = "en" ,
479+ bot : str = None ,
480+ user : str = None ):
481+
482+ import requests
483+
484+ llm_server_url = Utility .environment ['llm' ]['url' ]
485+
486+ payload = {
487+ "input_source" : input_source ,
488+ "llama_parser_api_key" : Utility .environment ['llama_parse' ]['key' ],
489+ "result_type" : result_type ,
490+ "high_res_ocr" : high_res_ocr ,
491+ "language" : language ,
492+ "parsing_instruction" : prompt ,
493+ "user" : user ,
494+ "llm_type" : llm_type
495+ }
496+
497+ response = requests .post (
498+ f"{ llm_server_url } /{ bot } /parse/{ llm_type } " ,
499+ json = payload
500+ )
501+
502+ if response .status_code != 200 :
503+ raise Exception (response .text )
504+
505+ response = response .json ()
506+
507+ if not response .get ("success" ):
508+ raise Exception (response )
509+
510+ result = response .get ("data" )
511+
512+ return {
513+ "full_text" : result .get ("full_text" ),
514+ "extracted_data" : result .get ("extracted_data" )
515+ }
516+
517+
518+ @staticmethod
519+ def process_instruction (data_list , prompt , operation_type , model_id , llm_type : str = "openrouter" ,
520+ bot : str = None , user : str = None ):
521+ import requests
522+ from kairon .shared .admin .data_objects import LLMSecret
523+
524+ doc = LLMSecret .objects (llm_type = "openrouter" ).first ()
525+ api_key = Utility .decrypt_message (doc .api_key )
526+
527+ if operation_type == "embedding" :
528+
529+ llm_server_url = Utility .environment ['llm' ]['url' ]
530+ payload = {
531+ "text" : data_list ,
532+ "user" : user ,
533+ "kwargs" : {
534+ "model" : model_id ,
535+ "api_key" : api_key
536+ }
537+ }
538+
539+ response = requests .request (method = "POST" ,
540+ url = f"{ llm_server_url } /{ bot } /aembedding/{ llm_type } " ,
541+ json = payload )
542+ response .raise_for_status ()
543+ response = response .json ()
544+ logger .info (response )
545+
546+ return {
547+ "embeddings" : response
548+ }
549+
550+ else :
551+ text_input = data_list [0 ]
552+ final_prompt = prompt .format (document = text_input )
553+ payload = {
554+ "user" : user ,
555+ "hyperparameters" : {"temperature" : 0 , "model" : model_id },
556+ "messages" : [{"role" : "user" , "content" : final_prompt }]
557+ }
558+ llm_server_url = Utility .environment ['llm' ]['url' ]
559+ response = requests .request (method = "POST" ,
560+ url = f"{ llm_server_url } /{ bot } /completion/{ llm_type } " ,
561+ json = payload )
562+
563+ response .raise_for_status ()
564+ response = response .json ()
565+ extracted_data = response ['formatted_response' ]
566+
567+ logger .info (response )
568+ logger .info (extracted_data )
569+
570+ return extracted_data
571+
572+
573+ @staticmethod
574+ def create_vector_collection (collection_name , model_id : str , user : str , emb_size : int = 3072 ,
575+ overwrite : bool = False , metadata : list = None , bot : str = None ):
576+ from kairon .shared .cognition .data_objects import CognitionSchema , EmbeddingMetadata , ColumnMetadata
577+ from qdrant_client .models import VectorParams , Distance
578+ from qdrant_client import QdrantClient
579+
580+ db_url = Utility .environment ['vector' ]['db' ]
581+ knowledge_vault_name = collection_name
582+ collection_name = f"{ bot } _{ collection_name } _faq_embd"
583+ schema = {
584+ "metadata" : metadata ,
585+ "collection_name" : knowledge_vault_name
586+ }
587+
588+ client = QdrantClient (url = db_url )
589+
590+ collections = client .get_collections ().collections
591+ exists = any (c .name == collection_name for c in collections )
592+ embed_config = {
593+ "size" : emb_size ,
594+ "distance" : Distance .COSINE
595+ }
596+ vector_config = VectorParams (** embed_config )
597+ if exists and overwrite :
598+ client .delete_collection (collection_name = collection_name )
599+ exist = CognitionSchema .objects (bot = bot , collection_name = knowledge_vault_name ).first ()
600+ if exist :
601+ exist .delete ()
602+
603+ if not exists or overwrite :
604+ client .create_collection (
605+ collection_name = collection_name ,
606+ vectors_config = vector_config
607+ )
608+ metadata_obj = CognitionSchema (bot = bot , user = user )
609+ metadata_obj .metadata = [ColumnMetadata (** meta ) for meta in schema .get ("metadata" ) or []]
610+ metadata_obj .collection_name = schema .get ("collection_name" )
611+ metadata_obj .save ()
612+ else :
613+ return {
614+ "message" : "collection already exists"
615+ }
616+
617+ exist = EmbeddingMetadata .objects (bot = bot , collection_name = collection_name , model_id = model_id ,
618+ knowledge_vault_name = knowledge_vault_name ).first ()
619+ if not exist :
620+ EmbeddingMetadata (bot = bot , collection_name = collection_name , model_id = model_id ,
621+ knowledge_vault_name = knowledge_vault_name , user = user , vector_config = embed_config ).save ()
622+ return {
623+ "message" : "collection created successfully"
624+ }
0 commit comments