2121 Corpus Factory Class
2222"""
2323
24- import aixplain .utils .config as config
2524import aixplain .processes .data_onboarding .onboard_functions as onboard_functions
2625import json
2726import logging
@@ -86,12 +85,12 @@ def __from_response(cls, response: Dict) -> Corpus:
8685
8786 try :
8887 license = License (response ["license" ]["typeId" ])
89- except :
88+ except Exception :
9089 license = None
9190
9291 try :
9392 length = int (response ["segmentsCount" ])
94- except :
93+ except Exception :
9594 length = None
9695
9796 corpus = Corpus (
@@ -116,17 +115,27 @@ def get(cls, corpus_id: Text) -> Corpus:
116115 Returns:
117116 Corpus: Created 'Corpus' object
118117 """
119- url = urljoin (cls .backend_url , f"sdk/corpora/{ corpus_id } /overview" )
120- if cls .aixplain_key != "" :
121- headers = {"x-aixplain-key" : f"{ cls .aixplain_key } " , "Content-Type" : "application/json" }
118+ try :
119+ url = urljoin (cls .backend_url , f"sdk/corpora/{ corpus_id } /overview" )
120+ if cls .aixplain_key != "" :
121+ headers = {"x-aixplain-key" : f"{ cls .aixplain_key } " , "Content-Type" : "application/json" }
122+ else :
123+ headers = {"Authorization" : f"Token { config .TEAM_API_KEY } " , "Content-Type" : "application/json" }
124+ logging .info (f"Start service for GET Corpus - { url } - { headers } " )
125+ r = _request_with_retry ("get" , url , headers = headers )
126+ resp = r .json ()
127+
128+ except Exception as e :
129+ error_message = f"Error retrieving Corpus { corpus_id } : { str (e )} "
130+ logging .error (error_message , exc_info = True )
131+ raise Exception (error_message )
132+ if 200 <= r .status_code < 300 :
133+ logging .info (f"Corpus { corpus_id } retrieved successfully." )
134+ return cls .__from_response (resp )
122135 else :
123- headers = {"Authorization" : f"Token { config .TEAM_API_KEY } " , "Content-Type" : "application/json" }
124- logging .info (f"Start service for GET Corpus - { url } - { headers } " )
125- r = _request_with_retry ("get" , url , headers = headers )
126- resp = r .json ()
127- if "statusCode" in resp and resp ["statusCode" ] == 404 :
128- raise Exception (f"Corpus GET Error: Dataset { corpus_id } not found." )
129- return cls .__from_response (resp )
136+ error_message = f"Corpus GET Error: Status { r .status_code } - { resp } "
137+ logging .error (error_message )
138+ raise Exception (error_message )
130139
131140 @classmethod
132141 def create_asset_from_id (cls , corpus_id : Text ) -> Corpus :
@@ -168,7 +177,7 @@ def list(
168177 else :
169178 headers = {"Authorization" : f"Token { config .TEAM_API_KEY } " , "Content-Type" : "application/json" }
170179
171- assert 0 < page_size <= 100 , f "Corpus List Error: Page size must be greater than 0 and not exceed 100."
180+ assert 0 < page_size <= 100 , "Corpus List Error: Page size must be greater than 0 and not exceed 100."
172181 payload = {"pageSize" : page_size , "pageNumber" : page_number , "sort" : [{"field" : "createdAt" , "dir" : - 1 }]}
173182
174183 if query is not None :
@@ -188,26 +197,38 @@ def list(
188197 language = [language ]
189198 payload ["language" ] = [lng .value ["language" ] for lng in language ]
190199
191- logging .info (f"Start service for POST List Corpus - { url } - { headers } - { json .dumps (payload )} " )
192- r = _request_with_retry ("post" , url , headers = headers , json = payload )
193- resp = r .json ()
194- corpora , page_total , total = [], 0 , 0
195- if "results" in resp :
196- results = resp ["results" ]
197- page_total = resp ["pageTotal" ]
198- total = resp ["total" ]
199- logging .info (f"Response for POST List Corpus - Page Total: { page_total } / Total: { total } " )
200- for corpus in results :
201- corpus_ = cls .__from_response (corpus )
202- # add languages
203- languages = []
204- for lng in corpus ["languages" ]:
205- if "dialect" not in lng :
206- lng ["dialect" ] = ""
207- languages .append (Language (lng ))
208- corpus_ .kwargs ["languages" ] = languages
209- corpora .append (corpus_ )
210- return {"results" : corpora , "page_total" : page_total , "page_number" : page_number , "total" : total }
200+ try :
201+ logging .info (f"Start service for POST List Corpus - { url } - { headers } - { json .dumps (payload )} " )
202+ r = _request_with_retry ("post" , url , headers = headers , json = payload )
203+ resp = r .json ()
204+
205+ except Exception as e :
206+ error_message = f"Error listing corpora: { str (e )} "
207+ logging .error (error_message , exc_info = True )
208+ raise Exception (error_message )
209+
210+ if 200 <= r .status_code < 300 :
211+ corpora , page_total , total = [], 0 , 0
212+ if "results" in resp :
213+ results = resp ["results" ]
214+ page_total = resp ["pageTotal" ]
215+ total = resp ["total" ]
216+ logging .info (f"Response for POST List Corpus - Page Total: { page_total } / Total: { total } " )
217+ for corpus in results :
218+ corpus_ = cls .__from_response (corpus )
219+ # add languages
220+ languages = []
221+ for lng in corpus ["languages" ]:
222+ if "dialect" not in lng :
223+ lng ["dialect" ] = ""
224+ languages .append (Language (lng ))
225+ corpus_ .kwargs ["languages" ] = languages
226+ corpora .append (corpus_ )
227+ return {"results" : corpora , "page_total" : page_total , "page_number" : page_number , "total" : total }
228+ else :
229+ error_message = f"Corpus List Error: Status { r .status_code } - { resp } "
230+ logging .error (error_message )
231+ raise Exception (error_message )
211232
212233 @classmethod
213234 def get_assets_from_page (
@@ -245,7 +266,7 @@ def create(
245266 functions : List [Function ] = [],
246267 privacy : Privacy = Privacy .PRIVATE ,
247268 error_handler : ErrorHandler = ErrorHandler .SKIP ,
248- api_key : Optional [Text ] = None
269+ api_key : Optional [Text ] = None ,
249270 ) -> Dict :
250271 """Asynchronous call to Upload a corpus to the user's dashboard.
251272
0 commit comments