@@ -176,14 +176,32 @@ def initialize_file_connection(self, file_type, file_path):
176176 File object.
177177 """
178178 file_connections = {"cell_metadata" : CellMetadata , "cluster" : Clusters }
179-
180- return file_connections .get (file_type )(
181- file_path ,
182- self .study_id ,
183- self .study_file_id ,
184- tracer = self .tracer ,
185- ** self .kwargs ,
186- )
179+ try :
180+ return file_connections .get (file_type )(
181+ file_path ,
182+ self .study_id ,
183+ self .study_file_id ,
184+ tracer = self .tracer ,
185+ ** self .kwargs ,
186+ )
187+ except ValueError as v :
188+ # Caution: recording errorTypes in this manner can clobber other collected errors.
189+ # ValueErrors during file connection indicate file cannot be processed
190+ # this logging approach should not lose collected file validation information
191+ if str (v ).startswith ("could not convert" ):
192+ config .get_metric_properties ().update (
193+ {"errorTypes" : ["content:type:not-numeric" ]}
194+ )
195+ elif str (v ).startswith ("Unable to parse" ):
196+ config .get_metric_properties ().update (
197+ {"errorTypes" : ["format:cap:unique" ]}
198+ )
199+ else :
200+ config .get_metric_properties ().update (
201+ {"errorTypes" : ["parse:unhandled" ]}
202+ )
203+ self .report_validation ("failure" )
204+ raise ValueError (v )
187205
188206 def insert_many (self , collection_name , documents ):
189207 if not config .bypass_mongo_writes ():
@@ -427,6 +445,13 @@ def subsample(self):
427445 if load_status != 0 :
428446 return load_status
429447 else :
448+ # Caution: recording errorTypes in this manner can clobber other collected errors.
449+ # In subsampling, known failure modes are ValueErrors which stop processing so
450+ # this logging approach should not lose file validation information
451+ config .get_metric_properties ().update (
452+ {"errorTypes" : ["content:missing:values-across-files" ]}
453+ )
454+ self .report_validation ("failure" )
430455 raise ValueError (
431456 "Cluster file has cell names that are not present in cell metadata file."
432457 )
0 commit comments