@@ -287,54 +287,59 @@ def get_count(input_file):
287287 count += 1
288288 return count
289289
290+ def kv_generator (self , input_feature_count , fp , job_id , part_of_data ):
291+ fate_uuid = uuid .uuid1 ().hex
292+ get_line = self .get_line ()
293+ line_index = 0
294+ LOGGER .info (input_feature_count )
295+ while True :
296+ lines = fp .readlines (JobDefaultConfig .upload_block_max_bytes )
297+ LOGGER .info (JobDefaultConfig .upload_block_max_bytes )
298+ if lines :
299+ for line in lines :
300+ values = line .rstrip ().split (self .parameters ["id_delimiter" ])
301+ k , v = get_line (
302+ values = values ,
303+ line_index = line_index ,
304+ extend_sid = self .parameters ["extend_sid" ],
305+ auto_increasing_sid = self .parameters ["auto_increasing_sid" ],
306+ id_delimiter = self .parameters ["id_delimiter" ],
307+ fate_uuid = fate_uuid ,
308+ )
309+ yield k , v
310+ line_index += 1
311+ if line_index <= 100 :
312+ part_of_data .append ((k , v ))
313+ save_progress = line_index / input_feature_count * 100 // 1
314+ job_info = {
315+ "progress" : save_progress ,
316+ "job_id" : job_id ,
317+ "role" : self .parameters ["local" ]["role" ],
318+ "party_id" : self .parameters ["local" ]["party_id" ],
319+ }
320+ ControllerClient .update_job (job_info = job_info )
321+ else :
322+ return
323+
324+ def update_schema (self , head , fp ):
325+ read_status = False
326+ if head is True :
327+ data_head = fp .readline ()
328+ self .update_table_schema (data_head )
329+ read_status = True
330+ else :
331+ self .update_table_schema ()
332+ return read_status
333+
290334 def upload_file (self , input_file , head , job_id = None , input_feature_count = None , table = None ):
291335 if not table :
292336 table = self .table
293- with open (input_file , "r" ) as fin :
294- lines_count = 0
295- if head is True :
296- data_head = fin .readline ()
337+ part_of_data = []
338+ with open (input_file , "r" ) as fp :
339+ if self .update_schema (head , fp ):
297340 input_feature_count -= 1
298- self .update_table_schema (data_head )
299- else :
300- self .update_table_schema ()
301- n = 0
302- fate_uuid = uuid .uuid1 ().hex
303- get_line = self .get_line ()
304- line_index = 0
305- while True :
306- data = list ()
307- lines = fin .readlines (JobDefaultConfig .upload_block_max_bytes )
308- LOGGER .info (JobDefaultConfig .upload_block_max_bytes )
309- if lines :
310- # self.append_data_line(lines, data, n)
311- for line in lines :
312- values = line .rstrip ().split (self .parameters ["id_delimiter" ])
313- k , v = get_line (
314- values = values ,
315- line_index = line_index ,
316- extend_sid = self .parameters ["extend_sid" ],
317- auto_increasing_sid = self .parameters ["auto_increasing_sid" ],
318- id_delimiter = self .parameters ["id_delimiter" ],
319- fate_uuid = fate_uuid ,
320- )
321- data .append ((k , v ))
322- line_index += 1
323- lines_count += len (data )
324- save_progress = lines_count / input_feature_count * 100 // 1
325- job_info = {
326- "progress" : save_progress ,
327- "job_id" : job_id ,
328- "role" : self .parameters ["local" ]["role" ],
329- "party_id" : self .parameters ["local" ]["party_id" ],
330- }
331- ControllerClient .update_job (job_info = job_info )
332- table .put_all (data )
333- if n == 0 :
334- table .meta .update_metas (part_of_data = data )
335- else :
336- return
337- n += 1
341+ self .table .put_all (self .kv_generator (input_feature_count , fp , job_id , part_of_data ))
342+ table .meta .update_metas (part_of_data = part_of_data )
338343
339344 def get_computing_table (self , name , namespace , schema = None ):
340345 storage_table_meta = storage .StorageTableMeta (name = name , namespace = namespace )
0 commit comments