@@ -75,6 +75,53 @@ geocode <- function(enderecos,
7575 cache = TRUE ,
7676 n_cores = 1 ){
7777
78+
79+ # # ---- tiny timing toolkit (self-contained) ------------------------------
80+ .make_timer <- function (verbose = TRUE ) {
81+ .marks <- list ()
82+ .t0_rt <- proc.time()[[" elapsed" ]] # monotonic wall clock
83+ .t_prev <- .t0_rt
84+
85+ fmt <- function (secs ) sprintf(" %.3f s" , secs )
86+
87+ mark <- function (label ) {
88+ now <- proc.time()[[" elapsed" ]]
89+ step <- now - .t_prev
90+ total <- now - .t0_rt
91+ .marks <<- append(.marks , list (list (label = label , step = step , total = total )))
92+ .t_prev <<- now
93+ if (verbose ) message(sprintf(" [%s] +%s (total %s)" , label , fmt(step ), fmt(total )))
94+ invisible (now )
95+ }
96+
97+ summary <- function (print_summary = verbose ) {
98+ if (length(.marks ) == 0 ) return (invisible (data.frame ()))
99+ df <- data.frame (
100+ step = vapply(.marks , `[[` , " " , " label" ),
101+ step_sec = vapply(.marks , `[[` , 0.0 , " step" ),
102+ total_sec = vapply(.marks , `[[` , 0.0 , " total" ),
103+ stringsAsFactors = FALSE
104+ )
105+ if (print_summary ) {
106+ message(" — Timing summary —" )
107+ print(df , row.names = FALSE )
108+ }
109+ df
110+ }
111+
112+ time_it <- function (label , expr ) {
113+ force(label )
114+ res <- eval.parent(substitute(expr ))
115+ mark(label )
116+ invisible (res )
117+ }
118+
119+ list (mark = mark , summary = summary , time_it = time_it )
120+ }
121+ timer <- .make_timer(verbose = isTRUE(verboso ))
122+ on.exit(timer $ summary(), add = TRUE )
123+ # # -----------------------------------------------------------------------
124+
78125 # check input
79126 checkmate :: assert_data_frame(enderecos )
80127 checkmate :: assert_logical(resultado_completo , any.missing = FALSE , len = 1 )
@@ -96,6 +143,10 @@ geocode <- function(enderecos,
96143
97144 if (verboso ) message_standardizing_addresses()
98145
146+ # systime start 66666 ----------------
147+ timer $ mark(" Start" )
148+
149+
99150 input_padrao <- enderecobr :: padronizar_enderecos(
100151 enderecos ,
101152 campos_do_endereco = enderecobr :: correspondencia_campos(
@@ -124,6 +175,9 @@ geocode <- function(enderecos,
124175 )
125176 }
126177
178+ # systime padronizacao 66666 ----------------
179+ timer $ mark(" Padronizacao" )
180+
127181 # create temp id
128182 data.table :: setDT(enderecos )[, tempidgeocodebr : = 1 : nrow(input_padrao ) ]
129183 input_padrao [, tempidgeocodebr : = 1 : nrow(input_padrao ) ]
@@ -151,13 +205,17 @@ geocode <- function(enderecos,
151205 overwrite = TRUE , temporary = TRUE )
152206
153207
208+ # systime register standardized 66666 ----------------
209+ timer $ mark(" Register standardized input" )
210+
154211
155212 # cria coluna "log_causa_confusao" identificando logradouros que geram confusao
156213 # issue https://github.com/ipeaGIT/geocodebr/issues/67
157214 cria_col_logradouro_confusao(con )
158215
159- # b <- DBI::dbReadTable(con, name = 'input_padrao_db') |>
160- # filter(log_causa_confusao==0)
216+ # systime cria coluna "log_causa_confusao 66666 ----------------
217+ timer $ mark(" Cria coluna log_causa_confusao" )
218+
161219
162220
163221 # create an empty output table that will be populated -----------------------------------------------
@@ -249,19 +307,31 @@ geocode <- function(enderecos,
249307
250308 if (verboso ) finish_progress_bar(matched_rows )
251309
310+ # systime matching 66666 ----------------
311+ timer $ mark(" Matching" )
312+
313+
252314 if (verboso ) message_preparando_output()
253315
254316 # add precision column
255317 add_precision_col(con , update_tb = ' output_db' )
256318
257319
320+ # systime add precision 66666 ----------------
321+ timer $ mark(" Add precision" )
322+
323+
258324 # output with all original columns
259325 duckdb :: dbWriteTable(con , " input_db" , enderecos ,
260326 temporary = TRUE , overwrite = TRUE )
261327 # enderecos_arrw <- arrow::as_arrow_table(enderecos)
262328 # DBI::dbWriteTableArrow(con, name = "input_db", enderecos_arrw,
263329 # overwrite = TRUE, temporary = TRUE)
264330
331+ # systime write original input back 66666 ----------------
332+ timer $ mark(" Write original input back" )
333+
334+
265335 x_columns <- names(enderecos )
266336
267337 output_df <- merge_results(
@@ -273,6 +343,10 @@ geocode <- function(enderecos,
273343 resultado_completo = resultado_completo
274344 )
275345
346+ # systime merge results 66666 ----------------
347+ timer $ mark(" Merge results" )
348+
349+
276350 data.table :: setDT(output_df )
277351
278352 # Disconnect from DuckDB when done
@@ -284,6 +358,10 @@ geocode <- function(enderecos,
284358 output_df <- trata_empates_geocode(output_df , resolver_empates , verboso )
285359 }
286360
361+ # systime resolve empates 66666 ----------------
362+ timer $ mark(" Resolve empates" )
363+
364+
287365 # drop geocodebr temp id column
288366 output_df [, tempidgeocodebr : = NULL ]
289367
@@ -302,6 +380,10 @@ geocode <- function(enderecos,
302380 lng = lon ,
303381 resolution = h3_res )
304382 ]
383+
384+ # systime add h3 66666 ----------------
385+ timer $ mark(" Add H3" )
386+
305387 }
306388
307389 # convert df to simple feature
@@ -314,6 +396,10 @@ geocode <- function(enderecos,
314396 )
315397
316398 sf :: st_crs(output_sf ) <- 4674
399+
400+ # systime convert to sf 66666 ----------------
401+ timer $ mark(" Convert to sf" )
402+
317403 return (output_sf )
318404 }
319405
0 commit comments