Skip to content

Commit 01408d7

Browse files
committed
timing performance
1 parent c5238fb commit 01408d7

File tree

3 files changed

+95
-8
lines changed

3 files changed

+95
-8
lines changed

R/geocode.R

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,53 @@ geocode <- function(enderecos,
7575
cache = TRUE,
7676
n_cores = 1 ){
7777

78+
79+
## ---- tiny timing toolkit (self-contained) ------------------------------
80+
.make_timer <- function(verbose = TRUE) {
81+
.marks <- list()
82+
.t0_rt <- proc.time()[["elapsed"]] # monotonic wall clock
83+
.t_prev <- .t0_rt
84+
85+
fmt <- function(secs) sprintf("%.3f s", secs)
86+
87+
mark <- function(label) {
88+
now <- proc.time()[["elapsed"]]
89+
step <- now - .t_prev
90+
total <- now - .t0_rt
91+
.marks <<- append(.marks, list(list(label = label, step = step, total = total)))
92+
.t_prev <<- now
93+
if (verbose) message(sprintf("[%s] +%s (total %s)", label, fmt(step), fmt(total)))
94+
invisible(now)
95+
}
96+
97+
summary <- function(print_summary = verbose) {
98+
if (length(.marks) == 0) return(invisible(data.frame()))
99+
df <- data.frame(
100+
step = vapply(.marks, `[[`, "", "label"),
101+
step_sec = vapply(.marks, `[[`, 0.0, "step"),
102+
total_sec = vapply(.marks, `[[`, 0.0, "total"),
103+
stringsAsFactors = FALSE
104+
)
105+
if (print_summary) {
106+
message("— Timing summary —")
107+
print(df, row.names = FALSE)
108+
}
109+
df
110+
}
111+
112+
time_it <- function(label, expr) {
113+
force(label)
114+
res <- eval.parent(substitute(expr))
115+
mark(label)
116+
invisible(res)
117+
}
118+
119+
list(mark = mark, summary = summary, time_it = time_it)
120+
}
121+
timer <- .make_timer(verbose = isTRUE(verboso))
122+
on.exit(timer$summary(), add = TRUE)
123+
## -----------------------------------------------------------------------
124+
78125
# check input
79126
checkmate::assert_data_frame(enderecos)
80127
checkmate::assert_logical(resultado_completo, any.missing = FALSE, len = 1)
@@ -96,6 +143,10 @@ geocode <- function(enderecos,
96143

97144
if (verboso) message_standardizing_addresses()
98145

146+
# systime start 66666 ----------------
147+
timer$mark("Start")
148+
149+
99150
input_padrao <- enderecobr::padronizar_enderecos(
100151
enderecos,
101152
campos_do_endereco = enderecobr::correspondencia_campos(
@@ -124,6 +175,9 @@ geocode <- function(enderecos,
124175
)
125176
}
126177

178+
# systime padronizacao 66666 ----------------
179+
timer$mark("Padronizacao")
180+
127181
# create temp id
128182
data.table::setDT(enderecos)[, tempidgeocodebr := 1:nrow(input_padrao) ]
129183
input_padrao[, tempidgeocodebr := 1:nrow(input_padrao) ]
@@ -151,13 +205,17 @@ geocode <- function(enderecos,
151205
overwrite = TRUE, temporary = TRUE)
152206

153207

208+
# systime register standardized 66666 ----------------
209+
timer$mark("Register standardized input")
210+
154211

155212
# cria coluna "log_causa_confusao" identificando logradouros que geram confusao
156213
# issue https://github.com/ipeaGIT/geocodebr/issues/67
157214
cria_col_logradouro_confusao(con)
158215

159-
# b <- DBI::dbReadTable(con, name = 'input_padrao_db') |>
160-
# filter(log_causa_confusao==0)
216+
# systime cria coluna "log_causa_confusao 66666 ----------------
217+
timer$mark("Cria coluna log_causa_confusao")
218+
161219

162220

163221
# create an empty output table that will be populated -----------------------------------------------
@@ -249,19 +307,31 @@ geocode <- function(enderecos,
249307

250308
if (verboso) finish_progress_bar(matched_rows)
251309

310+
# systime matching 66666 ----------------
311+
timer$mark("Matching")
312+
313+
252314
if (verboso) message_preparando_output()
253315

254316
# add precision column
255317
add_precision_col(con, update_tb = 'output_db')
256318

257319

320+
# systime add precision 66666 ----------------
321+
timer$mark("Add precision")
322+
323+
258324
# output with all original columns
259325
duckdb::dbWriteTable(con, "input_db", enderecos,
260326
temporary = TRUE, overwrite=TRUE)
261327
# enderecos_arrw <- arrow::as_arrow_table(enderecos)
262328
# DBI::dbWriteTableArrow(con, name = "input_db", enderecos_arrw,
263329
# overwrite = TRUE, temporary = TRUE)
264330

331+
# systime write original input back 66666 ----------------
332+
timer$mark("Write original input back")
333+
334+
265335
x_columns <- names(enderecos)
266336

267337
output_df <- merge_results(
@@ -273,6 +343,10 @@ geocode <- function(enderecos,
273343
resultado_completo = resultado_completo
274344
)
275345

346+
# systime merge results 66666 ----------------
347+
timer$mark("Merge results")
348+
349+
276350
data.table::setDT(output_df)
277351

278352
# Disconnect from DuckDB when done
@@ -284,6 +358,10 @@ geocode <- function(enderecos,
284358
output_df <- trata_empates_geocode(output_df, resolver_empates, verboso)
285359
}
286360

361+
# systime resolve empates 66666 ----------------
362+
timer$mark("Resolve empates")
363+
364+
287365
# drop geocodebr temp id column
288366
output_df[, tempidgeocodebr := NULL]
289367

@@ -302,6 +380,10 @@ geocode <- function(enderecos,
302380
lng = lon,
303381
resolution = h3_res)
304382
]
383+
384+
# systime add h3 66666 ----------------
385+
timer$mark("Add H3")
386+
305387
}
306388

307389
# convert df to simple feature
@@ -314,6 +396,10 @@ geocode <- function(enderecos,
314396
)
315397

316398
sf::st_crs(output_sf) <- 4674
399+
400+
# systime convert to sf 66666 ----------------
401+
timer$mark("Convert to sf")
402+
317403
return(output_sf)
318404
}
319405

tests/tests_rafa/benchmark_LIKE.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ bench::mark( iterations = 1,
134134
n_cores = ncores,
135135
resultado_completo = T,
136136
verboso = T,
137-
resultado_sf = F,
138-
resolver_empates = F
137+
resultado_sf = T,
138+
resolver_empates = T
139139
)
140140
)
141141

tests/tests_rafa/benchmark_reg_adm.R

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,15 @@ fields_cad <- geocodebr::definir_campos(
118118

119119
# bench::mark( iterations = 1,
120120
bench::system_time(
121-
cadgeo <- geocodebr::geocode(
121+
cadgeo <- geocode(
122122
enderecos = cad,
123123
campos_endereco = fields_cad,
124124
resultado_completo = T,
125-
n_cores = 25, # 7
125+
n_cores = 7, # 7
126126
verboso = T,
127-
resultado_sf = F,
128-
resolver_empates = F
127+
resultado_sf = T,
128+
resolver_empates = F,
129+
h3_res = 9
129130
)
130131
)
131132

0 commit comments

Comments
 (0)