Skip to content

Commit 2bcb80d

Browse files
committed
Merge branch 'main' of https://github.com/ipeaGIT/geocodebr
2 parents 01408d7 + 92ad64a commit 2bcb80d

File tree

6 files changed

+43
-46
lines changed

6 files changed

+43
-46
lines changed

R/match_cases.R

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,22 @@
11
match_cases <- function( # nocov start
22
con = con,
33
x = "input_padrao_db",
4-
y = "filtered_cnefe",
54
output_tb = "output_db",
65
key_cols = key_cols,
76
match_type = match_type,
87
resultado_completo){
98

109
# match_type = "dn01"
1110

12-
# read corresponding parquet file
13-
table_name <- paste(key_cols, collapse = "_")
14-
table_name <- gsub('estado_municipio', 'municipio', table_name)
15-
1611
# get corresponding parquet table
17-
table_name <- get_reference_table(match_type)
12+
cnefe_table_name <- get_reference_table(match_type)
13+
y <- cnefe_table_name
1814

1915
# build path to local file
2016
path_to_parquet <- fs::path(
2117
listar_pasta_cache(),
2218
glue::glue("geocodebr_data_release_{data_release}"),
23-
paste0(table_name,".parquet")
19+
paste0(cnefe_table_name,".parquet")
2420
)
2521

2622
# determine geographical scope of the search
@@ -39,11 +35,11 @@ match_cases <- function( # nocov start
3935
# summary(c$desvio_metros)
4036

4137
# register filtered_cnefe to db
42-
duckdb::duckdb_register_arrow(con, "filtered_cnefe", filtered_cnefe)
38+
duckdb::duckdb_register_arrow(con, cnefe_table_name, filtered_cnefe)
4339

4440
# Create the JOIN condition by concatenating the key columns
4541
join_condition <- paste(
46-
glue::glue("filtered_cnefe.{key_cols} = {x}.{key_cols}"),
42+
glue::glue("{y}.{key_cols} = {x}.{key_cols}"),
4743
collapse = ' AND '
4844
)
4945

@@ -68,7 +64,7 @@ match_cases <- function( # nocov start
6864
colunas_encontradas <- paste0(", ", colunas_encontradas)
6965

7066
additional_cols <- paste0(
71-
glue::glue("filtered_cnefe.{key_cols} AS {key_cols}_encontrado"),
67+
glue::glue("{y}.{key_cols} AS {key_cols}_encontrado"),
7268
collapse = ', ')
7369

7470
additional_cols <- gsub('localidade_encontrado', 'localidade_encontrada', additional_cols)
@@ -80,24 +76,25 @@ match_cases <- function( # nocov start
8076
query_match <- glue::glue(
8177
"INSERT INTO output_db (tempidgeocodebr, lat, lon, endereco_encontrado, tipo_resultado, desvio_metros, contagem_cnefe {colunas_encontradas})
8278
SELECT {x}.tempidgeocodebr,
83-
filtered_cnefe.lat,
84-
filtered_cnefe.lon,
85-
filtered_cnefe.endereco_completo AS endereco_encontrado,
79+
{y}.lat,
80+
{y}.lon,
81+
{y}.endereco_completo AS endereco_encontrado,
8682
'{match_type}' AS tipo_resultado,
87-
filtered_cnefe.desvio_metros,
88-
filtered_cnefe.n_casos AS contagem_cnefe {additional_cols}
83+
{y}.desvio_metros,
84+
{y}.n_casos AS contagem_cnefe {additional_cols}
8985
FROM {x}
90-
LEFT JOIN filtered_cnefe
86+
LEFT JOIN {y}
9187
ON {join_condition}
92-
WHERE {cols_not_null} AND filtered_cnefe.lon IS NOT NULL;"
88+
WHERE {cols_not_null} AND {y}.lon IS NOT NULL;"
9389
)
9490

9591
DBI::dbSendQueryArrow(con, query_match)
9692
# a <- DBI::dbReadTable(con, 'output_db')
9793
# summary(a$desvio_metros)
9894
# summary(a$lat)
9995

100-
duckdb::duckdb_unregister_arrow(con, "filtered_cnefe")
96+
#### 66666 remover
97+
duckdb::duckdb_unregister_arrow(con, cnefe_table_name)
10198

10299
# UPDATE input_padrao_db: Remove observations found in previous step
103100
temp_n <- update_input_db(

R/match_cases_probabilistic.R

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
match_cases_probabilistic <- function(
99
con = con,
1010
x = 'input_padrao_db',
11-
y = 'filtered_cnefe',
1211
output_tb = "output_db",
1312
key_cols = key_cols,
1413
match_type = match_type,
@@ -17,14 +16,15 @@ match_cases_probabilistic <- function(
1716
# match_type = "pn01"
1817

1918
# get corresponding parquet table and key columns
20-
table_name <- get_reference_table(match_type)
19+
cnefe_table_name <- get_reference_table(match_type)
20+
y <- cnefe_table_name
2121
key_cols <- get_key_cols(match_type)
2222

2323
# build path to local file
2424
path_to_parquet <- fs::path(
2525
listar_pasta_cache(),
2626
glue::glue("geocodebr_data_release_{data_release}"),
27-
paste0(table_name,".parquet")
27+
paste0(cnefe_table_name,".parquet")
2828
)
2929

3030
# determine geographical scope of the search
@@ -40,7 +40,7 @@ match_cases_probabilistic <- function(
4040
dplyr::compute()
4141

4242
# register filtered_cnefe to db
43-
duckdb::duckdb_register_arrow(con, "filtered_cnefe", filtered_cnefe)
43+
duckdb::duckdb_register_arrow(con, cnefe_table_name, filtered_cnefe)
4444

4545

4646

@@ -168,7 +168,7 @@ match_cases_probabilistic <- function(
168168
colunas_encontradas <- paste0(colunas_encontradas, ", similaridade_logradouro")
169169

170170
additional_cols <- paste0(
171-
glue::glue("filtered_cnefe.{key_cols} AS {key_cols}_encontrado"),
171+
glue::glue("{y}.{key_cols} AS {key_cols}_encontrado"),
172172
collapse = ', ')
173173

174174
additional_cols <- gsub('localidade_encontrado', 'localidade_encontrada', additional_cols)
@@ -180,16 +180,16 @@ match_cases_probabilistic <- function(
180180
query_update_db <- glue::glue(
181181
"INSERT INTO output_db (tempidgeocodebr, lat, lon, endereco_encontrado, tipo_resultado, desvio_metros, contagem_cnefe {colunas_encontradas})
182182
SELECT {x}.tempidgeocodebr,
183-
filtered_cnefe.lat,
184-
filtered_cnefe.lon,
185-
filtered_cnefe.endereco_completo AS endereco_encontrado,
183+
{y}.lat,
184+
{y}.lon,
185+
{y}.endereco_completo AS endereco_encontrado,
186186
'{match_type}' AS tipo_resultado,
187-
filtered_cnefe.desvio_metros,
188-
filtered_cnefe.n_casos AS contagem_cnefe {additional_cols}
187+
{y}.desvio_metros,
188+
{y}.n_casos AS contagem_cnefe {additional_cols}
189189
FROM {x}
190-
LEFT JOIN filtered_cnefe
190+
LEFT JOIN {y}
191191
ON {join_condition_match}
192-
WHERE {cols_not_null} AND filtered_cnefe.lon IS NOT NULL;"
192+
WHERE {cols_not_null} AND {y}.lon IS NOT NULL;"
193193
)
194194

195195

@@ -201,7 +201,7 @@ match_cases_probabilistic <- function(
201201

202202
# remove arrow tables from db
203203
duckdb::duckdb_unregister_arrow(con, "unique_logradouros")
204-
duckdb::duckdb_unregister_arrow(con, "filtered_cnefe")
204+
duckdb::duckdb_unregister_arrow(con, cnefe_table_name) # 6666666666666666666
205205

206206

207207

R/match_weighted_cases.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
match_weighted_cases <- function( # nocov start
22
con = con,
33
x = 'input_padrao_db',
4-
y = 'filtered_cnefe',
54
output_tb = "output_db",
65
key_cols = key_cols,
76
match_type = match_type,
@@ -10,13 +9,14 @@ match_weighted_cases <- function( # nocov start
109
# match_type = "da01"
1110

1211
# get corresponding parquet table
13-
table_name <- get_reference_table(match_type)
12+
cnefe_table_name <- get_reference_table(match_type)
13+
y <- cnefe_table_name
1414

1515
# build path to local file
1616
path_to_parquet <- fs::path(
1717
listar_pasta_cache(),
1818
glue::glue("geocodebr_data_release_{data_release}"),
19-
paste0(table_name,".parquet")
19+
paste0(cnefe_table_name,".parquet")
2020
)
2121

2222
# determine geographical scope of the search
@@ -32,7 +32,7 @@ match_weighted_cases <- function( # nocov start
3232
dplyr::compute()
3333

3434
# register filtered_cnefe to db
35-
duckdb::duckdb_register_arrow(con, "filtered_cnefe", filtered_cnefe)
35+
duckdb::duckdb_register_arrow(con, cnefe_table_name, filtered_cnefe)
3636

3737
# cols that cannot be null
3838
cols_not_null <- paste(
@@ -64,7 +64,7 @@ match_weighted_cases <- function( # nocov start
6464
colunas_encontradas <- paste0(", ", colunas_encontradas)
6565

6666
additional_cols <- paste0(
67-
glue::glue("filtered_cnefe.{key_cols} AS {key_cols}_encontrado"),
67+
glue::glue("{y}.{key_cols} AS {key_cols}_encontrado"),
6868
collapse = ', ')
6969

7070
additional_cols <- gsub('localidade_encontrado', 'localidade_encontrada', additional_cols)
@@ -144,7 +144,7 @@ match_weighted_cases <- function( # nocov start
144144
# b <- DBI::dbGetQuery(con, query_aggregate)
145145

146146

147-
duckdb::duckdb_unregister_arrow(con, "filtered_cnefe")
147+
duckdb::duckdb_unregister_arrow(con, cnefe_table_name) # 66666
148148

149149
# UPDATE input_padrao_db: Remove observations found in previous step
150150
temp_n <- update_input_db(

R/match_weighted_cases_probabilistic.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
match_weighted_cases_probabilistic <- function( # nocov start
77
con = con,
88
x = 'input_padrao_db',
9-
y = 'filtered_cnefe',
109
output_tb = "output_db",
1110
key_cols = key_cols,
1211
match_type = match_type,
@@ -15,14 +14,15 @@ match_weighted_cases_probabilistic <- function( # nocov start
1514
# match_type = "pa01"
1615

1716
# get corresponding parquet table
18-
table_name <- get_reference_table(match_type)
17+
cnefe_table_name <- get_reference_table(match_type)
18+
y <- cnefe_table_name
1919
key_cols <- get_key_cols(match_type)
2020

2121
# build path to local file
2222
path_to_parquet <- fs::path(
2323
listar_pasta_cache(),
2424
glue::glue("geocodebr_data_release_{data_release}"),
25-
paste0(table_name,".parquet")
25+
paste0(cnefe_table_name,".parquet")
2626
)
2727

2828
# determine geographical scope of the search
@@ -38,7 +38,7 @@ match_weighted_cases_probabilistic <- function( # nocov start
3838
dplyr::compute()
3939

4040
# register filtered_cnefe to db
41-
duckdb::duckdb_register_arrow(con, "filtered_cnefe", filtered_cnefe)
41+
duckdb::duckdb_register_arrow(con, cnefe_table_name, filtered_cnefe)
4242

4343

4444

@@ -169,7 +169,7 @@ match_weighted_cases_probabilistic <- function( # nocov start
169169
colunas_encontradas <- paste0(", ", colunas_encontradas)
170170

171171
additional_cols <- paste0(
172-
glue::glue("filtered_cnefe.{key_cols} AS {key_cols}_encontrado"),
172+
glue::glue("{y}.{key_cols} AS {key_cols}_encontrado"),
173173
collapse = ', ')
174174

175175
additional_cols <- gsub('localidade_encontrado', 'localidade_encontrada', additional_cols)
@@ -250,7 +250,7 @@ match_weighted_cases_probabilistic <- function( # nocov start
250250
# d <- DBI::dbReadTable(con, 'aaa')
251251

252252
# remove arrow tables from db
253-
duckdb::duckdb_unregister_arrow(con, "filtered_cnefe")
253+
duckdb::duckdb_unregister_arrow(con, cnefe_table_name) # 6666666
254254

255255
# if (match_type %like% "01") {
256256
duckdb::duckdb_unregister_arrow(con, "unique_logradouros")

geocodebr.Rproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: f1d517dd-ef51-4413-9401-f05762abb322
23

34
RestoreWorkspace: Default
45
SaveWorkspace: Default

tests/tests_rafa/benchmark_LIKE.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,8 @@ bench::mark( iterations = 1,
139139
)
140140
)
141141

142-
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
143-
# v2_F 25.6s 25.6s 0.0391 64.6MB 0.352 1 9 25.6s <dt>
144-
# v3_F 26.2s 26.2s 0.0381 65MB 0.343 1 9 26.2s <dt>
142+
# expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory
143+
# v2 <- geocodeb… 21.9s 21.9s 0.0456 70.7MB 0.456 1 10 21.9s <dt> <Rprofmem>
145144

146145

147146
# v2: 729 empates

0 commit comments

Comments
 (0)