From a0a9faf0feb385681df4482eca1329c2ffb16454 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Sat, 7 Oct 2017 20:33:22 -0300 Subject: [PATCH 01/11] =?UTF-8?q?Adiciona=20fun=C3=A7=C3=A3o=20de=20remove?= =?UTF-8?q?r=20pronomes=20de=20tratamento?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/text_functions.R | 36 ++++++++++++++++++ data/list_pronomes.rda | Bin 0 -> 767 bytes .../testthat/test_remove_pronome_tratamento.R | 14 +++++++ 3 files changed, 50 insertions(+) create mode 100644 data/list_pronomes.rda create mode 100644 tests/testthat/test_remove_pronome_tratamento.R diff --git a/R/text_functions.R b/R/text_functions.R index f06b92b..c805559 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -85,3 +85,39 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ return(funcao_generica(base, ..., suffixo = suffixo, FUN = abrevia_nomes_meio_coluna)) } + +#' Remove commom treatment pronouns used in Brazil. +#' +#' \code{remove_pronome_tratamento} return names without treatment pronouns (Sra, Sr, Dr, etc). +#' +#' +#' @param base A data table, data frame or character vector. +#' @param ... columns for apply the function +#' +#' @import data.table, stringr, dplyr +#' @return the base param with a new column. +#' +#' @examples +#' remove_pronome_tratamento("Dr. Fulano") +#' remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") +#' +#' base <- data.table(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) +#' base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") +#' +#' @export +remove_pronome_tratamento <- function(base, ..., suffixo = "_sem_pron"){ + return(funcao_generica(base, ..., suffixo = suffixo, FUN = remove_pronome_tratamento_coluna)) +} + + +remove_pronome_tratamento_coluna <- function(nomes){ + data("list_pronomes",envir = environment()) + novos_nomes <- sapply(nomes, USE.NAMES = F, function(nome){ + if(is.na(nome)){ return(nome) } + nome <- str_replace_all(nome, "\\s+"," ") + nome <- str_replace_all(toupper(nome),lista,"") + return(nome) + }) + return(novos_nomes) +} + diff --git a/data/list_pronomes.rda b/data/list_pronomes.rda new file mode 100644 index 0000000000000000000000000000000000000000..e5197a8c4f02896ebbf6bc0329cd07f0279f0805 GIT binary patch literal 767 zcmV)mL)iB=Gb zE0O4q3Q>UQka!v%A@7Bl%_iJM{J+y&lSJP?`;R^LjvXz#*O%R8Q52(MJRTLJvHiRl z*~gROqIhQCPd|OU`+RFZp6_At&ESjQS2u57y?2v$liSIc57$?&ia}l7%Juf{>;D?2 z+bQDeWfsz^VH)GmZRY&asZ<%sA&m@R#wbp%Vc|*%3Li3&U})yuDzpwc?UmgRB4J8W zXekcWs#gfcPK`D-th#RqB|g#BCD~UdV#=(EyiH^3m1<)sR0{3!cvg;PxVaRF#qYY=0!l5$yIC(W;3Zo^4TJWA0v_k}Mkba#* zNA`hjsMwtfZH>q%HVSpHs-t34@8O zv;~T&*GERNO;abSmw1^>BN%$ET&XDc3PbFH7|EuMM^#+yzULHTg|JErB{@?6iD(xL zA{Fp_dWmuJMB~>=PU?FR53unBvs80QiDjA-objSUK0levoN{QVvJtf#I9henFlA26 zh4j~BZrUV{0lZIG1m`emTDzCSb;UY6(zjE=r}$prp2O{cm>vUmKQn1NF3_|mHDswz z>qxTnCCf>82=+(F-h8wKy+km!V$C;Um@$FVU^L?sU`tC%G4yJM{TXTDy{#qpwoQ{# ze(&B#(0#UH+k3;e_h5S{33)bAj=R_ro(W5GAS}026dx06-A3`6dUUBUN}{(@cnJ}^ x@1Vt;!NF8`i11=U?j^P`1e5C!gv=X88^%Qm@LdeU%UBe}pTA#irf}~R003IQanJw& literal 0 HcmV?d00001 diff --git a/tests/testthat/test_remove_pronome_tratamento.R b/tests/testthat/test_remove_pronome_tratamento.R new file mode 100644 index 0000000..daf5fd7 --- /dev/null +++ b/tests/testthat/test_remove_pronome_tratamento.R @@ -0,0 +1,14 @@ +test_that("remove_pronome_tratamento", { + nomes <- c("Dr. Joao das Neves", "Exmo. Sr. Pedro dos Anjos", "Maria das Gracas") + nomes_sem_pronomes <- c("JOAO DAS NEVES", "PEDRO DOS ANJOS", "MARIA DAS GRACAS") + base <- data.table(nome = nomes) + base <- remove_pronome_tratamento(base, "nome") + expect_equal(c("nome", "nome_sem_pron"), names(base)) + expect_equal(nomes_sem_pronomes, base$nome_sem_pron) +}) + +test_that("remove_pronome_tratamento de um vetor de caracteres", { + nomes <- c("Dr. Joao das Neves", "Exmo. Sr. Pedro dos Anjos", "Maria das Gracas") + nomes_sem_pronomes <- c("JOAO DAS NEVES", "PEDRO DOS ANJOS", "MARIA DAS GRACAS") + expect_equal(nomes_sem_pronomes, remove_pronome_tratamento(nomes)) +}) From 9a86db2856b5625648bd39f95c7bb323b4eef910 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Mon, 16 Oct 2017 17:24:00 -0200 Subject: [PATCH 02/11] =?UTF-8?q?atualiza=20documenta=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- NAMESPACE | 1 + R/data.R | 7 +++++++ man/lista.Rd | 14 ++++++++++++++ man/remove_pronome_tratamento.Rd | 27 +++++++++++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 man/lista.Rd create mode 100644 man/remove_pronome_tratamento.Rd diff --git a/NAMESPACE b/NAMESPACE index e91775a..c6d6245 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(extrai_NomeProprio) export(ident_erros_munic_galileo) export(nome_de_solteira) export(remove_preposicao_nomes) +export(remove_pronome_tratamento) import(RCurl) import(data.table) importFrom(dplyr,"%>%") diff --git a/R/data.R b/R/data.R index 90e32e8..b2ac730 100644 --- a/R/data.R +++ b/R/data.R @@ -11,3 +11,10 @@ #' \item{Nome_Municipio}{City names} #' } "geocod_base" + +#' Brazilian treatment pronouns +#' +#' A regex character with the most common Brazilian treatment pronouns used in Brazil +#' +#' @format Character +"lista" diff --git a/man/lista.Rd b/man/lista.Rd new file mode 100644 index 0000000..2108b75 --- /dev/null +++ b/man/lista.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{lista} +\alias{lista} +\title{Brazilian treatment pronouns} +\format{Character} +\usage{ +lista +} +\description{ +A regex character with the most common Brazilian treatment pronouns used in Brazil +} +\keyword{datasets} diff --git a/man/remove_pronome_tratamento.Rd b/man/remove_pronome_tratamento.Rd new file mode 100644 index 0000000..591e04d --- /dev/null +++ b/man/remove_pronome_tratamento.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/text_functions.R +\name{remove_pronome_tratamento} +\alias{remove_pronome_tratamento} +\title{Remove commom treatment pronouns used in Brazil.} +\usage{ +remove_pronome_tratamento(base, ..., suffixo = "_sem_pron") +} +\arguments{ +\item{base}{A data table, data frame or character vector.} + +\item{...}{columns for apply the function} +} +\value{ +the base param with a new column. +} +\description{ +\code{remove_pronome_tratamento} return names without treatment pronouns (Sra, Sr, Dr, etc). +} +\examples{ + remove_pronome_tratamento("Dr. Fulano") + remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") + + base <- data.table(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) + base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") + +} From b5a84d5bd7f90083f31b89847e2f05ca709d35a0 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Mon, 23 Oct 2017 16:26:01 -0200 Subject: [PATCH 03/11] Corrige dependencias --- R/text_functions.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/text_functions.R b/R/text_functions.R index c805559..07ddcef 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -94,14 +94,15 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ #' @param base A data table, data frame or character vector. #' @param ... columns for apply the function #' -#' @import data.table, stringr, dplyr +#' @import data.table +#' @importFrom stringr str_replace_all #' @return the base param with a new column. #' #' @examples #' remove_pronome_tratamento("Dr. Fulano") #' remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") #' -#' base <- data.table(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) +#' base <- data.frame(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) #' base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") #' #' @export From 18af59cf7200783a4cfd8cf604e4a8bf0bdf2c7a Mon Sep 17 00:00:00 2001 From: igornoberto Date: Tue, 24 Oct 2017 11:37:38 -0200 Subject: [PATCH 04/11] Atualiza Dependencias --- man/remove_pronome_tratamento.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/remove_pronome_tratamento.Rd b/man/remove_pronome_tratamento.Rd index 591e04d..71e0afe 100644 --- a/man/remove_pronome_tratamento.Rd +++ b/man/remove_pronome_tratamento.Rd @@ -21,7 +21,7 @@ the base param with a new column. remove_pronome_tratamento("Dr. Fulano") remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") - base <- data.table(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) + base <- data.frame(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") } From 6f2533fe20d72d2c10b998be4a71c81833789de0 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Tue, 24 Oct 2017 11:58:12 -0200 Subject: [PATCH 05/11] =?UTF-8?q?Adiciona=20defini=C3=A7=C3=A3o=20de=20par?= =?UTF-8?q?ametro=20suffixo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/text_functions.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/text_functions.R b/R/text_functions.R index 07ddcef..691275b 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -92,6 +92,7 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ #' #' #' @param base A data table, data frame or character vector. +#' @param suffixo Name of the new column to be created. #' @param ... columns for apply the function #' #' @import data.table From e9b27d2848ea96a1749f05acfee72e6eae1efaa7 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Tue, 24 Oct 2017 12:06:49 -0200 Subject: [PATCH 06/11] =?UTF-8?q?Adiciona=20documenta=C3=A7=C3=A3o=20do=20?= =?UTF-8?q?parametro=20suffixo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- man/remove_pronome_tratamento.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/remove_pronome_tratamento.Rd b/man/remove_pronome_tratamento.Rd index 71e0afe..f6f31d6 100644 --- a/man/remove_pronome_tratamento.Rd +++ b/man/remove_pronome_tratamento.Rd @@ -10,6 +10,8 @@ remove_pronome_tratamento(base, ..., suffixo = "_sem_pron") \item{base}{A data table, data frame or character vector.} \item{...}{columns for apply the function} + +\item{suffixo}{Name of the new column to be created.} } \value{ the base param with a new column. From fdaca17b4e3e1274a1db5474d49a523962416f1e Mon Sep 17 00:00:00 2001 From: igornoberto Date: Sat, 7 Oct 2017 20:33:22 -0300 Subject: [PATCH 07/11] =?UTF-8?q?Adiciona=20fun=C3=A7=C3=A3o=20de=20remove?= =?UTF-8?q?r=20pronomes=20de=20tratamento?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- R/text_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/text_functions.R b/R/text_functions.R index 691275b..29826e3 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -103,7 +103,7 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ #' remove_pronome_tratamento("Dr. Fulano") #' remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") #' -#' base <- data.frame(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) +#' base <- data.frame(nome = c("Ph.D Pedro Anjos", "Prof Maria Gracas", "Pe. João")) #' base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") #' #' @export From ad5af53bd701cb847b3876c1591ce30e6fd675f1 Mon Sep 17 00:00:00 2001 From: igornoberto Date: Mon, 16 Oct 2017 17:24:00 -0200 Subject: [PATCH 08/11] =?UTF-8?q?atualiza=20documenta=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- man/remove_pronome_tratamento.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/remove_pronome_tratamento.Rd b/man/remove_pronome_tratamento.Rd index f6f31d6..3ab3a7d 100644 --- a/man/remove_pronome_tratamento.Rd +++ b/man/remove_pronome_tratamento.Rd @@ -23,7 +23,7 @@ the base param with a new column. remove_pronome_tratamento("Dr. Fulano") remove_pronome_tratamento("Exmo. Sr. Cicrano de Tal") - base <- data.frame(nome = c("Ph.D Pedro dos Anjos", "Prof Maria das Gracas", "Pe. João das Neves")) + base <- data.frame(nome = c("Ph.D Pedro Anjos", "Prof Maria Gracas", "Pe. João")) base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") } From 0e8dce2cf1ec9cc0da59d7b2907f7271c94a6a6a Mon Sep 17 00:00:00 2001 From: igornoberto Date: Wed, 25 Oct 2017 10:45:50 -0200 Subject: [PATCH 09/11] melhora testes --- tests/testthat/test_remove_pronome_tratamento.R | 16 ++++++++++++++-- tests/testthat/test_text_functions.R | 11 +++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test_remove_pronome_tratamento.R b/tests/testthat/test_remove_pronome_tratamento.R index daf5fd7..faee6ae 100644 --- a/tests/testthat/test_remove_pronome_tratamento.R +++ b/tests/testthat/test_remove_pronome_tratamento.R @@ -7,8 +7,20 @@ test_that("remove_pronome_tratamento", { expect_equal(nomes_sem_pronomes, base$nome_sem_pron) }) + +test_that("remove_pronome_tratamento data.frame e NA", { + nomes <- c("Dr. Joao das Neves", "Exmo. Sr. Pedro dos Anjos", "Maria das Gracas", NA) + nomes_sem_pronomes <- c("JOAO DAS NEVES", "PEDRO DOS ANJOS", "MARIA DAS GRACAS", NA) + base <- data.frame(nome = nomes) + base <- remove_pronome_tratamento(base, "nome") + expect_equal(c("nome", "nome_sem_pron"), names(base)) + expect_equal(nomes_sem_pronomes, base$nome_sem_pron) +}) + test_that("remove_pronome_tratamento de um vetor de caracteres", { - nomes <- c("Dr. Joao das Neves", "Exmo. Sr. Pedro dos Anjos", "Maria das Gracas") - nomes_sem_pronomes <- c("JOAO DAS NEVES", "PEDRO DOS ANJOS", "MARIA DAS GRACAS") + nomes <- c("Dr. Joao das Neves", "Exmo. Sr. Pedro dos Anjos", "Maria das Gracas", NA) + nomes_sem_pronomes <- c("JOAO DAS NEVES", "PEDRO DOS ANJOS", "MARIA DAS GRACAS", NA) expect_equal(nomes_sem_pronomes, remove_pronome_tratamento(nomes)) }) + + diff --git a/tests/testthat/test_text_functions.R b/tests/testthat/test_text_functions.R index 2d0d2d2..7e2841c 100644 --- a/tests/testthat/test_text_functions.R +++ b/tests/testthat/test_text_functions.R @@ -7,6 +7,17 @@ test_that("remove_preposicao_nomes", { expect_equal(nomes_sem_preposicao, base$nome_semD) }) + +test_that("remove_preposicao_nomes", { + nomes <- c("João das Neves", "Pedro dos Anjos", "Maria das Gracas") + nomes_sem_preposicao <- c("João Neves", "Pedro Anjos", "Maria Gracas") + base <- data.frame(nome = nomes) + base <- remove_preposicao_nomes(base, "nome") + expect_equal(c("nome", "nome_semD"), names(base)) + expect_equal(nomes_sem_preposicao, base$nome_semD) +}) + + test_that("remove_preposicao_nomes de um vetor de caracteres", { nomes <- c("João das Neves", "Pedro dos Anjos", "Maria das Gracas") nomes_sem_preposicao <- c("João Neves", "Pedro Anjos", "Maria Gracas") From 26d64438fa70383c7197622b0d9be95b65a9bd45 Mon Sep 17 00:00:00 2001 From: Gustavo Rodrigues Coelho Date: Thu, 26 Oct 2017 15:01:14 -0200 Subject: [PATCH 10/11] Removing notes Signed-off-by: Gustavo Rodrigues Coelho --- R/text_functions.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/text_functions.R b/R/text_functions.R index 29826e3..e82286f 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -113,12 +113,13 @@ remove_pronome_tratamento <- function(base, ..., suffixo = "_sem_pron"){ remove_pronome_tratamento_coluna <- function(nomes){ + lista <- NULL data("list_pronomes",envir = environment()) novos_nomes <- sapply(nomes, USE.NAMES = F, function(nome){ if(is.na(nome)){ return(nome) } - nome <- str_replace_all(nome, "\\s+"," ") - nome <- str_replace_all(toupper(nome),lista,"") - return(nome) + nome <- str_replace_all(nome, "\\s+"," ") + nome <- str_replace_all(toupper(nome),lista,"") + return(nome) }) return(novos_nomes) } From f4ace6653482532d0e3f32d6a0b1bfd4d100b36e Mon Sep 17 00:00:00 2001 From: Igor Noberto Camelo Date: Fri, 17 Nov 2017 19:13:25 -0200 Subject: [PATCH 11/11] Spark: adiciona compatibilidade remove pronome para Spark --- R/text_functions.R | 37 +++++++++++++++++++++++++++-------- data/list_pronomes_spark.rda | Bin 0 -> 5072 bytes 2 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 data/list_pronomes_spark.rda diff --git a/R/text_functions.R b/R/text_functions.R index e82286f..64acf63 100644 --- a/R/text_functions.R +++ b/R/text_functions.R @@ -44,20 +44,22 @@ abrevia_nomes_meio_coluna<- function(nomes){ } -funcao_generica <- function(base, ..., suffixo, FUN){ +funcao_generica <- function(base, ..., suffixo, FUN, spark_conn){ FUN <- match.fun(FUN) if(is.character(base)){ + if(!is.null(spark_conn)){ return(FUN(base,spark_conn)) } return(FUN(base)) } other_columns <- unlist(eval(substitute(alist(...)))) stopifnot(length(other_columns) > 0) if(!is.data.table(base)){ setDT(base) } new_columns <- sapply(other_columns, function(x) paste0(x, suffixo)) - mapply( function(x, y){ set(base, j = x, value = FUN(base[[y]])) }, - new_columns, other_columns) - + if(!is.null(spark_conn)){ + mapply( function(x, y){ set(base, j = x, value = FUN(base[[y]],spark_conn)) }, new_columns, other_columns) + } else { + mapply( function(x, y){ set(base, j = x, value = FUN(base[[y]])) }, new_columns, other_columns) + } return(base) - } @@ -94,8 +96,9 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ #' @param base A data table, data frame or character vector. #' @param suffixo Name of the new column to be created. #' @param ... columns for apply the function +#' @param spark_conn A character with the spark's connection name. For NULL, it runs locally. #' -#' @import data.table +#' @import data.table sparklyr dplyr #' @importFrom stringr str_replace_all #' @return the base param with a new column. #' @@ -107,8 +110,13 @@ abrevia_nome_meio <- function(base, ..., suffixo = "_abrev"){ #' base <- remove_pronome_tratamento(base, "nome", suffixo = "_new_names") #' #' @export -remove_pronome_tratamento <- function(base, ..., suffixo = "_sem_pron"){ - return(funcao_generica(base, ..., suffixo = suffixo, FUN = remove_pronome_tratamento_coluna)) +remove_pronome_tratamento <- function(base, ..., suffixo = "_sem_pron", spark_conn = NULL){ + if(is.null(spark_conn)){ + return(funcao_generica(base, ..., suffixo = suffixo, FUN = remove_pronome_tratamento_coluna, spark_conn = spark_conn)) + } else{ + return(funcao_generica(base, ..., suffixo = suffixo, FUN = remove_pronome_tratamento_coluna_spark, spark_conn = spark_conn)) + } + } @@ -124,3 +132,16 @@ remove_pronome_tratamento_coluna <- function(nomes){ return(novos_nomes) } + +remove_pronome_tratamento_coluna_spark <- function(nomes,spark_conn){ + lista_spark <- NULL + data("list_pronomes_spark",envir = environment()) + nomes <- data.table(nome = nomes) + nomes_tbl <- dplyr::copy_to(spark_conn,nomes,"nomes",overwrite = TRUE) + if(!("nomes" %in% src_tbls(spark_conn))){ stop("Unable to copy 'base' to Spark") } + nomes_tbl <- nomes_tbl %>% dplyr::summarise(nome = toupper(nome)) %>% dplyr::mutate(regex = regexp_replace(nome,lista_spark,"")) + novos_nomes <- nomes_tbl %>% dplyr::select(regex) %>% dplyr::collect() %>% as.data.table() + dplyr::db_drop_table(spark_conn,"nomes") + return(novos_nomes) +} + diff --git a/data/list_pronomes_spark.rda b/data/list_pronomes_spark.rda new file mode 100644 index 0000000000000000000000000000000000000000..db9732c7450bbfdde9137f608c4f811eca920b0f GIT binary patch literal 5072 zcmbuDy>8S%6opNahR`7K0-;NaxS&Qtvoo{FAdknI*{ObJEtFu$n#E?z9@+W7X=M;i9pxOFGC|y!0 z1qP*sv6m+MX8y#EF+j2F1;ryExy|9iI3F@(un-P>3JOBjbYW#cWhA!Wn3>CqIjU#p z4HU_{X#Qb{?rRAQS*IyI;fP3w4YY>E&|iY((r#-)r;2&dsJMsqCTtUQ=I3R6Hb zCCnEe7beTxQ1tFD4Kpje_S_y^6*3fITCXrPD7MQaP?KeirlT(kFEjX`?sAzAhRX$1 zeGIL~1?OQ*J+*)6bLhFx!p$QQ5F^>iKuHE4C`6sT$tjc)B5af!diz7=`Kd^&@vbk7 za4hfS3;C#^RH5GbMw+E2cEjtL^_ZEU|MEsb{^bn~FtWw&?q1T{6_`~(>Mk} zCYtB8T?L*;CmV-0=;gjnf4#B~#E=!<}$SpvOJVR)hBo4H@V0rY(?mH+?% literal 0 HcmV?d00001