📦 rlibpostal
Rcpp bindings for libpostal — parsing and normalizing international postal addresses directly in R.
✨ Features 🌍 International address normalization (expand_address) 🧩 Address parsing into structured components (parse_address) ⚡ High performance via Rcpp + libpostal (C) 🔁 Persistent initialization (no reload of models at each call) 📚 About libpostal
rlibpostal is a thin R wrapper around the C library libpostal, which provides:
Language-agnostic address normalization Machine-learning based parsing Global address support
👉 https://github.com/openvenues/libpostal
Here is an example of how you can install libpostal with a R code :
run_cmd <- function(cmd, args = character(), error_msg = NULL, echo = TRUE) {
if (echo) {
message(">> ", paste(c(cmd, args), collapse = " "))
}
status <- system2(cmd, args = args)
if (!identical(status, 0L)) {
stop(if (is.null(error_msg)) paste("Command failed:", cmd) else error_msg, call. = FALSE)
}
invisible(TRUE)
}
check_cmd <- function(cmd) {
nzchar(Sys.which(cmd))
}
write_libpostal_env <- function(include_dir = NULL,
lib_dir = NULL,
bin_dir = NULL,
data_dir = NULL,
scope = c("session", "renviron")) {
scope <- match.arg(scope)
if (!is.null(include_dir)) {
Sys.setenv(LIBPOSTAL_INCLUDE = include_dir)
}
if (!is.null(lib_dir)) {
Sys.setenv(LIBPOSTAL_LIB = lib_dir)
}
if (!is.null(bin_dir)) {
Sys.setenv(LIBPOSTAL_BIN = bin_dir)
}
if (!is.null(data_dir)) {
Sys.setenv(LIBPOSTAL_DATA_DIR = data_dir)
}
if (scope == "renviron") {
renv <- path.expand("~/.Renviron")
old <- if (file.exists(renv)) readLines(renv, warn = FALSE) else character()
old <- old[!grepl("^LIBPOSTAL_INCLUDE=", old)]
old <- old[!grepl("^LIBPOSTAL_LIB=", old)]
old <- old[!grepl("^LIBPOSTAL_BIN=", old)]
old <- old[!grepl("^LIBPOSTAL_DATA_DIR=", old)]
new_lines <- old
if (!is.null(include_dir)) {
new_lines <- c(
new_lines,
sprintf('LIBPOSTAL_INCLUDE="%s"',
normalizePath(include_dir, winslash = "/", mustWork = FALSE))
)
}
if (!is.null(lib_dir)) {
new_lines <- c(
new_lines,
sprintf('LIBPOSTAL_LIB="%s"',
normalizePath(lib_dir, winslash = "/", mustWork = FALSE))
)
}
if (!is.null(bin_dir)) {
new_lines <- c(
new_lines,
sprintf('LIBPOSTAL_BIN="%s"',
normalizePath(bin_dir, winslash = "/", mustWork = FALSE))
)
}
if (!is.null(data_dir)) {
new_lines <- c(
new_lines,
sprintf('LIBPOSTAL_DATA_DIR="%s"',
normalizePath(data_dir, winslash = "/", mustWork = FALSE))
)
}
writeLines(new_lines, renv)
message("Variables added to ~/.Renviron")
}
message("LIBPOSTAL_INCLUDE = ", Sys.getenv("LIBPOSTAL_INCLUDE"))
message("LIBPOSTAL_LIB = ", Sys.getenv("LIBPOSTAL_LIB"))
message("LIBPOSTAL_BIN = ", Sys.getenv("LIBPOSTAL_BIN"))
message("LIBPOSTAL_DATA_DIR = ", Sys.getenv("LIBPOSTAL_DATA_DIR"))
invisible(TRUE)
}
find_libpostal_data_dir <- function(prefix) {
candidates <- c(
file.path(prefix, "share", "libpostal"),
file.path(prefix, "share", "libpostal_data"),
file.path(prefix, "libpostal"),
prefix
)
hit <- candidates[
dir.exists(candidates) &
(
file.exists(file.path(candidates, "address_parser.dat")) |
file.exists(file.path(candidates, "parser")) |
file.exists(file.path(candidates, "transliteration")) |
length(list.files(candidates, recursive = TRUE, all.files = TRUE, no.. = TRUE)) > 0
)
][1]
if (is.na(hit)) NA_character_ else hit
}
install_libpostal_linux <- function(use_sudo = TRUE,
persist = TRUE,
prefix = "/usr/local",
data_dir = file.path(prefix, "share", "libpostal"),
src_dir = tempfile("libpostal_src_"),
ref = "master") {
if (!check_cmd("apt-get")) {
stop("This Linux function is intended for Debian/Ubuntu (apt-get).", call. = FALSE)
}
build_deps <- c("curl", "autoconf", "automake", "libtool", "pkg-config", "git", "make")
if (use_sudo) {
run_cmd("sudo", c("apt-get", "update"), "apt-get update failed.")
run_cmd("sudo", c("apt-get", "install", "-y", build_deps),
"APT installation of libpostal build dependencies failed.")
} else {
run_cmd("apt-get", c("update"), "apt-get update failed.")
run_cmd("apt-get", c("install", "-y", build_deps),
"APT installation of libpostal build dependencies failed.")
}
dir.create(src_dir, recursive = TRUE, showWarnings = FALSE)
if (!dir.exists(file.path(src_dir, ".git"))) {
run_cmd("git", c("clone", "https://github.com/openvenues/libpostal.git", src_dir),
"Git clone of libpostal failed.")
}
run_cmd("git", c("-C", src_dir, "fetch", "--all", "--tags"),
"Git fetch for libpostal failed.")
run_cmd("git", c("-C", src_dir, "checkout", ref),
"Git checkout for libpostal failed.")
run_cmd("bash", c(file.path(src_dir, "bootstrap.sh")),
"libpostal bootstrap failed.")
configure_args <- c(
sprintf("--datadir=%s", shQuote(normalizePath(data_dir, winslash = "/", mustWork = FALSE))),
sprintf("--prefix=%s", shQuote(normalizePath(prefix, winslash = "/", mustWork = FALSE)))
)
run_cmd(file.path(src_dir, "configure"), configure_args,
"libpostal configure failed.")
run_cmd("make", c("-C", src_dir),
"libpostal compilation failed.")
if (use_sudo) {
run_cmd("sudo", c("make", "-C", src_dir, "install"),
"libpostal installation failed.")
if (check_cmd("ldconfig")) {
run_cmd("sudo", c("ldconfig"), "ldconfig failed.")
}
} else {
run_cmd("make", c("-C", src_dir, "install"),
"libpostal installation failed.")
if (check_cmd("ldconfig")) {
run_cmd("ldconfig", character(), "ldconfig failed.")
}
}
include_candidates <- c(
file.path(prefix, "include"),
"/usr/local/include",
"/usr/include"
)
lib_candidates <- c(
file.path(prefix, "lib"),
"/usr/local/lib",
"/usr/lib/x86_64-linux-gnu",
"/usr/lib"
)
bin_candidates <- c(
file.path(prefix, "bin"),
"/usr/local/bin",
"/usr/bin"
)
include_dir <- include_candidates[
file.exists(file.path(include_candidates, "libpostal", "libpostal.h"))
][1]
lib_dir <- lib_candidates[
file.exists(file.path(lib_candidates, "libpostal.so")) |
file.exists(file.path(lib_candidates, "libpostal.a")) |
file.exists(file.path(lib_candidates, "libpostal.so.1"))
][1]
bin_dir <- bin_candidates[
file.exists(file.path(bin_candidates, "libpostal")) |
file.exists(file.path(bin_candidates, "address_parser")) |
file.exists(file.path(bin_candidates, "address_expand"))
][1]
detected_data_dir <- find_libpostal_data_dir(prefix)
if (is.na(detected_data_dir) && dir.exists(data_dir)) {
detected_data_dir <- data_dir
}
if (is.na(include_dir)) {
warning("libpostal headers were not found automatically.")
include_dir <- file.path(prefix, "include")
}
if (is.na(lib_dir)) {
warning("libpostal libraries were not found automatically.")
lib_dir <- file.path(prefix, "lib")
}
if (is.na(bin_dir)) {
warning("libpostal executables were not found automatically.")
bin_dir <- file.path(prefix, "bin")
}
if (is.na(detected_data_dir)) {
warning("libpostal data directory was not found automatically.")
detected_data_dir <- data_dir
}
write_libpostal_env(
include_dir = include_dir,
lib_dir = lib_dir,
bin_dir = bin_dir,
data_dir = detected_data_dir,
scope = if (persist) "renviron" else "session"
)
invisible(list(
prefix = prefix,
include = include_dir,
lib = lib_dir,
bin = bin_dir,
data = detected_data_dir,
source = src_dir
))
}
install_libpostal_macos <- function(persist = TRUE,
use_brew_binary = TRUE,
prefix = NULL,
data_dir = NULL,
disable_sse2 = FALSE) {
if (!check_cmd("brew")) {
stop("Homebrew is not available.", call. = FALSE)
}
if (isTRUE(use_brew_binary)) {
run_cmd("brew", c("install", "libpostal"),
"Homebrew installation of libpostal failed.")
brew_prefix <- trimws(system2("brew", "--prefix", stdout = TRUE))
lp_prefix <- trimws(system2("brew", c("--prefix", "libpostal"), stdout = TRUE))
include_candidates <- c(file.path(lp_prefix, "include"), file.path(brew_prefix, "include"))
lib_candidates <- c(file.path(lp_prefix, "lib"), file.path(brew_prefix, "lib"))
bin_candidates <- c(file.path(lp_prefix, "bin"), file.path(brew_prefix, "bin"))
include_dir <- include_candidates[
file.exists(file.path(include_candidates, "libpostal", "libpostal.h"))
][1]
lib_dir <- lib_candidates[
file.exists(file.path(lib_candidates, "libpostal.dylib")) |
file.exists(file.path(lib_candidates, "libpostal.a"))
][1]
bin_dir <- bin_candidates[
file.exists(file.path(bin_candidates, "libpostal")) |
file.exists(file.path(bin_candidates, "address_parser")) |
file.exists(file.path(bin_candidates, "address_expand"))
][1]
detected_data_dir <- find_libpostal_data_dir(lp_prefix)
if (is.null(data_dir) || is.na(data_dir)) {
data_dir <- detected_data_dir
}
if (is.na(include_dir)) {
warning("libpostal headers were not found automatically.")
include_dir <- file.path(lp_prefix, "include")
}
if (is.na(lib_dir)) {
warning("libpostal libraries were not found automatically.")
lib_dir <- file.path(lp_prefix, "lib")
}
if (is.na(bin_dir)) {
warning("libpostal executables were not found automatically.")
bin_dir <- file.path(lp_prefix, "bin")
}
if (is.null(data_dir) || is.na(data_dir)) {
warning("libpostal data directory was not found automatically.")
data_dir <- file.path(lp_prefix, "share", "libpostal")
}
write_libpostal_env(
include_dir = include_dir,
lib_dir = lib_dir,
bin_dir = bin_dir,
data_dir = data_dir,
scope = if (persist) "renviron" else "session"
)
return(invisible(list(
prefix = lp_prefix,
include = include_dir,
lib = lib_dir,
bin = bin_dir,
data = data_dir,
method = "brew"
)))
}
if (is.null(prefix)) {
prefix <- "/usr/local"
}
if (is.null(data_dir)) {
data_dir <- file.path(prefix, "share", "libpostal")
}
run_cmd("brew", c("install", "curl", "autoconf", "automake", "libtool", "pkg-config", "git"),
"Homebrew installation of libpostal build dependencies failed.")
src_dir <- tempfile("libpostal_src_")
run_cmd("git", c("clone", "https://github.com/openvenues/libpostal.git", src_dir),
"Git clone of libpostal failed.")
run_cmd("bash", c(file.path(src_dir, "bootstrap.sh")),
"libpostal bootstrap failed.")
cfg <- c(
sprintf("--datadir=%s", shQuote(normalizePath(data_dir, winslash = "/", mustWork = FALSE))),
sprintf("--prefix=%s", shQuote(normalizePath(prefix, winslash = "/", mustWork = FALSE)))
)
if (isTRUE(disable_sse2)) {
cfg <- c(cfg, "--disable-sse2")
}
run_cmd(file.path(src_dir, "configure"), cfg,
"libpostal configure failed.")
run_cmd("make", c("-C", src_dir),
"libpostal compilation failed.")
run_cmd("make", c("-C", src_dir, "install"),
"libpostal installation failed.")
write_libpostal_env(
include_dir = file.path(prefix, "include"),
lib_dir = file.path(prefix, "lib"),
bin_dir = file.path(prefix, "bin"),
data_dir = data_dir,
scope = if (persist) "renviron" else "session"
)
invisible(list(
prefix = prefix,
include = file.path(prefix, "include"),
lib = file.path(prefix, "lib"),
bin = file.path(prefix, "bin"),
data = data_dir,
source = src_dir,
method = "source"
))
}
`%||%` <- function(x, y) {
if (is.null(x) || length(x) == 0 || is.na(x)) y else x
}
to_msys_path <- function(path) {
path <- normalizePath(path, winslash = "/", mustWork = FALSE)
if (grepl("^[A-Za-z]:", path)) {
drv <- tolower(substr(path, 1, 1))
rest <- substr(path, 3, nchar(path))
path <- paste0("/", drv, rest)
}
path
}
run_bash_msys2 <- function(bash, cmd, repo = "ucrt64", rtools_root = "C:/rtools45",
error_msg = NULL, echo = TRUE) {
repo <- match.arg(repo, c("ucrt64", "clang64", "mingw64"))
repo_bin <- switch(
repo,
ucrt64 = file.path(rtools_root, "ucrt64", "bin"),
clang64 = file.path(rtools_root, "clang64", "bin"),
mingw64 = file.path(rtools_root, "mingw64", "bin")
)
usr_bin <- file.path(rtools_root, "usr", "bin")
old_msystem <- Sys.getenv("MSYSTEM", unset = NA)
old_chere <- Sys.getenv("CHERE_INVOKING", unset = NA)
old_path <- Sys.getenv("PATH", unset = NA)
on.exit({
if (is.na(old_msystem)) Sys.unsetenv("MSYSTEM") else Sys.setenv(MSYSTEM = old_msystem)
if (is.na(old_chere)) Sys.unsetenv("CHERE_INVOKING") else Sys.setenv(CHERE_INVOKING = old_chere)
if (is.na(old_path)) Sys.unsetenv("PATH") else Sys.setenv(PATH = old_path)
}, add = TRUE)
Sys.setenv(
MSYSTEM = toupper(repo),
CHERE_INVOKING = "1",
PATH = paste(
normalizePath(repo_bin, winslash = "\\", mustWork = FALSE),
normalizePath(usr_bin, winslash = "\\", mustWork = FALSE),
old_path,
sep = ";"
)
)
script <- tempfile(fileext = ".sh")
writeLines(c(
"#!/usr/bin/env bash",
"set -e",
cmd
), script, useBytes = TRUE)
if (echo) {
message(">> ", bash, " ", normalizePath(script, winslash = "/", mustWork = FALSE))
}
status <- system2(
bash,
args = normalizePath(script, winslash = "/", mustWork = FALSE)
)
unlink(script)
if (!identical(status, 0L)) {
stop(if (is.null(error_msg)) "Bash command failed." else error_msg, call. = FALSE)
}
invisible(TRUE)
}
install_libpostal_windows_msys2 <- function(rtools_root = "C:/rtools45",
repo = "ucrt64",
persist = TRUE,
prefix = NULL,
data_dir = NULL,
src_dir = "C:/libpostal-src") {
repo <- match.arg(repo, c("ucrt64", "clang64", "mingw64"))
bash <- file.path(rtools_root, "usr", "bin", "bash.exe")
if (!file.exists(bash)) {
stop("bash.exe was not found in Rtools/MSYS2: ", bash, call. = FALSE)
}
prefix <- prefix %||% switch(
repo,
ucrt64 = file.path(rtools_root, "ucrt64"),
clang64 = file.path(rtools_root, "clang64"),
mingw64 = file.path(rtools_root, "mingw64")
)
if (is.null(data_dir)) {
data_dir <- file.path(prefix, "share", "libpostal")
}
msys_src_dir <- to_msys_path(src_dir)
msys_prefix <- to_msys_path(prefix)
msys_data_dir <- to_msys_path(data_dir)
pkgs <- switch(
repo,
ucrt64 = c(
"autoconf", "automake", "curl", "git", "make", "libtool", "pkgconf",
"mingw-w64-ucrt-x86_64-gcc"
),
clang64 = c(
"autoconf", "automake", "curl", "git", "make", "libtool", "pkgconf",
"mingw-w64-clang-x86_64-gcc"
),
mingw64 = c(
"autoconf", "automake", "curl", "git", "make", "libtool", "pkgconf",
"mingw-w64-x86_64-gcc"
)
)
run_bash_msys2(
bash = bash,
cmd = sprintf("pacman -S --needed --noconfirm %s", paste(pkgs, collapse = " ")),
repo = repo,
rtools_root = rtools_root,
error_msg = "MSYS2 installation of libpostal build dependencies failed."
)
run_bash_msys2(
bash = bash,
cmd = "echo MSYSTEM=$MSYSTEM && which gcc && gcc --version",
repo = repo,
rtools_root = rtools_root,
error_msg = "MSYS2 compiler test failed."
)
build_script <- paste(
sprintf("mkdir -p %s", shQuote(msys_src_dir)),
sprintf(
"if [ ! -d %s/.git ]; then git clone https://github.com/openvenues/libpostal.git %s; fi",
shQuote(msys_src_dir), shQuote(msys_src_dir)
),
sprintf("cd %s", shQuote(msys_src_dir)),
"git fetch --all --tags",
"git checkout master",
"cp -rf windows/* ./",
"./bootstrap.sh",
sprintf("./configure --datadir=%s --prefix=%s", shQuote(msys_data_dir), shQuote(msys_prefix)),
"make",
"make install",
sep = "\n"
)
run_bash_msys2(
bash = bash,
cmd = build_script,
repo = repo,
rtools_root = rtools_root,
error_msg = "MSYS2 build/install of libpostal failed."
)
include_dir <- file.path(prefix, "include")
lib_dir <- file.path(prefix, "lib")
bin_dir <- file.path(prefix, "bin")
write_libpostal_env(
include_dir = include_dir,
lib_dir = lib_dir,
bin_dir = bin_dir,
data_dir = data_dir,
scope = if (persist) "renviron" else "session"
)
invisible(list(
prefix = prefix,
include = include_dir,
lib = lib_dir,
bin = bin_dir,
data = data_dir,
source = src_dir
))
}
# Example Windows / Rtools45 / UCRT64
install_libpostal(rtools_root = "C:/rtools45", repo = "ucrt64")
> library(rlibpostal)
>
> libpostal_init()
[1] TRUE
>
> libpostal_parse_address(
+ "123 rue Saint-Paul Ouest, Montréal, QC H2Y 1Z5"
+ )
label value
1 house_number 123
2 road rue saint-paul ouest
3 city montréal
4 state qc
5 postcode h2y 1z5
> libpostal_expand_address(
+ "123 rue Saint-Paul Ouest, Montréal, QC H2Y 1Z5"
+ )
[1] "123 rue saint-paul ouest montreal qc h2y 1z5"
[2] "123 rue saint-paul ouest montreal quebec h2y 1z5"
[3] "123 rue saint-paul ouest montreal qc h2y 1 z5"
[4] "123 rue saint-paul ouest montreal quebec h2y 1 z5"
[5] "123 rue saint-paul ouest montreal qc h 2y 1z5"
[6] "123 rue saint-paul ouest montreal quebec h 2y 1z5"
[7] "123 rue saint-paul ouest montreal qc h 2y 1 z5"
[8] "123 rue saint-paul ouest montreal quebec h 2y 1 z5"
[9] "123 rue saint paul ouest montreal qc h2y 1z5"
[10] "123 rue saint paul ouest montreal quebec h2y 1z5"
[11] "123 rue saint paul ouest montreal qc h2y 1 z5"
[12] "123 rue saint paul ouest montreal quebec h2y 1 z5"
[13] "123 rue saint paul ouest montreal qc h 2y 1z5"
[14] "123 rue saint paul ouest montreal quebec h 2y 1z5"
[15] "123 rue saint paul ouest montreal qc h 2y 1 z5"
[16] "123 rue saint paul ouest montreal quebec h 2y 1 z5"
> libpostal_teardown_all()
[1] TRUE
📄 License
MIT (package) libpostal: BSD-style license
🙌 Acknowledgments libpostal by OpenVenues Rcpp ecosystem