Commit 2739469e authored by Carlos Cámara's avatar Carlos Cámara

Refactored geocoding.

Geocoding was failing due to input names were written in Spanish but official addresses are in Catalan. This commit introduces a previous step that "translates" addresses from Spanish to Catalan.
parent 52da3f93
.Rproj.user
.Rhistory
.RData
.Ruserdata
This source diff could not be displayed because it is too large. You can view the blob instead.
# require(devtools)
# devtools::install_github(repo = 'rCarto/photon')
library(photon)
library(stringdist)
library(tidyverse)
# Addresses' cleanup ------------------------------------------------------
# Load Valencia's official street names from:
# http://gobiernoabierto.valencia.es/va/dataset/?id=listado-de-calles
calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(codtipovia = sub("C/", "C", codtipovia)) %>%
mutate(tipovia_es = codtipovia) %>%
mutate(tipovia_es = sub("PG", "PA", tipovia_es)) %>%
mutate(tipovia_ca = codtipovia) %>%
mutate(tipovia_ca = sub("C/", "Carrer de", tipovia_ca)) %>%
mutate(nombre_es = paste(tipovia_es, traducnooficial, sep = " ")) %>%
mutate(nombre_es = as.factor(nombre_es))
# Load dataset and manipulate addresses.
df = read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
separate(Address, c("nombre_es_raw", "num"), extra = "merge",
sep = ", nº", remove = FALSE) %>%
separate(num, c("num", "puerta"), extra = "merge", sep = ", ", remove = FALSE) %>%
mutate(nombre_es_raw = as.factor(toupper(nombre_es_raw)))
# Basic Record linkage.
record.linkage.names = function(names,
officialnames = calles_valencia$nombre_es) {
# Matches a street's name with the most similar official one.
for (i in names) {
print(i)
inferred.name = as.character(
officialnames[amatch(i, toupper(officialnames), maxDist = 5)])
if (!is.na(inferred.name)) {
names[names == i] = inferred.name
}
}
return(names)
}
# df$nombre_es = record.linkage.names(df$nombre_es_raw)
df = df %>%
mutate(nombre_es = record.linkage.names(nombre_es_raw)) %>%
left_join(calles_valencia)
# Geocoding with Photon ---------------------------------------------------
# require(devtools)
# devtools::install_github(repo = 'rCarto/photon')
library(photon)
library(dplyr)
df = read.csv("https://code.montera34.com/airbnb/valencia/raw/master/data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
mutate(Address = gsub("C ", "calle de ", Address)) %>%
mutate(Address2 = paste(Address, Municipio, sep = ", "))
geocoded.df = photon::geocode(head(df$Addresses2), limit = 1,
df2 = df %>%
select(Signatura, Municipio, tipovia_ca, nomoficial, num) %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, num, Municipio,
sep = ", "))
geocoded.df = photon::geocode(head(df2$full_address_ca), limit = 1,
# lang = "es",
key = "highway",
locbias = c(-0.3766, 39.4665))
# Combine geocoded dataframe with original one.
df.combined = geocoded.df %>%
select(location, lon, lat) %>%
df.combined = geocoded.df %>%
select(location, lon, lat) %>%
right_join(df, by = c("location" = "Address2"))
write.csv(df.combined, file = "../data/output/filename.csv")
\ No newline at end of file
write.csv(df.combined, file = "data/output/filename.csv")
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment