Commit a2ca6ac6 authored by Montera34's avatar Montera34
Browse files

Merge branch 'patch-3' into 'master'

Add geocoding script using Photon. (WIP)

See merge request !6
parents 71105c93 2739469e
.Rproj.user
.Rhistory
.RData
.Ruserdata
This diff is collapsed.
# require(devtools)
# devtools::install_github(repo = 'rCarto/photon')
library(photon)
library(stringdist)
library(tidyverse)
# Addresses' cleanup ------------------------------------------------------
# Load Valencia's official street names from:
# http://gobiernoabierto.valencia.es/va/dataset/?id=listado-de-calles
calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(codtipovia = sub("C/", "C", codtipovia)) %>%
mutate(tipovia_es = codtipovia) %>%
mutate(tipovia_es = sub("PG", "PA", tipovia_es)) %>%
mutate(tipovia_ca = codtipovia) %>%
mutate(tipovia_ca = sub("C/", "Carrer de", tipovia_ca)) %>%
mutate(nombre_es = paste(tipovia_es, traducnooficial, sep = " ")) %>%
mutate(nombre_es = as.factor(nombre_es))
# Load dataset and manipulate addresses.
df = read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
separate(Address, c("nombre_es_raw", "num"), extra = "merge",
sep = ", nº", remove = FALSE) %>%
separate(num, c("num", "puerta"), extra = "merge", sep = ", ", remove = FALSE) %>%
mutate(nombre_es_raw = as.factor(toupper(nombre_es_raw)))
# Basic Record linkage.
record.linkage.names = function(names,
officialnames = calles_valencia$nombre_es) {
# Matches a street's name with the most similar official one.
for (i in names) {
print(i)
inferred.name = as.character(
officialnames[amatch(i, toupper(officialnames), maxDist = 5)])
if (!is.na(inferred.name)) {
names[names == i] = inferred.name
}
}
return(names)
}
# df$nombre_es = record.linkage.names(df$nombre_es_raw)
df = df %>%
mutate(nombre_es = record.linkage.names(nombre_es_raw)) %>%
left_join(calles_valencia)
# Geocoding with Photon ---------------------------------------------------
df2 = df %>%
select(Signatura, Municipio, tipovia_ca, nomoficial, num) %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, num, Municipio,
sep = ", "))
geocoded.df = photon::geocode(head(df2$full_address_ca), limit = 1,
# lang = "es",
key = "highway",
locbias = c(-0.3766, 39.4665))
# Combine geocoded dataframe with original one.
df.combined = geocoded.df %>%
select(location, lon, lat) %>%
right_join(df, by = c("location" = "Address2"))
write.csv(df.combined, file = "data/output/filename.csv")
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment