Commit a2ca6ac6 authored by Montera34's avatar Montera34

Merge branch 'patch-3' into 'master'

Add geocoding script using Photon. (WIP)

See merge request !6
parents 71105c93 2739469e
.Rproj.user
.Rhistory
.RData
.Ruserdata
This source diff could not be displayed because it is too large. You can view the blob instead.
# require(devtools)
# devtools::install_github(repo = 'rCarto/photon')
library(photon)
library(stringdist)
library(tidyverse)
# Addresses' cleanup ------------------------------------------------------
# Load Valencia's official street names from:
# http://gobiernoabierto.valencia.es/va/dataset/?id=listado-de-calles
calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(codtipovia = sub("C/", "C", codtipovia)) %>%
mutate(tipovia_es = codtipovia) %>%
mutate(tipovia_es = sub("PG", "PA", tipovia_es)) %>%
mutate(tipovia_ca = codtipovia) %>%
mutate(tipovia_ca = sub("C/", "Carrer de", tipovia_ca)) %>%
mutate(nombre_es = paste(tipovia_es, traducnooficial, sep = " ")) %>%
mutate(nombre_es = as.factor(nombre_es))
# Load dataset and manipulate addresses.
df = read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
separate(Address, c("nombre_es_raw", "num"), extra = "merge",
sep = ", nº", remove = FALSE) %>%
separate(num, c("num", "puerta"), extra = "merge", sep = ", ", remove = FALSE) %>%
mutate(nombre_es_raw = as.factor(toupper(nombre_es_raw)))
# Basic Record linkage.
record.linkage.names = function(names,
officialnames = calles_valencia$nombre_es) {
# Matches a street's name with the most similar official one.
for (i in names) {
print(i)
inferred.name = as.character(
officialnames[amatch(i, toupper(officialnames), maxDist = 5)])
if (!is.na(inferred.name)) {
names[names == i] = inferred.name
}
}
return(names)
}
# df$nombre_es = record.linkage.names(df$nombre_es_raw)
df = df %>%
mutate(nombre_es = record.linkage.names(nombre_es_raw)) %>%
left_join(calles_valencia)
# Geocoding with Photon ---------------------------------------------------
df2 = df %>%
select(Signatura, Municipio, tipovia_ca, nomoficial, num) %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, num, Municipio,
sep = ", "))
geocoded.df = photon::geocode(head(df2$full_address_ca), limit = 1,
# lang = "es",
key = "highway",
locbias = c(-0.3766, 39.4665))
# Combine geocoded dataframe with original one.
df.combined = geocoded.df %>%
select(location, lon, lat) %>%
right_join(df, by = c("location" = "Address2"))
write.csv(df.combined, file = "data/output/filename.csv")
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment