Commit da225d3a authored by numeroteca's avatar numeroteca

geocoded VUT Valencia with photon R script with streets without number of 'portal'

parent ccf048db
......@@ -9,13 +9,15 @@ library(tidyverse)
# Load Valencia's official street names from:
# http://gobiernoabierto.valencia.es/va/dataset/?id=listado-de-calles
# Creates variables to store type of highway in Spanish and Catalan
calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(tipovia_es = codtipovia) %>%
mutate(tipovia_es = sub("AV", "Avenida", tipovia_es)) %>%
mutate(tipovia_es = sub("C/", "Calle", tipovia_es)) %>%
mutate(tipovia_es = sub("GV", "Gran Vía", tipovia_es)) %>%
mutate(tipovia_es = sub("PG", "Paseo", tipovia_es)) %>%
mutate(tipovia_ca = sub("PL", "Plaza", tipovia_ca)) %>%
mutate(tipovia_ca = sub("PTGE", "Pasaje", tipovia_ca)) %>%
mutate(tipovia_es = sub("PL", "Plaza", tipovia_es)) %>%
mutate(tipovia_es = sub("PTGE", "Pasaje", tipovia_es)) %>%
mutate(tipovia_ca = codtipovia) %>%
mutate(tipovia_ca = sub("AV", "Avinguda", tipovia_ca)) %>%
mutate(tipovia_ca = sub("C/", "Carrer", tipovia_ca)) %>%
......@@ -34,13 +36,18 @@ calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(nombre_ca_full = as.factor(toupper(nombre_ca_full)))
# Load dataset and manipulate addresses.
df = read.csv("data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar.csv") %>%
# we only use street name, as there are no many street numbers in Valencia OSM data
# df = read.csv("data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar.csv") %>%
df = read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
# df = read.csv("data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar_geocoded-photon2_por-geocodificar.csv") %>%
# select(Signatura,Municipio,Provincia,Address,Teléfono) %>%
separate(Address, c("nombre_es_raw", "num"), extra = "merge",
sep = ", nº", remove = FALSE) %>%
separate(num, c("num", "puerta"), extra = "merge", sep = ", ", remove = FALSE) %>%
mutate(nombre_es_raw = as.factor(toupper(nombre_es_raw)))
# Basic Record linkage.
# Basic Record linkage
# Create function
record.linkage.names = function(names,
officialnames = calles_valencia$nombre_es) {
# Matches a street's name with the most similar official one.
......@@ -75,11 +82,15 @@ df = df %>%
# Build a new dataframe with desired information.
df2 = df %>%
select(Signatura, Municipio, tipovia_ca, nomoficial, num) %>%
select(Address, Signatura, Municipio, tipovia_ca, nomoficial, num) %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, Municipio, "Spain",
sep = " ")) %>%
filter(!is.na(nomoficial))
df <- df %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, Municipio, "Spain",
sep = " "))
geocoded.df = photon::geocode(unique(df2$full_address_ca), limit = 1,
# lang = "es",
key = "highway",
......@@ -109,12 +120,17 @@ ggplot() +
aes(x=lon, y=lat, label=name),alpha=1,size = 0.1)
# Combine geocoded dataframe with original one.---------------
geocoded.df$row.id <- rownames(geocoded.df)
df$row.id <- rownames(df)
# geocoded.df$row.id <- rownames(geocoded.df)
# df$row.id <- rownames(df)
# df.combined <- full_join(df, geocoded.df, by = "row.id")
# df.combined <- df2 %>%
# left_join(geocoded.df, by = c("full_address_ca" = "location"))
# add coordinates of streets to VUT
df.combined <- left_join(df,geocoded.df, by = c("full_address_ca" = "location"))
df.combined <- full_join(df, geocoded.df, by = "row.id")
# Check locations ----------
table(df.combined$country)
table(df.combined$city)
nrow(df.combined %>% filter(city == "Valencia"))
nrow(df.combined %>% filter(!city == "Valencia"))
write.csv(df.combined, file = "data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar_geocoded-photon2.csv", row.names = FALSE)
\ No newline at end of file
write.csv(df.combined, file = "data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_geocoded-photon3.csv", row.names = FALSE)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment