Commit 4b9e0dac authored by numeroteca's avatar numeroteca

add full info to exported geocoded file + map and table to check accuracy

parent 905f17e4
......@@ -19,7 +19,7 @@ calles_valencia = read.csv("data/original/vias-valencia.csv") %>%
mutate(nombre_es = as.factor(nombre_es))
# Load dataset and manipulate addresses.
df = read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") %>%
df = read.csv("data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar.csv") %>%
separate(Address, c("nombre_es_raw", "num"), extra = "merge",
sep = ", nº", remove = FALSE) %>%
separate(num, c("num", "puerta"), extra = "merge", sep = ", ", remove = FALSE) %>%
......@@ -40,7 +40,6 @@ record.linkage.names = function(names,
return(names)
}
df = df %>%
mutate(nombre_es = record.linkage.names(nombre_es_raw)) %>%
left_join(calles_valencia)
......@@ -53,14 +52,41 @@ df2 = df %>%
mutate(full_address_ca = paste(tipovia_ca, nomoficial, num, Municipio,
sep = ", "))
geocoded.df = photon::geocode(head(df2$full_address_ca), limit = 1,
geocoded.df = photon::geocode(df2$full_address_ca, limit = 1,
# lang = "es",
key = "highway",
locbias = c(-0.3766, 39.4665))
# TODO explore if extending search to three makes it possible later to select the one solution in Valencia City
# geocoded.df3 = photon::geocode(head(df2$full_address_ca), limit = 3,
# # lang = "es",
# key = "highway",
# locbias = c(-0.3766, 39.4665))
# Check locations ----------
table(geocoded.df$country)
table(geocoded.df$city)
# plot results in a map
ggplot() +
# geom_polygon(data = municipios,
# aes(x = long, y = lat, group = group),
# color = "grey", fill="white", size = 0.1) +
geom_polygon(data = barrios,
aes(x = long, y = lat, group = group),
color = "grey", fill="white", size = 0.1) +
geom_point(data= geocoded.df,
aes(x=lon, y=lat),alpha=1,size = 0.1) +
geom_text(data= geocoded.df,
aes(x=lon, y=lat, label=name),alpha=1,size = 0.1)
# Combine geocoded dataframe with original one.---------------
geocoded.df$row.id <- rownames(geocoded.df)
df$row.id <- rownames(df)
# df.combined <- df2 %>%
# left_join(geocoded.df, by = c("full_address_ca" = "location"))
# Combine geocoded dataframe with original one.
df.combined = df2 %>%
left_join(geocoded.df, by = c("full_address_ca" = "location"))
df.combined <- full_join(df, geocoded.df, by = "row.id")
write.csv(df.combined, file = "data/output/filename.csv")
write.csv(df.combined, file = "data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_por-geocodificar_geocoded-photon2.csv", row.names = FALSE)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment