From 2cf2dcdd700e83c0c71180a770f82520c1851212 Mon Sep 17 00:00:00 2001 From: numeroteca Date: Wed, 3 Apr 2019 16:16:21 +0200 Subject: [PATCH] =?UTF-8?q?a=C3=B1ade=20mapas=20en=20VUT=20e=20inicia=20co?= =?UTF-8?q?mparativa=20con=20datos=20de=20Airbnb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analysis/vut-analysis.R | 145 +++++++++++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 16 deletions(-) mode change 100644 => 100755 analysis/vut-analysis.R diff --git a/analysis/vut-analysis.R b/analysis/vut-analysis.R old mode 100644 new mode 100755 index c94f363..e2bdb30 --- a/analysis/vut-analysis.R +++ b/analysis/vut-analysis.R @@ -1,13 +1,27 @@ # script para analizar las viviendas turísticas de la comunidad Valenciana + +# Load libraries library(tidyverse) +# for maps and theme nothing +library(ggmap) +# read geojson +library(rgdal) +library(gsubfn) + +# load data ------ +vut_valenciana <- read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana.csv",stringsAsFactors = FALSE) +vut_valencia <- read.csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv",stringsAsFactors = FALSE) -# load data -vut_valencia <- read_csv("data/original/190302_viviendas-turisticas-comunidad-valenciana.csv") +vut <- read_csv("data/output/190302_viviendas-turisticas-comunidad-valenciana_valencia_geocoded_barrio-distrito.csv") -vut <- read_csv("data/original/190302_viviendas-turisticas-comunidad-valenciana_valencia.csv") +# shapes +barrios <- readOGR("data/original/shapes/barrios.valencia.wgs84.geojson") +distritos <- readOGR("data/original/shapes/distritos.valencia.wgs84.geojson") +# municipios <- distritos +municipios <- readOGR("data/original/shapes/municipios.provincia.valencia.geojson") # Analisis comunidad valenciana ------------- -vut.municipio <- group_by(vut_valencia,Municipio) %>% summarise( n= n() ) %>% arrange(desc(n)) +vut.municipio <- group_by(vut_valenciana,Municipio) %>% summarise( n= n() ) %>% arrange(desc(n)) vut.municipio %>% head(25) %>% ggplot(aes(x = reorder(Municipio,n), y = n)) + @@ -23,21 +37,16 @@ vut.municipio %>% head(25) %>% x = "tlf", caption = "Datos: Comunidad Valenciana. Gráfico: lab.montera34.com/airbnb") - - - # analisis Valencia ----------------- -table(vut$Signatura) - -names(vut) <- c("signatura","municipio","provincia","addres","tlf") +# names(vut) <- c("signatura","municipio","provincia","addres","tlf","lat","lon") ggplot(data=vut)+ geom_bar(stat='identity', aes(x = signatura, y = tlf)) -ntlf<- group_by(vut,tlf) %>% summarise( n= n() ) %>% arrange(desc(n)) +ntlf<- group_by(vut,Teléfono) %>% summarise( n= n() ) %>% arrange(desc(n)) -ntlf[!is.na(ntlf$tlf),] %>% head(25) %>% -ggplot(aes(x = reorder(tlf,n), y = n)) + +ntlf[!is.na(ntlf$Teléfono),] %>% head(25) %>% +ggplot(aes(x = reorder(Teléfono,n), y = n)) + geom_col() + coord_flip() + theme_minimal(base_family = "Roboto Condensed", base_size = 14) + theme( @@ -50,8 +59,112 @@ theme_minimal(base_family = "Roboto Condensed", base_size = 14) + x = "tlf", caption = "Datos: Comunidad Valenciana. Gráfico: lab.montera34.com/airbnb") -select(vut,tlf==963356793) - -table(local_activo$room_type) +# select(vut,Teléfono=="963356793") +# +# vut[vut$tlf=="963356793",] +# table(local_activo$room_type) +# +# tlf=="963356793" + +vut$Teléfono <- as.factor(vut$Teléfono) + +# extract VUT ID -------- +# vut_valencia$registro.number <- str_extract(vut_valencia$Signatura,"[:punctuation:]?[:blank:]?-\\d{5}") +vut_valencia$registro.num <-as.character( strapplyc( vut_valencia$Signatura, ".*(\\d{5}).*", simplify = TRUE)) + +vut_valencia.airbnb <- merge(vut_valencia, datos2, by = "registro.num", type = "left") + +# agrupa por host_id de airbnb y add tlf de vut +x <- vut_valencia.airbnb %>% group_by(host_id,host_name,Teléfono) %>% summarise(count=n()) + +# Puntos en mapa --------------- +ggplot() + + geom_polygon(data = municipios, + aes(x = long, y = lat, group = group), + color = "grey", fill="white", size = 0.1) + + geom_polygon(data = barrios, + aes(x = long, y = lat, group = group), + color = "grey", fill="white", size = 0.1) + + geom_point(data= vut, + aes(x=lon, y=lat),alpha=1,size = 0.1)+ + geom_jitter(data= vut, + aes(x=lon, y=lat),alpha=0.6,size = 0.6,color="red",width = 0.0007, height = 0.0007)+ + # geom_point(data= local_activo[host_id %in% n_alojamientos[1:5,]$host_id,], + # aes(x=longitude, y=latitude, color=host_name),alpha=0.6,size = 1) + #color="blue" + coord_fixed(ratio=1.3 ) + + # coord_fixed(xlim= c(-0.4, -0.3),ylim=c(39.45,39.5),ratio=1.3 ) + + theme_nothing(legend = TRUE) + + theme_minimal(base_family = "Roboto Condensed", base_size = 12) + + theme( + panel.grid = element_blank(), + axis.title = element_blank(), + axis.text = element_blank(), + panel.background = element_rect(fill="#EEEEFF",color = "grey",size = 0.25), + legend.position = "top" + ) + + labs(title=paste("Cada punto es una vivienda turística", sep = "")) + + guides(colour = guide_legend(override.aes = list(size=3))) + +ggplot() + + geom_polygon(data = municipios, + aes(x = long, y = lat, group = group), + color = "grey", fill="white", size = 0.1) + + geom_polygon(data = barrios, + aes(x = long, y = lat, group = group), + color = "grey", fill="white", size = 0.1) + + geom_point(data= vut, + aes(x=lon, y=lat),alpha=1,size = 0.1)+ + geom_jitter(data= vut, + aes(x=lon, y=lat),alpha=0.6,size = 0.6,color="red",width = 0.0007, height = 0.0007)+ + # geom_point(data= local_activo[host_id %in% n_alojamientos[1:5,]$host_id,], + # aes(x=longitude, y=latitude, color=host_name),alpha=0.6,size = 1) + #color="blue" + coord_fixed(xlim= c(-0.4, -0.3),ylim=c(39.45,39.5),ratio=1.3 ) + + theme_nothing(legend = TRUE) + + theme_minimal(base_family = "Roboto Condensed", base_size = 12) + + theme( + panel.grid = element_blank(), + axis.title = element_blank(), + axis.text = element_blank(), + panel.background = element_rect(fill="#EEEEFF",color = "grey",size = 0.25), + legend.position = "top" + ) + + labs(title=paste("Valencia zoom. Cada punto es una vivienda turística", sep = "")) + + guides(colour = guide_legend(override.aes = list(size=3))) + +# por distrito en Valencia------------ +vut.distrito <- group_by(vut,distrito) %>% summarise( n= n() ) %>% arrange(desc(n)) + +vut.distrito %>% filter(!is.na(distrito)) %>% +ggplot(aes(x = reorder(distrito,n), y = n)) + + geom_col()+ + geom_text(data = vut.distrito %>% filter(!is.na(distrito)), + aes(label = n,y = n+3), + hjust = 0, + size=3,color="#000000") + + coord_flip() + + theme_minimal(base_family = "Roboto Condensed", base_size = 10) + + theme( + panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), + legend.position = "bottom" + ) + + labs(title = "Número de viviendas turísticas por distrito", + subtitle = "Valencia. Marzo 2019.", + y = "nº anuncios", + x = "tlf", + caption = "Datos: Comunidad Valenciana. Gráfico: lab.montera34.com/airbnb") +# por barrio +vut.barrio <- group_by(vut,barrio) %>% summarise( n= n() ) %>% arrange(desc(n)) +vut.barrio %>% ggplot(aes(x = reorder(barrio,n), y = n)) + + geom_col() + coord_flip() + + theme_minimal(base_family = "Roboto Condensed", base_size = 10) + + theme( + panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), + legend.position = "bottom" + ) + + labs(title = "Número de viviendas turísticas por barrio", + subtitle = "Valencia. Marzo 2019.", + y = "nº anuncios", + x = "tlf", + caption = "Datos: Comunidad Valenciana. Gráfico: lab.montera34.com/airbnb") -- 2.24.1