diff --git a/scraping/scraping-viviendas-turisticas-comunitat-valenciana.R b/scraping/scraping-viviendas-turisticas-comunitat-valenciana.R new file mode 100644 index 0000000000000000000000000000000000000000..bb866cc541bb90defe3efb49b6768cfa4c4de51c --- /dev/null +++ b/scraping/scraping-viviendas-turisticas-comunitat-valenciana.R @@ -0,0 +1,29 @@ +#- scrapping airbnb: http://comunitatvalenciana.com/viaje/alojamiento/viviendas-turisticas +library("rvest") +library("tidyverse") +content <- read_html("http://comunitatvalenciana.com/viaje/alojamiento/viviendas-turisticas?page=1") + +body_table <- content %>% html_nodes('body') %>% + html_nodes('table') %>% + html_table(dec = ",") +my_table <- body_table[[1]] + +adress <- "http://comunitatvalenciana.com/viaje/alojamiento/viviendas-turisticas?page=" + +#- hay 3642 paginas +my_names <-names(my_table) + +#- 76 peta x Error in match.names(clabs, names(xi)) : names do not match previous names +n_pags <- 3642 +for (ii in 2:n_pags){ #- n_pags + adress_n <- paste0(adress, ii) + content_n <- read_html(adress_n) + + body_table_n <- content_n %>% html_nodes('body') %>% + html_nodes('table') %>% + html_table(dec = ",") + my_table_n <- body_table_n[[1]] + names(my_table_n) <- my_names + my_table <- rbind(my_table, my_table_n) + print(ii) +} \ No newline at end of file