Commit 42da1ccc authored by numeroteca's avatar numeroteca

process downloaded raw areas data json with R. Plot areas in front page for 1 newspaper

parents
Pipeline #31 failed with stages
# load libraries
library(rjson)
library(tidyverse)
library(gsubfn)
# Load data
dataorigen <- fromJSON(file="raw.json")
# process data
images <- dataorigen$images
images <- lapply(images, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
pox_img <- as.data.frame(do.call("rbind", images))
areas <- dataorigen$areas
areas <- lapply(areas, function(x) {
x[sapply(x, is.null)] <- NA
unlist(x)
})
pox <- as.data.frame(do.call("rbind", areas))
# images list
# create widh and height of images
pox_img$w <- as.numeric(strapplyc( as.character(pox_img$image_size), "([0-9]*)x[0-9]*", simplify = TRUE))
pox_img$h <- as.numeric(strapplyc( as.character(pox_img$image_size), "[0-9]x([0-9]*)", simplify = TRUE))
pox_img$img_area <- pox_img$w * pox_img$h
head(pox)
head(pox_img)
# converts to integer
pox$area_x1 <- as.integer(as.character(pox$area_x1))
pox$area_x2 <- as.integer(as.character(pox$area_x2))
pox$area_y1 <- as.integer(as.character(pox$area_y1))
pox$area_y2 <- as.integer(as.character(pox$area_y2))
pox$area_height <- as.integer(as.character(pox$area_height))
pox$area_width <- as.integer(as.character(pox$area_width))
# add size of front page image to df
pox <- left_join(pox, select(pox_img,publication_date,img_area), by= "publication_date")
# calculate area of area
pox$area <- pox$area_height * pox$area_width
# calculate % of area
pox$area_p <- round(pox$area / pox$img_area *100, digits = 2)
# recalculates to validate
pox$width <- pox$area_x2 - pox$area_x1
pox$height <- pox$area_y2 - pox$area_y1
ggplot(pox)+
# xlim(0,1200)+
# ylim(0,1200)+
# annotate("rect",xmin = 5, xmax = 500, ymin = 900, ymax = 100) +
# TODO> can not use area_y2 cause it has wrong data
geom_rect(aes(xmin = area_x1, ymin = area_y1, xmax = area_x2 , ymax = area_y1 + area_height, fill=code_text), alpha=0.5) +
scale_y_reverse() +
facet_wrap(~publication_date, nrow = 1)
{"areas":[{"areas_id":77421,"user_name":"numeroteca","code_text":"Ciudadanos","publication_date":"2019-04-22","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":569,"area_x2":139,"area_y2":874,"area_width":139,"area_height":305,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77422,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-22","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":215,"area_x2":298,"area_y2":922,"area_width":298,"area_height":351,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77423,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-22","media_name":"El País","media_country":"Spain","area_x1":139,"area_y1":567,"area_x2":299,"area_y2":1056,"area_width":160,"area_height":303,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77424,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-23","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":216,"area_x2":599,"area_y2":875,"area_width":599,"area_height":659,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77425,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-23","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":875,"area_x2":152,"area_y2":1114,"area_width":152,"area_height":239,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77426,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-24","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":216,"area_x2":600,"area_y2":874,"area_width":600,"area_height":658,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77427,"user_name":"numeroteca","code_text":"Ciudadanos","publication_date":"2019-04-25","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":216,"area_x2":448,"area_y2":1532,"area_width":448,"area_height":658,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77428,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-26","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":216,"area_x2":299,"area_y2":570,"area_width":299,"area_height":354,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77429,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-26","media_name":"El País","media_country":"Spain","area_x1":138,"area_y1":570,"area_x2":301,"area_y2":872,"area_width":163,"area_height":302,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77430,"user_name":"numeroteca","code_text":"PSOE","publication_date":"2019-04-26","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":570,"area_x2":136,"area_y2":1149,"area_width":136,"area_height":302,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77431,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-27","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":217,"area_x2":599,"area_y2":396,"area_width":599,"area_height":179,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77432,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-27","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":396,"area_x2":147,"area_y2":794,"area_width":147,"area_height":398,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77433,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-28","media_name":"El País","media_country":"Spain","area_x1":0,"area_y1":215,"area_x2":598,"area_y2":612,"area_width":598,"area_height":397,"taxonomy_values":{"Framing":"Neutral"}},{"areas_id":77434,"user_name":"numeroteca","code_text":"Varios","publication_date":"2019-04-28","media_name":"El País","media_country":"Spain","area_x1":142,"area_y1":613,"area_x2":597,"area_y2":1133,"area_width":455,"area_height":258,"taxonomy_values":{"Framing":"Neutral"}}],"images":[{"publication_date":"2019-04-22","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/22/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-23","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/23/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-24","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/24/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-25","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/25/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-26","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/26/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-27","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/27/es/elpais.750.jpg","missing":false},{"publication_date":"2019-04-28","media_name":"El País","media_country":"Spain","image_size":"750x1114","source_url":"http://img.kiosko.net/2019/04/28/es/elpais.750.jpg","missing":false}]}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment