Commit 15e69e15 authored by numeroteca's avatar numeroteca
Browse files

fintune Cifuetnes charts

parent 2809a65f
......@@ -4,15 +4,42 @@
library(tidyverse)
# Settings ------
subtitle_text <- "Telediarios de TVE"
# subtitle_text <- "Telediarios de TVE"
subtitle_text <- "RTVE television news programs"
# el_caso <- "caso Lezo y caso Púnica"
# caso_path <- "lezo-punica"
# el_caso <- "caso Máster (Cristina Cifuentes)"
el_caso <- "Master Cifuentes scandal"
caso_path <- "cifuentes"
# el_caso <- "Messi"
caption_text <- "Datos: verba.civio.es (Civio). Gráfico: numeroteca.org"
# caption_text <- "Datos: verba.civio.es (Civio). Gráfico: numeroteca.org"
caption_text <- "Data: verba.civio.es (Civio) and RTVE"
time_text <- "March 20 - April 30, 2018"
date_limits <- c(as.Date("2018-03-19"),as.Date("2018-04-30"))
# functions -------
# function for double axis
# via https://dmitrijskass.netlify.app/2019/06/30/multi-level-labels-with-ggplot2/
# Set locale to english with
# Sys.setlocale("LC_TIME", "C")
format_dates <- function(x) {
months <- strftime(x, format = "%m") # Abbreviated name of the month.
years <- lubridate::year(x) # Year as a 4-digit number.
if_else(is.na(lag(years)) | lag(years) != years, # Conditions for pasting.
true = paste(months, years, sep = "\n"),
false = months)
}
format_dates_days <- function(x) {
days <- strftime(x, format = "%d") # day.
months <- strftime(x, format = "%b") # Year as a 4-digit number.
if_else(is.na(lag(months)) | lag(months) != months, # Conditions for pasting.
true = paste(days, months, sep = "\n"),
false = days)
}
# Load data via API
# De normal la app llama a la API que hace el trabajo y recibe JSON de vuelta:
......@@ -39,9 +66,12 @@ if (file.exists( paste0("img/",el_caso) )){
}
# Load data ---------------
data <- read.delim( paste0("https://verba.civio.es/api/search.csv?q=", search,"&size=20000"),sep = ",")
# load online
data <- read.delim( paste0("https://verba.civio.es/api/search.csv?q=", search,"&size=10000"),sep = ",")
# data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv",sep = ",")
data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv",sep = ",")
# data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas.csv",sep = ",")
# data <- read.delim("data/verba/180320_180430_cifuentes.csv",sep = ",")
# data <- read.delim("data/verba/140104_140228_barcenas-palabras-relacionadas_01.csv",sep = ",")
# data <- read.delim("data/verba/140104_140228_messi_01.csv",sep = ",")
# data <- read.delim("data/verba/170301_170630_lezo-punica.csv",sep = ",")
......@@ -67,41 +97,59 @@ data <- data %>% mutate(
telediario = ifelse( telediario == "T21", "Telediario 21h",telediario),
) %>% filter ( telediario != "T08" & telediario != "T07")
set_minutes <- 3
data <- data %>% mutate(
first_minutes = ifelse( start_time/60 < set_minutes, "yes", "no")
)
# Plots ----------------------
# Número de frases por telediario
# 1. Número de frases por telediario ------
png(filename = paste0("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_01.png"), width = 1200,height = 700)
ggplot(data = data ) +
geom_bar(aes( x=date2 )) + # si hay clasificación por caso: fill=caso
data %>%
ggplot() +
geom_bar(aes( x=date, fill= first_minutes )) + # si hay clasificación por caso: fill=caso
scale_fill_manual( values= c("#888888","#000000") ) +
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
# scale_x_datetime(date_breaks = "1 day", date_labels = "%d") +
scale_x_datetime(date_breaks = "1 week",
date_labels = "%d/%m",
#secondary axis to add months
sec.axis = sec_axis(~ .,
labels = scales::time_format("%b/%y"))
) +
labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
scale_x_date(
date_breaks = "2 day",
minor_breaks = "1 day",
expand = c(0,0),
limits = date_limits,
labels = format_dates_days
#secondary axis to add months
# sec.axis = sec_axis(~ .,
# labels = scales::time_format("%b/%y"))
) +
# labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
labs(title = paste("Number of sentences about ",el_caso,sep = ""),
subtitle = subtitle_text,
x = NULL,
y = "nº de frases",
y = "number of sentences",
fill ="in first minutes?",
caption = caption_text) +
theme(
legend.position = "top",
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.x = element_line()
# panel.grid.major.y = element_blank(),
axis.ticks = element_line(color="#888888")
) +
facet_wrap( ~telediario, ncol=1)
dev.off()
# Cuando hablan del caso dentro del Telediario
png(filename=paste0("img/",caso_path ,"/apariciones-cuando-telediarios_",caso_path,"_01.png"),width = 1200,height = 900)
# 2. Cuando hablan del caso dentro del Telediario ----------
png(filename=paste0("img/",caso_path ,"/apariciones-cuando-telediarios_",caso_path,"_01.png"),width = 1200,height = 800)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
# primer minuto y medio (90 segundos)
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=120/60), alpha = 0.02, fill = "lightgrey" ) +
geom_text( data = data %>% top_n(1,date), aes (min(data$date2+500000), 2, label="primeros 2 minutos"),base_family = "Roboto Condensed") +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60), alpha = 0.8, size=12) + #, color=caso
geom_rect(aes( xmin=min(data$date), xmax=max(data$date),
ymin=0,ymax=set_minutes*60/60), alpha = 0.2, fill = "lightgrey"
) +
annotate("text", x= min(data$date)+5, y= set_minutes/2, label=paste("First minutes",set_minutes), family = "Roboto Condensed") +
geom_segment( aes(x = date, xend = date, y = start_time/60, yend = (start_time+30)/60), alpha = 0.8, size=12) + #, color=caso
geom_hline(aes(yintercept=0), size=0.1) +
# anotate
# geom_curve(aes(x = as.POSIXct("2018-03-24"), y = 0.5, xend = as.POSIXct("2018-03-26"), yend = 7),
......@@ -109,65 +157,96 @@ ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
# annotate(geom = "text", x = as.POSIXct("2018-03-26"), y = 7, label = "Portada (1:30 minutos)",
# family = "Roboto Condensed", hjust = 0,size=6,size=0.6) +
theme_minimal(base_family = "Roboto Condensed", base_size = 18) +
scale_x_datetime(date_breaks = "1 day", date_labels = "%d", expand= c(0.01,0.05),
#secondary axis to add months
sec.axis = sec_axis(~ .,
# date_breaks = "1 month",
labels = scales::time_format("%b/%y"))) +
labs(title = paste("Cuándo hablan del ",el_caso, " en los telediarios",sep = ""),
scale_x_date(
date_breaks = "2 day",
minor_breaks = "1 day",
expand = c(0,0),
limits = date_limits,
labels = format_dates_days
#secondary axis to add months
# sec.axis = sec_axis(~ .,
# labels = scales::time_format("%b/%y"))
) +
labs(title = paste("When they speak about ",el_caso, " in the news cast ",sep = ""),
subtitle = paste(subtitle_text),
x = NULL,
y = "minutos desde el inicio",
y = "minutos since the start",
caption = caption_text) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
# panel.grid.major.y = element_blank(),
axis.ticks = element_line(color="#888888")
) +
facet_wrap( ~telediario, ncol=1)
dev.off()
# si existe clasificación manual de las noticias -------------------
# Número de frases por telediario
png(filename=paste0("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_clasificado.png"),width = 1200,height = 700)
ggplot(data = data ) +
geom_bar(aes( x=date2, fill=caso )) + # si hay clasificación por caso: fill=caso
data <- data %>% mutate(
caso = ifelse( caso == "dimision", "resignation",caso),
caso = ifelse( caso == "duda", "doubt",caso),
caso = ifelse( caso == "otro", "other",caso),
caso = ifelse( caso == "robo", "robbery video scandal",caso),
caso = ifelse( caso == "master", "master scandal",caso)
)
mycolors <- c( "#999999","#991122","#cc9999","#559933","#666699" )
# 3. Número de frases por telediario: clasificadas ---------
png(filename=paste0("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_clasificado_02.png"),width = 1200,height = 700)
data %>% filter(
# caso == "other"
) %>% group_by(date, telediario, caso) %>% summarise(
count = n()
) %>%
ggplot( ) +
geom_col(aes( x=date, y=count, fill=caso )) + # si hay clasificación por caso: fill=caso
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
# scale_x_datetime(date_breaks = "1 day", date_labels = "%d") +
scale_x_datetime(date_breaks = "1 week",
date_labels = "%d/%m",
#secondary axis to add months
sec.axis = sec_axis(~ .,
labels = scales::time_format("%b/%y"))
scale_x_date(
date_breaks = "2 day",
minor_breaks = "1 day",
expand = c(0,0),
limits = date_limits,
labels = format_dates_days
#secondary axis to add months
# sec.axis = sec_axis(~ .,
# labels = scales::time_format("%b/%y"))
) +
labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
scale_fill_manual(values=mycolors) +
# labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
labs(title = paste("Number of sentences about ",el_caso,sep = ""),
subtitle = subtitle_text,
x = NULL,
y = "nº de frases",
# y = "nº de frases",
y = "number of sentences",
fill = "",
caption = caption_text) +
theme(
legend.position = "top",
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.x = element_line(),
legend.position = "top"
# panel.grid.major.y = element_blank(),
axis.ticks = element_line(color="#888888")
) +
facet_wrap( ~telediario, ncol=1)
dev.off()
# Solamente para Cifuetnes, porque tiene calsificadas las noticias --------------------
png(filename=paste("img/apariciones-cuando-telediarios-caso-master_01.png", sep = ""),width = 1200,height = 900)
png(filename=paste("img/",caso_path,"/apariciones-cuando-telediarios-caso-master_02.png", sep = ""),width = 1200,height = 800)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=120/60), alpha = 0.02, fill = "lightgrey" ) +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60, color=caso), alpha = 0.8, size=10) +
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=180/60), alpha = 0.02, fill = "lightgrey" ) +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60, color=caso),
alpha = 0.8, size=10) +
scale_color_manual(values=mycolors) +
geom_hline(aes(yintercept=0), size=0.1) +
# anotate
geom_curve(aes(x = as.POSIXct("2018-03-24"), y = 0.5, xend = as.POSIXct("2018-03-26"), yend = 7),
color="#999999", data =data, curvature = -0.2, size = 0.1) +
annotate(geom = "text", x = as.POSIXct("2018-03-26"), y = 7, label = "Portada (2 minutos)",
annotate(geom = "text", x = as.POSIXct("2018-03-26"), y = 7, label = "Summary (3 minutes)",
family = "Roboto Condensed", hjust = 0,size=6,size=0.6) +
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
scale_x_datetime(date_breaks = "1 day", date_labels = "%d", expand= c(0.01,0.05),
......@@ -175,21 +254,22 @@ ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
sec.axis = sec_axis(~ .,
# date_breaks = "1 month",
labels = scales::time_format("%b/%y"))) +
labs(title = paste("Cuándo hablan del caso Máster (Cristina Cifuentes) en los telediarios",sep = ""),
labs(title = paste("When they speak about ",el_caso,sep = ""),
subtitle = paste(subtitle_text),
x = NULL,
y = "minutos",
x = time_text,
y = "minutes",
caption = caption_text) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
panel.grid.major.y = element_blank(),
legend.position = "top"
) +
facet_wrap( ~telediario, ncol=1)
dev.off()
png(filename=paste("img/apariciones-cuando-telediarios-caso-master_01_v.png", sep = ""),width = 900,height = 1200)
png(filename=paste("img/",caso_path,"/apariciones-cuando-telediarios-caso-master_01_v.png", sep = ""),width = 900,height = 1200)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=120/60), alpha = 0.02, fill = "lightgrey" ) +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60, color=caso), alpha = 0.8, size=10) +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment