Commit 6c19033e authored by numeroteca's avatar numeroteca
Browse files

process raw verba data: detect telediario time, create string variables for plots

parent 2dd3a0f7
......@@ -5,27 +5,52 @@ library(tidyverse)
# Load data
data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv",sep = ",")
data <- read.delim("data/verba/140104_140228_barcenas-palabras-relacionadas_01.csv",sep = ",")
data <- read.delim("data/verba/140104_140228_messi_01.csv",sep = ",")
data <- read.delim("data/verba/170301_170630_lezo-punica.csv",sep = ",")
# Preprocess data -------
data$date <- as.Date(data$date)
# preprocessed data (cifuentes)i
# data$date <- as.Date(data$date)
# data$date2 <- as.POSIXct(data$date)
# raw vera data
data$date <- as.Date(data$programme_date)
data$date2 <- as.POSIXct(data$date)
levels(data$telediario) <- c("15:00h","21.00h")
# create variable Telediario based on programme_date
data$telediario_fino <- substr(data$programme_date, 11, 13)
table(data$telediario_fino)
# simplifies times
data[data$telediario_fino == "T22",]$telediario_fino <- "T21"
data[data$telediario_fino == "T20",]$telediario_fino <- "T21"
data[data$telediario_fino == "T14",]$telediario_fino <- "T15"
# test borrable
# data$id <- as.character(data$id)
# data[data$id == "GxCr-W0BCKkHyZ2v_0lo",]$id <- "wer"
# levels(data$telediario_fino) <- c("15:00h","21.00h")
data$date2[1] + 3600
# Settings
subtitle_text <- "Telediarios de TVE. 20 marzo - 29 abril 2018"
subtitle_text <- "Telediarios de TVE. 4 enero - 28 febrero 2017"
el_caso <- "caso Máster (Cristina Cifuentes)"
el_caso <- "caso Lezo y caso Púnica"
el_caso <- "Messi"
caption_text <- "Datos: Verba. Gráfico: numeroteca.org"
# Plots ----------------------
# Número de frases por telediario
png(filename=paste("img/apariciones-n-telediarios-caso-master_01.png", sep = ""),width = 1200,height = 900)
ggplot(data = data ) +
geom_bar(aes( x=date2,fill=caso )) +
geom_bar(aes( x=date2 )) + # si hay clasificación por caso: fill=caso
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
scale_x_datetime(date_breaks = "1 day", date_labels = "%d") +
labs(title = paste("Número de frases sobre caso Máster (Cristina Cifuentes)",sep = ""),
labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
subtitle = subtitle_text,
x = NULL,
y = "nº de frases",
......@@ -36,11 +61,38 @@ ggplot(data = data ) +
# panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
) +
facet_wrap( ~telediario, ncol=1)
facet_wrap( ~telediario_fino, ncol=1)
dev.off()
# Cuando hablan del caso dentro del Telediario
png(filename=paste("img/apariciones-cuando-telediarios-caso-master_01.png", sep = ""),width = 1200,height = 900)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=90/60), alpha = 0.02, fill = "lightgrey" ) +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60), alpha = 0.8, size=4) + #, color=caso
geom_hline(aes(yintercept=0), size=0.1) +
# anotate
# geom_curve(aes(x = as.POSIXct("2018-03-24"), y = 0.5, xend = as.POSIXct("2018-03-26"), yend = 7),
# color="#999999", data =data, curvature = -0.2, size = 0.1) +
# annotate(geom = "text", x = as.POSIXct("2018-03-26"), y = 7, label = "Portada (1:30 minutos)",
# family = "Roboto Condensed", hjust = 0,size=6,size=0.6) +
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
scale_x_datetime(date_breaks = "3 day", date_labels = "%d", expand= c(0.01,0.05)) +
labs(title = paste("Cuándo hablan del ",el_caso, " en los telediarios",sep = ""),
subtitle = paste(subtitle_text),
x = NULL,
y = "minutos",
caption = caption_text) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
# panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
) +
facet_wrap( ~telediario_fino, ncol=1)
dev.off()
png(filename=paste("img/apariciones-cuando-telediarios-caso-master_01_v.png", sep = ""),width = 900,height = 1200)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=90/60), alpha = 0.02, fill = "lightgrey" ) +
geom_segment( aes(x = date2 + 82800, xend = date2 + 82800, y = start_time/60, yend = (start_time+30)/60, color=caso), alpha = 0.8, size=9) +
......@@ -63,6 +115,5 @@ ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
# panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
) +
facet_wrap( ~telediario, ncol=1)
facet_wrap( ~telediario, ncol=2) + coord_flip()
dev.off()
s
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment