Commit fd7f06f7 authored by numeroteca's avatar numeroteca
Browse files

calculate histogram of sport related news in all telediarios

parent 60e4ecde
......@@ -5,6 +5,13 @@
library(jsonlite)
library(tidyverse)
subtitle_text <- "RTVE television newscasts"
el_caso <- "Master Cifuentes scandal"
caso_path <- "cifuentes"
caption_text <- "Data: verba.civio.es (Civio) and RTVE"
time_text <- "March 20 - April 30, 2018"
date_limits <- c(as.Date("2018-03-19"),as.Date("2018-04-30"))
# functions -------
# function for double axis
......@@ -261,6 +268,8 @@ ggsave("img/telediarios/all-telediarios-duration_cifuentes.png",
# When they speak plot Sports--------------
sports_query <- "deportes|Real Madrid|champions|tenis|Camp Nou|Bernabeu|Messi|Cristiano|Simeone|Atlético|fútbol|liga|baloncesto"
telediarios %>%
filter(
date > "2018-02-10"
......@@ -278,12 +287,20 @@ ggplot( ) +
geom_hline(aes(yintercept=4), size=0.8, color="#bbbbbb") +
geom_segment( aes(x = date, xend = date, y = seconds/60, yend = (seconds+4)/60), alpha = 0.2, size=3, color= "#999999") + #, color=caso
geom_segment( data = . %>% filter( grepl("deportes|Real Madrid|champions|tenis|Camp Nou|Bernabeu|Messi|Cristiano|Simeone|Atlético|fútbol|liga",text) ),
aes(x = date, xend = date, y = seconds/60, yend = (seconds+20)/60), alpha = 0.7, size=3, color= "red") + #, color=caso
geom_segment(
data = . %>%
filter( grepl(sports_query,text) ),
aes(x = date, xend = date, y = seconds/60, yend = (seconds+20)/60), alpha = 0.7, size=3, color= "darkred") + #, color=caso
# geom_text( aes(x = date,y = seconds/60, label=text), alpha = 0.7, size=3, color= "#000000") + #, color=caso
# geom_text(data = . %>% filter( grepl("deportes",text) ) , aes(x = date,y = seconds/60, label=text), alpha = 0.7, size=3, color= "red") + #, color=caso
facet_wrap( ~telediario, ncol=1) +
# anotate
geom_curve(aes(x = as.Date("2018-03-24"), y = 4, xend = as.Date("2018-03-26"), yend = 12),
color="#000000", curvature = -0.2, size = 0.35) +
annotate(geom = "text", x = as.Date("2018-03-26"), y = 12, label = "Sport news at the end of summary (4 minutes)",
family = "Roboto Condensed", hjust = 0, size=4, size=0.6) +
geom_hline(aes(yintercept=0), size=0.1) +
# anotate
......@@ -310,7 +327,7 @@ ggplot( ) +
subtitle = paste("🟥 Sports related words",subtitle_text),
x = time_text,
y = "minutos since the start",
caption = paste(caption_text, "Highlighed in red are words related to sports" ) ) +
caption = paste(caption_text, "From: 2018-02-10 to 2018-06-01. Highlighed in red are words related to sports:",sports_query ) ) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
......@@ -318,16 +335,110 @@ ggplot( ) +
# panel.grid.major.y = element_blank(),
plot.background = element_rect(fill="white",color="white"),
axis.ticks = element_line(color="#888888")
) +
facet_wrap( ~telediario, ncol=1) +
# anotate
geom_curve(aes(x = as.Date("2018-03-24"), y = 4, xend = as.Date("2018-03-26"), yend = 12),
color="#000000", data =data, curvature = -0.2, size = 0.35) +
annotate(geom = "text", x = as.Date("2018-03-26"), y = 12, label = "Sport news at the end of summary (4 minutes)",
family = "Roboto Condensed", hjust = 0, size=4, size=0.6)
)
ggsave("img/telediarios/all-telediarios-when-they-speak-sports.png",
width=12,
height=7,
dpi=260)
# histogram ----------
telediarios %>%
filter(
date > "2018-02-10"
& date < "2018-06-01"
# date == "2018-04-01"
# & telediario =="Telediario 15h"
) %>%
filter( grepl(sports_query,text) ) %>%
mutate(
minutes= round( as.numeric(seconds)/60, 0)
) %>% group_by(telediario,minutes) %>%
summarise( count=n()) %>%
ggplot( ) +
# geom_histogram( aes()+0.5,binwidth = 1 )+
geom_col( aes(minutes-0.5,count),width = 1 )+
geom_vline( xintercept = 4, color="darkred", linetype="dashed") +
annotate(geom = "text", x = 4.5, y = 150, label = "Sport news are mendioned at the end of summary\n(3-4 minutes since beginning)",
family = "Roboto Condensed", hjust = 0, size=3, size=0.6) +
theme_minimal(base_family = "Roboto Condensed", base_size = 9) +
facet_wrap( ~telediario, ncol=1, scales="free_x") +
scale_y_continuous(
expand = c(0,0)
) +
scale_x_continuous(
expand = c(0,0),
limits = c(0,60),
breaks = c(0,3,4,10,20,30,40,50,60)
) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_blank(),
plot.background = element_rect(fill="white",color="white"),
axis.ticks = element_line(color="#888888")
) +
labs(title = paste("When they speak about Sports... to find duration of news summary",sep = ""),
subtitle = paste("Histogram of sentences in ",subtitle_text),
x = "minutes",
y = "number of sentences",
caption = paste0("From: 2018-02-10 to 2018-06-01. Query: ",sports_query,". ",caption_text )
)
ggsave("img/telediarios/all-telediarios-when-they-speak-sports_histogram.png",
width=8,
height=4,
dpi=260)
telediarios %>%
filter(
date > "2018-02-10"
& date < "2018-06-01"
# date == "2018-04-01"
# & telediario =="Telediario 15h"
) %>%
filter( grepl(sports_query,text) ) %>%
mutate(
minutes= round( as.numeric(seconds)/60, 1)
) %>% group_by(telediario,minutes) %>%
summarise( count=n()) %>%
ggplot( ) +
# geom_histogram( aes()+0.5,binwidth = 1 )+
geom_col( aes(minutes-0.05,count),width = 0.1 )+
geom_vline( xintercept = 4, color="darkred", linetype="dashed") +
annotate(geom = "text", x = 4.9, y = 30, label = "Sport news are mendioned at the end of summary\n(3-4 minutes since beginning)",
family = "Roboto Condensed", hjust = 0, size=3, size=0.6) +
theme_minimal(base_family = "Roboto Condensed", base_size = 9) +
facet_wrap( ~telediario, ncol=1, scales="free_x") +
scale_y_continuous(
expand = c(0,0)
) +
scale_x_continuous(
expand = c(0,0),
limits = c(0,20),
breaks = c(0,1,2,3,4,5,10,15,20)
) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_blank(),
plot.background = element_rect(fill="white",color="white"),
axis.ticks = element_line(color="#888888")
) +
labs(title = paste("When they speak about Sports... to find duration of news summary",sep = ""),
subtitle = paste("Histogram of sentences in ",subtitle_text),
x = "minutes",
y = "number of sentences",
caption = paste0("From: 2018-02-10 to 2018-06-01. Query: ",sports_query,". ",caption_text )
)
ggsave("img/telediarios/all-telediarios-when-they-speak-sports_histogram_detail.png",
width=8,
height=4,
dpi=260)
......@@ -5,7 +5,7 @@ library(tidyverse)
# Settings ------
# subtitle_text <- "Telediarios de TVE"
subtitle_text <- "RTVE television news programs"
subtitle_text <- "RTVE television newscasts"
# el_caso <- "caso Lezo y caso Púnica"
# caso_path <- "lezo-punica"
# el_caso <- "caso Máster (Cristina Cifuentes)"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment