Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
numeroteca
VerbaR
Commits
fd7f06f7
Commit
fd7f06f7
authored
Jun 01, 2022
by
numeroteca
Browse files
calculate histogram of sport related news in all telediarios
parent
60e4ecde
Changes
5
Hide whitespace changes
Inline
Side-by-side
analysis/all_telediarios.R
View file @
fd7f06f7
...
...
@@ -5,6 +5,13 @@
library
(
jsonlite
)
library
(
tidyverse
)
subtitle_text
<-
"RTVE television newscasts"
el_caso
<-
"Master Cifuentes scandal"
caso_path
<-
"cifuentes"
caption_text
<-
"Data: verba.civio.es (Civio) and RTVE"
time_text
<-
"March 20 - April 30, 2018"
date_limits
<-
c
(
as.Date
(
"2018-03-19"
),
as.Date
(
"2018-04-30"
))
# functions -------
# function for double axis
...
...
@@ -261,6 +268,8 @@ ggsave("img/telediarios/all-telediarios-duration_cifuentes.png",
# When they speak plot Sports--------------
sports_query
<-
"deportes|Real Madrid|champions|tenis|Camp Nou|Bernabeu|Messi|Cristiano|Simeone|Atlético|fútbol|liga|baloncesto"
telediarios
%>%
filter
(
date
>
"2018-02-10"
...
...
@@ -278,12 +287,20 @@ ggplot( ) +
geom_hline
(
aes
(
yintercept
=
4
),
size
=
0.8
,
color
=
"#bbbbbb"
)
+
geom_segment
(
aes
(
x
=
date
,
xend
=
date
,
y
=
seconds
/
60
,
yend
=
(
seconds
+4
)
/
60
),
alpha
=
0.2
,
size
=
3
,
color
=
"#999999"
)
+
#, color=caso
geom_segment
(
data
=
.
%>%
filter
(
grepl
(
"deportes|Real Madrid|champions|tenis|Camp Nou|Bernabeu|Messi|Cristiano|Simeone|Atlético|fútbol|liga"
,
text
)
),
aes
(
x
=
date
,
xend
=
date
,
y
=
seconds
/
60
,
yend
=
(
seconds
+20
)
/
60
),
alpha
=
0.7
,
size
=
3
,
color
=
"red"
)
+
#, color=caso
geom_segment
(
data
=
.
%>%
filter
(
grepl
(
sports_query
,
text
)
),
aes
(
x
=
date
,
xend
=
date
,
y
=
seconds
/
60
,
yend
=
(
seconds
+20
)
/
60
),
alpha
=
0.7
,
size
=
3
,
color
=
"darkred"
)
+
#, color=caso
# geom_text( aes(x = date,y = seconds/60, label=text), alpha = 0.7, size=3, color= "#000000") + #, color=caso
# geom_text(data = . %>% filter( grepl("deportes",text) ) , aes(x = date,y = seconds/60, label=text), alpha = 0.7, size=3, color= "red") + #, color=caso
facet_wrap
(
~
telediario
,
ncol
=
1
)
+
# anotate
geom_curve
(
aes
(
x
=
as.Date
(
"2018-03-24"
),
y
=
4
,
xend
=
as.Date
(
"2018-03-26"
),
yend
=
12
),
color
=
"#000000"
,
curvature
=
-0.2
,
size
=
0.35
)
+
annotate
(
geom
=
"text"
,
x
=
as.Date
(
"2018-03-26"
),
y
=
12
,
label
=
"Sport news at the end of summary (4 minutes)"
,
family
=
"Roboto Condensed"
,
hjust
=
0
,
size
=
4
,
size
=
0.6
)
+
geom_hline
(
aes
(
yintercept
=
0
),
size
=
0.1
)
+
# anotate
...
...
@@ -310,7 +327,7 @@ ggplot( ) +
subtitle
=
paste
(
"🟥 Sports related words"
,
subtitle_text
),
x
=
time_text
,
y
=
"minutos since the start"
,
caption
=
paste
(
caption_text
,
"Highlighed in red are words related to sports
"
)
)
+
caption
=
paste
(
caption_text
,
"
From: 2018-02-10 to 2018-06-01.
Highlighed in red are words related to sports
:"
,
sports_query
)
)
+
theme
(
panel.grid.minor.x
=
element_blank
(),
panel.grid.minor.y
=
element_blank
(),
...
...
@@ -318,16 +335,110 @@ ggplot( ) +
# panel.grid.major.y = element_blank(),
plot.background
=
element_rect
(
fill
=
"white"
,
color
=
"white"
),
axis.ticks
=
element_line
(
color
=
"#888888"
)
)
+
facet_wrap
(
~
telediario
,
ncol
=
1
)
+
# anotate
geom_curve
(
aes
(
x
=
as.Date
(
"2018-03-24"
),
y
=
4
,
xend
=
as.Date
(
"2018-03-26"
),
yend
=
12
),
color
=
"#000000"
,
data
=
data
,
curvature
=
-0.2
,
size
=
0.35
)
+
annotate
(
geom
=
"text"
,
x
=
as.Date
(
"2018-03-26"
),
y
=
12
,
label
=
"Sport news at the end of summary (4 minutes)"
,
family
=
"Roboto Condensed"
,
hjust
=
0
,
size
=
4
,
size
=
0.6
)
)
ggsave
(
"img/telediarios/all-telediarios-when-they-speak-sports.png"
,
width
=
12
,
height
=
7
,
dpi
=
260
)
# histogram ----------
telediarios
%>%
filter
(
date
>
"2018-02-10"
&
date
<
"2018-06-01"
# date == "2018-04-01"
# & telediario =="Telediario 15h"
)
%>%
filter
(
grepl
(
sports_query
,
text
)
)
%>%
mutate
(
minutes
=
round
(
as.numeric
(
seconds
)
/
60
,
0
)
)
%>%
group_by
(
telediario
,
minutes
)
%>%
summarise
(
count
=
n
())
%>%
ggplot
(
)
+
# geom_histogram( aes()+0.5,binwidth = 1 )+
geom_col
(
aes
(
minutes
-0.5
,
count
),
width
=
1
)
+
geom_vline
(
xintercept
=
4
,
color
=
"darkred"
,
linetype
=
"dashed"
)
+
annotate
(
geom
=
"text"
,
x
=
4.5
,
y
=
150
,
label
=
"Sport news are mendioned at the end of summary\n(3-4 minutes since beginning)"
,
family
=
"Roboto Condensed"
,
hjust
=
0
,
size
=
3
,
size
=
0.6
)
+
theme_minimal
(
base_family
=
"Roboto Condensed"
,
base_size
=
9
)
+
facet_wrap
(
~
telediario
,
ncol
=
1
,
scales
=
"free_x"
)
+
scale_y_continuous
(
expand
=
c
(
0
,
0
)
)
+
scale_x_continuous
(
expand
=
c
(
0
,
0
),
limits
=
c
(
0
,
60
),
breaks
=
c
(
0
,
3
,
4
,
10
,
20
,
30
,
40
,
50
,
60
)
)
+
theme
(
panel.grid.minor.x
=
element_blank
(),
panel.grid.minor.y
=
element_blank
(),
panel.grid.major.x
=
element_blank
(),
# panel.grid.major.y = element_blank(),
plot.background
=
element_rect
(
fill
=
"white"
,
color
=
"white"
),
axis.ticks
=
element_line
(
color
=
"#888888"
)
)
+
labs
(
title
=
paste
(
"When they speak about Sports... to find duration of news summary"
,
sep
=
""
),
subtitle
=
paste
(
"Histogram of sentences in "
,
subtitle_text
),
x
=
"minutes"
,
y
=
"number of sentences"
,
caption
=
paste0
(
"From: 2018-02-10 to 2018-06-01. Query: "
,
sports_query
,
". "
,
caption_text
)
)
ggsave
(
"img/telediarios/all-telediarios-when-they-speak-sports_histogram.png"
,
width
=
8
,
height
=
4
,
dpi
=
260
)
telediarios
%>%
filter
(
date
>
"2018-02-10"
&
date
<
"2018-06-01"
# date == "2018-04-01"
# & telediario =="Telediario 15h"
)
%>%
filter
(
grepl
(
sports_query
,
text
)
)
%>%
mutate
(
minutes
=
round
(
as.numeric
(
seconds
)
/
60
,
1
)
)
%>%
group_by
(
telediario
,
minutes
)
%>%
summarise
(
count
=
n
())
%>%
ggplot
(
)
+
# geom_histogram( aes()+0.5,binwidth = 1 )+
geom_col
(
aes
(
minutes
-0.05
,
count
),
width
=
0.1
)
+
geom_vline
(
xintercept
=
4
,
color
=
"darkred"
,
linetype
=
"dashed"
)
+
annotate
(
geom
=
"text"
,
x
=
4.9
,
y
=
30
,
label
=
"Sport news are mendioned at the end of summary\n(3-4 minutes since beginning)"
,
family
=
"Roboto Condensed"
,
hjust
=
0
,
size
=
3
,
size
=
0.6
)
+
theme_minimal
(
base_family
=
"Roboto Condensed"
,
base_size
=
9
)
+
facet_wrap
(
~
telediario
,
ncol
=
1
,
scales
=
"free_x"
)
+
scale_y_continuous
(
expand
=
c
(
0
,
0
)
)
+
scale_x_continuous
(
expand
=
c
(
0
,
0
),
limits
=
c
(
0
,
20
),
breaks
=
c
(
0
,
1
,
2
,
3
,
4
,
5
,
10
,
15
,
20
)
)
+
theme
(
panel.grid.minor.x
=
element_blank
(),
panel.grid.minor.y
=
element_blank
(),
panel.grid.major.x
=
element_blank
(),
# panel.grid.major.y = element_blank(),
plot.background
=
element_rect
(
fill
=
"white"
,
color
=
"white"
),
axis.ticks
=
element_line
(
color
=
"#888888"
)
)
+
labs
(
title
=
paste
(
"When they speak about Sports... to find duration of news summary"
,
sep
=
""
),
subtitle
=
paste
(
"Histogram of sentences in "
,
subtitle_text
),
x
=
"minutes"
,
y
=
"number of sentences"
,
caption
=
paste0
(
"From: 2018-02-10 to 2018-06-01. Query: "
,
sports_query
,
". "
,
caption_text
)
)
ggsave
(
"img/telediarios/all-telediarios-when-they-speak-sports_histogram_detail.png"
,
width
=
8
,
height
=
4
,
dpi
=
260
)
analysis/verba-processing.R
View file @
fd7f06f7
...
...
@@ -5,7 +5,7 @@ library(tidyverse)
# Settings ------
# subtitle_text <- "Telediarios de TVE"
subtitle_text
<-
"RTVE television news
program
s"
subtitle_text
<-
"RTVE television news
cast
s"
# el_caso <- "caso Lezo y caso Púnica"
# caso_path <- "lezo-punica"
# el_caso <- "caso Máster (Cristina Cifuentes)"
...
...
img/telediarios/all-telediarios-when-they-speak-sports.png
View replaced file @
60e4ecde
View file @
fd7f06f7
743 KB
|
W:
|
H:
752 KB
|
W:
|
H:
2-up
Swipe
Onion skin
img/telediarios/all-telediarios-when-they-speak-sports_histogram.png
0 → 100644
View file @
fd7f06f7
90.3 KB
img/telediarios/all-telediarios-when-they-speak-sports_histogram_detail.png
0 → 100644
View file @
fd7f06f7
91.7 KB
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment