Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
numeroteca
VerbaR
Commits
6c19033e
Commit
6c19033e
authored
Jan 22, 2020
by
numeroteca
Browse files
process raw verba data: detect telediario time, create string variables for plots
parent
2dd3a0f7
Changes
1
Hide whitespace changes
Inline
Side-by-side
analysis/verba-processing.R
View file @
6c19033e
...
...
@@ -5,27 +5,52 @@ library(tidyverse)
# Load data
data
<-
read.delim
(
"data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv"
,
sep
=
","
)
data
<-
read.delim
(
"data/verba/140104_140228_barcenas-palabras-relacionadas_01.csv"
,
sep
=
","
)
data
<-
read.delim
(
"data/verba/140104_140228_messi_01.csv"
,
sep
=
","
)
data
<-
read.delim
(
"data/verba/170301_170630_lezo-punica.csv"
,
sep
=
","
)
# Preprocess data -------
data
$
date
<-
as.Date
(
data
$
date
)
# preprocessed data (cifuentes)i
# data$date <- as.Date(data$date)
# data$date2 <- as.POSIXct(data$date)
# raw vera data
data
$
date
<-
as.Date
(
data
$
programme_date
)
data
$
date2
<-
as.POSIXct
(
data
$
date
)
levels
(
data
$
telediario
)
<-
c
(
"15:00h"
,
"21.00h"
)
# create variable Telediario based on programme_date
data
$
telediario_fino
<-
substr
(
data
$
programme_date
,
11
,
13
)
table
(
data
$
telediario_fino
)
# simplifies times
data
[
data
$
telediario_fino
==
"T22"
,]
$
telediario_fino
<-
"T21"
data
[
data
$
telediario_fino
==
"T20"
,]
$
telediario_fino
<-
"T21"
data
[
data
$
telediario_fino
==
"T14"
,]
$
telediario_fino
<-
"T15"
# test borrable
# data$id <- as.character(data$id)
# data[data$id == "GxCr-W0BCKkHyZ2v_0lo",]$id <- "wer"
# levels(data$telediario_fino) <- c("15:00h","21.00h")
data
$
date2
[
1
]
+
3600
# Settings
subtitle_text
<-
"Telediarios de TVE. 20 marzo - 29 abril 2018"
subtitle_text
<-
"Telediarios de TVE. 4 enero - 28 febrero 2017"
el_caso
<-
"caso Máster (Cristina Cifuentes)"
el_caso
<-
"caso Lezo y caso Púnica"
el_caso
<-
"Messi"
caption_text
<-
"Datos: Verba. Gráfico: numeroteca.org"
# Plots ----------------------
# Número de frases por telediario
png
(
filename
=
paste
(
"img/apariciones-n-telediarios-caso-master_01.png"
,
sep
=
""
),
width
=
1200
,
height
=
900
)
ggplot
(
data
=
data
)
+
geom_bar
(
aes
(
x
=
date2
,
fill
=
caso
))
+
geom_bar
(
aes
(
x
=
date2
))
+
# si hay clasificación por caso:
fill=caso
theme_minimal
(
base_family
=
"Roboto Condensed"
,
base_size
=
22
)
+
scale_x_datetime
(
date_breaks
=
"1 day"
,
date_labels
=
"%d"
)
+
labs
(
title
=
paste
(
"Número de frases sobre
caso Máster (Cristina Cifuentes)"
,
sep
=
""
),
labs
(
title
=
paste
(
"Número de frases sobre
"
,
el_caso
,
sep
=
""
),
subtitle
=
subtitle_text
,
x
=
NULL
,
y
=
"nº de frases"
,
...
...
@@ -36,11 +61,38 @@ ggplot(data = data ) +
# panel.grid.major.x = element_blank(),
panel.grid.major.y
=
element_blank
()
)
+
facet_wrap
(
~
telediario
,
ncol
=
1
)
facet_wrap
(
~
telediario
_fino
,
ncol
=
1
)
dev.off
()
# Cuando hablan del caso dentro del Telediario
png
(
filename
=
paste
(
"img/apariciones-cuando-telediarios-caso-master_01.png"
,
sep
=
""
),
width
=
1200
,
height
=
900
)
ggplot
(
data
=
data
)
+
#%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect
(
aes
(
xmin
=
min
(
data
$
date2
+
41000
)
,
xmax
=
max
(
data
$
date2
+
82800
+
40000
),
ymin
=
0
,
ymax
=
90
/
60
),
alpha
=
0.02
,
fill
=
"lightgrey"
)
+
geom_segment
(
aes
(
x
=
date2
+
82800
,
xend
=
date2
+
82800
,
y
=
start_time
/
60
,
yend
=
(
start_time
+30
)
/
60
),
alpha
=
0.8
,
size
=
4
)
+
#, color=caso
geom_hline
(
aes
(
yintercept
=
0
),
size
=
0.1
)
+
# anotate
# geom_curve(aes(x = as.POSIXct("2018-03-24"), y = 0.5, xend = as.POSIXct("2018-03-26"), yend = 7),
# color="#999999", data =data, curvature = -0.2, size = 0.1) +
# annotate(geom = "text", x = as.POSIXct("2018-03-26"), y = 7, label = "Portada (1:30 minutos)",
# family = "Roboto Condensed", hjust = 0,size=6,size=0.6) +
theme_minimal
(
base_family
=
"Roboto Condensed"
,
base_size
=
22
)
+
scale_x_datetime
(
date_breaks
=
"3 day"
,
date_labels
=
"%d"
,
expand
=
c
(
0.01
,
0.05
))
+
labs
(
title
=
paste
(
"Cuándo hablan del "
,
el_caso
,
" en los telediarios"
,
sep
=
""
),
subtitle
=
paste
(
subtitle_text
),
x
=
NULL
,
y
=
"minutos"
,
caption
=
caption_text
)
+
theme
(
panel.grid.minor.x
=
element_blank
(),
panel.grid.minor.y
=
element_blank
(),
# panel.grid.major.x = element_blank(),
panel.grid.major.y
=
element_blank
()
)
+
facet_wrap
(
~
telediario_fino
,
ncol
=
1
)
dev.off
()
png
(
filename
=
paste
(
"img/apariciones-cuando-telediarios-caso-master_01_v.png"
,
sep
=
""
),
width
=
900
,
height
=
1200
)
ggplot
(
data
=
data
)
+
#%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect
(
aes
(
xmin
=
min
(
data
$
date2
+
41000
)
,
xmax
=
max
(
data
$
date2
+
82800
+
40000
),
ymin
=
0
,
ymax
=
90
/
60
),
alpha
=
0.02
,
fill
=
"lightgrey"
)
+
geom_segment
(
aes
(
x
=
date2
+
82800
,
xend
=
date2
+
82800
,
y
=
start_time
/
60
,
yend
=
(
start_time
+30
)
/
60
,
color
=
caso
),
alpha
=
0.8
,
size
=
9
)
+
...
...
@@ -63,6 +115,5 @@ ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
# panel.grid.major.x = element_blank(),
panel.grid.major.y
=
element_blank
()
)
+
facet_wrap
(
~
telediario
,
ncol
=
1
)
facet_wrap
(
~
telediario
,
ncol
=
2
)
+
coord_flip
()
dev.off
()
s
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment