Commit 71c3bf19 authored by numeroteca's avatar numeroteca
Browse files

process all the telediarios

parent 15e69e15
# processing all the Telediarios
# 0. load libraries -------------------------------------------------
library(jsonlite)
library(tidyverse)
# path to files
path <- "~/data/verba/data/original/cooked/"
# load 1 file -----
file <- "2258715.output.json"
file2 <- "2258715.json"
telediario <- stream_in(file( paste0(path,file) ))
telediario_meta <- stream_in(file( paste0(path,file2) ))
the_datetime <- telediario_meta$publicationDate
telediario$datetime <- the_datetime
# load all the files ----
# List files in directory
files <- list.files( path = path)
# Loop through all the files
for ( i in 1:length(files) ) {
# for ( i in 1:8 ) {
print(i)
if(( i %% 2) == 1) {
telediario_meta <- stream_in(file( paste0(path,files[i]) ))
the_datetime <- telediario_meta$publicationDate
} else {
telediario <- stream_in(file( paste0(path,files[i]) ))
telediario$datetime <- the_datetime
telediario$file <- files[i]
}
if (i == 1) {
telediarios <- telediario
} else {
telediarios <- rbind(telediarios, telediario)
}
}
saveRDS(telediarios, file = "~/data/verba/data/output/telediarios.rds")
telediarios <- telediarios %>% mutate(
hour = substring(start_time,1,2) %>% as.numeric(),
min = substring( start_time, 4,5) %>% as.numeric(),
sec = substring( start_time, 7,8) %>% as.numeric(),
date = as.Date(substring( datetime,1,10), "%d-%m-%Y"),
telediario = paste0("T",substring(datetime,12,13))
)
table(telediarios$hour)
telediarios <- telediarios %>% mutate(
hour = ifelse( hour > 2, 0, hour),
seconds = hour*3600 + min*60 + sec
)
duration <- telediarios %>% group_by(date, telediario) %>%
summarise(
time = max(seconds)
)
hist(duration$time)
plot(duration$time)
duration %>% mutate(
# simplifies times when times are not the standard T15 and T21.
# Transforms T22 and T20 to T21, and T14 to T15.
# T15 is 15:00h program and T21 is 21:00h program
telediario = ifelse( telediario == "T14", "T15",telediario),
telediario = ifelse( telediario == "T16", "T15",telediario),
telediario = ifelse( telediario == "T20", "T21",telediario),
telediario = ifelse( telediario == "T22", "T21",telediario),
telediario = ifelse( telediario == "T15", "Telediario 15h",telediario),
telediario = ifelse( telediario == "T21", "Telediario 21h",telediario),
) %>% filter ( telediario != "T08" & telediario != "T07") %>%
ggplot() +
geom_col( aes( date,time)) +
geom_hline( yintercept = 3600) +
facet_wrap(~telediario, ncol = 1) +
theme_minimal( base_family = "Roboto condensed") +
scale_x_date(
limits = c(as.Date("2018-03-10"),as.Date("2018-05-01"))
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment