Commit bbb3e214 authored by numeroteca's avatar numeroteca
Browse files

add search function to app

parent a5dce7f9
......@@ -4,56 +4,72 @@
library(tidyverse)
# Settings ------
# subtitle_text <- "Telediarios de TVE. 20 marzo - 29 abril 2018"
# subtitle_text <- "Telediarios de TVE. 4 enero - 28 febrero 2017"
# subtitle_text <- "Telediarios de TVE. 1 marzo - 30 junio 2017"
subtitle_text <- "Telediarios de TVE"
# el_caso <- "caso Lezo y caso Púnica"
# caso_path <- "lezo-punica"
el_caso <- "caso Máster (Cristina Cifuentes)"
# el_caso <- "caso Máster (Cristina Cifuentes)"
caso_path <- "cifuentes"
# el_caso <- "Messi"
caption_text <- "Datos: verba.civio.es (Civio). Gráfico: numeroteca.org"
# Load data via API
# De normal la app llama a la API que hace el trabajo y recibe JSON de vuelta:
# https://verba.civio.es/api/search?q=bilbao&page=0&aggregations=week
# pero poniendo "search.csv" en vez de "search" recibe de vuelta CSV
# https://verba.civio.es/api/search.csv?q=bilbao&page=0&aggregations=week
# Y si quieres todos los resultados, no solo la última página, puedes poner "size".
# Esto es básicamente lo que hace la app al descargar los resultados como CSV
# https://verba.civio.es/api/search.csv?q=bilbao&size=20000
# Word to search
search <- "corrupci%C3%B3n"
# search <- "bilbao"
# search <- "acorrupción"
el_caso <- search %>% str_replace_all("%C3%B3","o")
caso_path <- el_caso
# create directory if doesn't exists
if (file.exists( paste0("img/",el_caso) )){
# do nothing
print("do nothing, relax")
} else {
dir.create(file.path("img/", el_caso))
}
# Load data ---------------
data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv",sep = ",")
data <- read.delim( paste0("https://verba.civio.es/api/search.csv?q=", search,"&size=20000"),sep = ",")
# data <- read.delim("data/verba/180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv",sep = ",")
# data <- read.delim("data/verba/140104_140228_barcenas-palabras-relacionadas_01.csv",sep = ",")
# data <- read.delim("data/verba/140104_140228_messi_01.csv",sep = ",")
# data <- read.delim("data/verba/170301_170630_lezo-punica.csv",sep = ",")
# Preprocess data -------
# preprocessed data (cifuentes)
# data$date <- as.Date(data$date)
# data$date2 <- as.POSIXct(data$date)
# Transform raw verba data into date format
data$date <- as.Date(data$programme_date)
# Transforms in to date-time format. TODO: loses hour!
data$date2 <- as.POSIXct(data$date)
# create variable Telediario based on programme_date
data$telediario_fino <- substr(data$programme_date, 11, 13)
table(data$telediario_fino)
# simplifies times when times are not the standard T15 and T21.
# Transforms T22 and T20 to T21, and T14 to T15.
# T15 is 15:00h program and T21 is 21:00h program
data[data$telediario_fino == "T22",]$telediario_fino <- "T21"
data[data$telediario_fino == "T20",]$telediario_fino <- "T21"
data[data$telediario_fino == "T14",]$telediario_fino <- "T15"
table(data$telediario_fino)
# test borrable
# data$id <- as.character(data$id)
# data[data$id == "GxCr-W0BCKkHyZ2v_0lo",]$id <- "wer"
# levels(data$telediario_fino) <- c("15:00h","21.00h")
# data$date2[1] + 3600
data <- data %>% mutate(
# Preprocess data -------
# Transform raw verba data into date format
date = as.Date(programme_date),
# Transforms in to date-time format. TODO: loses hour!
date2 = as.POSIXct(date),
# create variable Telediario based on programme_date
telediario = substr(programme_date, 11, 13),
# telediario_fino = substr(programme_date, 11, 13),
# simplifies times when times are not the standard T15 and T21.
# Transforms T22 and T20 to T21, and T14 to T15.
# T15 is 15:00h program and T21 is 21:00h program
telediario = ifelse( telediario == "T14", "T15",telediario),
telediario = ifelse( telediario == "T16", "T15",telediario),
telediario = ifelse( telediario == "T20", "T21",telediario),
telediario = ifelse( telediario == "T22", "T21",telediario),
telediario = ifelse( telediario == "T15", "Telediario 15h",telediario),
telediario = ifelse( telediario == "T21", "Telediario 21h",telediario),
) %>% filter ( telediario != "T08" & telediario != "T07")
# Plots ----------------------
# Número de frases por telediario
png(filename=paste("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_01.png", sep = ""),width = 1200,height = 700)
png(filename = paste0("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_01.png"), width = 1200,height = 700)
ggplot(data = data ) +
geom_bar(aes( x=date2 )) + # si hay clasificación por caso: fill=caso
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
......@@ -76,11 +92,11 @@ ggplot(data = data ) +
panel.grid.major.y = element_blank(),
axis.ticks.x = element_line()
) +
facet_wrap( ~telediario_fino, ncol=1)
facet_wrap( ~telediario, ncol=1)
dev.off()
# Cuando hablan del caso dentro del Telediario
png(filename=paste("img/",caso_path ,"/apariciones-cuando-telediarios_",caso_path,"_01.png", sep = ""),width = 1200,height = 900)
png(filename=paste0("img/",caso_path ,"/apariciones-cuando-telediarios_",caso_path,"_01.png"),width = 1200,height = 900)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
# primer minuto y medio (90 segundos)
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=120/60), alpha = 0.02, fill = "lightgrey" ) +
......@@ -109,11 +125,40 @@ ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()
) +
facet_wrap( ~telediario_fino, ncol=1)
facet_wrap( ~telediario, ncol=1)
dev.off()
# si existe clasificación manual de las noticias -------------------
# Número de frases por telediario
png(filename=paste0("img/",caso_path ,"/apariciones-n-telediarios-",caso_path,"_clasificado.png"),width = 1200,height = 700)
ggplot(data = data ) +
geom_bar(aes( x=date2, fill=caso )) + # si hay clasificación por caso: fill=caso
theme_minimal(base_family = "Roboto Condensed", base_size = 22) +
# scale_x_datetime(date_breaks = "1 day", date_labels = "%d") +
scale_x_datetime(date_breaks = "1 week",
date_labels = "%d/%m",
#secondary axis to add months
sec.axis = sec_axis(~ .,
labels = scales::time_format("%b/%y"))
) +
labs(title = paste("Número de frases sobre ",el_caso,sep = ""),
subtitle = subtitle_text,
x = NULL,
y = "nº de frases",
caption = caption_text) +
theme(
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
axis.ticks.x = element_line(),
legend.position = "top"
) +
facet_wrap( ~telediario, ncol=1)
dev.off()
# Solamente para Cifuetnes, porque tiene calsificadas las noticias --------------------
png(filename=paste("img/apariciones-cuando-telediarios-caso-master_01.png", sep = ""),width = 1200,height = 900)
ggplot(data = data ) + #%>% filter( date> "2018-04-01" & date < "2018-04-05" )
geom_rect(aes( xmin=min(data$date2+ 41000) , xmax=max(data$date2+ 82800+ 40000), ymin=0,ymax=120/60), alpha = 0.02, fill = "lightgrey" ) +
......
# Load libraries
library(shiny)
library(tidyverse)
library(plotly)
# setttings: captions
subtitle_text <- "Telediarios de TVE. "
caption_text <- "Datos: verba.civio.es (Civio). Gráfico: numeroteca.org"
# Define UI for miles per gallon app ----
# Define UI ----
ui <- fluidPage(
tags$head(
# Note the wrapping of the string in HTML()
......@@ -36,6 +39,13 @@ ui <- fluidPage(
# Sidebar panel for inputs ----
sidebarPanel(
actionButton("go", "Aplica filtros",style="color: #fff; background-color: #337ab7; border-color: #2e6da4"),
# Input search
textInput("search",
label="Búsqueda:",
value="coronavirus"
),
# TODO set limits to chart
dateInput("date1", "Fecha transcripción", value = "2018-04-04" ),
# dateInput("date2", "Date ends", value = "2012-02-29"),
......@@ -48,17 +58,15 @@ ui <- fluidPage(
# value=as.Date("2016-12-01"),
# timeFormat="%Y-%m-%d"),
actionButton("go", "Aplica filtros",style="color: #fff; background-color: #337ab7; border-color: #2e6da4"),
# Input: select data source among the previously downloaded CSV files at app/data/ ----
# selectInput("database", "Fuente de datos:",
# list.files("data/"),
# selected = "180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv"
# # c("Cylinders" = "cyl",
# # "Transmission" = "am",
# # "Gears" = "gear")
# ),
# Input: Selector for variable to plot against mpg ----
selectInput("variable", "Fuente de datos:",
list.files("data/"),
selected = "180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv"
# c("Cylinders" = "cyl",
# "Transmission" = "am",
# "Gears" = "gear")
),
sliderInput(inputId = "heading",
label = "Cabecera (segundos)",
min = 60, max = 360, value = 120
......@@ -76,7 +84,7 @@ ui <- fluidPage(
"Semana" = "week",
"Año" = "year"
),
selected = "day"
selected = "year"
),
sliderInput(inputId = "xlabelsize",
......@@ -91,12 +99,12 @@ ui <- fluidPage(
"Mes / año" = "%m/%Y",
"Año" = "%Y"
),
selected = "%d"
selected = "%m/%Y"
),
sliderInput(inputId = "lineas",
label = "Ancho línea",
min = 1, max = 20, value = 9
min = 1, max = 20, value = 1
),
# Explicaciones
......@@ -136,19 +144,49 @@ ui <- fluidPage(
# Define server logic to plot various variables against mpg ----
server <- function(input, output) {
# search input
search <- eventReactive(input$go, {
# if (is.null(input$go)) {
# return( paste0("https://verba.civio.es/api/search.csv?q=", "coronavirus","&size=20000") )
# }
# Ecoding from https://www.w3schools.com/tags/ref_urlencode.asp
search_clean <- input$search %>%
str_replace_all(" ","%20") %>%
str_replace_all("á","%C3%A1") %>%
str_replace_all("ä","%C3%A4") %>%
str_replace_all("é","%C3%A9") %>%
str_replace_all("ë","%C3%AB") %>%
str_replace_all("í","%C3%AD") %>%
str_replace_all("ï","%C3%AF") %>%
str_replace_all("ó","%C3%B3") %>%
str_replace_all("ö","%C3%B6") %>%
str_replace_all("ú","%C3%BA") %>%
str_replace_all("ü","%C3%BC") %>%
str_replace_all("ñ","%C3%B1")
paste0("https://verba.civio.es/api/search.csv?q=", search_clean,"&size=20000")
})
# search input
search_word <- eventReactive(input$go, {
# if (is.null(input$go)) {
# return( paste0("https://verba.civio.es/api/search.csv?q=", "coronavirus","&size=20000") )
# }
input$search
})
# Return the formula text for printing as a caption ----
# output$caption <- renderText({
# formulaText()
# })
# data source
formulaText <- eventReactive(input$go, {
if (is.null(input$go)) {
return( paste0("data/", "180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv") )
}
paste0("data/", input$variable)
})
# data source: use if local data bases are used
# database <- eventReactive(input$go, {
# if (is.null(input$go)) {
# return( paste0("data/", "180320_180430_cifuentes-y-palabras-relacionadas_clasificado.csv") )
# }
# paste0("data/", input$database)
# })
# select heading time
heading <- eventReactive(input$go, {
input$heading
......@@ -183,8 +221,9 @@ server <- function(input, output) {
})
# Load data ---------------
mydata <- reactive({
read.delim( formulaText() ,sep = ",") %>% mutate(
mydata <- eventReactive(input$go, {
read.delim( search() ,sep = ",") %>% mutate(
# Preprocess data -------
# Transform raw verba data into date format
date = as.Date(programme_date),
......@@ -264,7 +303,7 @@ server <- function(input, output) {
#secondary axis to add months
sec.axis = sec_axis(~ .,
labels = scales::time_format("%b/%y"))) +
labs(title = paste0("Cuándo hablan de ",input$variable, " en los telediarios"),
labs(title = paste0("Cuándo hablan de ´",search_word(), "´ en los telediarios"),
subtitle = paste0(subtitle_text,
"Periodo: ",
substr( min(mydata()$date2),1,10),
......@@ -284,6 +323,8 @@ server <- function(input, output) {
axis.text.x = element_text(size = xlabelsize() )
) +
facet_wrap( ~telediario, ncol=1)
})
# Plot columns ------------
......@@ -317,7 +358,7 @@ server <- function(input, output) {
sec.axis = sec_axis(~ .,
labels = scales::time_format("%b/%y"))
) +
labs(title = paste0("Cuánto hablan de ",input$variable, " en los telediarios"),
labs(title = paste0("Cuánto hablan de ´",search_word(), "´ en los telediarios"),
subtitle = paste0(subtitle_text,
"Periodo: ",
substr( min(mydata()$date2),1,10),
......@@ -346,11 +387,11 @@ server <- function(input, output) {
mydata() %>% select(content,start_time,date,telediario) %>%
mutate (start_time = start_time / 60,
date = as.character( as.Date(date, origin = lubridate::origin) ),
date_tmp = as.character( as.Date(date, origin = lubridate::origin) )
date = as.character( as.Date(date, origin = lubridate::origin) )
# date_tmp = as.character( as.Date(date, origin = lubridate::origin) )
) %>%
# filter by selected date
filter( date_tmp == date1())
filter( date == date1())
})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment