Commit 908faf8c authored by numeroteca's avatar numeroteca
Browse files

fix count of answers in CIS microdata when the first probme (p1) was empty)

parent a915bb89
......@@ -358,7 +358,7 @@ barometros <- read.delim("data/output/barometros-microdatos-ccaa-3problems_clean
# Group by date and CCAA ----------------------
evol_count <- barometros %>% group_by(CCAA,date) %>% summarise(
# counts number of elements by barometro and CCAA
# counts number of elements (answers) by barometro and CCAA
count_total = n()
) %>% ungroup()
......@@ -445,22 +445,39 @@ evol <- rbind(
# add number of answers per barometer and CCAA
evol <- left_join(
# adds number of respondant per survey and CCAA
evol %>% mutate(dunique = paste0(date,CCAA)),
evol_count %>% mutate(dunique = paste0(date,CCAA)) %>% select(-date,-CCAA),
by = "dunique"
) %>% mutate (
)
evol <- evol %>% mutate (
# aa <- evol %>% mutate (
# change NA with Ninguno, así puedo luego usar el valor si no da error
p1 = replace_na( p1, "Ninguno"),
# change NA with 0
count_p1 = replace_na(count_p1,0),
count_p2 = replace_na(count_p2,0),
count_p3 = replace_na(count_p3,0),
# This is not working for NS/NC as it counts the same answers multiple times
count_p = replace_na(count_p1,0) + replace_na(count_p2,0) + replace_na(count_p3,0),
# For NS/NC: if p1 is a NS/NC don't overcount and just use the p2 number of answers
count_p = ifelse( p1 == "NS/NC", count_p1, count_p),
perc = round( replace_na(count_p,0) / count_total * 100, digits = 1)
) %>% select ( date, CCAA, everything(), -dunique ) %>% mutate(
# TODO For NS/NC: if p1 is a NS/NC don't overcount and just use the p2 number of answers
# is it p2 the one to use?? or p1?
count_p = ifelse( p1 == "NS/NC", replace_na(count_p1,0), count_p),
perc = round( replace_na(count_p,0) / count_total * 100, digits = 1),
# rellena p si está vacío. primero con p1, si está vacío p, rellena con p1, y si está vacío con p3
# p es la respusta como "El paro"
# p es la respuesta como "El paro"
# si p1 es ninguno, no pongas ninguno, si no p2
p = p1,
p = ifelse( is.na(p),p2,p),
p = ifelse( is.na(p),p3,p),
p = ifelse( p1 == "Ninguno" & !is.na(p2), p2, p),
p = ifelse( p1 == "Ninguno" & is.na(p2) & !is.na(p3), p3, p),
# p = ifelse( is.na(p) & p1 == "Ninguno", p2, p),
# p = ifelse( is.na(p) & p1 == "Ninguno", p3, p)
) %>% select ( date, CCAA, everything(), -dunique ) %>% filter(
# CCAA == "Madrid" &
# ( p1 == "La corrupción y el fraude" | p1 == "Ninguno")
) %>% mutate(
# si tiene datos NS/NC introducevalor en p
p = ifelse( !is.na(count_nsnc), "NS/NC consolidated", p ),
# Ojo porque "NS/NC consolidated" corresponde cuando las 3 respuestas son NS/NC
......@@ -604,6 +621,47 @@ evol %>% mutate(
date_labels = "%Y")
# Madrid and Catalonia -----------------
evol %>% filter( p == "La corrupción y el fraude" ) %>% ungroup() %>% # | p == "El paro"
filter(
( CCAA == "Madrid" | CCAA=="Cataluña" | CCAA=="Andalucía" | CCAA=="País Vasco")
# ( CCAA == "Madrid" | CCAA=="Cataluña" | CCAA=="Andalucía" )
& date > as.Date("2006-01-01")
& date < as.Date("2011-01-01")
) %>%
ggplot() +
# geom_text( aes( date, perc+2, color = CCAA, label= format(date,"%m") ), size = 2 ) +
geom_point( aes( date, perc, color = CCAA) ) +
geom_line( aes( date, perc, group=CCAA, color = CCAA) ) +
geom_line( data = opinion_3p %>% filter(
problem == "La corrupción y el fraude"
& date > as.Date("2006-01-01")
& date < as.Date("2011-01-01")
),
aes(date, perc, group=problem),
color = "#999999",
# alpha = 0.5,
size = 2
) +
# geom_line( aes( date, count_total, group=p) ) +
# geom_point( aes( date, count_total) ) +
# facet_wrap(~CCAA) +
scale_x_date(
date_breaks = "1 years",
date_labels = "%Y",
expand =c(0,0)
) + labs(
title = "Percentage of respondents that included Corruption and fraud as one of the 3 main problems in Spain",
subtitle = "C. de Madrid, Catalonia, Basque Country and Andalucía. Grey line: weighted average Spain",
y = "percentage",
x = ""
) +
theme_minimal(base_family = "Roboto Condensed",base_size = 16) +
theme(
legend.position = "top"
)
evol %>% select( date, CCAA, count_total) %>% distinct() %>% filter ( date < as.Date( "2019-12-01")) %>% ggplot() +
geom_line( aes( date, count_total, group=CCAA, color=CCAA)) + #, color = "orange"
geom_text( data = evol %>% filter ( date < as.Date( "2019-12-01")) %>% group_by(CCAA) %>% top_n(1, date), aes(date,count_total, label=CCAA, color=CCAA), hjust =0) +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment