Commit 2fc4c4f0 authored by numeroteca's avatar numeroteca

update cleanin OpenRefine, move 1 barometro analysis to other file, Cleans NS/NC and 0 answers

parent b20235db
This diff is collapsed.
# Analysis of CIS barometro data http://bilbaodatalab.wikitoki.org/wiki/barometro-del-cis/
# This is a script with the former way to analyse a barometro.
# unused code from analizando-CIS.R
# 1. Load libraries --------
# install.packages("foreign")
library(foreign)
library(tidyverse)
# Load data for single barometro------------------------
# 201801
# df <- read.spss("DA3203.sav", use.value.label=TRUE, to.data.frame=TRUE)
# 201905
# df <- read.spss("../../data/CIS/barometro/1905/3247.sav", use.value.label=TRUE, to.data.frame=TRUE)
# df <- read.spss("~/data/CIS/barometro/almacen/3203.sav", use.value.label=TRUE, to.data.frame=TRUE)
# df <- read.spss("~/data/CIS/barometro/por-email/3164.sav", use.value.label=TRUE, to.data.frame=TRUE)
# df <- read.spss("../../data/CIS/barometro/fid/FID_2293.sav", use.value.label=TRUE, to.data.frame=TRUE)
# Preview of the data ----------------
summary(df)
head(df)
df[1,]
# Table
table(df$PROV)
table(df$CCAA)
plot(df$CCAA)
table(df$P701)
plot(df$P701)
plot(df$P702)
plot(df$P703)
table(df[df$CCAA=="País Vasco",]$P701)
table(df[df$CCAA=="Madrid (Comunidad de)",]$P701)
nrow(df[df$P701=="La corrupción y el fraude",])/nrow(df)
nrow(df[df$P702=="La corrupción y el fraude",])/nrow(df)
nrow(df[df$P703=="La corrupción y el fraude",])/nrow(df)
# Calcula porcentaje de encuestados que contestaron que "____" era unos de -------------------------
# los tres principales problemas que existen actualmente en España
(nrow(df[df$P701=="La corrupción y el fraude",]) +
nrow(df[df$P702=="La corrupción y el fraude",]) +
nrow(df[df$P703=="La corrupción y el fraude",])
) /nrow(df)*100
(nrow(df[df$P701=="La educación",]) +
nrow(df[df$P702=="La educación",]) +
nrow(df[df$P703=="La educación",])
) /nrow(df)*100
(nrow(df[df$P701=="El paro",]) +
nrow(df[df$P702=="El paro",]) +
nrow(df[df$P703=="El paro",])
) /nrow(df)*100
# For one single Comunidad Autónoma ------
euskadi <- df[df$CCAA=="País Vasco",]
nrow(euskadi)
(nrow(euskadi[euskadi$P701=="La educación",]) +
nrow(euskadi[euskadi$P702=="La educación",]) +
nrow(euskadi[euskadi$P703=="La educación",])
) /nrow(euskadi)*100
# Calcualres for each CCAA ------
ccaa <- as.data.frame(levels(df$CCAA))
names(ccaa) <- "nombre"
# ccaa_results <- data.frame(matrix(ncol = 1,nrow = nrow(ccaa) ))
ccaa$results_t <- 0
ccaa$results_1st <- 0
# calculates the answers by CCAA
for ( i in 1:nrow(ccaa)) {
print(i)
ccaa$results_t[i] <- round(
# change to P701 for 201801
(
nrow(df[df$P901=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],]) +
nrow(df[df$P902=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],]) +
nrow(df[df$P903=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],])
) /nrow(df[df$CCAA==ccaa[i,1],])*100
, digits = 2)
ccaa$results_1st[i] <- round(
(
nrow(df[df$P901=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],])
) /nrow(df[df$CCAA==ccaa[i,1],])*100
, digits = 2)
}
plot(ccaa$results)
plot(ccaa)
# Calcualres for each RECUERDO ----------------
recuerdo <- as.data.frame(levels(df$RECUERDO))
names(recuerdo) <- "nombre"
# recuerdo_results <- data.frame(matrix(ncol = 1,nrow = nrow(recuerdo) ))
recuerdo$recuerdo <- 0
# calculates the answers by recuerdo
for ( i in 1:nrow(recuerdo)) {
print(i)
recuerdo$recuerdo[i] <- round(
(
nrow(df[df$RECUERDO==recuerdo[i,1] & (
df$P901=="La corrupción y el fraude" | df$P902=="La corrupción y el fraude" | df$P903=="La corrupción y el fraude"
),])
) /nrow(df[df$RECUERDO==recuerdo[i,1],])*100
, digits = 2)
}
# Calcualres for each votaria en genereales ----------------
votaria <- as.data.frame(levels(df$P20R))
names(votaria) <- "nombre"
# votaria_results <- data.frame(matrix(ncol = 1,nrow = nrow(votaria) ))
# votaria$votaria <- 0
# calculates the answers by votaria
for ( i in 1:nrow(votaria)) {
print(i)
votaria$votaria[i] <- round(
(
nrow(df[df$P20R==votaria[i,1] & (
df$P901=="La corrupción y el fraude" | df$P902=="La corrupción y el fraude" | df$P903=="La corrupción y el fraude"
),])
) /nrow(df[df$P20R==votaria[i,1],])*100
, digits = 2)
}
# join results -----------
rec_vot <- merge(recuerdo,votaria, by = "nombre")
# network analysis ¿Meleiro? -------------------------------------------------------
library(foreign)
library(tidyverse)
library(networkD3)
# install.packages("networkD3")
setwd("/Users/meleiro/Dropbox/MASTER/DATOS/R/CIS")
cis <- read.spss("3210.sav", use.value.label=TRUE, to.data.frame=TRUE)
p <- cis %>% group_by(P26AR, VOTOSIMG) %>% summarise(N = n()) %>% mutate(pct = N/sum(N))
p <- p[!is.na(p$P26AR),]
nodes <- data.frame(name = unique(unique(p$P26AR), unique(p$VOTOSIMG)))
links <- data.frame(p)
names(links) = c("source", "target", "value", "value_pct")
# convert to character
links$source <- as.character(links$source)
links$target <- as.character(links$target)
# changes character by integer
links[links$source=="PP",]$source <- 0
links[links$target=="PP",]$target <- 0
links[links$source=="PSOE",]$source <- 1
links[links$target=="PSOE",]$target <- 1
# links[links$source=="Unidos Podemos",]$source <- 2
links[links$target=="Unidos Podemos",]$target <- 2
links[links$source=="Ciudadanos",]$source <- 3
links[links$target=="Ciudadanos",]$target <- 3
# TODO: complete and automatize conversion for all the nodes in nodes
# converts to integer
links$source <- as.integer(links$source)
links$target <- as.integer(links$target)
# slecet avaialbe data
links_clean <- links[!is.na(links$target) & !is.na(links$source) ,]
# plot
sankeyNetwork(Links = links_clean, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "name",
fontSize = 12, nodeWidth = 30)
URL <- paste0('https://cdn.rawgit.com/christophergandrud/networkD3/',
'master/JSONdata/energy.json')
energy <- jsonlite::fromJSON(URL)
# Plot
sankeyNetwork(Links = linksx, Nodes = nodesx, Source = 'source',
Target = 'target', Value = 'value', NodeID = 'name',
units = 'TWh', fontSize = 12, nodeWidth = 30)
linksx <- energy$links
nodesx <- energy$nodes
src <- c("A", "A", "A", "A", "B", "B", "C", "C", "D")
target <- c("B", "C", "D", "J", "E", "F", "G", "H", "I")
networkData <- data.frame(src, target)
# Plot
simpleNetwork(networkData)
\ No newline at end of file
This diff is collapsed.
......@@ -444,27 +444,24 @@ problema","","","","","","","","","","","","","","","",""
"310","15","3271.sav","3271","02-01-2020","","BARÓMETRO DE ENERO 2020","2020-01-02","A9_1","A9_2","A9_3","","","","","","","","","","","","","","","","","",""
"311","14","3273.sav","3273","01-02-2020","Feb20","BARÓMETRO DE FEBRERO 2020","2020-02-01","A9_1","A9_2","A9_3","","","","","","","","","","","","","","","","","",""
"312","13","3277.sav","3277","01-03-2020","Mar20","BARÓMETRO DE MARZO 2020","2020-03-01","A9_1","A9_2","A9_3","","","","","","","","","","","","","","","","","",""
"313","12","3279.sav","3279","01-04-2020","","BARÓMETRO DE ABRIL 2020","2020-04-01","","","","","Error in read.spss(data, use.value.label = TRUE, to.data.frame = TRUE) :
"313","12","3279.sav","3279","01-04-2020","","BARÓMETRO DE ABRIL 2020","2020-04-01","P23_1","P23_2","P23_3","","Error in read.spss(data, use.value.label = TRUE, to.data.frame = TRUE) :
error reading system-file header
In addition: There were 50 or more warnings (use warnings() to see the first 50)
P21_1 P21_2 P21_3","","","","","","","","","","","","","","","",""
"314","11","3281.sav","3281","04-05-2020","","BARÓMETRO DE MAYO 2020","2020-05-04","","","","","Error in read.spss(data, use.value.label = TRUE, to.data.frame = TRUE) :
"314","11","3281.sav","3281","04-05-2020","","BARÓMETRO DE MAYO 2020","2020-05-04","P23_1","P23_2","P23_3","","Error in read.spss(data, use.value.label = TRUE, to.data.frame = TRUE) :
error reading system-file header
In addition: There were 50 or more warnings (use warnings() to see the first 50)
P21_1 P21_2 P21_3","","","","","","","","","","","","","","","",""
"315","10","3283.sav","3283","01-06-2020","","BARÓMETRO DE JUNIO 2020","2020-06-01","P23_1","P23_2","P23_3","","Error in read.spss(data, use.value.label = TRUE, to.data.frame = TRUE) :
error reading system-file header
In addition: There were 50 or more warnings (use warnings() to see the first 50)
P23_1 P23_2 P23_3","","","","","","","","","","","","","","","",""
"315","10","3283.sav","3283","01-06-2020","","BARÓMETRO DE JUNIO 2020","2020-06-01","P12_1","P12_2","P12_3","","","","","","","","","","","","","","","","","",""
"316","9","3288.sav","3288","01-07-2020","","BARÓMETRO DE JULIO 2020","2020-07-01","P12_1","P12_2","P12_3","","","","","","","","","","","","","","","","","",""
"317","8","3292.sav","3292","01-09-2020","","BARÓMETRO DE SEPTIEMBRE 2020","2020-09-01","P17_1","P17_2","P17_3","","","","","","","","","","","","","","","","","",""
"318","7","3296.sav","3296","01-10-2020","","BARÓMETRO DE OCTUBRE 2020","2020-10-01","P15_1","P15_2","P15_3","","","","","","","","","","","","","","","","","",""
"319","6","3300.sav","3300","03-11-2020","","BARÓMETRO DE NOVIEMBRE 2020","2020-11-03","P15_1","P15_2","P15_3","","","","","","","","","","","","","","","","","",""
"320","5","3303.sav","3303","01-12-2020","","BARÓMETRO DE DICIEMBRE 2020","2020-12-01","P15_1","P15_2","P15_3","","","","","","","","","","","","","","","","","",""
"321","4","3307.sav","3307","07-01-2021","","BARÓMETRO DE ENERO 2021","2021-01-07","P14_1","P14_2","P14_3","","","","","","","","","","","","","","","","","",""
"322","3","3309.sav","3309","03-02-2021","","BARÓMETRO DE FEBRERO 2021","2021-02-03","P15_1","P15_2","P15_3","","","","","","","","","","","","","","","","","",""
"323","2","3313.sav","3313","01-03-2021","","BARÓMETRO DE MARZO 2021","2021-03-01","P14_1","P14_2","P14_3","","","","","","","","","","","","","","","","","",""
"324","1","3318.sav","3318","05-04-2021","","BARÓMETRO DE ABRIL 2021","2021-04-05","P16_1","P16_2","P16_3","","","","","","","","","","","","","","","","","",""
"322","3","3309.sav","3309","03-02-2021","","BARÓMETRO DE FEBRERO 2021","2021-02-03","PESPANNA1","PESPANNA2","PESPANNA3","","","","","","","","","","","","","","","","","",""
"323","2","3313.sav","3313","01-03-2021","","BARÓMETRO DE MARZO 2021","2021-03-01","PESPANNA1","PESPANNA2","PESPANNA3","","","","","","","","","","","","","","","","","",""
"324","1","3318.sav","3318","05-04-2021","","BARÓMETRO DE ABRIL 2021","2021-04-05","PESPANNA1","PESPANNA2","PESPANNA3","","","","","","","","","","","","","","","","","",""
"","","_2220.sav","","","","NA","","","","","","","","","","","","","","","","","","","","","",""
"","","3242.sav","3242","","","NA","","","","","","","","","","","","","","","","","","","","","",""
"","","3245.sav","3245","","","NA","","","","","","","","","","","","","","","","","","","","","",""
......
......@@ -330,18 +330,18 @@
"3271.sav","3271","02-01-2020","BARÓMETRO DE ENERO 2020",2020-01-02,"A9_1","A9_2","A9_3"
"3273.sav","3273","01-02-2020","BARÓMETRO DE FEBRERO 2020",2020-02-01,"A9_1","A9_2","A9_3"
"3277.sav","3277","01-03-2020","BARÓMETRO DE MARZO 2020",2020-03-01,"A9_1","A9_2","A9_3"
"3279.sav","3279","01-04-2020","BARÓMETRO DE ABRIL 2020",2020-04-01,"","",""
"3281.sav","3281","04-05-2020","BARÓMETRO DE MAYO 2020",2020-05-04,"","",""
"3283.sav","3283","01-06-2020","BARÓMETRO DE JUNIO 2020",2020-06-01,"P23_1","P23_2","P23_3"
"3279.sav","3279","01-04-2020","BARÓMETRO DE ABRIL 2020",2020-04-01,"P23_1","P23_2","P23_3"
"3281.sav","3281","04-05-2020","BARÓMETRO DE MAYO 2020",2020-05-04,"P23_1","P23_2","P23_3"
"3283.sav","3283","01-06-2020","BARÓMETRO DE JUNIO 2020",2020-06-01,"P12_1","P12_2","P12_3"
"3288.sav","3288","01-07-2020","BARÓMETRO DE JULIO 2020",2020-07-01,"P12_1","P12_2","P12_3"
"3292.sav","3292","01-09-2020","BARÓMETRO DE SEPTIEMBRE 2020",2020-09-01,"P17_1","P17_2","P17_3"
"3296.sav","3296","01-10-2020","BARÓMETRO DE OCTUBRE 2020",2020-10-01,"P15_1","P15_2","P15_3"
"3300.sav","3300","03-11-2020","BARÓMETRO DE NOVIEMBRE 2020",2020-11-03,"P15_1","P15_2","P15_3"
"3303.sav","3303","01-12-2020","BARÓMETRO DE DICIEMBRE 2020",2020-12-01,"P15_1","P15_2","P15_3"
"3307.sav","3307","07-01-2021","BARÓMETRO DE ENERO 2021",2021-01-07,"P14_1","P14_2","P14_3"
"3309.sav","3309","03-02-2021","BARÓMETRO DE FEBRERO 2021",2021-02-03,"P15_1","P15_2","P15_3"
"3313.sav","3313","01-03-2021","BARÓMETRO DE MARZO 2021",2021-03-01,"P14_1","P14_2","P14_3"
"3318.sav","3318","05-04-2021","BARÓMETRO DE ABRIL 2021",2021-04-05,"P16_1","P16_2","P16_3"
"3309.sav","3309","03-02-2021","BARÓMETRO DE FEBRERO 2021",2021-02-03,"PESPANNA1","PESPANNA2","PESPANNA3"
"3313.sav","3313","01-03-2021","BARÓMETRO DE MARZO 2021",2021-03-01,"PESPANNA1","PESPANNA2","PESPANNA3"
"3318.sav","3318","05-04-2021","BARÓMETRO DE ABRIL 2021",2021-04-05,"PESPANNA1","PESPANNA2","PESPANNA3"
"1990-consavtodos","1990-cotodos",NA,NA,NA,NA,NA,NA
"1991-consavtodos","1991-cotodos",NA,NA,NA,NA,NA,NA
"1992-consavtodos","1992-cotodos",NA,NA,NA,NA,NA,NA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment