Commit 28d5fe32 authored by numeroteca's avatar numeroteca

initial commit analyze barometro CIS

parents
Pipeline #75 failed with stages
# Analysis of CIS barometro data http://bilbaodatalab.wikitoki.org/wiki/barometro-del-cis/
# Load libraries
# install.packages("foreign")
library(foreign)
library(tidyverse)
# Load data ------------------------
# 201801
# df <- read.spss("DA3203.sav", use.value.label=TRUE, to.data.frame=TRUE)
# 201905
df <- read.spss("../../data/CIS/barometro/1905/3247.sav", use.value.label=TRUE, to.data.frame=TRUE)
# Preview of the data ----------------
summary(df)
head(df)
df[1,]
# Table
table(df$PROV)
table(df$CCAA)
plot(df$CCAA)
table(df$P701)
plot(df$P701)
plot(df$P702)
plot(df$P703)
table(df[df$CCAA=="País Vasco",]$P701)
table(df[df$CCAA=="Madrid (Comunidad de)",]$P701)
nrow(df[df$P701=="La corrupción y el fraude",])/nrow(df)
nrow(df[df$P702=="La corrupción y el fraude",])/nrow(df)
nrow(df[df$P703=="La corrupción y el fraude",])/nrow(df)
# Calcula porcentaje de encuestados que contestaron que "____" era unos de -------------------------
# los tres principales problemas que existen actualmente en España
(nrow(df[df$P701=="La corrupción y el fraude",]) +
nrow(df[df$P702=="La corrupción y el fraude",]) +
nrow(df[df$P703=="La corrupción y el fraude",])
) /nrow(df)*100
(nrow(df[df$P701=="La educación",]) +
nrow(df[df$P702=="La educación",]) +
nrow(df[df$P703=="La educación",])
) /nrow(df)*100
(nrow(df[df$P701=="El paro",]) +
nrow(df[df$P702=="El paro",]) +
nrow(df[df$P703=="El paro",])
) /nrow(df)*100
# For one single Comunidad Autónoma ------
euskadi <- df[df$CCAA=="País Vasco",]
nrow(euskadi)
(nrow(euskadi[euskadi$P701=="La educación",]) +
nrow(euskadi[euskadi$P702=="La educación",]) +
nrow(euskadi[euskadi$P703=="La educación",])
) /nrow(euskadi)*100
# Calcualres for each CCAA ------
ccaa <- as.data.frame(levels(df$CCAA))
names(ccaa) <- "nombre"
# ccaa_results <- data.frame(matrix(ncol = 1,nrow = nrow(ccaa) ))
ccaa$results_t <- 0
ccaa$results_1st <- 0
# calculates the answers by CCAA
for ( i in 1:nrow(ccaa)) {
print(i)
ccaa$results_t[i] <- round(
# change to P701 for 201801
(
nrow(df[df$P901=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],]) +
nrow(df[df$P902=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],]) +
nrow(df[df$P903=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],])
) /nrow(df[df$CCAA==ccaa[i,1],])*100
, digits = 2)
ccaa$results_1st[i] <- round(
(
nrow(df[df$P901=="La corrupción y el fraude" & df$CCAA==ccaa[i,1],])
) /nrow(df[df$CCAA==ccaa[i,1],])*100
, digits = 2)
}
plot(ccaa$results)
plot(ccaa)
# Calcualres for each RECUERDO ----------------
recuerdo <- as.data.frame(levels(df$RECUERDO))
names(recuerdo) <- "nombre"
# recuerdo_results <- data.frame(matrix(ncol = 1,nrow = nrow(recuerdo) ))
recuerdo$recuerdo <- 0
# calculates the answers by recuerdo
for ( i in 1:nrow(recuerdo)) {
print(i)
recuerdo$recuerdo[i] <- round(
(
nrow(df[df$RECUERDO==recuerdo[i,1] & (
df$P901=="La corrupción y el fraude" | df$P902=="La corrupción y el fraude" | df$P903=="La corrupción y el fraude"
),])
) /nrow(df[df$RECUERDO==recuerdo[i,1],])*100
, digits = 2)
}
# Calcualres for each votaria en genereales ----------------
votaria <- as.data.frame(levels(df$P20R))
names(votaria) <- "nombre"
# votaria_results <- data.frame(matrix(ncol = 1,nrow = nrow(votaria) ))
# votaria$votaria <- 0
# calculates the answers by votaria
for ( i in 1:nrow(votaria)) {
print(i)
votaria$votaria[i] <- round(
(
nrow(df[df$P20R==votaria[i,1] & (
df$P901=="La corrupción y el fraude" | df$P902=="La corrupción y el fraude" | df$P903=="La corrupción y el fraude"
),])
) /nrow(df[df$P20R==votaria[i,1],])*100
, digits = 2)
}
# join results -----------
rec_vot <- merge(recuerdo,votaria, by = "nombre")
# network analysis ¿Meleiro? -------------------------------------------------------
library(foreign)
library(tidyverse)
library(networkD3)
# install.packages("networkD3")
setwd("/Users/meleiro/Dropbox/MASTER/DATOS/R/CIS")
cis <- read.spss("3210.sav", use.value.label=TRUE, to.data.frame=TRUE)
p <- cis %>% group_by(P26AR, VOTOSIMG) %>% summarise(N = n()) %>% mutate(pct = N/sum(N))
p <- p[!is.na(p$P26AR),]
nodes <- data.frame(name = unique(unique(p$P26AR), unique(p$VOTOSIMG)))
links <- data.frame(p)
names(links) = c("source", "target", "value", "value_pct")
# convert to character
links$source <- as.character(links$source)
links$target <- as.character(links$target)
# changes character by integer
links[links$source=="PP",]$source <- 0
links[links$target=="PP",]$target <- 0
links[links$source=="PSOE",]$source <- 1
links[links$target=="PSOE",]$target <- 1
# links[links$source=="Unidos Podemos",]$source <- 2
links[links$target=="Unidos Podemos",]$target <- 2
links[links$source=="Ciudadanos",]$source <- 3
links[links$target=="Ciudadanos",]$target <- 3
# TODO: complete and automatize conversion for all the nodes in nodes
# converts to integer
links$source <- as.integer(links$source)
links$target <- as.integer(links$target)
# slecet avaialbe data
links_clean <- links[!is.na(links$target) & !is.na(links$source) ,]
# plot
sankeyNetwork(Links = links_clean, Nodes = nodes,
Source = "source", Target = "target",
Value = "value", NodeID = "name",
fontSize = 12, nodeWidth = 30)
URL <- paste0('https://cdn.rawgit.com/christophergandrud/networkD3/',
'master/JSONdata/energy.json')
energy <- jsonlite::fromJSON(URL)
# Plot
sankeyNetwork(Links = linksx, Nodes = nodesx, Source = 'source',
Target = 'target', Value = 'value', NodeID = 'name',
units = 'TWh', fontSize = 12, nodeWidth = 30)
linksx <- energy$links
nodesx <- energy$nodes
src <- c("A", "A", "A", "A", "B", "B", "C", "C", "D")
target <- c("B", "C", "D", "J", "E", "F", "G", "H", "I")
networkData <- data.frame(src, target)
# Plot
simpleNetwork(networkData)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment