--- title: Midlevel analysis ($\chi^2$) author: "Marcin" editor_options: chunk_output_type: console output: pdf_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, tidy = FALSE, size = 'footnotesize', fig.align = 'center', out.width = '0.7\\textwidth', cache = FALSE) ``` ```{r echo=FALSE, message=FALSE} library('tidyverse') theme_set(theme_bw()) ``` ```{r} data <- read.csv('data_matrix_items.csv') data$dim <- as.factor(str_replace(data$dim, '_1', '')) ``` Number of stimuli per subject and category: ```{r} all(xtabs(~ dim + id, data) == 12) ``` Number of subjects: ```{r} length(unique(data$id)) ``` Number of unique stimuli: ```{r} length(unique(data$item)) ``` The above should be 48 - is there a duplicate? ```{r} data %>% group_by(id, item) %>% summarize(reps_per_subj = n()) %>% filter(reps_per_subj > 1) %>% group_by(item, reps_per_subj) %>% count() ``` Remove the second occurrence of the duplicate stimulus: ```{r} data <- data %>% group_by(id, item) %>% arrange(X) %>% slice(1) ``` # Overall plot ```{r} data %>% pivot_longer(!c('id', 'dim', 'item', 'X')) %>% group_by(dim, name) %>% summarize(perc_ones = 100 * sum(value) / n()) %>% ggplot(aes(x = dim, y = perc_ones)) + geom_col() + facet_wrap(~ name, ncol = 5) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust= 1)) ``` # $\chi^2$ tests Function to run multiple comparisons and correct p-values: ```{r} chisq_multicomp <- function(df) { df <- as.data.frame(df) res <- matrix(NA, nrow(df) - 1, 3) for (i in 2:nrow(df)) { chisq <- chisq.test(df[c(1, i), -1]) res[i-1, 1] <- chisq$statistic res[i-1, 2] <- chisq$p.value } res[,3] <- res[,2] * (nrow(df) - 1) res[res[,3] > 1, 3] <- 1 rownames(res) <- df[-1, 1] colnames(res) <- c('chisq', 'p_value', 'p_value_cor') return(res) } ``` # Common ground ```{r} data$dim <- relevel(data$dim, which(levels(data$dim) == 'dim10_low')) common_ground <- group_by(data, dim) %>% summarize(yes = sum(working.towards.common.ground), no = n() - yes) common_ground ggplot(common_ground, aes(x = dim, y = yes / (yes + no))) + geom_col() chisq_multicomp(common_ground) ``` # Explanation ```{r} data$dim <- relevel(data$dim, which(levels(data$dim) == 'dim10_high')) explanation <- group_by(data, dim) %>% summarize(yes = sum(explanation), no = n() - yes) explanation ggplot(explanation, aes(x = dim, y = yes / (yes + no))) + geom_col() chisq_multicomp(explanation) ``` # Self-directed ```{r} data$dim <- relevel(data$dim, which(levels(data$dim) == 'dim05_low')) self_dir <- group_by(data, dim) %>% summarize(yes = sum(self.directed), no = n() - yes) self_dir ggplot(self_dir, aes(x = dim, y = yes / (yes + no))) + geom_col() chisq_multicomp(self_dir) ``` # Other-directed ```{r} data$dim <- relevel(data$dim, which(levels(data$dim) == 'dim05_high')) other_dir <- group_by(data, dim) %>% summarize(yes = sum(other.directed), no = n() - yes) other_dir ggplot(other_dir, aes(x = dim, y = yes / (yes + no))) + geom_col() chisq_multicomp(other_dir) ```