Ch. 7 Data Wrangling - Special considerations with survey data

7.1 Overview

In this example, we will work with the qualtRics package to read in Qualtrics exports, and apply minimal pre-processing.

7.2 Load data

Read in survey and split StartDate into two columns for easy filtering (we don’t need pilot data)

library(qualtRics)
library(tidyverse)

# set filename of survey file
fn = "data/demo_qualtrics_export.csv"

# filter out pilot data
PILOT_START_DATE = as.Date("2022-06-21")
qualtrics_df <- read_survey(fn) %>%
  separate(StartDate, into=c("dt_date", "dt_time"), sep=" ", remove=F) %>%
  mutate(is_pilot_data = dt_date < PILOT_START_DATE) %>%
  filter(!is_pilot_data) %>%
  mutate(participant_id = as.character(ResponseId))

## 
## ── Column specification ─────────────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   StartDate = col_datetime(format = ""),
##   EndDate = col_datetime(format = ""),
##   Progress = col_double(),
##   `Duration (in seconds)` = col_double(),
##   Finished = col_logical(),
##   RecordedDate = col_datetime(format = ""),
##   RecipientLastName = col_logical(),
##   RecipientFirstName = col_logical(),
##   RecipientEmail = col_logical(),
##   ExternalReference = col_logical(),
##   LocationLatitude = col_double(),
##   LocationLongitude = col_double(),
##   demo_yob = col_double(),
##   `demo_timer_First Click` = col_double(),
##   `demo_timer_Last Click` = col_double(),
##   `demo_timer_Page Submit` = col_double(),
##   `demo_timer_Click Count` = col_double(),
##   `tipi_timer_First Click` = col_double(),
##   `tipi_timer_Last Click` = col_double(),
##   `tipi_timer_Page Submit` = col_double()
##   # ... with 1 more columns
## )
## ℹ Use `spec()` for the full column specifications.

7.3 Split demographics into seperate dataset

df_demo = qualtrics_df %>%
select(ResponseId, contains("demo"))

knitr::kable(df_demo)# %>% kableExtra::kable_paper(.)

ResponseId	demo_yob	demo_state	demo_educ	demo_timer_First Click	demo_timer_Last Click	demo_timer_Page Submit	demo_timer_Click Count
R_24ratHMHlPXg31b	1991	FL	DOCTORAL DEGREE (EXAMPLE: PhD, EdD)	1.066	6.14	6.898	3

7.4 Split out survey metadata

df_metadata = qualtrics_df %>%
select(ResponseId, contains("Date"), contains("duration"), contains("progress"))

knitr::kable(df_metadata)# %>% kableExtra::kable_paper(.)

ResponseId	StartDate	dt_date	EndDate	RecordedDate	Duration (in seconds)	Progress
R_24ratHMHlPXg31b	2022-06-22 10:59:58	2022-06-22	2022-06-22 11:00:15	2022-06-22 11:00:15	17	100

7.5 Split timer data into seperate dataset

df_timings = qualtrics_df %>%
select(ResponseId, contains("timer"))

knitr::kable(df_timings)# %>% kableExtra::kable_paper(.)

ResponseId	demo_timer_First Click	demo_timer_Last Click	demo_timer_Page Submit	demo_timer_Click Count	tipi_timer_First Click	tipi_timer_Last Click	tipi_timer_Page Submit	tipi_timer_Click Count
R_24ratHMHlPXg31b	1.066	6.14	6.898	3	0.731	5.849	6.542	10

7.6 Score TIPI data

7.6.1 Create reverse-scoring function

reverse_code <- function(df, ques_label) {
  out_df = df %>%
    filter(tipi_question == ques_label) %>%
    mutate(response_n = recode(response, 
                               `Disagree strongly` = 7,
                               `Disagree moderately` = 6, 
                               `Disagree a little` = 5,
                               `Neither agree nor disagree` = 4,
                               `Agree a little` = 3, 
                               `Agree moderately` = 2,
                               `Agree strongly` = 1,
                               .default=-999))
  return(out_df)
}

7.6.2 Seperate TIPI data from main `qualtrics_df`

# create TIPI dataset -----
tipi_df = qualtrics_df %>%
  select(ResponseId, contains("TIPI"), -contains("_Timer"))

7.6.3 Transform TIPI from wide to long

tipi_long = tipi_df %>%
  pivot_longer(cols=TIPI_1:TIPI_10, names_to="tipi_question", values_to="response")

7.6.4 Recode values

tipi_long_r = tipi_long %>%
  mutate(response_n = recode(response, 
                             `Disagree strongly` = 1,
                             `Disagree moderately` = 2, 
                             `Disagree a little` = 3,
                             `Neither agree nor disagree` = 4,
                             `Agree a little` = 5, 
                             `Agree moderately` = 6,
                             `Agree strongly` = 7,
                             .default=-999))

7.6.5 Apply Reverse Coding

NOTE: Extraversion: 1, 6R; Agreeableness: 2R, 7; Conscientiousness; 3, 8R; Emotional Stability: 4R, 9; Openness to Experiences: 5, 10R

tipi_long_recode2 = reverse_code(tipi_long_r, "TIPI_2")
tipi_long_recode4 = reverse_code(tipi_long_r, "TIPI_4")
tipi_long_recode6 = reverse_code(tipi_long_r, "TIPI_6")
tipi_long_recode8 = reverse_code(tipi_long_r, "TIPI_8")
tipi_long_recode10 = reverse_code(tipi_long_r, "TIPI_10")

tipi_long_nonrecoded = tipi_long_r %>%
  filter(tipi_question %in% c("TIPI_1","TIPI_3","TIPI_5","TIPI_7","TIPI_9"))

tipi_long_recode_all = bind_rows(tipi_long_nonrecoded,
                                 tipi_long_recode2,
                                 tipi_long_recode4,
                                 tipi_long_recode6,
                                 tipi_long_recode8,
                                 tipi_long_recode10)

7.6.6 Finally transform to wide format

tipi_wide_recoded = tipi_long_recode_all %>%
  select(-response) %>%
  pivot_wider(names_from="tipi_question", values_from="response_n") %>%
  rowwise() %>%
  mutate(tipi_extraversion = mean(c(TIPI_1, TIPI_6), na.rm=T)) %>%
  mutate(tipi_agreeableness = mean(c(TIPI_2, TIPI_7), na.rm=T),
         tipi_conscientiousness = mean(c(TIPI_3, TIPI_8), na.rm=T),
         tipi_emostability = mean(c(TIPI_4, TIPI_9), na.rm=T),
         tipi_openexperience = mean(c(TIPI_5, TIPI_10), na.rm=T))

knitr::kable(tipi_wide_recoded %>% select(contains("TIPI_", ignore.case = F)))# %>% kableExtra::kable_paper(.)

TIPI_1	TIPI_3	TIPI_5	TIPI_7	TIPI_9	TIPI_2	TIPI_4	TIPI_6	TIPI_8	TIPI_10
1	7	1	7	7	1	7	1	7	7

knitr::kable(tipi_wide_recoded %>% select(contains("tipi_", ignore.case = F)))# %>% kableExtra::kable_paper(.)

tipi_extraversion	tipi_agreeableness	tipi_conscientiousness	tipi_emostability	tipi_openexperience
1	4	7	7	4

6 Working with Dates

8 Data Wrangling and Visualization of Big Data