Ch. 10 Working with JSON data
As part of my research program, I am developing a chatbot to assess cognitive function. For this example, we will work with a sample output file from the chatbot, in JSON format.
10.2 Pre-process data (tidy it)
# separate data ----
keystroke_df = tibble(cogbot_d$keystrokes)
message_df = tibble(cogbot_d$messages)
# pre-process data ----
message_pp = message_df %>%
separate(`cogbot_d$messages`,
sep = ",",
into = c("ts", "session_uuid", "msg_uuid", "msg_id", "sender", "msg")) %>%
mutate(ts_c = anytime::anytime(gsub('"', "", gsub("\\[", "", ts)))) %>%
select(-ts) %>%
select(ts_c, everything())
## Warning: Expected 6 pieces. Additional pieces discarded in 1 rows [5].
head(message_pp)
## # A tibble: 6 × 6
## ts_c session_uuid msg_uuid msg_id sender msg
## <dttm> <chr> <chr> <chr> <chr> <chr>
## 1 2022-02-21 01:13:24 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"icmeu… 0 "\"bo… "\"W…
## 2 2022-02-21 01:14:01 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"z9wa6… 1 "\"us… "\"a…
## 3 2022-02-21 01:14:02 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"ei2xk… 2 "\"bo… "\"W…
## 4 2022-02-21 01:14:03 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"74jz1… 3 "\"us… "\"3…
## 5 2022-02-21 01:14:04 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"bad8d… 4 "\"bo… "\"O…
## 6 2022-02-21 01:14:05 "\"5ihdywx2ocnmvpiu8xaqyf\"" "\"m0kb3… 5 "\"us… "\"3…
keystroke_pp = keystroke_df %>%
separate(`cogbot_d$keystrokes`,
sep = ",",
into = c("ts", "session_uuid", "msg_uuid", "event_uuid",
"e_ts", "e_loc", "e_key", "e_type", "e_keycode",
"e_which", "e_ctrlkey", "e_shiftkey", "e_altkey", "e_metakey",
"e_repeat")) %>%
mutate(e_ts_c = as.numeric(e_ts)) %>%
mutate(ts_c = anytime::anytime(gsub('"', "", gsub("\\[", "", ts)))) %>%
select(-ts) %>%
select(ts_c, everything())
head(keystroke_pp)
## # A tibble: 6 × 16
## ts_c session_uuid msg_uuid event_uuid e_ts e_loc e_key e_type
## <dttm> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2022-02-21 01:13:24 "\"\"" "\"\"" "\"l4j6w2… 6059… null "nul… "\"cl…
## 2 2022-02-21 01:14:00 "\"5ihdywx2o… "\"icme… "\"o1kbj1… 4223… 0 "\"a… "\"ke…
## 3 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"4mb7bw… 4267… 0 "\"p… "\"ke…
## 4 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"e9f346… 4282… 0 "\"p… "\"ke…
## 5 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"06c6qa… 4296… 0 "\"l… "\"ke…
## 6 2022-02-21 01:14:01 "\"5ihdywx2o… "\"icme… "\"2ezwmt… 4307… 0 "\"e… "\"ke…
## # … with 8 more variables: e_keycode <chr>, e_which <chr>, e_ctrlkey <chr>,
## # e_shiftkey <chr>, e_altkey <chr>, e_metakey <chr>, e_repeat <chr>,
## # e_ts_c <dbl>
10.3 Visualize data
10.3.1 Keypresses over time
ggplot(both_pp, aes(e_ts_c, e_key)) +
geom_point() +
theme_bw() +
theme(axis.text.x = element_text(angle=90))
## Warning: Removed 8 rows containing missing values (geom_point).
10.3.2 Keypresses per question asked by the chatbot
ggplot(both_pp, aes(e_ts_c, e_key)) +
geom_point() +
theme_bw() +
theme(axis.text.x = element_text(angle=90)) +
facet_grid(.~msg)
## Warning: Removed 8 rows containing missing values (geom_point).