Title: | Tidy Handling and Navigation of the Student-Life Dataset |
---|---|
Description: | Download, navigate and analyse the Student-Life dataset. The Student-Life dataset contains passive and automatic sensing data from the phones of a class of 48 Dartmouth college students. It was collected over a 10 week term. Additionally, the dataset contains ecological momentary assessment results along with pre-study and post-study mental health surveys. The intended use is to assess mental health, academic performance and behavioral trends. The raw dataset and additional information is available at <https://studentlife.cs.dartmouth.edu/>. |
Authors: | Daniel Fryer [aut, cre] |
Maintainer: | Daniel Fryer <[email protected]> |
License: | GPL-3 |
Version: | 1.1.0 |
Built: | 2025-02-23 03:16:33 UTC |
Source: | https://github.com/frycast/studentlife |
Classify observations from an SL_tibble
into block labels using available
date-time information. See more information
about "blocks" under the details section.
Daylight savings is ignored, and started on 31st March 2013.
add_block_labels( tab, type = c("hour_in_day", "epoch", "day", "week", "weekday", "month", "date"), interval = "start", warning = TRUE, start_date = getOption("SL_start"), epoch_levels = getOption("SL_epoch_levels"), epoch_ubs = getOption("SL_epoch_ubs"), unsafe = F )
add_block_labels( tab, type = c("hour_in_day", "epoch", "day", "week", "weekday", "month", "date"), interval = "start", warning = TRUE, start_date = getOption("SL_start"), epoch_levels = getOption("SL_epoch_levels"), epoch_ubs = getOption("SL_epoch_ubs"), unsafe = F )
tab |
An |
type |
A character vector of block label types to include. Can be one or more of "epoch", "day", "week", "weekday", "month" and "date". Any block label types that are not inferrable from the available date-time data are ignored. |
interval |
A character string that decides how block
membership is decided when |
warning |
Logical. If |
start_date |
Date. The date that the StudentLife study started. |
epoch_levels |
A character vector of epoch levels. |
epoch_ubs |
An integer vector that defines the hour that is the upper boundary of each epoch. |
unsafe |
A logical. Default is |
Block label types can be one or more of "epoch"
(giving labels morning, evening, afternoon and night),
"day" (giving number of days since the start_date
of the
StudentLife study),
"week" (giving integer number of weeks since the first week of the
StudentLife study, rounded downs),
"weekday" (giving the day of the week),
"month" (giving integer number of months since the start of the
StudentLife study, rounded down) and "date".
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "sensing", table = "activity", csv_nrows = 10) b_tab <- add_block_labels(tab) b_tab
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "sensing", table = "activity", csv_nrows = 10) b_tab <- add_block_labels(tab) b_tab
Download the entire StudentLife dataset or a smaller sample dataset for testing.
download_studentlife( url = "dartmouth", location = ".", unzip = TRUE, untar = TRUE )
download_studentlife( url = "dartmouth", location = ".", unzip = TRUE, untar = TRUE )
url |
A character string. Either
"rdata" for the URL to the (more efficient)
RData format version hosted on Zenodo, or
"dartmouth" for the (original) Dartmouth URL, or
"testdata" for a small sample dataset. Otherwise
a full URL of your choice can be specified leading to
the StudentLife dataset as a |
location |
The destination path. If the path does
not exist it is created with |
unzip |
Logical. If |
untar |
Logical. If |
If url = "rdata"
then data will be downloaded
from <https://zenodo.org/record/3529253>
If url = "dartmouth"
then data will be downloaded
from <https://studentlife.cs.dartmouth.edu/dataset/dataset.tar.bz2>
If url = "testdata"
then data will be downloaded
from the test data at the studentlife GitHub repository
<https://github.com/frycast/studentlife>
d <- tempdir() download_studentlife(location = d, url = "testdata") ## Not run: ## With menu load_SL_tibble(location = d) ## End(Not run) ## Without menu SL_tables load_SL_tibble(schema = "EMA", table = "PAM", location = d)
d <- tempdir() download_studentlife(location = d, url = "testdata") ## Not run: ## With menu load_SL_tibble(location = d) ## End(Not run) ## Without menu SL_tables load_SL_tibble(schema = "EMA", table = "PAM", location = d)
Get the EMA questions from a StudentLife tibble whose schema is "EMA".
get_EMA_questions(x)
get_EMA_questions(x)
x |
A StudentLife tibble whose schema is
EMA, as output by the function
|
The EMA_questions attribute of x
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "PAM" get_EMA_questions(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "PAM" get_EMA_questions(tab_PAM)
Retrieve the schema name from a StudentLife tibble
get_schema(x)
get_schema(x)
x |
An object of class StudentLife tibble
( |
A character string indicating the schema name
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "EMA" get_schema(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "EMA" get_schema(tab_PAM)
Retrieve the table name from a StudentLife tibble
get_table(x)
get_table(x)
x |
An object of class StudentLife tibble
( |
A character string indicating the table name
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "PAM" get_table(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns "PAM" get_table(tab_PAM)
Confirm that an object is a dateless StudentLife tibble
is_dateless_SL_tibble(x)
is_dateless_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_S <- load_SL_tibble( schema = "survey", table = "BigFive", location = d) # Returns TRUE is_dateless_SL_tibble(tab_S)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_S <- load_SL_tibble( schema = "survey", table = "BigFive", location = d) # Returns TRUE is_dateless_SL_tibble(tab_S)
Confirm that an object is a date-only StudentLife tibble
is_dateonly_SL_tibble(x)
is_dateonly_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_DL <- load_SL_tibble( schema = "education", table = "deadlines", location = d) # Returns TRUE is_dateonly_SL_tibble(tab_DL)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_DL <- load_SL_tibble( schema = "education", table = "deadlines", location = d) # Returns TRUE is_dateonly_SL_tibble(tab_DL)
Confirm that an object is an interval StudentLife tibble
is_interval_SL_tibble(x)
is_interval_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_con <- load_SL_tibble( schema = "sensing", table = "conversation", location = d, csv_nrow = 10) # Returns TRUE is_interval_SL_tibble(tab_con)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_con <- load_SL_tibble( schema = "sensing", table = "conversation", location = d, csv_nrow = 10) # Returns TRUE is_interval_SL_tibble(tab_con)
Confirm that an object is a regularised StudentLife tibble
is_reg_SL_tibble(x)
is_reg_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) # Returns TRUE is_reg_SL_tibble(reg_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) # Returns TRUE is_reg_SL_tibble(reg_PAM)
Confirm that an object is a StudentLife tibble
is_SL_tibble(x)
is_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns TRUE is_SL_tibble(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns TRUE is_SL_tibble(tab_PAM)
Confirm that an object is a timestamped StudentLife tibble
is_timestamp_SL_tibble(x)
is_timestamp_SL_tibble(x)
x |
Any object |
Logical
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns TRUE is_timestamp_SL_tibble(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) # Returns TRUE is_timestamp_SL_tibble(tab_PAM)
Import a chosen StudentLife table as
a tibble. Leave schema
and table
unspecified to choose interactively via a
menu. This function is only intended for use
with the studentlife dataset in it's original
format, with the original directory structure.
See the examples below for the recommended alternative approach
to loading tables when the RData format is used.
load_SL_tibble( schema, table, location = ".", time_options = c("interval", "timestamp", "dateonly", "dateless"), vars, csv_nrows, datafolder = "dataset", uid_range = getOption("SL_uids") )
load_SL_tibble( schema, table, location = ".", time_options = c("interval", "timestamp", "dateonly", "dateless"), vars, csv_nrows, datafolder = "dataset", uid_range = getOption("SL_uids") )
schema |
A character string. The menu 1 choice. Leave blank to choose interactively. |
table |
A character string. The menu 2 choice. Leave blank to choose interactively. |
location |
The path to a copy of the StudentLife dataset. |
time_options |
A character vector specifying which table types (out of "interval", "timestamp", "dateonly" and "dateless") to include in the menu. This allows you to restrict menu options according to the amount of date-time information present in the data. The default includes all data. Note this parameter only has an effect when used with the interactive menu. |
vars |
Character vector of variable
names to import for all students. Leave
blank and this will be chosen interactively
if necesssary. If |
csv_nrows |
An integer specifying the number of rows to read per student if the target is a csv. The largest files in StudentLife are csv files, so this allows code testing with less overhead. |
datafolder |
Specifies the subfolder of |
uid_range |
An integer vector. The range of uids in the StudentLife study. |
An object of class SL_tibble
is returned. These inherit
properties from class tibble
and
class data.frame
.
Depending on the date-time information available, the object
may also be a timestamp_SL_tibble
,
interval_SL_tibble
or
dateonly_SL_tibble
(which are all
subclasses of SL_tibble
).
## Example that uses RData format to efficiently ## download and load tables, as an alternative ## to using this function. ## Not run: d <- tempdir() download_studentlife(location = d, url = "rdata") # Choose the schema and table from the list SL_tables: SL_tables # Example with activity table from sensing schema schema <- "sensing" table <- "activity" act <- readRDS(paste0(d, "/dataset_rds/", schema, "/", table, ".Rds")) act ## End(Not run) ## Example that uses the studentlife dataset in ## its original format. # Use url = "dartmouth" for the full original dataset d <- tempdir() download_studentlife(location = d, url = "testdata") ## Not run: ## With menu load_SL_tibble(location = d) ## End(Not run) ## Without menu SL_tables PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) ## Load less data for testing with less overhead act <- load_SL_tibble(schema = "sensing", table = "activity", location = d, csv_nrows = 10) ## Not run: ## Browse all tables with timestamps (non-interval) load_SL_tibble(location = d, time_options = "timestamp") ## Browse all tables with intervals load_SL_tibble(location = d, time_options = "interval") ## Browse all dateless tables load_SL_tibble(location = d, time_options = "dateless") ## End(Not run)
## Example that uses RData format to efficiently ## download and load tables, as an alternative ## to using this function. ## Not run: d <- tempdir() download_studentlife(location = d, url = "rdata") # Choose the schema and table from the list SL_tables: SL_tables # Example with activity table from sensing schema schema <- "sensing" table <- "activity" act <- readRDS(paste0(d, "/dataset_rds/", schema, "/", table, ".Rds")) act ## End(Not run) ## Example that uses the studentlife dataset in ## its original format. # Use url = "dartmouth" for the full original dataset d <- tempdir() download_studentlife(location = d, url = "testdata") ## Not run: ## With menu load_SL_tibble(location = d) ## End(Not run) ## Without menu SL_tables PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) ## Load less data for testing with less overhead act <- load_SL_tibble(schema = "sensing", table = "activity", location = d, csv_nrows = 10) ## Not run: ## Browse all tables with timestamps (non-interval) load_SL_tibble(location = d, time_options = "timestamp") ## Browse all tables with intervals load_SL_tibble(location = d, time_options = "interval") ## Browse all dateless tables load_SL_tibble(location = d, time_options = "dateless") ## End(Not run)
Categorise Photographic Affect Meter (PAM) scores into 4 categories by either PAM Quadrant, Valence or Arousal (or multiple of these).
PAM_categorise( tab, pam_name = "picture_idx", types = c("quadrant", "valence", "arousal") )
PAM_categorise( tab, pam_name = "picture_idx", types = c("quadrant", "valence", "arousal") )
tab |
A data.frame (or tibble) with a column representing Photographic Affect Meter (PAM) score. |
pam_name |
Character. The name of the column representing PAM. |
types |
Character vector containing the categories, one or more of "quadrant", "valence" and "arousal" into which to code PAM scores. |
The 4 Quadrant categories are as follows: Quadrant 1: negative valence, low arousal. Quadrant 2: negative valence, high arousal. Quadrant 3: positive valence, low arousal. Quadrant 4: positive valence, high arousal.
Valence and arousal are traditionally scores from -2 to 2, measuring displeasure to pleasure, and state of activation respectively. However, here we map those scores to positive numbers so (-2,-1,1,2) -> (1,2,3,4).
The data.frame (or tibble) tab
with extra columns
pam_q
, pam_v
, and pam_a
for
quadrant, valence and arousal respectively.
Pollak, J. P., Adams, P., & Gay, G. (2011, May). PAM: a photographic affect meter for frequent, in situ measurement of affect. In Proceedings of the SIGCHI conference on Human factors in computing systems (pp. 725-734). ACM.
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "EMA", table = "PAM", csv_nrows = 10) PAM_categorise(tab)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "EMA", table = "PAM", csv_nrows = 10) PAM_categorise(tab)
Transform an SL_tibble
(as produced by
load_SL_tibble
)
in such a way that the observations are aggregated in
equal length intervals called 'blocks' (for more
information on blocks see
add_block_labels
).
regularise_time( tab, ..., blocks = c("epoch", "day"), add_NAs = TRUE, unsafe = F, study_duration = getOption("SL_duration"), start_date = getOption("SL_start"), epoch_levels = getOption("SL_epoch_levels"), epoch_ubs = getOption("SL_epoch_ubs"), uid_range = getOption("SL_uids"), date_range = seq(from = start_date, by = 1, length.out = study_duration) )
regularise_time( tab, ..., blocks = c("epoch", "day"), add_NAs = TRUE, unsafe = F, study_duration = getOption("SL_duration"), start_date = getOption("SL_start"), epoch_levels = getOption("SL_epoch_levels"), epoch_ubs = getOption("SL_epoch_ubs"), uid_range = getOption("SL_uids"), date_range = seq(from = start_date, by = 1, length.out = study_duration) )
tab |
An |
... |
Arguments passed to |
blocks |
A character vector naming one or more of the
block options "hour_in_day", "epoch", "day", "week", "weekday",
"month" or "date".
If not present as column names in
|
add_NAs |
A logical. If TRUE then NAs will be introduced to fill missing blocks. |
unsafe |
A logical. Default is |
study_duration |
Integer. The duration of the StudentLife
study in days. This parameter does nothing if |
start_date |
Date. The date that the StudentLife study started. |
epoch_levels |
A character vector of epoch labels. |
epoch_ubs |
An integer vector that defines the hour that is the upper boundary of each epoch. |
uid_range |
An integer vector. The range of uids in the StudentLife study. |
date_range |
A vector of dates to be
used if |
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "sensing", table = "activity", csv_nrows = 10) r_tab <- regularise_time( tab, blocks = c("day","weekday"), act_inf = max(activity_inference), add_NAs = FALSE) r_tab
d <- tempdir() download_studentlife(location = d, url = "testdata") tab <- load_SL_tibble( loc = d, schema = "sensing", table = "activity", csv_nrows = 10) r_tab <- regularise_time( tab, blocks = c("day","weekday"), act_inf = max(activity_inference), add_NAs = FALSE) r_tab
This function produces a histogram that visualizes the frequencies of observations within hourly blocks, or blocks of multiple hours.
response_hour_hist( tab, break_hours = 10, xlab = "Hours into study", main = paste0("Distribution of ", attr(tab, "table"), " response times"), ... )
response_hour_hist( tab, break_hours = 10, xlab = "Hours into study", main = paste0("Distribution of ", attr(tab, "table"), " response times"), ... )
tab |
A StudentLife tibble with time information,
(i.e., and object of class |
break_hours |
Specify the width in hours of each histogram bin. |
xlab |
Argument passed to |
main |
Argument passed to |
... |
Arguments passed to |
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) response_hour_hist(tab_PAM)
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) response_hour_hist(tab_PAM)
This command returns a 5 element list. Each of the five elements are given names corresponding to the schema names of the studentlife data set. Each element is a vector of strings, where each string corresponds to the name of a table within the respective schema.
SL_tables
SL_tables
An object of class list
of length 5.
https://studentlife.cs.dartmouth.edu/
Download, navigate and analyse the Student-Life dataset. The Student-Life dataset contains passive and automatic sensing data from the phones of a class of 48 de-identified Dartmouth college students. It was collected over a 10 week term. Additionally, the dataset contains Ecological Momentary Assessment results along with pre- and post-study mental health surveys, such as the PHQ-9. The intended use is to assess mental health, academic performance and behavioral trends. The raw dataset and additional information is available at <https://studentlife.cs.dartmouth.edu/>.
Details on the Student-Life dataset as well as the dataset itself are available at https://studentlife.cs.dartmouth.edu/.
Current updates are available through URL: https://github.com/frycast/studentlife
https://github.com/frycast/studentlife/issues
Daniel Fryer [email protected]
Produce a visualisation of the number of missing values among each student in a regularised SL_tbl.
vis_NAs( tab, response, main = paste0("Missing values by student (", attr(tab, "table"), ") (blocks: ", paste0(attr(tab, "blocks"), collapse = ", "), ")"), show_perc_col = FALSE, ... )
vis_NAs( tab, response, main = paste0("Missing values by student (", attr(tab, "table"), ") (blocks: ", paste0(attr(tab, "blocks"), collapse = ", "), ")"), show_perc_col = FALSE, ... )
tab |
A regularised StudentLife tibble (i.e., an object of class
|
response |
A character string naming one of the columns
in |
main |
The plot title, passed to |
show_perc_col |
Logical passed to |
... |
Arguments passed to |
A ggplot object.
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) vis_NAs(reg_PAM, response = "m")
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) vis_NAs(reg_PAM, response = "m")
Produce an ordered bar plot of the total number of responses for each student in a regularised SL_tbl.
vis_response_counts( tab, response, main = paste0("Total responses by student (", attr(tab, "table"), ")"), xlab = "Student UID", ylab = "Response count", ... )
vis_response_counts( tab, response, main = paste0("Total responses by student (", attr(tab, "table"), ")"), xlab = "Student UID", ylab = "Response count", ... )
tab |
A regularised StudentLife tibble (i.e., an object of class
|
response |
A character string naming one of the columns
in |
main |
The plot title, passed to |
xlab |
The x axis label, passed to |
ylab |
The y axis label, passed to |
... |
Arguments passed to |
A named numeric vector of response counts, sorted in descending order.
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) vis_response_counts(reg_PAM, response = "m")
d <- tempdir() download_studentlife(location = d, url = "testdata") tab_PAM <- load_SL_tibble(schema = "EMA", table = "PAM", location = d) reg_PAM <- regularise_time( tab_PAM, blocks = c("day", "epoch"), m = mean(picture_idx, na.rm = TRUE)) vis_response_counts(reg_PAM, response = "m")