3.14 Miscellaneous Checks (S)
The following are some highly specific Special Checks that fall outside the common categories. While they may not be practical, their structure could serve as a reference when creating your own custom Special Checks.
#Miscellaneous Check Examples (with example hints)
#Example 1 (From HW 6, Q3): Checking if the column `mocodes` has correctly split its observations into multiple columns (named `mocodes_1`, `mocodes_2`, etc.) that each contain at most only one 4-digit M.O. code (e.g., the entry "1049 1304 1000" should be split into "1049" in `mocodes_1`, "1304" in `mocodes_2`, and "1000" in `mocodes_3`). The `separate_wider_delim()` function would be optimal to achieve this.
#Column Check
else if(ncol(variable_name) != ncol(variable_name_test)){
#Checks if each mocodes column has at most one M.O. code (4 digits long)
#Checks for a mistaken `delim` argument
if(any(str_detect(colnames(variable_name), "mocodes"))){
mocodes_col <- variable_name |> select(contains("mocodes")) |> mutate_all(~ str_detect(., "\\d.*\\d.*\\d.*\\d.*\\d")) |> colSums(., na.rm = TRUE)
if(sum(as.vector(mocodes_col), na.rm = TRUE) > 0){
test.results[2, 4] <- "Each `mocodes` column should have at most one M.O. code. Hint: Look at the argument `delim` in `separate_wider_delim()`."
}
}
else{
test.results[2, 4] <- "`variable_name` has the incorrect number of columns. Hint: Look at the function `separate_wider_delim()`."
}
}
#Column Name Check
else if(!all(colnames(variable_name_test) %in% colnames(variable_name))){
#Checks for a mistaken `names_sep` argument
if(any(!str_detect(colnames(variable_name)[str_detect(colnames(variable_name), "mocodes")], "_"))){
test.results[2, 4] <- "Every `mocodes` column should have an `_` between `mocodes` and its corresponding number. Hint: Look at the argument `names_sep` in `separate_wider_delim().`"
}
else{
test.results[2, 4] <- paste0(paste0(c("The following column(s) should be in `variable_name`, but they were not found in your answer:",
colnames(variable_name_test)[!(colnames(variable_name_test) %in% colnames(variable_name))]),
collapse = " "), ". Hint: Look at the function `separate_wider_delim().`")
}
}
#Example 2 (From H14, Q4a): Dynamically checking if a column contains the correct values (there are 4 different values/rows):
else if(!isTRUE(all.equal(variable_name$column_name |> sort(na.last = T), variable_name_test$column_name |> sort(na.last = T)))){
check_names <- variable_name$column_name
correct_names <- variable_name_test$column_name
correct_check <- sapply(check_names, function(names) !names %in% correct_names)
correct_check_names <- paste0(correct_names[correct_check], collapse = " ")
test.results[2, 4] <- paste0("The following values are missing in `column_name`: ", correct_check_names, ". Hint: (a) Look at the function `case_when()`. (b) Make sure you converted the values correctly according to the prompt.")
}
#Example 3 (From Final, P1 Q1b): Dynamically checking if the columns of a tibble were correctly cleaned by removing the punctuation symbols `‡`, `–`, `†`.
else if(any(sapply(c('‡', '–', '†'), function(value) {any(variable_name |> summarise_all(~ value %in% .))}))){
value_check <- sapply(c('‡', '–', '†'), function(value) {any(variable_name |> summarise_all(~ value %in% .))})
value_names <- paste0(c('‡', '–', '†')[value_check], collapse = " ")
test.results[2, 4] <- paste0("The following special characters were found in `variable_name`: ", value_names, ". Please clean the data before proceeding.", collapse = " ")
}