3.14 Miscellaneous Checks (S)

The following are some highly specific Special Checks that fall outside the common categories. While they may not be practical, their structure could serve as a reference when creating your own custom Special Checks.

#Miscellaneous Check Examples (with example hints)

#Example 1 (From HW 6, Q3): Checking if the column `mocodes` has correctly split its observations into multiple columns (named `mocodes_1`, `mocodes_2`, etc.) that each contain at most only one 4-digit M.O. code (e.g., the entry "1049 1304 1000" should be split into "1049" in `mocodes_1`, "1304" in `mocodes_2`, and "1000" in `mocodes_3`). The `separate_wider_delim()` function would be optimal to achieve this. 
#Column Check 
else if(ncol(variable_name) != ncol(variable_name_test)){ 
  #Checks if each mocodes column has at most one M.O. code (4 digits long)
  #Checks for a mistaken `delim` argument
  if(any(str_detect(colnames(variable_name), "mocodes"))){
    mocodes_col <- variable_name |> select(contains("mocodes")) |> mutate_all(~ str_detect(., "\\d.*\\d.*\\d.*\\d.*\\d")) |> colSums(., na.rm = TRUE)
    if(sum(as.vector(mocodes_col), na.rm = TRUE) > 0){
      test.results[2, 4] <- "Each `mocodes` column should have at most one M.O. code. Hint: Look at the argument `delim` in `separate_wider_delim()`."
    }
  }
  else{
    test.results[2, 4] <- "`variable_name` has the incorrect number of columns. Hint: Look at the function `separate_wider_delim()`."
  }
}
#Column Name Check  
else if(!all(colnames(variable_name_test) %in% colnames(variable_name))){
  #Checks for a mistaken `names_sep` argument 
  if(any(!str_detect(colnames(variable_name)[str_detect(colnames(variable_name), "mocodes")], "_"))){
    test.results[2, 4] <- "Every `mocodes` column should have an `_` between `mocodes` and its corresponding number. Hint: Look at the argument `names_sep` in `separate_wider_delim().`"
  } 
  else{
    test.results[2, 4] <- paste0(paste0(c("The following column(s) should be in `variable_name`, but they were not found in your answer:",
                                   colnames(variable_name_test)[!(colnames(variable_name_test) %in% colnames(variable_name))]),
                                   collapse = "  "), ". Hint: Look at the function `separate_wider_delim().`")
  }
}

#Example 2 (From H14, Q4a): Dynamically checking if a column contains the correct values (there are 4 different values/rows): 
else if(!isTRUE(all.equal(variable_name$column_name |> sort(na.last = T), variable_name_test$column_name |> sort(na.last = T)))){
    
  check_names <- variable_name$column_name
    
  correct_names <- variable_name_test$column_name
    
  correct_check <- sapply(check_names, function(names) !names %in% correct_names)
    
  correct_check_names <- paste0(correct_names[correct_check], collapse = " ")
    
  test.results[2, 4] <- paste0("The following values are missing in `column_name`: ", correct_check_names, ". Hint: (a) Look at the function `case_when()`. (b) Make sure you converted the values correctly according to the prompt.")
}
    
#Example 3 (From Final, P1 Q1b): Dynamically checking if the columns of a tibble were correctly cleaned by removing the punctuation symbols `‡`, `–`, `†`. 
else if(any(sapply(c('‡', '–', '†'), function(value) {any(variable_name |> summarise_all(~ value %in% .))}))){
  
  value_check <- sapply(c('‡', '–', '†'), function(value) {any(variable_name |> summarise_all(~ value %in% .))})
    
  value_names <- paste0(c('‡', '–', '†')[value_check], collapse = " ")
    
  test.results[2, 4] <- paste0("The following special characters were found in `variable_name`: ", value_names, ". Please clean the data before proceeding.", collapse = "  ")
}