5.1 Example: Tibble
Consider the following question from Homework 9.2
Part 1: Coding Assignment
- (Public Question) Next, using
un_data_tfr
create a new tibble calledun_data_tfr_gdp
that contains a column for the year, a column for the region, and a column for the average GDP per capita in each region for each year. The new column should be calledgdp_per_capita
, and should be created by dividing the sum of GDP by the sum of population for each region and year.
The answer should look something like:
un_data_tfr_gdp <- un_data_tfr |>
summarise(gdp_per_capita = weighted.mean(gdp_per_capita, population, na.rm = T),
.by = c(year, region))
Assume this question is worth 20 points and that un_data_tfr
was created in Question 2.
Then, the autograder code for this question could look like:
#Autograder Code for Part 1 Question 4----------
#Initializing `test.results[4, ]`
test.results[4, ] <- c("Part 1 Question 4 (Public)", 0, 20, "Try again. Hint: Start with summarize().")
#Prerequisite Check
if(test.results[2, 2] == 0){
test.results[4, 4] <- "This question depends on `un_data_tfr` from Question 2 being correct. Try again."
#Name Check
} else if(is.error(un_data_tfr_gdp)){
test.results[4, 4] <- "`un_data_tfr_gdp` is not found. Please make sure to name the variable correctly. Hint: Check spelling and capitalization."
#Structure Check (for a tibble)
} else if(!is_tibble(un_data_tfr_gdp)){
test.results[4, 4] <- "`un_data_tfr_gdp` is not a tibble. Please make sure it is a tibble."
#Column Name Check
} else if(!all(colnames(un_data_tfr_gdp_test) %in% colnames(un_data_tfr_gdp))){
test.results[4, 4] <- paste0(c("The following column(s) should be in un_data_tfr_gdp, but they were not found in your answer:",
colnames(un_data_tfr_gdp_test)[!(colnames(un_data_tfr_gdp_test) %in% colnames(un_data_tfr_gdp))],
". Hint: Use summarize() with appropriate .by = argument (besides gdp_per_capita, the other two columns that you need will be
your .by argument, in the form of a vector.)"), collapse = " ")
} else if(!all(colnames(un_data_tfr_gdp) %in% colnames(un_data_tfr_gdp_test))){
test.results[4, 4] <- paste0(c("The following column(s) should not be in un_data_tfr_gdp, but they were found in your answer :",
colnames(un_data_tfr_gdp)[!(colnames(un_data_tfr_gdp) %in% colnames(un_data_tfr_gdp_test))],
". Hint: Use summarize() with appropriate .by= argument (besides gdp_per_capita, the other two columns that you need will be
your .by argument, in the form of a vector.)"), collapse = " ")
#NA Check
} else if(sum(is.na(un_data_tfr_gdp$gdp_per_capita)) > 0){
test.results[4, 4] <- "There are `NA` values in your `gdp_per_capita` column. Hint: You can do one of the following: (1) filter out all rows with missing gdp (or gdp_per_capita) and population values first, (2) set na.rm correctly when necessary."
#Type Check (numeric)
} else if(!is.numeric(un_data_tfr_gdp$gdp_per_capita)){
test.results[4, 4] <- "`gdp_per_capita` should be a numeric column."
#Calculation Check
} else if(isTRUE(all.equal((un_data_tfr_gdp$gdp_per_capita / 1000) |> sort(),
un_data_tfr_gdp_test$gdp_per_capita |> sort()))){
test.results[4, 4] <- "The `gdp_per_capita` column is incorrect. Hint: If you used `gdp` to calculate `gdp_per_capita` column in summarize(), don't forget that `population` is in terms of thousands of people."
#Value Check
} else if(!isTRUE(all.equal(un_data_tfr_gdp$gdp_per_capita |> sort(),
un_data_tfr_gdp_test$gdp_per_capita |> sort(),
tolerance = 0.001))){
test.results[4, 4] <- "The `gdp_per_capita` column is incorrect. Hint: Within summarize(), try weighted.mean(). Or you can use sum() to implement a weighted average. Note: If you used sum(), you will need to remove rows that have a missing gdp or population value in the first place."
#Row Check
} else if(nrow(un_data_tfr_gdp) != nrow(un_data_tfr_gdp_test)){
test.results[4, 4] <- "The number of rows in un_data_tfr_gdp is not correct. Hint: Did you use the summarize() function correctly?"
#Correct Check
} else if(isTRUE(all.equal(un_data_tfr_gdp |> ungroup() |>
select(colnames(un_data_tfr_gdp_test)) |>
arrange(across(everything())),
un_data_tfr_gdp_test |> ungroup() |>
select(colnames(un_data_tfr_gdp_test)) |>
arrange(across(everything())),
tolerance = 0.001,
check.attributes = F))){
test.results[4, 2] <- 20
test.results[4, 4] <- "Well done!"
}
Technically, this is the fifth question of the assignment, but I have modified it to be the fourth to align with its question number in
test.results[#, ]
.↩︎