Jun 21, 2024
tidyverse
, medicaldata
Create New Project:
File -> New Project -> New Directory -> New Project
intro_wrangling_analysis
Create R Markdown Document:
File -> New File -> R Markdown
Chapter 2
tidyverse
and medicaldata
packagesopt
and polyps
data()
and ?
functionstidyverse
select
new_data <- original_data %>% select(column1, column2)
opt_select <- opt %>% select(Clinic, Age, Education)
opt_rename <- opt %>% select(newName = oldName)
opt_select <- opt %>% select(2, 4, 3, 10)
opt_deselect <- opt %>% select(-columnName)
filter
filtered_data <- original_data %>% filter(condition)
filtered_data <- opt %>% filter(Clinic == "NY")
bmi_diabetes <- opt %>% filter(BMI >= 30 & Diabetes == "Yes")
bmi_bgs <- opt %>% filter(Diabetes == "Yes" | BMI >= 30)
&
(AND) and |
(OR), correct order of logical operators, correct handling of text casesarrange
sorted_data <- original_data %>% arrange(column1, column2)
arranged_data <- polyps %>% arrange(Sex, Baseline)
mutate
new_data <- original_data %>% mutate(new_column = expression)
polyps2 <- polyps %>% mutate(treatment1 = Baseline - three_months)
case_when
for Conditional Mutation:
polyps3 <- polyps2 %>% mutate(Improvement = case_when(
Total > 0 ~ "Decline",
Total == 0 ~ "No Change",
Total < 0 ~ "Improvement"
))
polyps %>% summarize(missing_baseline = sum(is.na(Baseline)), ...)
polyp6 <- polyps %>% drop_na(column_name)
summary_data <- polyps %>% summarize(mean_value = mean(column_name, na.rm = TRUE))
polyps7 <- polyps %>% mutate(column_name = replace_na(column_name, 0))
summarize
summary_data <- original_data %>% summarize(mean_col = mean(column_name), ...)
polyps_summary <- polyps %>% summarize(mean_baseline = mean(Baseline), ...)
group_by
and summarize
grouped_data <- original_data %>% group_by(column_name) %>% summarize(mean_col = mean(column_name), ...)
grouped_summary <- polyps %>% group_by(Sex) %>% summarize(mean_total = mean(Total))
round2 <- function(x, digits) {
posneg <- sign(x)
z <- abs(x)*10^digits
z <- z + 0.5
z <- trunc(z)
z <- z/10^digits
z*posneg
}
rounded_data <- original_data %>% mutate(new_column = round2(column_name, 1))
tidyverse
functions