From 82b7d176955180686ecad60785e7036aa8efc18d Mon Sep 17 00:00:00 2001 From: yyh Date: Thu, 20 Mar 2025 22:55:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20exam02=5Fp1.Rmd?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- exam02_p1.Rmd | 206 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 168 insertions(+), 38 deletions(-) diff --git a/exam02_p1.Rmd b/exam02_p1.Rmd index 2a9bf01..695edc2 100644 --- a/exam02_p1.Rmd +++ b/exam02_p1.Rmd @@ -9,15 +9,12 @@ 数据分析: ```{r} -spider_data <- read.csv("../redback_spider.csv", stringsAsFactors = TRUE) +spider_data <- read.csv("C:\\Users\\31598\\Desktop\\BSI_exam\\worms.csv", stringsAsFactors = TRUE) str(spider_data) head(spider_data) -spider_table <- table(spider_data$fate_first_mate, spider_data$second_mate) -spider_table - -prop.table(spider_table, margin = 1) +spider_table <- table(spider_data) ``` ###Visualization @@ -46,6 +43,7 @@ fisher_test # 检查卡方检验的期望频数 chisq_test$expected ``` + 两种统计检验并不同样适合这些数据。卡方检验通常要求每个单元格的期望频数大于5,而从上面的输出可以看到,有些单元格的期望频数小于5。因此,Fisher精确检验更适合这种小样本量的情况,它不依赖于大样本近似。 ### Conclusion and Discussion @@ -89,61 +87,193 @@ Fisher精确检验的p值为`r fisher_test$p.value`,卡方检验的p值为`r c 数据分析: ```{r} # 读取数据 -dolphins <- read.csv("../dolphin_oxygen.csv", stringsAsFactors = TRUE) +data <- read.csv("C:\\Users\\31598\\Desktop\\BSI_exam\\crickets.csv", stringsAsFactors = TRUE) +colnames(data) <- c("group", "value") # 统一列名,便于后续分析 + +# 确保分组变量是因子类型 +data$group <- factor(data$group) # 查看数据结构 -str(dolphins) -head(dolphins) +str(data) +head(data) -# 计算平均氧气消耗量 -mean_nonfeeding <- mean(dolphins$oxygenUseNonfeeding) -mean_feeding <- mean(dolphins$oxygenUseFeeding) +# 计算每组的描述性统计量 +group_summary <- tapply(data$value, data$group, summary) +group_sd <- tapply(data$value, data$group, sd) +print(group_summary) +print(group_sd) -cat("非捕食潜水的平均氧气消耗量:", mean_nonfeeding, "ml O2/kg\n") -cat("捕食潜水的平均氧气消耗量:", mean_feeding, "ml O2/kg\n") +# 计算样本量 +sample_sizes <- table(data$group) +print(sample_sizes) + +# 计算组均值(用于后续分析) +group_means <- tapply(data$value, data$group, mean) +print(group_means) + +# 获取组别名称(用于后续分析) +group_levels <- levels(data$group) ``` ###Visualization ```{r} +plot_title <- "[两组比较的标题]" +y_label <- "[测量变量]" -library(reshape2) -dolphins_long <- melt(dolphins, id.vars = "individual", - variable.name = "dive_type", - value.name = "oxygen_use") - - -boxplot(oxygen_use ~ dive_type, data = dolphins_long, - main = "钟乳石海豚在不同潜水类型下的氧气消耗量", - xlab = "潜水类型", - ylab = "氧气消耗量 (ml O2/kg)", +# 箱线图与散点图组合 +boxplot(value ~ group, data = data, + main = plot_title, + xlab = "组别", + ylab = y_label, col = c("lightblue", "salmon"), - names = c("非捕食潜水", "捕食潜水")) + border = "black") +# 添加点以显示原始数据 +stripchart(value ~ group, data = data, + method = "jitter", + vertical = TRUE, + add = TRUE, + pch = 19, + col = "darkblue") -plot(dolphins$oxygenUseNonfeeding, dolphins$oxygenUseFeeding, - main = "钟乳石海豚的氧气消耗量比较", - xlab = "非捕食潜水氧气消耗量 (ml O2/kg)", - ylab = "捕食潜水氧气消耗量 (ml O2/kg)", - pch = 19, col = "blue") -abline(0, 1, lty = 2) # 添加对角线,表示相等的情况 +# 直方图 +par(mfrow = c(1, 2)) +hist(data$value[data$group == group_levels[1]], + main = paste(group_levels[1], "组直方图"), + xlab = y_label, + col = "lightblue") +hist(data$value[data$group == group_levels[2]], + main = paste(group_levels[2], "组直方图"), + xlab = y_label, + col = "salmon") +par(mfrow = c(1, 1)) + +# 小提琴图(可选) +if(!require(vioplot)) install.packages("vioplot") +library(vioplot) +with(data, + vioplot(value[group==group_levels[1]], + value[group==group_levels[2]], + names = group_levels, + col = c("lightblue", "salmon"), + main = plot_title, + xlab = "组别", + ylab = y_label)) + +# 点图加均值和95%置信区间 +if(!require(ggplot2)) install.packages("ggplot2") +library(ggplot2) +ggplot(data, aes(x = group, y = value, color = group)) + + geom_point(position = position_jitter(width = 0.2), size = 3, alpha = 0.7) + + stat_summary(fun = mean, geom = "point", shape = 18, size = 5, color = "black") + + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + + labs(title = plot_title, + x = "组别", + y = y_label) + + theme_classic() + + scale_color_manual(values = c("blue", "red")) ``` ###检查配对t检验的假设 ```{r} -diff <- dolphins$oxygenUseFeeding - dolphins$oxygenUseNonfeeding -shapiro.test(diff) +# 1. 检查正态性 +# 按组分别检查 +shapiro_test1 <- shapiro.test(data$value[data$group == group_levels[1]]) +shapiro_test2 <- shapiro.test(data$value[data$group == group_levels[2]]) -qqnorm(diff) -qqline(diff) +cat(group_levels[1], "组Shapiro-Wilk正态性检验 p值:", shapiro_test1$p.value, "\n") +cat(group_levels[2], "组Shapiro-Wilk正态性检验 p值:", shapiro_test2$p.value, "\n") -hist(diff, breaks = 5, main = "差异的直方图", xlab = "氧气消耗量差异") +# 确定是否满足正态性假设 +normality_assumption_met <- shapiro_test1$p.value > 0.05 & shapiro_test2$p.value > 0.05 +cat("正态性假设是否满足:", normality_assumption_met, "\n") + +# Q-Q图 +par(mfrow = c(1, 2)) +qqnorm(data$value[data$group == group_levels[1]], + main = paste(group_levels[1], "组Q-Q图")) +qqline(data$value[data$group == group_levels[1]]) +qqnorm(data$value[data$group == group_levels[2]], + main = paste(group_levels[2], "组Q-Q图")) +qqline(data$value[data$group == group_levels[2]]) +par(mfrow = c(1, 1)) + +# 2. 检查方差同质性 +var_test <- var.test(value ~ group, data = data) +print(var_test) + +# 确定是否满足方差同质性假设 +variance_homogeneity_met <- var_test$p.value > 0.05 +cat("方差同质性假设是否满足:", variance_homogeneity_met, "\n") + +# 3. 确定检验方向 +# 查看两组均值差异 +cat(group_levels[1], "组均值与", group_levels[2], "组均值的差异:", + group_means[1] - group_means[2], "\n") + +# 设置检验方向(根据研究假设修改) +# "two.sided" - 双侧检验(默认) +# "greater" - 单侧检验,假设第一组大于第二组 +# "less" - 单侧检验,假设第一组小于第二组 +test_alternative <- "two.sided" # 可根据研究假设修改 ``` ###统计检验: ```{r} +if(normality_assumption_met) { + cat("数据满足正态性假设,使用t检验\n") + + # 根据方差同质性检查结果选择t检验类型 + if(variance_homogeneity_met) { + cat("数据满足方差同质性假设,使用等方差t检验(var.equal = TRUE)\n") + t_test <- t.test(value ~ group, data = data, + alternative = test_alternative, + var.equal = TRUE) + } else { + cat("数据不满足方差同质性假设,使用Welch's t检验(var.equal = FALSE)\n") + t_test <- t.test(value ~ group, data = data, + alternative = test_alternative, + var.equal = FALSE) + } + + print(t_test) + + # 计算效应量(Cohen's d) + if(!require(effsize)) install.packages("effsize") + library(effsize) + cohen_d <- cohen.d(value ~ group, data = data) + print(cohen_d) + + # 保存检验结果 + test_result <- t_test + effect_size <- cohen_d$estimate + effect_size_interpretation <- ifelse(abs(effect_size) < 0.2, "小", + ifelse(abs(effect_size) < 0.5, "小到中", + ifelse(abs(effect_size) < 0.8, "中", + ifelse(abs(effect_size) < 1.2, "大", "非常大")))) + test_name <- ifelse(variance_homogeneity_met, "等方差t检验", "Welch's t检验") + +} else { + cat("数据不满足正态性假设,使用非参数检验(Wilcoxon秩和检验)\n") + wilcox_test <- wilcox.test(value ~ group, data = data, + alternative = test_alternative) + print(wilcox_test) + + # 计算非参数效应量 + if(!require(rstatix)) install.packages("rstatix") + library(rstatix) + wilcox_effsize <- wilcox_effsize(value ~ group, data = data) + print(wilcox_effsize) + + # 保存检验结果 + test_result <- wilcox_test + effect_size <- wilcox_effsize$effsize + effect_size_interpretation <- wilcox_effsize$magnitude + test_name <- "Wilcoxon秩和检验" +} -t_test <- t.test(dolphins$oxygenUseFeeding, dolphins$oxygenUseNonfeeding, - paired = TRUE, alternative = "greater") -t_test +# 保存p值和显著性结论 +p_value <- test_result$p.value +significance <- ifelse(p_value < 0.05, "显著", "不显著") ``` ###结论和讨论: