更新 exam02_p1.Rmd
This commit is contained in:
parent
8c42c72cd9
commit
82b7d17695
206
exam02_p1.Rmd
206
exam02_p1.Rmd
|
|
@ -9,15 +9,12 @@
|
||||||
数据分析:
|
数据分析:
|
||||||
```{r}
|
```{r}
|
||||||
|
|
||||||
spider_data <- read.csv("../redback_spider.csv", stringsAsFactors = TRUE)
|
spider_data <- read.csv("C:\\Users\\31598\\Desktop\\BSI_exam\\worms.csv", stringsAsFactors = TRUE)
|
||||||
|
|
||||||
str(spider_data)
|
str(spider_data)
|
||||||
head(spider_data)
|
head(spider_data)
|
||||||
|
|
||||||
spider_table <- table(spider_data$fate_first_mate, spider_data$second_mate)
|
spider_table <- table(spider_data)
|
||||||
spider_table
|
|
||||||
|
|
||||||
prop.table(spider_table, margin = 1)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
###Visualization
|
###Visualization
|
||||||
|
|
@ -46,6 +43,7 @@ fisher_test
|
||||||
# 检查卡方检验的期望频数
|
# 检查卡方检验的期望频数
|
||||||
chisq_test$expected
|
chisq_test$expected
|
||||||
```
|
```
|
||||||
|
|
||||||
两种统计检验并不同样适合这些数据。卡方检验通常要求每个单元格的期望频数大于5,而从上面的输出可以看到,有些单元格的期望频数小于5。因此,Fisher精确检验更适合这种小样本量的情况,它不依赖于大样本近似。
|
两种统计检验并不同样适合这些数据。卡方检验通常要求每个单元格的期望频数大于5,而从上面的输出可以看到,有些单元格的期望频数小于5。因此,Fisher精确检验更适合这种小样本量的情况,它不依赖于大样本近似。
|
||||||
|
|
||||||
### Conclusion and Discussion
|
### Conclusion and Discussion
|
||||||
|
|
@ -89,61 +87,193 @@ Fisher精确检验的p值为`r fisher_test$p.value`,卡方检验的p值为`r c
|
||||||
数据分析:
|
数据分析:
|
||||||
```{r}
|
```{r}
|
||||||
# 读取数据
|
# 读取数据
|
||||||
dolphins <- read.csv("../dolphin_oxygen.csv", stringsAsFactors = TRUE)
|
data <- read.csv("C:\\Users\\31598\\Desktop\\BSI_exam\\crickets.csv", stringsAsFactors = TRUE)
|
||||||
|
colnames(data) <- c("group", "value") # 统一列名,便于后续分析
|
||||||
|
|
||||||
|
# 确保分组变量是因子类型
|
||||||
|
data$group <- factor(data$group)
|
||||||
|
|
||||||
# 查看数据结构
|
# 查看数据结构
|
||||||
str(dolphins)
|
str(data)
|
||||||
head(dolphins)
|
head(data)
|
||||||
|
|
||||||
# 计算平均氧气消耗量
|
# 计算每组的描述性统计量
|
||||||
mean_nonfeeding <- mean(dolphins$oxygenUseNonfeeding)
|
group_summary <- tapply(data$value, data$group, summary)
|
||||||
mean_feeding <- mean(dolphins$oxygenUseFeeding)
|
group_sd <- tapply(data$value, data$group, sd)
|
||||||
|
print(group_summary)
|
||||||
|
print(group_sd)
|
||||||
|
|
||||||
cat("非捕食潜水的平均氧气消耗量:", mean_nonfeeding, "ml O2/kg\n")
|
# 计算样本量
|
||||||
cat("捕食潜水的平均氧气消耗量:", mean_feeding, "ml O2/kg\n")
|
sample_sizes <- table(data$group)
|
||||||
|
print(sample_sizes)
|
||||||
|
|
||||||
|
# 计算组均值(用于后续分析)
|
||||||
|
group_means <- tapply(data$value, data$group, mean)
|
||||||
|
print(group_means)
|
||||||
|
|
||||||
|
# 获取组别名称(用于后续分析)
|
||||||
|
group_levels <- levels(data$group)
|
||||||
```
|
```
|
||||||
|
|
||||||
###Visualization
|
###Visualization
|
||||||
```{r}
|
```{r}
|
||||||
|
plot_title <- "[两组比较的标题]"
|
||||||
|
y_label <- "[测量变量]"
|
||||||
|
|
||||||
library(reshape2)
|
# 箱线图与散点图组合
|
||||||
dolphins_long <- melt(dolphins, id.vars = "individual",
|
boxplot(value ~ group, data = data,
|
||||||
variable.name = "dive_type",
|
main = plot_title,
|
||||||
value.name = "oxygen_use")
|
xlab = "组别",
|
||||||
|
ylab = y_label,
|
||||||
|
|
||||||
boxplot(oxygen_use ~ dive_type, data = dolphins_long,
|
|
||||||
main = "钟乳石海豚在不同潜水类型下的氧气消耗量",
|
|
||||||
xlab = "潜水类型",
|
|
||||||
ylab = "氧气消耗量 (ml O2/kg)",
|
|
||||||
col = c("lightblue", "salmon"),
|
col = c("lightblue", "salmon"),
|
||||||
names = c("非捕食潜水", "捕食潜水"))
|
border = "black")
|
||||||
|
|
||||||
|
# 添加点以显示原始数据
|
||||||
|
stripchart(value ~ group, data = data,
|
||||||
|
method = "jitter",
|
||||||
|
vertical = TRUE,
|
||||||
|
add = TRUE,
|
||||||
|
pch = 19,
|
||||||
|
col = "darkblue")
|
||||||
|
|
||||||
plot(dolphins$oxygenUseNonfeeding, dolphins$oxygenUseFeeding,
|
# 直方图
|
||||||
main = "钟乳石海豚的氧气消耗量比较",
|
par(mfrow = c(1, 2))
|
||||||
xlab = "非捕食潜水氧气消耗量 (ml O2/kg)",
|
hist(data$value[data$group == group_levels[1]],
|
||||||
ylab = "捕食潜水氧气消耗量 (ml O2/kg)",
|
main = paste(group_levels[1], "组直方图"),
|
||||||
pch = 19, col = "blue")
|
xlab = y_label,
|
||||||
abline(0, 1, lty = 2) # 添加对角线,表示相等的情况
|
col = "lightblue")
|
||||||
|
hist(data$value[data$group == group_levels[2]],
|
||||||
|
main = paste(group_levels[2], "组直方图"),
|
||||||
|
xlab = y_label,
|
||||||
|
col = "salmon")
|
||||||
|
par(mfrow = c(1, 1))
|
||||||
|
|
||||||
|
# 小提琴图(可选)
|
||||||
|
if(!require(vioplot)) install.packages("vioplot")
|
||||||
|
library(vioplot)
|
||||||
|
with(data,
|
||||||
|
vioplot(value[group==group_levels[1]],
|
||||||
|
value[group==group_levels[2]],
|
||||||
|
names = group_levels,
|
||||||
|
col = c("lightblue", "salmon"),
|
||||||
|
main = plot_title,
|
||||||
|
xlab = "组别",
|
||||||
|
ylab = y_label))
|
||||||
|
|
||||||
|
# 点图加均值和95%置信区间
|
||||||
|
if(!require(ggplot2)) install.packages("ggplot2")
|
||||||
|
library(ggplot2)
|
||||||
|
ggplot(data, aes(x = group, y = value, color = group)) +
|
||||||
|
geom_point(position = position_jitter(width = 0.2), size = 3, alpha = 0.7) +
|
||||||
|
stat_summary(fun = mean, geom = "point", shape = 18, size = 5, color = "black") +
|
||||||
|
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) +
|
||||||
|
labs(title = plot_title,
|
||||||
|
x = "组别",
|
||||||
|
y = y_label) +
|
||||||
|
theme_classic() +
|
||||||
|
scale_color_manual(values = c("blue", "red"))
|
||||||
```
|
```
|
||||||
###检查配对t检验的假设
|
###检查配对t检验的假设
|
||||||
```{r}
|
```{r}
|
||||||
diff <- dolphins$oxygenUseFeeding - dolphins$oxygenUseNonfeeding
|
# 1. 检查正态性
|
||||||
shapiro.test(diff)
|
# 按组分别检查
|
||||||
|
shapiro_test1 <- shapiro.test(data$value[data$group == group_levels[1]])
|
||||||
|
shapiro_test2 <- shapiro.test(data$value[data$group == group_levels[2]])
|
||||||
|
|
||||||
qqnorm(diff)
|
cat(group_levels[1], "组Shapiro-Wilk正态性检验 p值:", shapiro_test1$p.value, "\n")
|
||||||
qqline(diff)
|
cat(group_levels[2], "组Shapiro-Wilk正态性检验 p值:", shapiro_test2$p.value, "\n")
|
||||||
|
|
||||||
hist(diff, breaks = 5, main = "差异的直方图", xlab = "氧气消耗量差异")
|
# 确定是否满足正态性假设
|
||||||
|
normality_assumption_met <- shapiro_test1$p.value > 0.05 & shapiro_test2$p.value > 0.05
|
||||||
|
cat("正态性假设是否满足:", normality_assumption_met, "\n")
|
||||||
|
|
||||||
|
# Q-Q图
|
||||||
|
par(mfrow = c(1, 2))
|
||||||
|
qqnorm(data$value[data$group == group_levels[1]],
|
||||||
|
main = paste(group_levels[1], "组Q-Q图"))
|
||||||
|
qqline(data$value[data$group == group_levels[1]])
|
||||||
|
qqnorm(data$value[data$group == group_levels[2]],
|
||||||
|
main = paste(group_levels[2], "组Q-Q图"))
|
||||||
|
qqline(data$value[data$group == group_levels[2]])
|
||||||
|
par(mfrow = c(1, 1))
|
||||||
|
|
||||||
|
# 2. 检查方差同质性
|
||||||
|
var_test <- var.test(value ~ group, data = data)
|
||||||
|
print(var_test)
|
||||||
|
|
||||||
|
# 确定是否满足方差同质性假设
|
||||||
|
variance_homogeneity_met <- var_test$p.value > 0.05
|
||||||
|
cat("方差同质性假设是否满足:", variance_homogeneity_met, "\n")
|
||||||
|
|
||||||
|
# 3. 确定检验方向
|
||||||
|
# 查看两组均值差异
|
||||||
|
cat(group_levels[1], "组均值与", group_levels[2], "组均值的差异:",
|
||||||
|
group_means[1] - group_means[2], "\n")
|
||||||
|
|
||||||
|
# 设置检验方向(根据研究假设修改)
|
||||||
|
# "two.sided" - 双侧检验(默认)
|
||||||
|
# "greater" - 单侧检验,假设第一组大于第二组
|
||||||
|
# "less" - 单侧检验,假设第一组小于第二组
|
||||||
|
test_alternative <- "two.sided" # 可根据研究假设修改
|
||||||
```
|
```
|
||||||
|
|
||||||
###统计检验:
|
###统计检验:
|
||||||
```{r}
|
```{r}
|
||||||
|
if(normality_assumption_met) {
|
||||||
|
cat("数据满足正态性假设,使用t检验\n")
|
||||||
|
|
||||||
|
# 根据方差同质性检查结果选择t检验类型
|
||||||
|
if(variance_homogeneity_met) {
|
||||||
|
cat("数据满足方差同质性假设,使用等方差t检验(var.equal = TRUE)\n")
|
||||||
|
t_test <- t.test(value ~ group, data = data,
|
||||||
|
alternative = test_alternative,
|
||||||
|
var.equal = TRUE)
|
||||||
|
} else {
|
||||||
|
cat("数据不满足方差同质性假设,使用Welch's t检验(var.equal = FALSE)\n")
|
||||||
|
t_test <- t.test(value ~ group, data = data,
|
||||||
|
alternative = test_alternative,
|
||||||
|
var.equal = FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
print(t_test)
|
||||||
|
|
||||||
|
# 计算效应量(Cohen's d)
|
||||||
|
if(!require(effsize)) install.packages("effsize")
|
||||||
|
library(effsize)
|
||||||
|
cohen_d <- cohen.d(value ~ group, data = data)
|
||||||
|
print(cohen_d)
|
||||||
|
|
||||||
|
# 保存检验结果
|
||||||
|
test_result <- t_test
|
||||||
|
effect_size <- cohen_d$estimate
|
||||||
|
effect_size_interpretation <- ifelse(abs(effect_size) < 0.2, "小",
|
||||||
|
ifelse(abs(effect_size) < 0.5, "小到中",
|
||||||
|
ifelse(abs(effect_size) < 0.8, "中",
|
||||||
|
ifelse(abs(effect_size) < 1.2, "大", "非常大"))))
|
||||||
|
test_name <- ifelse(variance_homogeneity_met, "等方差t检验", "Welch's t检验")
|
||||||
|
|
||||||
|
} else {
|
||||||
|
cat("数据不满足正态性假设,使用非参数检验(Wilcoxon秩和检验)\n")
|
||||||
|
wilcox_test <- wilcox.test(value ~ group, data = data,
|
||||||
|
alternative = test_alternative)
|
||||||
|
print(wilcox_test)
|
||||||
|
|
||||||
|
# 计算非参数效应量
|
||||||
|
if(!require(rstatix)) install.packages("rstatix")
|
||||||
|
library(rstatix)
|
||||||
|
wilcox_effsize <- wilcox_effsize(value ~ group, data = data)
|
||||||
|
print(wilcox_effsize)
|
||||||
|
|
||||||
|
# 保存检验结果
|
||||||
|
test_result <- wilcox_test
|
||||||
|
effect_size <- wilcox_effsize$effsize
|
||||||
|
effect_size_interpretation <- wilcox_effsize$magnitude
|
||||||
|
test_name <- "Wilcoxon秩和检验"
|
||||||
|
}
|
||||||
|
|
||||||
t_test <- t.test(dolphins$oxygenUseFeeding, dolphins$oxygenUseNonfeeding,
|
# 保存p值和显著性结论
|
||||||
paired = TRUE, alternative = "greater")
|
p_value <- test_result$p.value
|
||||||
t_test
|
significance <- ifelse(p_value < 0.05, "显著", "不显著")
|
||||||
```
|
```
|
||||||
|
|
||||||
###结论和讨论:
|
###结论和讨论:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue