rm(list = ls())
library(dplyr)
library(broom)
library(ggplot2)
# 设置工作目录
setwd("C:\\Users\\Administrator\\Desktop\\machine learning\\Multiple Linear Regression")
# 读取多样性数据
diversity_data <- read.table("alpha_diversity.txt", header = TRUE, sep = "\t")
# 读取分组数据
group_data <- read.table("group.txt", header = TRUE, sep = "\t")
# 合并两个数据集
merged_data <- merge(diversity_data, group_data, by.x = "SampleID", by.y = "SampleID")
# 确保因变量为数值型,自变量为因子型
merged_data$Gene <- as.factor(merged_data$Gene)
# 生成所有模型(包含交互项)
diversity_metrics <- c("Shannon", "Simpson", "Pielou")
results <- lapply(diversity_metrics, function(metric) {
# 构建回归模型公式,包含基因型和时间的交互项
formula <- as.formula(paste(metric, "~ Gene"))
# 拟合线性模型
model <- lm(formula, data = merged_data)
# 结果整理并加上指标名称
tidy(model) %>%
mutate(Metric = metric) # 添加指标名称
})
# 合并所有结果
final_table <- bind_rows(results)
# 筛选需要的列并重命名
final_table <- final_table %>%
select(Metric, term, estimate, std.error, p.value) %>%
rename(Variable = term, β = estimate, SE = std.error, p = p.value)
# 输出结果表格
print(final_table)
# 将结果保存为CSV文件
write.csv(final_table, "alpha_regression_results.csv", row.names = TRUE)
#########################################################
# 定义一个函数来提取模型评估参数
extract_metrics <- function(model, metric_name) {
model_summary <- summary(model)
data.frame(
Metric = metric_name,
Residual_SE = model_summary$sigma,
R_squared = model_summary$r.squared,
Adjusted_R_squared = model_summary$adj.r.squared,
F_statistic = model_summary$fstatistic[1],
F_p_value = pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)
)
}
# 对多个模型应用
metrics_list <- lapply(diversity_metrics, function(metric) {
formula <- as.formula(paste(metric, "~ Gene"))
model <- lm(formula, data = merged_data)
extract_metrics(model, metric)
})
# 合并结果到一个表格
metrics_table <- do.call(rbind, metrics_list)
# 查看结果
print(metrics_table)
write.csv(metrics_table, "alpha_metrics_table.csv", row.names = FALSE)
标签:library,多样性,table,线性,alpha,csv,final From: https://www.cnblogs.com/wzbzk/p/18574457