gene_x 0 like s 8 view s
Tags: scripts
# https://bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc//MicrobiotaProcess.html
# -----------------------------------
# ---- prepare the R environment ----
#Rscript MicrobiotaProcess.R
#NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
#mkdir figures
#rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
#source("MicrobiotaProcess_Group3_vs_Group4.R")
# with #alpha = 2.0, running the following script further!
# -----------------------------
# ---- 3.1. bridges other tools
##https://github.com/YuLab-SMU/MicrobiotaProcess
##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
#BiocManager::install("MicrobiotaProcess")
#install.packages("microeco")
#install.packages("ggalluvial")
#install.packages("ggh4x")
library(MicrobiotaProcess)
library(microeco)
library(ggalluvial)
library(ggh4x)
library(gghalves)
## Convert the phyloseq object to a MicrobiotaProcess object
#mp <- as.MicrobiotaProcess(ps.ng.tax)
#mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
#abundance_table <- mt$abun_table
#taxonomy_table <- mt$tax_table
#ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
#ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
##OPTION1 (NOT_USED): take all samples, prepare ps.ng.tax_abund --> mpse_abund
##mpse <- ps.ng.tax %>% as.MPSE()
#mpse_abund <- ps.ng.tax_abund %>% as.MPSE()
##OPTION2 (USED!): take partial samples, prepare ps.ng.tax or ps.ng.tax_abund (2 replacements!)--> ps.ng.tax_sel --> mpse_abund
ps.ng.tax_sel <- ps.ng.tax_abund
##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")]
##NOTE: Only choose Group2, Group4, Group6, Group8
#> ps.ng.tax_sel
#otu_table() OTU Table: [ 37465 taxa and 29 samples ]
#sample_data() Sample Data: [ 29 samples by 10 sample variables ]
#tax_table() Taxonomy Table: [ 37465 taxa by 7 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
#-Group4: "21","22","23","24","25","26","27","28",
#-Group8: , "47","48","49","50","52","53","55"
otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7", "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8")]
mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
# A MPSE-tibble (MPSE object) abstraction: 2,352 × 20
# NOTE mpse_abund contains 20 variables: OTU, Sample, Abundance, BarcodeSequence, LinkerPrimerSequence, FileInput, Group,
# Sex_age <chr>, pre_post_stroke <chr>, Conc <dbl>, Vol_50ng <dbl>, Vol_PCR <dbl>, Description <chr>,
# Domain <chr>, Phylum <chr>, Class <chr>, Order <chr>, Family <chr>, Genus <chr>, Species <chr>
# -----------------------------------
# ---- 3.2. alpha diversity analysis
# Rarefied species richness + RareAbundance
mpse_abund %<>% mp_rrarefy()
# 'chunks' represent the split number of each sample to calculate alpha
# diversity, default is 400. e.g. If a sample has total 40000
# reads, if chunks is 400, it will be split to 100 sub-samples
# (100, 200, 300,..., 40000), then alpha diversity index was
# calculated based on the sub-samples.
# '.abundance' the column name of abundance, if the '.abundance' is not be
# rarefied calculate rarecurve, user can specific 'force=TRUE'.
mpse_abund %<>%
mp_cal_rarecurve(
.abundance = RareAbundance,
chunks = 400
)
# The RareAbundanceRarecurve column will be added the colData slot
# automatically (default action="add")
#NOTE mpse_abund contains 22 varibles = 20 varibles + RareAbundance <dbl> + RareAbundanceRarecurve <list>
# default will display the confidence interval around smooth.
# se=TRUE
# NOTE that two colors #c("#00A087FF", "#3C5488FF") for .group = pre_post_stroke; four colors c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a") for .group = Group;
p1 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
)
p2 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = Observe,
.group = Group
) +
scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none")
# combine the samples belong to the same groups if plot.group=TRUE
p3 <- mpse_abund %>%
mp_plot_rarecurve(
.rare = RareAbundanceRarecurve,
.alpha = "Observe",
.group = Group,
plot.group = TRUE
) +
scale_color_manual(values=c("#1f78b4", "#e31a1c")) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"),guide="none")
png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
p1 + p2 + p3
dev.off()
# ------------------------------------------
# 3.3. calculate alpha index and visualization
library(ggplot2)
library(MicrobiotaProcess)
mpse_abund %<>%
mp_cal_alpha(.abundance=RareAbundance)
mpse_abund
#NOTE mpse_abund contains 28 varibles = 22 varibles + Observe <dbl>, Chao1 <dbl>, ACE <dbl>, Shannon <dbl>, Simpson <dbl>, Pielou <dbl>
f1 <- mpse_abund %>%
mp_plot_alpha(
.group=Group,
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
) +
scale_fill_manual(values=c("#1f78b4", "#e31a1c"), guide="none") +
scale_color_manual(values=c("#1f78b4", "#e31a1c"), guide="none")
f2 <- mpse_abund %>%
mp_plot_alpha(
.alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
)
#ps.ng.tax_sel contais only pre samples --> f1 cannot be generated!
png("alpha_diversity_comparison.png", width=1400, height=600)
f1 / f2
dev.off()
# -------------------------------------------
# 3.4. The visualization of taxonomy abundance (Class)
mpse_abund %<>%
mp_cal_abundance( # for each samples
.abundance = RareAbundance
) %>%
mp_cal_abundance( # for each groups
.abundance=RareAbundance,
.group=Group
)
mpse_abund
#NOTE mpse_abund contains 29 varibles = 28 varibles + RelRareAbundanceBySample <dbl>
# visualize the relative abundance of top 20 phyla for each sample.
# .group=time,
p1 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
taxa.class = Class,
topn = 20,
relative = TRUE
)
# visualize the abundance (rarefied) of top 20 phyla for each sample.
# .group=time,
p2 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
taxa.class = Class,
topn = 20,
relative = FALSE
)
png("relative_abundance_and_abundance.png", width= 1200, height=600) #NOT PRODUCED!
p1 / p2
dev.off()
#----
h1 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = Group,
taxa.class = Class,
relative = TRUE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
h2 <- mpse_abund %>%
mp_plot_abundance(
.abundance = RareAbundance,
.group = Group,
taxa.class = Class,
relative = FALSE,
topn = 20,
geom = 'heatmap',
features.dist = 'euclidean',
features.hclust = 'average',
sample.dist = 'bray',
sample.hclust = 'average'
)
# the character (scale or theme) of figure can be adjusted by set_scale_theme
# refer to the mp_plot_dist
png("relative_abundance_and_abundance_heatmap.png", width= 1200, height=600)
aplot::plot_list(gglist=list(h1, h2), tag_levels="A")
dev.off()
# visualize the relative abundance of top 20 class for each .group (Group)
p3 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group=Group,
taxa.class = Class,
topn = 20,
plot.group = TRUE
)
# visualize the abundance of top 20 phyla for each .group (time)
p4 <- mpse_abund %>%
mp_plot_abundance(
.abundance=RareAbundance,
.group= Group,
taxa.class = Class,
topn = 20,
relative = FALSE,
plot.group = TRUE
)
png("relative_abundance_and_abundance_groups.png", width= 1000, height=1000)
p3 / p4
dev.off()
# ---------------------------
# 3.5. Beta diversity analysis
# ---------------------------------------------
# 3.5.1 The distance between samples or groups
# standardization
# mp_decostand wraps the decostand of vegan, which provides
# many standardization methods for community ecology.
# default is hellinger, then the abundance processed will
# be stored to the assays slot.
mpse_abund %<>%
mp_decostand(.abundance=Abundance)
mpse_abund
#NOTE mpse_abund contains 30 varibles = 29 varibles + hellinger <dbl>
# calculate the distance between the samples.
# the distance will be generated a nested tibble and added to the
# colData slot.
mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod="bray")
mpse_abund
#NOTE mpse_abund contains 31 varibles = 30 varibles + bray <list>
# mp_plot_dist provides there methods to visualize the distance between the samples or groups
# when .group is not provided, the dot heatmap plot will be return
p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray)
png("distance_between_samples.png", width= 1000, height=1000)
p1
dev.off()
# when .group is provided, the dot heatmap plot with group information will be return.
p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = Group)
# The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function.
p2 %>% set_scale_theme(
x = scale_fill_manual(
values=c("#1f78b4", "#e31a1c"), #c("orange", "deepskyblue"),
guide = guide_legend(
keywidth = 1,
keyheight = 0.5,
title.theme = element_text(size=8),
label.theme = element_text(size=6)
)
),
aes_var = Group # specific the name of variable
) %>%
set_scale_theme(
x = scale_color_gradient(
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
) %>%
set_scale_theme(
x = scale_size_continuous(
range = c(0.1, 3),
guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
),
aes_var = bray
)
png("distance_between_samples_with_group_info.png", width= 1000, height=1000)
p2
dev.off()
# when .group is provided and group.test is TRUE, the comparison of different groups will be returned
# Assuming p3 is a ggplot object after mp_plot_dist call
p3 <- mpse_abund %>%
mp_plot_dist(.distmethod = bray, .group = Group, group.test = TRUE, textsize = 6) +
theme(
axis.title.x = element_text(size = 14), # Customize x-axis label face = "bold"
axis.title.y = element_text(size = 14), # Customize y-axis label
axis.text.x = element_text(size = 14), # Customize x-axis ticks
axis.text.y = element_text(size = 14) # Customize y-axis ticks
)
# Save the plot with the new theme settings
png("Comparison_of_Bray_Distances_Group3-4.png", width = 1000, height = 1000)
print(p3) # Ensure that p3 is explicitly printed in the device
dev.off()
# Extract Bray-Curtis Distance Values and save them in a Excel-table.
library(dplyr)
library(tidyr)
library(openxlsx)
# Define the sample numbers vector
sample_numbers <- c("sample-C3","sample-C4","sample-C5","sample-C6","sample-C7", "sample-E4","sample-E5","sample-E6","sample-E7","sample-E8")
# Consolidate the list of tibbles using the actual sample numbers
bray_data <- bind_rows(
lapply(seq_along(mpse_abund$bray), function(i) {
tibble(
Sample1 = sample_numbers[i], # Use actual sample number
Sample2 = mpse_abund$bray[[i]]$braySampley,
BrayDistance = mpse_abund$bray[[i]]$bray
)
}),
.id = "PairID"
)
# Print the data frame to check the output
print(bray_data)
# Write the data frame to an Excel file
write.xlsx(bray_data, file = "Bray_Curtis_Distances.xlsx")
#DELETE the column "PairID" in Excel file
# -----------------------
# 3.5.2 The PCoA analysis
#install.packages("corrr")
library(corrr)
#install.packages("ggside")
library(ggside)
mpse_abund %<>%
mp_cal_pcoa(.abundance=hellinger, distmethod="bray")
# The dimensions of ordination analysis will be added the colData slot (default).
mpse_abund
mpse_abund %>% print(width=380, n=2)
#NOTE mpse_abund contains 34 varibles = 31 varibles + `PCo1 (30.16%)` <dbl>, `PCo2 (15.75%)` <dbl>, `PCo3 (10.53%)` <dbl>
#BUG why 36 variables in mpse_abund %>% print(width=380, n=1) [RareAbundanceBySample <list>, RareAbundanceByGroup <list>]
#> methods(class=class(mpse_abund))
# [1] [ [[<- [<-
# [4] $ $<- arrange
# [7] as_tibble as.data.frame as.phyloseq
#[10] coerce coerce<- colData<-
#[13] distinct filter group_by
#[16] left_join mp_adonis mp_aggregate_clade
#[19] mp_aggregate mp_anosim mp_balance_clade
#[22] mp_cal_abundance mp_cal_alpha mp_cal_cca
#[25] mp_cal_clust mp_cal_dca mp_cal_dist
#[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa
#[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda
#[34] mp_cal_upset mp_cal_venn mp_decostand
#[37] mp_diff_analysis mp_diff_clade mp_envfit
#[40] mp_extract_abundance mp_extract_assays mp_extract_dist
#[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve
#[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy
#[49] mp_extract_tree mp_filter_taxa mp_mantel
#[52] mp_mrpp mp_plot_abundance mp_plot_alpha
#[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist
#[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset
#[61] mp_plot_venn mp_rrarefy mp_select_as_tip
#[64] mp_stat_taxa mutate otutree
#[67] otutree<- print pull
#[70] refsequence refsequence<- rename
#[73] rownames<- select show
# [ reached getOption("max.print") -- omitted 6 entries ]
#see '?methods' for accessing help and source code
# We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups.
mpse_abund %<>%
mp_adonis(.abundance=hellinger, .formula=~Group, distmethod="bray", permutations=9999, action="add")
mpse_abund %>% mp_extract_internal_attr(name=adonis)
#NOTE mpse_abund contains 34 varibles, no new variable, it has been saved in mpse_abund and can be extracted with "mpse_abund %>% mp_extract_internal_attr(name='adonis')"
#The result of adonis has been saved to the internal attribute !
#It can be extracted using this-object %>% mp_extract_internal_attr(name='adonis')
#The object contained internal attribute: PCoA ADONIS
#Permutation test for adonis under reduced model
#Terms added sequentially (first to last)
#Permutation: free
#Number of permutations: 9999
#
#vegan::adonis2(formula = .formula, data = sampleda, permutations = permutations, method = distmethod)
# Df SumOfSqs R2 F Pr(>F)
#Group 1 0.23448 0.22659 3.5158 5e-04 ***
#Residual 12 0.80032 0.77341
#Total 13 1.03480 1.00000
#---
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# ("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")
#div.df2[div.df2 == "Group1"] <- "aged.post"
#div.df2[div.df2 == "Group3"] <- "young.post"
#div.df2[div.df2 == "Group5"] <- "aged.post"
#div.df2[div.df2 == "Group7"] <- "young.post"
# ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")
#div.df2[div.df2 == "Group2"] <- "aged.pre"
#div.df2[div.df2 == "Group4"] <- "young.pre"
#div.df2[div.df2 == "Group6"] <- "aged.pre"
#div.df2[div.df2 == "Group8"] <- "young.pre"
#Group1: f.aged and post
#Group2: f.aged and pre
#Group3: f.young and post
#Group4: f.young and pre
#Group5: m.aged and post
#Group6: m.aged and pre
#Group7: m.young and post
#Group8: m.young and pre
#[,c("1","2","5","6","7", "8","9","10","12","13","14")]
#[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")]
#[,c("29","30","31","32", "33","34","35","36","37","38","39","51")]
#[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")]
#For the first set:
#a6cee3: This is a light blue color, somewhat pastel and soft.
#b2df8a: A soft, pale green, similar to a light lime.
#fb9a99: A soft pink, slightly peachy or salmon-like.
#cab2d6: A pale purple, reminiscent of lavender or a light mauve.
#For the second set:
#1f78b4: This is a strong, vivid blue, close to cobalt or a medium-dark blue.
#33a02c: A medium forest green, vibrant and leafy.
#e31a1c: A bright red, very vivid, similar to fire engine red.
#6a3d9a: This would be described as a deep purple, akin to a dark lavender or plum.
p1 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = 4, # increase point size!
.alpha = 1,
ellipse = TRUE,
show.legend = FALSE
) +
scale_fill_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 1.6,
keyheight = 1.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 1.6,
keyheight = 1.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title = element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA_Group3-4.png", width = 1200, height = 1000)
p1
dev.off()
pdf("PCoA_Group3-4.pdf")
p1
dev.off()
p2 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE
) +
scale_fill_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_size_continuous(
range = c(2, 6), # increase size range!
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title = element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA2_Group3-4.png", width = 1200, height = 1000)
p2
dev.off()
pdf("PCoA2_Group3-4.pdf")
p2
dev.off()
# Extract sample names and add ShortLabel to colData
colData(mpse_abund)$ShortLabel <- gsub("sample-", "", mpse_abund@colData@rownames)
p3 <- mpse_abund %>%
mp_plot_ord(
.ord = pcoa,
.group = Group,
.color = Group,
.size = Shannon,
.alpha = Observe,
ellipse = TRUE,
show.legend = FALSE
) +
geom_text_repel(aes(label = ShortLabel), size = 5, max.overlaps = 100) +
scale_fill_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_color_manual(
values = c("#1f78b4", "#e31a1c"),
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
scale_size_continuous(
range = c(2, 6), # increase size range!
guide = guide_legend(
keywidth = 0.6,
keyheight = 0.6,
label.theme = element_text(size = 16)
)
) +
theme(
axis.text = element_text(size = 20),
axis.title = element_text(size = 22),
legend.text = element_text(size = 20),
legend.title = element_text(size = 22),
plot.title = element_text(size = 24, face = "bold"),
plot.subtitle = element_text(size = 20)
)
png("PCoA3_Group3-4.png", width = 1200, height = 1000)
p3
dev.off()
pdf("PCoA3_Group3-4.pdf")
p3
dev.off()
点赞本文的读者
还没有人对此文章表态
没有评论
Phyloseq.Rmd processing Data_Karoline_16S_2025
Cluster Analysis and Quantification of Segmented Volumes from IMARIS 3D Segmentation Data
© 2023 XGenes.com Impressum