Phyloseq + MicrobiotaProcess + PICRUSt2

gene_x 0 like s 1121 view s

Tags: R, metagenomics, 16S, tool, pipeline

  1. Phyloseq.Rmd

    1. author: ""
    2. date: '`r format(Sys.time(), "%d %m %Y")`'
    3. header-includes:
    4. - \usepackage{color, fancyvrb}
    5. output:
    6. rmdformats::readthedown:
    7. highlight: kate
    8. number_sections : yes
    9. pdf_document:
    10. toc: yes
    11. toc_depth: 2
    12. number_sections : yes
    13. ---
    14. ```{r, echo=FALSE, warning=FALSE}
    15. ## Global options
    16. # TODO: reproduce the html with the additional figure/SVN-files for editing.
    17. # IMPORTANT NOTE: needs before "mkdir figures"
    18. #rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
    19. ```
    20. ```{r load-packages, include=FALSE}
    21. library(knitr)
    22. library(rmdformats)
    23. library(readxl)
    24. library(dplyr)
    25. library(kableExtra)
    26. options(max.print="75")
    27. knitr::opts_chunk$set(fig.width=8,
    28. fig.height=6,
    29. eval=TRUE,
    30. cache=TRUE,
    31. echo=TRUE,
    32. prompt=FALSE,
    33. tidy=TRUE,
    34. comment=NA,
    35. message=FALSE,
    36. warning=FALSE)
    37. opts_knit$set(width=85)
    38. # Phyloseq R library
    39. #* Phyloseq web site : https://joey711.github.io/phyloseq/index.html
    40. #* See in particular tutorials for
    41. # - importing data: https://joey711.github.io/phyloseq/import-data.html
    42. # - heat maps: https://joey711.github.io/phyloseq/plot_heatmap-examples.html
    43. ```
    44. # Data
    45. Import raw data and assign sample key:
    46. ```{r, echo=TRUE, warning=FALSE}
    47. #extend map_corrected.txt with Diet and Flora
    48. #setwd("~/DATA/Data_Laura_16S_2/core_diversity_e4753")
    49. map_corrected <- read.csv("map_corrected.txt", sep="\t", row.names=1)
    50. knitr::kable(map_corrected) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    51. ```
    52. # Prerequisites to be installed
    53. * R : https://pbil.univ-lyon1.fr/CRAN/
    54. * R studio : https://www.rstudio.com/products/rstudio/download/#download
    55. ```R
    56. install.packages("dplyr") # To manipulate dataframes
    57. install.packages("readxl") # To read Excel files into R
    58. install.packages("ggplot2") # for high quality graphics
    59. install.packages("heatmaply")
    60. source("https://bioconductor.org/biocLite.R")
    61. biocLite("phyloseq")
    62. ```
    63. ```{r libraries, echo=TRUE, message=FALSE}
    64. library("readxl") # necessary to import the data from Excel file
    65. library("ggplot2") # graphics
    66. library("picante")
    67. library("microbiome") # data analysis and visualisation
    68. library("phyloseq") # also the basis of data object. Data analysis and visualisation
    69. library("ggpubr") # publication quality figures, based on ggplot2
    70. library("dplyr") # data handling, filter and reformat data frames
    71. library("RColorBrewer") # nice color options
    72. library("heatmaply")
    73. library(vegan)
    74. library(gplots)
    75. ```
    76. # Read the data and create phyloseq objects
    77. Three tables are needed
    78. * OTU
    79. * Taxonomy
    80. * Samples
    81. ```{r, echo=TRUE, warning=FALSE}
    82. #Change your working directory to where the files are located
    83. ps.ng.tax <- import_biom("./table_even42369.biom", "./clustering/rep_set.tre")
    84. sample <- read.csv("./map_corrected.txt", sep="\t", row.names=1)
    85. SAM = sample_data(sample, errorIfNULL = T)
    86. rownames(SAM) <-
    87. c("1","2","3","5","6","7","8","9","10","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","46","47","48","49","50","51","52","53","55")
    88. ps.ng.tax <- merge_phyloseq(ps.ng.tax, SAM)
    89. print(ps.ng.tax)
    90. colnames(tax_table(ps.ng.tax)) <- c("Domain","Phylum","Class","Order","Family","Genus","Species")
    91. saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
    92. ```
    93. Visualize data
    94. ```{r, echo=TRUE, warning=FALSE}
    95. sample_names(ps.ng.tax)
    96. rank_names(ps.ng.tax)
    97. sample_variables(ps.ng.tax)
    98. ```
    99. Normalize number of reads in each sample using median sequencing depth.
    100. ```{r, echo=TRUE, warning=FALSE}
    101. # RAREFACTION
    102. #set.seed(9242) # This will help in reproducing the filtering and nomalisation.
    103. #ps.ng.tax <- rarefy_even_depth(ps.ng.tax, sample.size = 42369)
    104. #total <- 42369
    105. # NORMALIZE number of reads in each sample using median sequencing depth.
    106. total = median(sample_sums(ps.ng.tax))
    107. #> total
    108. #[1] 42369
    109. standf = function(x, t=total) round(t * (x / sum(x)))
    110. ps.ng.tax = transform_sample_counts(ps.ng.tax, standf)
    111. ps.ng.tax_rel <- microbiome::transform(ps.ng.tax, "compositional")
    112. saveRDS(ps.ng.tax, "./ps.ng.tax.rds")
    113. hmp.meta <- meta(ps.ng.tax)
    114. hmp.meta$sam_name <- rownames(hmp.meta)
    115. ```
    116. # Heatmaps
    117. ```{r, echo=TRUE, warning=FALSE}
    118. #MOVE_FROM_ABOVE: The number of reads used for normalization is **`r sprintf("%.0f", total)`**.
    119. #A basic heatmap using the default parameters.
    120. # plot_heatmap(ps.ng.tax, method = "NMDS", distance = "bray")
    121. #NOTE that giving the correct OTU numbers in the text (1%, 0.5%, ...)!!!
    122. ```
    123. We consider the most abundant OTUs for heatmaps. For example one can only take OTUs that represent at least 1% of reads in at least one sample. Remember we normalized all the sampples to median number of reads (total). We are left with only 168 OTUS which makes the reading much more easy.
    124. ```{r, echo=TRUE, warning=FALSE}
    125. # Custom function to plot a heatmap with the specified sample order
    126. #plot_heatmap_custom <- function(ps, sample_order, method = "NMDS", distance = "bray") {
    127. ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
    128. kable(otu_table(ps.ng.tax_abund)) %>%
    129. kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    130. # Calculate the relative abundance for each sample
    131. ps.ng.tax_abund_rel <- transform_sample_counts(ps.ng.tax_abund, function(x) x / sum(x))
    132. datamat_ = as.data.frame(otu_table(ps.ng.tax_abund))
    133. #datamat <- datamat_[c("1","2","5","6","7", "8","9","10","12","13","14", "15","16","17","18","19","20", "21","22","23","24","25","26","27","28", "29","30","31","32", "33","34","35","36","37","38","39","51", "40","41","42","43","44","46", "47","48","49","50","52","53","55")]
    134. datamat <- datamat_[c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
    135. hr <- hclust(as.dist(1-cor(t(datamat), method="pearson")), method="complete")
    136. hc <- hclust(as.dist(1-cor(datamat, method="spearman")), method="complete")
    137. mycl = cutree(hr, h=max(hr$height)/1.08)
    138. mycol = c("YELLOW", "DARKBLUE", "DARKORANGE", "DARKMAGENTA", "DARKCYAN", "DARKRED", "MAROON", "DARKGREEN", "LIGHTBLUE", "PINK", "MAGENTA", "LIGHTCYAN","LIGHTGREEN", "BLUE", "ORANGE", "CYAN", "RED", "GREEN");
    139. mycol = mycol[as.vector(mycl)]
    140. sampleCols <- rep('GREY',ncol(datamat))
    141. #names(sampleCols) <- c("Group1", "Group1", "Group1", "Group1", "Group1", "Group2", "Group2", "Group2", "Group2", "Group2","Group2", "Group3", "Group3", "Group3", "Group3", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1", "Group1")
    142. #sampleCols[colnames(datamat)=='1'] <- '#a6cee3'
    143. #sampleCols[colnames(datamat)=='2'] <- '#a6cee3'
    144. #sampleCols[colnames(datamat)=='5'] <- '#a6cee3'
    145. #sampleCols[colnames(datamat)=='6'] <- '#a6cee3'
    146. #sampleCols[colnames(datamat)=='7'] <- '#a6cee3'
    147. sampleCols[colnames(datamat)=='8'] <- '#1f78b4'
    148. sampleCols[colnames(datamat)=='9'] <- '#1f78b4'
    149. sampleCols[colnames(datamat)=='10'] <- '#1f78b4'
    150. sampleCols[colnames(datamat)=='12'] <- '#1f78b4'
    151. sampleCols[colnames(datamat)=='13'] <- '#1f78b4'
    152. sampleCols[colnames(datamat)=='14'] <- '#1f78b4'
    153. #sampleCols[colnames(datamat)=='15'] <- '#b2df8a'
    154. #sampleCols[colnames(datamat)=='16'] <- '#b2df8a'
    155. #sampleCols[colnames(datamat)=='17'] <- '#b2df8a'
    156. #sampleCols[colnames(datamat)=='18'] <- '#b2df8a'
    157. #sampleCols[colnames(datamat)=='19'] <- '#b2df8a'
    158. #sampleCols[colnames(datamat)=='20'] <- '#b2df8a'
    159. sampleCols[colnames(datamat)=='21'] <- '#33a02c'
    160. sampleCols[colnames(datamat)=='22'] <- '#33a02c'
    161. sampleCols[colnames(datamat)=='23'] <- '#33a02c'
    162. sampleCols[colnames(datamat)=='24'] <- '#33a02c'
    163. sampleCols[colnames(datamat)=='25'] <- '#33a02c'
    164. sampleCols[colnames(datamat)=='26'] <- '#33a02c'
    165. sampleCols[colnames(datamat)=='27'] <- '#33a02c'
    166. sampleCols[colnames(datamat)=='28'] <- '#33a02c'
    167. #sampleCols[colnames(datamat)=='29'] <- '#fb9a99'
    168. #sampleCols[colnames(datamat)=='30'] <- '#fb9a99'
    169. #sampleCols[colnames(datamat)=='31'] <- '#fb9a99'
    170. #sampleCols[colnames(datamat)=='32'] <- '#fb9a99'
    171. sampleCols[colnames(datamat)=='33'] <- '#e31a1c'
    172. sampleCols[colnames(datamat)=='34'] <- '#e31a1c'
    173. sampleCols[colnames(datamat)=='35'] <- '#e31a1c'
    174. sampleCols[colnames(datamat)=='36'] <- '#e31a1c'
    175. sampleCols[colnames(datamat)=='37'] <- '#e31a1c'
    176. sampleCols[colnames(datamat)=='38'] <- '#e31a1c'
    177. sampleCols[colnames(datamat)=='39'] <- '#e31a1c'
    178. sampleCols[colnames(datamat)=='51'] <- '#e31a1c'
    179. #sampleCols[colnames(datamat)=='40'] <- '#cab2d6'
    180. #sampleCols[colnames(datamat)=='41'] <- '#cab2d6'
    181. #sampleCols[colnames(datamat)=='42'] <- '#cab2d6'
    182. #sampleCols[colnames(datamat)=='43'] <- '#cab2d6'
    183. #sampleCols[colnames(datamat)=='44'] <- '#cab2d6'
    184. #sampleCols[colnames(datamat)=='46'] <- '#cab2d6'
    185. sampleCols[colnames(datamat)=='47'] <- '#6a3d9a'
    186. sampleCols[colnames(datamat)=='48'] <- '#6a3d9a'
    187. sampleCols[colnames(datamat)=='49'] <- '#6a3d9a'
    188. sampleCols[colnames(datamat)=='50'] <- '#6a3d9a'
    189. sampleCols[colnames(datamat)=='52'] <- '#6a3d9a'
    190. sampleCols[colnames(datamat)=='53'] <- '#6a3d9a'
    191. sampleCols[colnames(datamat)=='55'] <- '#6a3d9a'
    192. #bluered(75)
    193. #color_pattern <- colorRampPalette(c("blue", "white", "red"))(100)
    194. library(RColorBrewer)
    195. custom_palette <- colorRampPalette(brewer.pal(9, "Blues"))
    196. heatmap_colors <- custom_palette(100)
    197. #colors <- heatmap_color_default(100)
    198. png("figures/heatmap.png", width=1200, height=2400)
    199. #par(mar=c(2, 2, 2, 2)) , lwid=1 lhei=c(0.7, 10)) # Adjust height of color keys keysize=0.3,
    200. heatmap.2(as.matrix(datamat),Rowv=as.dendrogram(hr),Colv = NA, dendrogram = 'row',
    201. scale='row',trace='none',col=heatmap_colors, cexRow=1.2, cexCol=1.5,
    202. RowSideColors = mycol, ColSideColors = sampleCols, srtCol=15, labRow=row.names(datamat), key=TRUE, margins=c(10, 15), lhei=c(0.7, 15), lwid=c(1,8))
    203. dev.off()
    204. ```
    205. ```{r, echo=TRUE, warning=FALSE, fig.cap="Heatmap", out.width = '100%', fig.align= "center"}
    206. knitr::include_graphics("./figures/heatmap.png")
    207. ```
    208. ```{r, echo=FALSE, warning=FALSE}
    209. #It is possible to use different distances and different multivaraite methods. Many different built-in distances can be used.
    210. #dist_methods <- unlist(distanceMethodList)
    211. #print(dist_methods)
    212. ```
    213. \pagebreak
    214. # Taxonomic summary
    215. ## Bar plots in phylum level
    216. ```{r, echo=FALSE, warning=FALSE}
    217. #Make the bargraph nicer by removing OTUs boundaries. This is done by adding ggplot2 modifier.
    218. # 1: uniform color. Color is for the border, fill is for the inside
    219. #ggplot(mtcars, aes(x=as.factor(cyl) )) +
    220. # geom_bar(color="blue", fill=rgb(0.1,0.4,0.5,0.7) )
    221. # 2: Using Hue
    222. #ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
    223. # geom_bar( ) +
    224. # scale_fill_hue(c = 40) +
    225. # theme(legend.position="none")
    226. # 3: Using RColorBrewer
    227. #ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
    228. # geom_bar( ) +
    229. # scale_fill_brewer(palette = "Set1") +
    230. # theme(legend.position="none")
    231. # 4: Using greyscale:
    232. #ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
    233. # geom_bar( ) +
    234. # scale_fill_grey(start = 0.25, end = 0.75) +
    235. # theme(legend.position="none")
    236. # 5: Set manualy
    237. #ggplot(mtcars, aes(x=as.factor(cyl), fill=as.factor(cyl) )) +
    238. # geom_bar( ) +
    239. # scale_fill_manual(values = c("red", "green", "blue") ) +
    240. # theme(legend.position="none")
    241. #NOT SUCCESSFUL!
    242. #allGroupsColors<- c(
    243. # "grey0", "grey50", "dodgerblu", "deepskyblue",
    244. # "red", "darkred", "green", "green4")
    245. # plot_bar(ps.ng.tax_rel, fill="Phylum") +
    246. # geom_bar(stat="identity", position="stack") + scale_color_manual(values = allGroupsColors) #, fill=Phylum + scale_fill_brewer(palette = "Set1")
    247. # ##### Keep only the most abundant phyla and
    248. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Actinobacteria")) #1.57
    249. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Bacteroidetes")) #27.27436
    250. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Cyanobacteria")) #0.02244249
    251. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Epsilonbacteraeota")) #0.01309145
    252. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Euryarchaeota")) #0.1210024
    253. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Firmicutes")) #32.50589
    254. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Lentisphaerae")) #0.0001870208
    255. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Patescibacteria")) #0.008789976
    256. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Planctomycetes")) #0.01365252
    257. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Proteobacteria")) #6.769216
    258. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Synergistetes")) #0.005049561
    259. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Tenericutes")) #0.0005610623
    260. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Verrucomicrobia")) #2.076304
    261. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c(NA)) #sum(otu_table(ps.ng.tax_most)) = 2.619413
    262. ```
    263. ```{r, echo=TRUE, warning=FALSE}
    264. library(ggplot2)
    265. geom.text.size = 6
    266. theme.size = 8 #(14/5) * geom.text.size
    267. #ps.ng.tax_most <- subset_taxa(ps.ng.tax_rel, Phylum %in% c("D_1__Actinobacteria", "D_1__Bacteroidetes", "D_1__Firmicutes", "D_1__Proteobacteria", "D_1__Verrucomicrobia", NA))
    268. ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
    269. #CONSOLE(OPTIONAL): for sampleid in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73; do
    270. #echo "otu_table(ps.ng.tax_most)[,${sampleid}]=otu_table(ps.ng.tax_most)[,${sampleid}]/sum(otu_table(ps.ng.tax_most)[,${sampleid}])" done
    271. #OR
    272. ps.ng.tax_most_ = transform_sample_counts(ps.ng.tax_most, function(x) x / sum(x))
    273. ```
    274. ```{r, echo=FALSE, warning=FALSE}
    275. ##--Creating 100% stacked bar plots with less abundant taxa in a sub-category #901--
    276. ##https://github.com/joey711/phyloseq/issues/901
    277. ##ps.ng.tax_most_df <- psmelt(ps.ng.tax_most_) #5986x19
    278. #glom <- tax_glom(ps.ng.tax_most_, taxrank = 'Phylum')
    279. #tax_table(glom) # should list # taxa as # phyla
    280. #data <- psmelt(glom) # create dataframe from phyloseq object
    281. #data$Phylum <- as.character(data$Phylum) #convert to character
    282. ##simple way to rename phyla with < 1% abundance
    283. #data$Phylum[data$Abundance < 0.001] <- "< 0.1% abund."
    284. #
    285. #library(plyr)
    286. #medians <- ddply(data, ~Phylum, function(x) c(median=median(x$Abundance)))
    287. #remainder <- medians[medians$median <= 0.001,]$Phylum
    288. #data[data$Phylum %in% remainder,]$Phylum <- "Phyla < 0.1% abund."
    289. #data$Phylum[data$Abundance < 0.001] <- "Phyla < 0.1% abund."
    290. ##--> data are not used!
    291. #
    292. ##in class level
    293. #glom <- tax_glom(ps.ng.tax_most_, taxrank = 'Class')
    294. #tax_table(glom) # should list # taxa as # phyla
    295. #data <- psmelt(glom) # create dataframe from phyloseq object
    296. #data$Class <- as.character(data$Class) #convert to character
    297. #
    298. ##simple way to rename phyla with < 1% abundance
    299. #data$Class[data$Abundance < 0.001] <- "< 0.1% abund."
    300. #Count = length(unique(data$Class))
    301. #
    302. ##unique(data$Class)
    303. ##data$Class <- factor(data$Class, levels = c("Bacilli", "Bacteroidia", "Verrucomicrobiae", "Clostridia", "Gammaproteobacteria", "Alphaproteobacteria", "Actinobacteria", "Negativicutes", "Erysipelotrichia", "Methanobacteria", "< 0.1% abund."))
    304. ##------- Creating 100% stacked bar plots END --------
    305. library(stringr)
    306. #FITTING1:
    307. # tax_table(ps.ng.tax_most_)[1,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Domain"], "__")[[1]][2]
    308. # ... ...
    309. # tax_table(ps.ng.tax_most_)[167,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Species"], "__")[[1]][2]
    310. #ps.ng.tax_most_
    311. #in total [ 89 taxa and 55 samples ]
    312. #otu_table() OTU Table: [ 166 taxa and 54 samples ]
    313. #otu_table() OTU Table: [ 168 taxa and 50 samples ]
    314. #for id in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166; do
    315. #echo "tax_table(ps.ng.tax_most_)[${id},\"Domain\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Domain\"], \"__\")[[1]][2]"
    316. #echo "tax_table(ps.ng.tax_most_)[${id},\"Phylum\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Phylum\"], \"__\")[[1]][2]"
    317. #echo "tax_table(ps.ng.tax_most_)[${id},\"Class\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Class\"], \"__\")[[1]][2]"
    318. #echo "tax_table(ps.ng.tax_most_)[${id},\"Order\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Order\"], \"__\")[[1]][2]"
    319. #echo "tax_table(ps.ng.tax_most_)[${id},\"Family\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Family\"], \"__\")[[1]][2]"
    320. #echo "tax_table(ps.ng.tax_most_)[${id},\"Genus\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Genus\"], \"__\")[[1]][2]"
    321. #echo "tax_table(ps.ng.tax_most_)[${id},\"Species\"] <- str_split(tax_table(ps.ng.tax_most_)[${id},\"Species\"], \"__\")[[1]][2]"
    322. #done
    323. tax_table(ps.ng.tax_most_)[1,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Domain"], "__")[[1]][2]
    324. tax_table(ps.ng.tax_most_)[1,"Phylum"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Phylum"], "__")[[1]][2]
    325. tax_table(ps.ng.tax_most_)[1,"Class"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Class"], "__")[[1]][2]
    326. tax_table(ps.ng.tax_most_)[1,"Order"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Order"], "__")[[1]][2]
    327. tax_table(ps.ng.tax_most_)[1,"Family"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Family"], "__")[[1]][2]
    328. tax_table(ps.ng.tax_most_)[1,"Genus"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Genus"], "__")[[1]][2]
    329. tax_table(ps.ng.tax_most_)[1,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[1,"Species"], "__")[[1]][2]
    330. #... ...
    331. tax_table(ps.ng.tax_most_)[167,"Domain"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Domain"], "__")[[1]][2]
    332. tax_table(ps.ng.tax_most_)[167,"Phylum"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Phylum"], "__")[[1]][2]
    333. tax_table(ps.ng.tax_most_)[167,"Class"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Class"], "__")[[1]][2]
    334. tax_table(ps.ng.tax_most_)[167,"Order"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Order"], "__")[[1]][2]
    335. tax_table(ps.ng.tax_most_)[167,"Family"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Family"], "__")[[1]][2]
    336. tax_table(ps.ng.tax_most_)[167,"Genus"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Genus"], "__")[[1]][2]
    337. tax_table(ps.ng.tax_most_)[167,"Species"] <- str_split(tax_table(ps.ng.tax_most_)[167,"Species"], "__")[[1]][2]
    338. ```
    339. ```{r, echo=TRUE, warning=FALSE}
    340. #aes(color="Phylum", fill="Phylum") --> aes()
    341. #ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum))
    342. plot_bar(ps.ng.tax_most_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
    343. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2)) #6 instead of theme.size
    344. ```
    345. ```{r, echo=FALSE, warning=FALSE}
    346. #png("abc.png")
    347. #knitr::include_graphics("./Phyloseq_files/figure-html/unnamed-chunk-7-1.png")
    348. #dev.off()
    349. ```
    350. \pagebreak
    351. Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
    352. ```{r, echo=TRUE, warning=FALSE}
    353. ps.ng.tax_most_pre_post_stroke <- merge_samples(ps.ng.tax_most_, "pre_post_stroke")
    354. ps.ng.tax_most_pre_post_stroke_ = transform_sample_counts(ps.ng.tax_most_pre_post_stroke, function(x) x / sum(x))
    355. #plot_bar(ps.ng.tax_most_SampleType_, fill = "Phylum") + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack")
    356. plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Phylum") + geom_bar(aes(), stat="identity", position="stack") +
    357. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))
    358. ```
    359. \pagebreak
    360. Use color according to phylum. Do separate panels Stroke and Sex_age.
    361. ```{r, echo=TRUE, warning=FALSE}
    362. ps.ng.tax_most_copied <- data.table::copy(ps.ng.tax_most_)
    363. #FITTING6: regulate the bar height if it has replicates: 5+6+6+8+4+8+6+7=25+25=50
    364. otu_table(ps.ng.tax_most_)[,c("1")] <- otu_table(ps.ng.tax_most_)[,c("1")]/5
    365. otu_table(ps.ng.tax_most_)[,c("2")] <- otu_table(ps.ng.tax_most_)[,c("2")]/5
    366. otu_table(ps.ng.tax_most_)[,c("5")] <- otu_table(ps.ng.tax_most_)[,c("5")]/5
    367. otu_table(ps.ng.tax_most_)[,c("6")] <- otu_table(ps.ng.tax_most_)[,c("6")]/5
    368. otu_table(ps.ng.tax_most_)[,c("7")] <- otu_table(ps.ng.tax_most_)[,c("7")]/5
    369. otu_table(ps.ng.tax_most_)[,c("8")] <- otu_table(ps.ng.tax_most_)[,c("8")]/6
    370. otu_table(ps.ng.tax_most_)[,c("9")] <- otu_table(ps.ng.tax_most_)[,c("9")]/6
    371. otu_table(ps.ng.tax_most_)[,c("10")] <- otu_table(ps.ng.tax_most_)[,c("10")]/6
    372. otu_table(ps.ng.tax_most_)[,c("12")] <- otu_table(ps.ng.tax_most_)[,c("12")]/6
    373. otu_table(ps.ng.tax_most_)[,c("13")] <- otu_table(ps.ng.tax_most_)[,c("13")]/6
    374. otu_table(ps.ng.tax_most_)[,c("14")] <- otu_table(ps.ng.tax_most_)[,c("14")]/6
    375. otu_table(ps.ng.tax_most_)[,c("15")] <- otu_table(ps.ng.tax_most_)[,c("15")]/6
    376. otu_table(ps.ng.tax_most_)[,c("16")] <- otu_table(ps.ng.tax_most_)[,c("16")]/6
    377. otu_table(ps.ng.tax_most_)[,c("17")] <- otu_table(ps.ng.tax_most_)[,c("17")]/6
    378. otu_table(ps.ng.tax_most_)[,c("18")] <- otu_table(ps.ng.tax_most_)[,c("18")]/6
    379. otu_table(ps.ng.tax_most_)[,c("19")] <- otu_table(ps.ng.tax_most_)[,c("19")]/6
    380. otu_table(ps.ng.tax_most_)[,c("20")] <- otu_table(ps.ng.tax_most_)[,c("20")]/6
    381. otu_table(ps.ng.tax_most_)[,c("21")] <- otu_table(ps.ng.tax_most_)[,c("21")]/8
    382. otu_table(ps.ng.tax_most_)[,c("22")] <- otu_table(ps.ng.tax_most_)[,c("22")]/8
    383. otu_table(ps.ng.tax_most_)[,c("23")] <- otu_table(ps.ng.tax_most_)[,c("23")]/8
    384. otu_table(ps.ng.tax_most_)[,c("24")] <- otu_table(ps.ng.tax_most_)[,c("24")]/8
    385. otu_table(ps.ng.tax_most_)[,c("25")] <- otu_table(ps.ng.tax_most_)[,c("25")]/8
    386. otu_table(ps.ng.tax_most_)[,c("26")] <- otu_table(ps.ng.tax_most_)[,c("26")]/8
    387. otu_table(ps.ng.tax_most_)[,c("27")] <- otu_table(ps.ng.tax_most_)[,c("27")]/8
    388. otu_table(ps.ng.tax_most_)[,c("28")] <- otu_table(ps.ng.tax_most_)[,c("28")]/8
    389. otu_table(ps.ng.tax_most_)[,c("29")] <- otu_table(ps.ng.tax_most_)[,c("29")]/4
    390. otu_table(ps.ng.tax_most_)[,c("30")] <- otu_table(ps.ng.tax_most_)[,c("30")]/4
    391. otu_table(ps.ng.tax_most_)[,c("31")] <- otu_table(ps.ng.tax_most_)[,c("31")]/4
    392. otu_table(ps.ng.tax_most_)[,c("32")] <- otu_table(ps.ng.tax_most_)[,c("32")]/4
    393. otu_table(ps.ng.tax_most_)[,c("33")] <- otu_table(ps.ng.tax_most_)[,c("33")]/8
    394. otu_table(ps.ng.tax_most_)[,c("34")] <- otu_table(ps.ng.tax_most_)[,c("34")]/8
    395. otu_table(ps.ng.tax_most_)[,c("35")] <- otu_table(ps.ng.tax_most_)[,c("35")]/8
    396. otu_table(ps.ng.tax_most_)[,c("36")] <- otu_table(ps.ng.tax_most_)[,c("36")]/8
    397. otu_table(ps.ng.tax_most_)[,c("37")] <- otu_table(ps.ng.tax_most_)[,c("37")]/8
    398. otu_table(ps.ng.tax_most_)[,c("38")] <- otu_table(ps.ng.tax_most_)[,c("38")]/8
    399. otu_table(ps.ng.tax_most_)[,c("39")] <- otu_table(ps.ng.tax_most_)[,c("39")]/8
    400. otu_table(ps.ng.tax_most_)[,c("51")] <- otu_table(ps.ng.tax_most_)[,c("51")]/8
    401. otu_table(ps.ng.tax_most_)[,c("40")] <- otu_table(ps.ng.tax_most_)[,c("40")]/6
    402. otu_table(ps.ng.tax_most_)[,c("41")] <- otu_table(ps.ng.tax_most_)[,c("41")]/6
    403. otu_table(ps.ng.tax_most_)[,c("42")] <- otu_table(ps.ng.tax_most_)[,c("42")]/6
    404. otu_table(ps.ng.tax_most_)[,c("43")] <- otu_table(ps.ng.tax_most_)[,c("43")]/6
    405. otu_table(ps.ng.tax_most_)[,c("44")] <- otu_table(ps.ng.tax_most_)[,c("44")]/6
    406. otu_table(ps.ng.tax_most_)[,c("46")] <- otu_table(ps.ng.tax_most_)[,c("46")]/6
    407. otu_table(ps.ng.tax_most_)[,c("47")] <- otu_table(ps.ng.tax_most_)[,c("47")]/7
    408. otu_table(ps.ng.tax_most_)[,c("48")] <- otu_table(ps.ng.tax_most_)[,c("48")]/7
    409. otu_table(ps.ng.tax_most_)[,c("49")] <- otu_table(ps.ng.tax_most_)[,c("49")]/7
    410. otu_table(ps.ng.tax_most_)[,c("50")] <- otu_table(ps.ng.tax_most_)[,c("50")]/7
    411. otu_table(ps.ng.tax_most_)[,c("52")] <- otu_table(ps.ng.tax_most_)[,c("52")]/7
    412. otu_table(ps.ng.tax_most_)[,c("53")] <- otu_table(ps.ng.tax_most_)[,c("53")]/7
    413. otu_table(ps.ng.tax_most_)[,c("55")] <- otu_table(ps.ng.tax_most_)[,c("55")]/7
    414. #plot_bar(ps.ng.tax_most_swab_, x="Phylum", fill = "Phylum", facet_grid = Patient~RoundDay) + geom_bar(aes(color=Phylum, fill=Phylum), stat="identity", position="stack") + theme(axis.text = element_text(size = theme.size, colour="black"))
    415. plot_bar(ps.ng.tax_most_, x="Phylum", fill="Phylum", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
    416. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=2))
    417. ```
    418. ```{r, echo=FALSE, warning=FALSE}
    419. #knitr::include_graphics("./Phyloseq_files/figure-html/unnamed-chunk-10-1.png")
    420. #> tax_table(carbom)
    421. #Taxonomy Table: [205 taxa by 7 taxonomic ranks]:
    422. # Domain Supergroup Division Class
    423. #Otu001 "Eukaryota" "Archaeplastida" "Chlorophyta" "Mamiellophyceae"
    424. # Order Family Genus
    425. #Otu001 "Mamiellales" "Bathycoccaceae" "Ostreococcus"
    426. #sample_data(ps.ng.tax)
    427. ```
    428. ## Bar plots in class level
    429. ```{r, echo=TRUE, warning=FALSE}
    430. plot_bar(ps.ng.tax_most_copied, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
    431. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
    432. ```
    433. Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
    434. ```{r, echo=TRUE, warning=FALSE}
    435. plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Class") + geom_bar(aes(), stat="identity", position="stack") +
    436. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black"))
    437. ```
    438. \pagebreak
    439. Use color according to class. Do separate panels Stroke and Sex_age.
    440. ```{r, echo=TRUE, warning=FALSE}
    441. #-- If existing replicates, to be processed as follows --
    442. plot_bar(ps.ng.tax_most_, x="Class", fill="Class", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
    443. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=3))
    444. ```
    445. ## Bar plots in order level
    446. ```{r, echo=TRUE, warning=FALSE}
    447. plot_bar(ps.ng.tax_most_copied, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
    448. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
    449. ```
    450. Regroup together pre vs post stroke and normalize number of reads in each group using median sequencing depth.
    451. ```{r, echo=TRUE, warning=FALSE}
    452. plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Order") + geom_bar(aes(), stat="identity", position="stack") +
    453. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
    454. ```
    455. \pagebreak
    456. Use color according to order. Do separate panels Stroke and Sex_age.
    457. ```{r, echo=TRUE, warning=FALSE}
    458. #FITTING7: regulate the bar height if it has replicates
    459. plot_bar(ps.ng.tax_most_, x="Order", fill="Order", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
    460. scale_fill_manual(values = c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue","royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=4))
    461. ```
    462. ## Bar plots in family level
    463. ```{r, echo=TRUE, warning=FALSE}
    464. plot_bar(ps.ng.tax_most_copied, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
    465. scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 5, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
    466. ```
    467. Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
    468. ```{r, echo=TRUE, warning=FALSE}
    469. plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Family") + geom_bar(aes(), stat="identity", position="stack") +
    470. scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
    471. ```
    472. \pagebreak
    473. Use color according to family. Do separate panels Stroke and Sex_age.
    474. ```{r, echo=TRUE, warning=FALSE}
    475. #-- If existing replicates, to be processed as follows --
    476. plot_bar(ps.ng.tax_most_, x="Family", fill="Family", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
    477. scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 5, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=8))
    478. ```
    479. ```{r, echo=FALSE, warning=FALSE}
    480. #MOVE_FROM_ABOVE: ## Bar plots in genus level
    481. #MOVE_FROM_ABOVE: Regroup together pre vs post stroke samples and normalize number of reads in each group using median sequencing depth.
    482. #plot_bar(ps.ng.tax_most_pre_post_stroke_, fill="Genus") + geom_bar(aes(), stat="identity", position="stack") +
    483. #scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = theme.size, colour="black")) + theme(legend.position="bottom")
    484. ```
    485. \pagebreak
    486. ```{r, echo=FALSE, warning=FALSE}
    487. #MOVE_FROM_ABOVE: Use color according to genus. Do separate panels Stroke and Sex_age.
    488. ##-- If existing replicates, to be processed as follows --
    489. #plot_bar(ps.ng.tax_most_, x="Genus", fill="Genus", facet_grid = pre_post_stroke~Sex_age) + geom_bar(aes(), stat="identity", position="stack") +
    490. #scale_fill_manual(values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#FFFFFF", "#FFFF00", "#00FFFF", "#FFA500", "#00FF00", "#808080", "#FF00FF", "#800080", "#FDD017", "#0000A0", "#3BB9FF", "#008000", "#800000", "#ADD8E6", "#F778A1", "#800517", "#736F6E", "#F52887", "#C11B17", "#5CB3FF", "#A52A2A", "#FF8040", "#2B60DE", "#736AFF", "#1589FF", "#98AFC7", "#8D38C9", "#307D7E", "#F6358A", "#151B54", "#6D7B8D", "#FDEEF4", "#FF0080", "#F88017", "#2554C7", "#FFF8C6", "#D4A017", "#306EFF", "#151B8D", "#9E7BFF", "#EAC117", "#E0FFFF", "#15317E", "#6C2DC7", "#FBB917", "#FCDFFF", "#15317E", "#254117", "#FAAFBE", "#357EC7")) + theme(axis.text = element_text(size = 6, colour="black"), axis.text.x=element_blank(), axis.ticks=element_blank()) + theme(legend.position="bottom") + guides(fill=guide_legend(nrow=18))
    491. ```
    492. \pagebreak
    493. # Alpha diversity
    494. Plot Chao1 richness estimator, Observed OTUs, Shannon index, and Phylogenetic diversity.
    495. Regroup together samples from the same group.
    496. ```{r, echo=FALSE, warning=FALSE}
    497. # using rarefied data
    498. #FITTING2: CONSOLE:
    499. #gunzip table_even4753.biom.gz
    500. #alpha_diversity.py -i table_even42369.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering/rep_set.tre
    501. #gunzip table_even4753.biom.gz
    502. #alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_stool/rep_set.tre
    503. #gunzip table_even4753.biom.gz
    504. #alpha_diversity.py -i table_even4753.biom --metrics chao1,observed_otus,shannon,PD_whole_tree -o adiv_even.txt -t ../clustering_swab/rep_set.tre
    505. ```
    506. ```{r, echo=TRUE, warning=FALSE}
    507. hmp.div_qiime <- read.csv("adiv_even.txt", sep="\t")
    508. colnames(hmp.div_qiime) <- c("sam_name", "chao1", "observed_otus", "shannon", "PD_whole_tree")
    509. row.names(hmp.div_qiime) <- hmp.div_qiime$sam_name
    510. div.df <- merge(hmp.div_qiime, hmp.meta, by = "sam_name")
    511. div.df2 <- div.df[, c("Group", "chao1", "shannon", "observed_otus", "PD_whole_tree")]
    512. colnames(div.df2) <- c("Group", "Chao-1", "Shannon", "OTU", "Phylogenetic Diversity")
    513. #colnames(div.df2)
    514. options(max.print=999999)
    515. #27 H47 830.5000 5.008482 319 10.60177
    516. #FITTING4: if occuring "Computation failed in `stat_signif()`:not enough 'y' observations"
    517. #means: the patient H47 contains only one sample, it should be removed for the statistical p-values calculations.
    518. #delete H47(1)
    519. #div.df2 <- div.df2[-c(3), ]
    520. #div.df2 <- div.df2[-c(55,54, 45,40,39,27,26,25,1), ]
    521. knitr::kable(div.df2) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    522. #https://uc-r.github.io/t_test
    523. #We can perform the test with t.test and transform our data and we can also perform the nonparametric test with the wilcox.test function.
    524. stat.test.Shannon <- compare_means(
    525. Shannon ~ Group, data = div.df2,
    526. method = "t.test"
    527. )
    528. knitr::kable(stat.test.Shannon) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    529. div_df_melt <- reshape2::melt(div.df2)
    530. #head(div_df_melt)
    531. #https://plot.ly/r/box-plots/#horizontal-boxplot
    532. #http://www.sthda.com/english/wiki/print.php?id=177
    533. #https://rpkgs.datanovia.com/ggpubr/reference/as_ggplot.html
    534. #http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/82-ggplot2-easy-way-to-change-graphical-parameters/
    535. #https://plot.ly/r/box-plots/#horizontal-boxplot
    536. #library("gridExtra")
    537. #par(mfrow=c(4,1))
    538. p <- ggboxplot(div_df_melt, x = "Group", y = "value",
    539. facet.by = "variable",
    540. scales = "free",
    541. width = 0.5,
    542. fill = "gray", legend= "right")
    543. #ggpar(p, xlab = FALSE, ylab = FALSE)
    544. lev <- levels(factor(div_df_melt$Group)) # get the variables
    545. #FITTING4: delete H47(1) in lev
    546. #lev <- lev[-c(3)]
    547. # make a pairwise list that we want to compare.
    548. #my_stat_compare_means
    549. #https://stackoverflow.com/questions/47839988/indicating-significance-with-ggplot2-in-a-boxplot-with-multiple-groups
    550. L.pairs <- combn(seq_along(lev), 2, simplify = FALSE, FUN = function(i) lev[i]) #%>% filter(p.signif != "ns")
    551. my_stat_compare_means <- function (mapping = NULL, data = NULL, method = NULL, paired = FALSE,
    552. method.args = list(), ref.group = NULL, comparisons = NULL,
    553. hide.ns = FALSE, label.sep = ", ", label = NULL, label.x.npc = "left",
    554. label.y.npc = "top", label.x = NULL, label.y = NULL, tip.length = 0.03,
    555. symnum.args = list(), geom = "text", position = "identity",
    556. na.rm = FALSE, show.legend = NA, inherit.aes = TRUE, ...)
    557. {
    558. if (!is.null(comparisons)) {
    559. method.info <- ggpubr:::.method_info(method)
    560. method <- method.info$method
    561. method.args <- ggpubr:::.add_item(method.args, paired = paired)
    562. if (method == "wilcox.test")
    563. method.args$exact <- FALSE
    564. pms <- list(...)
    565. size <- ifelse(is.null(pms$size), 0.3, pms$size)
    566. color <- ifelse(is.null(pms$color), "black", pms$color)
    567. map_signif_level <- FALSE
    568. if (is.null(label))
    569. label <- "p.format"
    570. if (ggpubr:::.is_p.signif_in_mapping(mapping) | (label %in% "p.signif")) {
    571. if (ggpubr:::.is_empty(symnum.args)) {
    572. map_signif_level <- c(`****` = 1e-04, `***` = 0.001,
    573. `**` = 0.01, `*` = 0.05, ns = 1)
    574. } else {
    575. map_signif_level <- symnum.args
    576. }
    577. if (hide.ns)
    578. names(map_signif_level)[5] <- " "
    579. }
    580. step_increase <- ifelse(is.null(label.y), 0.12, 0)
    581. ggsignif::geom_signif(comparisons = comparisons, y_position = label.y,
    582. test = method, test.args = method.args, step_increase = step_increase,
    583. size = size, color = color, map_signif_level = map_signif_level,
    584. tip_length = tip.length, data = data)
    585. } else {
    586. mapping <- ggpubr:::.update_mapping(mapping, label)
    587. layer(stat = StatCompareMeans, data = data, mapping = mapping,
    588. geom = geom, position = position, show.legend = show.legend,
    589. inherit.aes = inherit.aes, params = list(label.x.npc = label.x.npc,
    590. label.y.npc = label.y.npc, label.x = label.x,
    591. label.y = label.y, label.sep = label.sep, method = method,
    592. method.args = method.args, paired = paired, ref.group = ref.group,
    593. symnum.args = symnum.args, hide.ns = hide.ns,
    594. na.rm = na.rm, ...))
    595. }
    596. }
    597. p2 <- p +
    598. stat_compare_means(
    599. method="t.test",
    600. comparisons = list(c("Group1", "Group2"), c("Group1", "Group3"), c("Group1", "Group4"), c("Group1", "Group6"), c("Group1", "Group8"), c("Group2", "Group5"),c("Group4", "Group5"),c("Group4", "Group6"),c("Group4", "Group7"),c("Group6", "Group7")),
    601. label = "p.signif",
    602. symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
    603. )
    604. #comparisons = L.pairs,
    605. #symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("****", "***", "**", "*")),
    606. #stat_pvalue_manual
    607. #print(p2)
    608. #https://stackoverflow.com/questions/20500706/saving-multiple-ggplots-from-ls-into-one-and-separate-files-in-r
    609. #FITTING3: mkdir figures
    610. ggsave("./figures/alpha_diversity_Group.png", device="png", height = 10, width = 12)
    611. ggsave("./figures/alpha_diversity_Group.svg", device="svg", height = 10, width = 12)
    612. p3 <- p +
    613. stat_compare_means(
    614. method="t.test",
    615. comparisons = list(c("Group2", "Group4"), c("Group2", "Group6"), c("Group4", "Group8"), c("Group6", "Group8")),
    616. label = "p.signif",
    617. symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")),
    618. )
    619. #symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05), symbols = c("****", "***", "**", "*")),
    620. #stat_pvalue_manual
    621. #print(p2)
    622. #https://stackoverflow.com/questions/20500706/saving-multiple-ggplots-from-ls-into-one-and-separate-files-in-r
    623. #FITTING3: mkdir figures
    624. ggsave("./figures/alpha_diversity_Group2.png", device="png", height = 10, width = 12)
    625. ggsave("./figures/alpha_diversity_Group2.svg", device="svg", height = 10, width = 12)
    626. ```
    627. # Selected alpha diversity
    628. ```{r, echo=TRUE, warning=FALSE, fig.cap="Alpha diversity", out.width = '100%', fig.align= "center"}
    629. knitr::include_graphics("./figures/alpha_diversity_Group2.png")
    630. selected_alpha_diversities<-read.csv("selected_alpha_diversities.txt",sep="\t")
    631. knitr::kable(selected_alpha_diversities) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    632. ```
    633. # Beta diversity
    634. ```{r, echo=TRUE, warning=FALSE, fig.cap="Beta diversity", out.width = '100%', fig.align= "center"}
    635. #file:///home/jhuang/DATA/Data_Marius_16S/core_diversity_e42369/bdiv_even42369_Group/unweighted_unifrac_boxplots/Group_Stats.txt
    636. beta_diversity_group_stats<-read.csv("unweighted_unifrac_boxplots_Group_Stats.txt",sep="\t")
    637. knitr::kable(beta_diversity_group_stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    638. #NOTE: Run this Phyloseq0.Rmd, then run the code of MicrobiotaProcess.R to manually generate PCoA.png, then run this Phyloseq.Rmd!
    639. #NOTE: AT_FIRST_DEACTIVATE_THIS_LINE: knitr::include_graphics("./figures/PCoA.png")
    640. ```
    641. # Differential abundance analysis
    642. Differential abundance analysis aims to find the differences in the abundance of each taxa between two groups of samples, assigning a significance value to each comparison.
    643. ## Group2 vs Group4
    644. ```{r, echo=TRUE, warning=FALSE}
    645. library("DESeq2")
    646. #ALTERNATIVE using ps.ng.tax_most_copied: ps.ng.tax (40594) vs. ps.ng.tax_most_copied (166)
    647. ps.ng.tax_sel <- ps.ng.tax
    648. #FITTING5: correct the id of the group members, see FITTING6
    649. otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28")]
    650. diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
    651. diagdds$Group <- relevel(diagdds$Group, "Group4")
    652. diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
    653. resultsNames(diagdds)
    654. res = results(diagdds, cooksCutoff = FALSE)
    655. alpha = 0.05
    656. sigtab = res[which(res$padj < alpha), ]
    657. sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
    658. sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
    659. kable(sigtab) %>%
    660. kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    661. #rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied))
    662. #subv %in% v
    663. ### returns a vector TRUE FALSE
    664. #is.element(subv, v)
    665. ### returns a vector TRUE FALSE
    666. library("ggplot2")
    667. theme_set(theme_bw())
    668. scale_fill_discrete <- function(palname = "Set1", ...) {
    669. scale_fill_brewer(palette = palname, ...)
    670. }
    671. x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
    672. x = sort(x)
    673. sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
    674. x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
    675. x = sort(x)
    676. sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
    677. ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
    678. theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
    679. #Error in checkForExperimentalReplicates(object, modelMatrix) :
    680. # The design matrix has the same number of samples and coefficients to fit,
    681. # so estimation of dispersion is not possible. Treating samples
    682. # as replicates was deprecated in v1.20 and no longer supported since v1.22.
    683. ```
    684. ## Group2 vs Group6
    685. ```{r, echo=TRUE, warning=FALSE}
    686. ps.ng.tax_sel <- ps.ng.tax
    687. otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "33","34","35","36","37","38","39","51")]
    688. diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
    689. diagdds$Group <- relevel(diagdds$Group, "Group6")
    690. diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
    691. resultsNames(diagdds)
    692. res = results(diagdds, cooksCutoff = FALSE)
    693. alpha = 0.05
    694. sigtab = res[which(res$padj < alpha), ]
    695. sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
    696. sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
    697. kable(sigtab) %>%
    698. kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    699. library("ggplot2")
    700. theme_set(theme_bw())
    701. scale_fill_discrete <- function(palname = "Set1", ...) {
    702. scale_fill_brewer(palette = palname, ...)
    703. }
    704. x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
    705. x = sort(x)
    706. sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
    707. x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
    708. x = sort(x)
    709. sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
    710. ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
    711. theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
    712. ```
    713. ## Group4 vs Group8
    714. ```{r, echo=TRUE, warning=FALSE}
    715. ps.ng.tax_sel <- ps.ng.tax
    716. otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("21","22","23","24","25","26","27","28", "47","48","49","50","52","53","55")]
    717. diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
    718. diagdds$Group <- relevel(diagdds$Group, "Group8")
    719. diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
    720. resultsNames(diagdds)
    721. res = results(diagdds, cooksCutoff = FALSE)
    722. alpha = 0.05
    723. sigtab = res[which(res$padj < alpha), ]
    724. sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
    725. sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
    726. kable(sigtab) %>%
    727. kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    728. library("ggplot2")
    729. theme_set(theme_bw())
    730. scale_fill_discrete <- function(palname = "Set1", ...) {
    731. scale_fill_brewer(palette = palname, ...)
    732. }
    733. x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
    734. x = sort(x)
    735. sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
    736. x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
    737. x = sort(x)
    738. sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
    739. ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
    740. theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
    741. ```
    742. ## Group6 vs Group8
    743. ```{r, echo=TRUE, warning=FALSE}
    744. ps.ng.tax_sel <- ps.ng.tax
    745. otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
    746. diagdds = phyloseq_to_deseq2(ps.ng.tax_sel, ~Group)
    747. diagdds$Group <- relevel(diagdds$Group, "Group8")
    748. diagdds = DESeq(diagdds, test="Wald", fitType="parametric")
    749. resultsNames(diagdds)
    750. res = results(diagdds, cooksCutoff = FALSE)
    751. alpha = 0.05
    752. sigtab = res[which(res$padj < alpha), ]
    753. sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps.ng.tax_sel)[rownames(sigtab), ], "matrix"))
    754. sigtab <- sigtab[rownames(sigtab) %in% rownames(tax_table(ps.ng.tax_most_copied)), ]
    755. kable(sigtab) %>%
    756. kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    757. library("ggplot2")
    758. theme_set(theme_bw())
    759. scale_fill_discrete <- function(palname = "Set1", ...) {
    760. scale_fill_brewer(palette = palname, ...)
    761. }
    762. x = tapply(sigtab$log2FoldChange, sigtab$Order, function(x) max(x))
    763. x = sort(x)
    764. sigtab$Order = factor(as.character(sigtab$Order), levels=names(x))
    765. x = tapply(sigtab$log2FoldChange, sigtab$Family, function(x) max(x))
    766. x = sort(x)
    767. sigtab$Family = factor(as.character(sigtab$Family), levels=names(x))
    768. ggplot(sigtab, aes(x=log2FoldChange, y=Family, color=Order)) + geom_point(aes(size=padj)) + scale_size_continuous(name="padj",range=c(8,4))+
    769. theme(axis.text.x = element_text(angle = -25, hjust = 0, vjust=0.5))
    770. ```
  2. MicrobiotaProcess.R

    1. # -----------------------------------
    2. # ---- prepare the R environment ----
    3. #Rscript MicrobiotaProcess.R
    4. #NOTE: exit R script, then login again R-environment; rm -rf Phyloseq*_cache
    5. rmarkdown::render('Phyloseq.Rmd',output_file='Phyloseq.html')
    6. # -----------------------------
    7. # ---- 3.1. bridges other tools
    8. ##https://github.com/YuLab-SMU/MicrobiotaProcess
    9. ##https://www.bioconductor.org/packages/release/bioc/vignettes/MicrobiotaProcess/inst/doc/MicrobiotaProcess.html
    10. ##https://chiliubio.github.io/microeco_tutorial/intro.html#framework
    11. ##https://yiluheihei.github.io/microbiomeMarker/reference/plot_cladogram.html
    12. #BiocManager::install("MicrobiotaProcess")
    13. #install.packages("microeco")
    14. #install.packages("ggalluvial")
    15. #install.packages("ggh4x")
    16. library(MicrobiotaProcess)
    17. library(microeco)
    18. library(ggalluvial)
    19. library(ggh4x)
    20. library(gghalves)
    21. ## Convert the phyloseq object to a MicrobiotaProcess object
    22. #mp <- as.MicrobiotaProcess(ps.ng.tax)
    23. #mt <- phyloseq2microeco(ps.ng.tax) #--> ERROR
    24. #abundance_table <- mt$abun_table
    25. #taxonomy_table <- mt$tax_table
    26. #ps.ng.tax_abund <- phyloseq::filter_taxa(ps.ng.tax, function(x) sum(x > total*0.01) > 0, TRUE)
    27. #ps.ng.tax_most = phyloseq::filter_taxa(ps.ng.tax_rel, function(x) mean(x) > 0.001, TRUE)
    28. ##OPTION1: take all samples, prepare mpse_abund!
    29. ##mpse <- ps.ng.tax %>% as.MPSE()
    30. #mpse_abund <- ps.ng.tax_abund %>% as.MPSE()
    31. ##OPTION2: take partial samples, prepare mpse_abund
    32. ps.ng.tax_sel <- ps.ng.tax #IMPORTANT
    33. ##otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")]
    34. ##NOTE: Only choose Group2, Group4, Group6, Group8
    35. #> ps.ng.tax_sel
    36. #otu_table() OTU Table: [ 37465 taxa and 29 samples ]
    37. #sample_data() Sample Data: [ 29 samples by 10 sample variables ]
    38. #tax_table() Taxonomy Table: [ 37465 taxa by 7 taxonomic ranks ]
    39. #phy_tree() Phylogenetic Tree: [ 37465 tips and 37461 internal nodes ]
    40. otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
    41. #For quick calculation
    42. #otu_table(ps.ng.tax_sel) <- otu_table(ps.ng.tax_abund)[,c("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")]
    43. mpse_abund <- ps.ng.tax_sel %>% as.MPSE()
    44. # -----------------------------------
    45. # ---- 3.2. alpha diversity analysis
    46. # Rarefied species richness + RareAbundance
    47. mpse_abund %<>% mp_rrarefy()
    48. # 'chunks' represent the split number of each sample to calculate alpha
    49. # diversity, default is 400. e.g. If a sample has total 40000
    50. # reads, if chunks is 400, it will be split to 100 sub-samples
    51. # (100, 200, 300,..., 40000), then alpha diversity index was
    52. # calculated based on the sub-samples.
    53. # '.abundance' the column name of abundance, if the '.abundance' is not be
    54. # rarefied calculate rarecurve, user can specific 'force=TRUE'.
    55. # + RareAbundance
    56. mpse_abund %<>%
    57. mp_cal_rarecurve(
    58. .abundance = RareAbundance,
    59. chunks = 400
    60. )
    61. # The RareAbundanceRarecurve column will be added the colData slot
    62. # automatically (default action="add")
    63. mpse_abund %>% print(width=180, n=100)
    64. # default will display the confidence interval around smooth.
    65. # se=TRUE
    66. p1 <- mpse_abund %>%
    67. mp_plot_rarecurve(
    68. .rare = RareAbundanceRarecurve,
    69. .alpha = Observe,
    70. )
    71. p2 <- mpse_abund %>%
    72. mp_plot_rarecurve(
    73. .rare = RareAbundanceRarecurve,
    74. .alpha = Observe,
    75. .group = pre_post_stroke
    76. ) +
    77. scale_color_manual(values=c("#00A087FF", "#3C5488FF")) +
    78. scale_fill_manual(values=c("#00A087FF", "#3C5488FF"), guide="none")
    79. # combine the samples belong to the same groups if plot.group=TRUE
    80. p3 <- mpse_abund %>%
    81. mp_plot_rarecurve(
    82. .rare = RareAbundanceRarecurve,
    83. .alpha = "Observe",
    84. .group = pre_post_stroke,
    85. plot.group = TRUE
    86. ) +
    87. scale_color_manual(values=c("#00A087FF", "#3C5488FF")) +
    88. scale_fill_manual(values=c("#00A087FF", "#3C5488FF"),guide="none")
    89. png("rarefaction_of_samples_or_groups.png", width=1080, height=600)
    90. p1 + p2 + p3
    91. dev.off()
    92. # ------------------------------------------
    93. # 3.3. calculate alpha index and visualization
    94. library(ggplot2)
    95. library(MicrobiotaProcess)
    96. mpse_abund %<>%
    97. mp_cal_alpha(.abundance=RareAbundance)
    98. mpse_abund
    99. f1 <- mpse_abund %>%
    100. mp_plot_alpha(
    101. .group=pre_post_stroke,
    102. .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
    103. ) +
    104. scale_fill_manual(values=c("#00A087FF", "#3C5488FF"), guide="none") +
    105. scale_color_manual(values=c("#00A087FF", "#3C5488FF"), guide="none")
    106. f2 <- mpse_abund %>%
    107. mp_plot_alpha(
    108. .alpha=c(Observe, Chao1, ACE, Shannon, Simpson, Pielou)
    109. )
    110. png("alpha_diversity_comparison.png", width=1000, height=1000)
    111. f1 / f2
    112. dev.off()
    113. # -------------------------------------------
    114. # 3.4. The visualization of taxonomy abundance (Class)
    115. mpse_abund %<>%
    116. mp_cal_abundance( # for each samples
    117. .abundance = RareAbundance
    118. ) %>%
    119. mp_cal_abundance( # for each groups
    120. .abundance=RareAbundance,
    121. .group=pre_post_stroke
    122. )
    123. mpse_abund
    124. # visualize the relative abundance of top 20 phyla for each sample.
    125. p1 <- mpse_abund %>%
    126. mp_plot_abundance(
    127. .abundance=RareAbundance,
    128. .group=time,
    129. taxa.class = Class,
    130. topn = 20,
    131. relative = TRUE
    132. )
    133. # visualize the abundance (rarefied) of top 20 phyla for each sample.
    134. p2 <- mpse_abund %>%
    135. mp_plot_abundance(
    136. .abundance=RareAbundance,
    137. .group=time,
    138. taxa.class = Class,
    139. topn = 20,
    140. relative = FALSE
    141. )
    142. png("relative_abundance_and_abundance.png", width= 1200, height=600) #NOT PRODUCED!
    143. p1 / p2
    144. dev.off()
    145. h1 <- mpse_abund %>%
    146. mp_plot_abundance(
    147. .abundance = RareAbundance,
    148. .group = pre_post_stroke,
    149. taxa.class = Class,
    150. relative = TRUE,
    151. topn = 20,
    152. geom = 'heatmap',
    153. features.dist = 'euclidean',
    154. features.hclust = 'average',
    155. sample.dist = 'bray',
    156. sample.hclust = 'average'
    157. )
    158. h2 <- mpse_abund %>%
    159. mp_plot_abundance(
    160. .abundance = RareAbundance,
    161. .group = pre_post_stroke,
    162. taxa.class = Class,
    163. relative = FALSE,
    164. topn = 20,
    165. geom = 'heatmap',
    166. features.dist = 'euclidean',
    167. features.hclust = 'average',
    168. sample.dist = 'bray',
    169. sample.hclust = 'average'
    170. )
    171. # the character (scale or theme) of figure can be adjusted by set_scale_theme
    172. # refer to the mp_plot_dist
    173. png("relative_abundance_and_abundance_heatmap.png", width= 1200, height=600)
    174. aplot::plot_list(gglist=list(h1, h2), tag_levels="A")
    175. dev.off()
    176. # visualize the relative abundance of top 20 class for each .group (pre_post_stroke)
    177. p3 <- mpse_abund %>%
    178. mp_plot_abundance(
    179. .abundance=RareAbundance,
    180. .group=pre_post_stroke,
    181. taxa.class = Class,
    182. topn = 20,
    183. plot.group = TRUE
    184. )
    185. # visualize the abundance of top 20 phyla for each .group (time)
    186. p4 <- mpse_abund %>%
    187. mp_plot_abundance(
    188. .abundance=RareAbundance,
    189. .group= pre_post_stroke,
    190. taxa.class = Class,
    191. topn = 20,
    192. relative = FALSE,
    193. plot.group = TRUE
    194. )
    195. png("relative_abundance_and_abundance_groups.png", width= 1000, height=1000)
    196. p3 / p4
    197. dev.off()
    198. # ---------------------------
    199. # 3.5. Beta diversity analysis
    200. # ---------------------------------------------
    201. # 3.5.1 The distance between samples or groups
    202. # standardization
    203. # mp_decostand wraps the decostand of vegan, which provides
    204. # many standardization methods for community ecology.
    205. # default is hellinger, then the abundance processed will
    206. # be stored to the assays slot.
    207. mpse_abund %<>%
    208. mp_decostand(.abundance=Abundance)
    209. mpse_abund
    210. # calculate the distance between the samples.
    211. # the distance will be generated a nested tibble and added to the
    212. # colData slot.
    213. mpse_abund %<>% mp_cal_dist(.abundance=hellinger, distmethod="bray")
    214. mpse_abund
    215. # mp_plot_dist provides there methods to visualize the distance between the samples or groups
    216. # when .group is not provided, the dot heatmap plot will be return
    217. p1 <- mpse_abund %>% mp_plot_dist(.distmethod = bray)
    218. png("distance_between_samples.png", width= 1000, height=1000)
    219. p1
    220. dev.off()
    221. # when .group is provided, the dot heatmap plot with group information will be return.
    222. p2 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = pre_post_stroke)
    223. # The scale or theme of dot heatmap plot can be adjusted using set_scale_theme function.
    224. p2 %>% set_scale_theme(
    225. x = scale_fill_manual(
    226. values=c("orange", "deepskyblue"),
    227. guide = guide_legend(
    228. keywidth = 1,
    229. keyheight = 0.5,
    230. title.theme = element_text(size=8),
    231. label.theme = element_text(size=6)
    232. )
    233. ),
    234. aes_var = pre_post_stroke # specific the name of variable
    235. ) %>%
    236. set_scale_theme(
    237. x = scale_color_gradient(
    238. guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
    239. ),
    240. aes_var = bray
    241. ) %>%
    242. set_scale_theme(
    243. x = scale_size_continuous(
    244. range = c(0.1, 3),
    245. guide = guide_legend(keywidth = 0.5, keyheight = 0.5)
    246. ),
    247. aes_var = bray
    248. )
    249. png("distance_between_samples_with_group_info.png", width= 1000, height=1000)
    250. p2
    251. dev.off()
    252. # when .group is provided and group.test is TRUE, the comparison of different groups will be returned
    253. p3 <- mpse_abund %>% mp_plot_dist(.distmethod = bray, .group = pre_post_stroke, group.test=TRUE, textsize=2)
    254. png("comparison_of_distance.png", width= 1000, height=1000)
    255. p3
    256. dev.off()
    257. # -----------------------
    258. # 3.5.2 The PCoA analysis
    259. #install.packages("corrr")
    260. library(corrr)
    261. #install.packages("ggside")
    262. library(ggside)
    263. mpse_abund %<>%
    264. mp_cal_pcoa(.abundance=hellinger, distmethod="bray")
    265. # The dimensions of ordination analysis will be added the colData slot (default).
    266. mpse_abund
    267. #> methods(class=class(mpse_abund))
    268. # [1] [ [[<- [<-
    269. # [4] $ $<- arrange
    270. # [7] as_tibble as.data.frame as.phyloseq
    271. #[10] coerce coerce<- colData<-
    272. #[13] distinct filter group_by
    273. #[16] left_join mp_adonis mp_aggregate_clade
    274. #[19] mp_aggregate mp_anosim mp_balance_clade
    275. #[22] mp_cal_abundance mp_cal_alpha mp_cal_cca
    276. #[25] mp_cal_clust mp_cal_dca mp_cal_dist
    277. #[28] mp_cal_nmds mp_cal_pca mp_cal_pcoa
    278. #[31] mp_cal_pd_metric mp_cal_rarecurve mp_cal_rda
    279. #[34] mp_cal_upset mp_cal_venn mp_decostand
    280. #[37] mp_diff_analysis mp_diff_clade mp_envfit
    281. #[40] mp_extract_abundance mp_extract_assays mp_extract_dist
    282. #[43] mp_extract_feature mp_extract_internal_attr mp_extract_rarecurve
    283. #[46] mp_extract_refseq mp_extract_sample mp_extract_taxonomy
    284. #[49] mp_extract_tree mp_filter_taxa mp_mantel
    285. #[52] mp_mrpp mp_plot_abundance mp_plot_alpha
    286. #[55] mp_plot_diff_boxplot mp_plot_diff_res mp_plot_dist
    287. #[58] mp_plot_ord mp_plot_rarecurve mp_plot_upset
    288. #[61] mp_plot_venn mp_rrarefy mp_select_as_tip
    289. #[64] mp_stat_taxa mutate otutree
    290. #[67] otutree<- print pull
    291. #[70] refsequence refsequence<- rename
    292. #[73] rownames<- select show
    293. # [ reached getOption("max.print") -- omitted 6 entries ]
    294. #see '?methods' for accessing help and source code
    295. # We also can perform adonis or anosim to check whether it is significant to the dissimilarities of groups.
    296. mpse_abund %<>%
    297. mp_adonis(.abundance=hellinger, .formula=~Group, distmethod="bray", permutations=9999, action="add")
    298. mpse_abund %>% mp_extract_internal_attr(name=adonis)
    299. # ("1","2","5","6","7", "15","16","17","18","19","20", "29","30","31","32", "40","41","42","43","44","46")
    300. #div.df2[div.df2 == "Group1"] <- "aged.post"
    301. #div.df2[div.df2 == "Group3"] <- "young.post"
    302. #div.df2[div.df2 == "Group5"] <- "aged.post"
    303. #div.df2[div.df2 == "Group7"] <- "young.post"
    304. # ("8","9","10","12","13","14", "21","22","23","24","25","26","27","28", "33","34","35","36","37","38","39","51", "47","48","49","50","52","53","55")
    305. #div.df2[div.df2 == "Group2"] <- "aged.pre"
    306. #div.df2[div.df2 == "Group4"] <- "young.pre"
    307. #div.df2[div.df2 == "Group6"] <- "aged.pre"
    308. #div.df2[div.df2 == "Group8"] <- "young.pre"
    309. #Group1: f.aged and post
    310. #Group2: f.aged and pre
    311. #Group3: f.young and post
    312. #Group4: f.young and pre
    313. #Group5: m.aged and post
    314. #Group6: m.aged and pre
    315. #Group7: m.young and post
    316. #Group8: m.young and pre
    317. #[,c("1","2","5","6","7", "8","9","10","12","13","14")]
    318. #[,c("15","16","17","18","19","20", "21","22","23","24","25","26","27","28")]
    319. #[,c("29","30","31","32", "33","34","35","36","37","38","39","51")]
    320. #[,c("40","41","42","43","44","46", "47","48","49","50","52","53","55")]
    321. p1 <- mpse_abund %>%
    322. mp_plot_ord(
    323. .ord = pcoa,
    324. .group = Group,
    325. .color = Group,
    326. .size = 2.4,
    327. .alpha = 1,
    328. ellipse = TRUE,
    329. show.legend = FALSE # don't display the legend of stat_ellipse
    330. ) +
    331. scale_fill_manual(
    332. #values = c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
    333. #values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
    334. values = c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a"),
    335. guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
    336. ) +
    337. scale_color_manual(
    338. #values=c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99", "#e31a1c", "#cab2d6", "#6a3d9a"),
    339. #values = c("#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"),
    340. values = c("#1f78b4", "#33a02c", "#e31a1c", "#6a3d9a"),
    341. guide = guide_legend(keywidth=1.6, keyheight=1.6, label.theme=element_text(size=12))
    342. )
    343. #scale_fill_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500")) +
    344. #scale_color_manual(values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"))
    345. #scale_fill_manual(values=c("#00A087FF", "#3C5488FF")) +
    346. #scale_color_manual(values=c("#00A087FF", "#3C5488FF"))
    347. #png("PCoA.png", width= 1000, height=1000)
    348. svg("PCoA.svg", width= 11, height=10)
    349. #svg("PCoA_.svg", width=10, height=10)
    350. #svg("PCoA.svg")
    351. pdf("PCoA.pdf")
    352. p1
    353. dev.off()
    354. # The size of point also can be mapped to other variables such as Observe, or Shannon
    355. # Then the alpha diversity and beta diversity will be displayed simultaneously.
    356. p2 <- mpse_abund %>%
    357. mp_plot_ord(
    358. .ord = pcoa,
    359. .group = Group,
    360. .color = Group,
    361. .size = Shannon,
    362. .alpha = Observe,
    363. ellipse = TRUE,
    364. show.legend = FALSE # don't display the legend of stat_ellipse
    365. ) +
    366. scale_fill_manual(
    367. values = c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
    368. guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
    369. ) +
    370. scale_color_manual(
    371. values=c("#FF0000", "#000000", "#0000FF", "#C0C0C0", "#00FF00", "#FFFF00", "#00FFFF", "#FFA500"),
    372. guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
    373. ) +
    374. scale_size_continuous(
    375. range=c(0.5, 3),
    376. guide = guide_legend(keywidth=0.6, keyheight=0.6, label.theme=element_text(size=8))
    377. )
    378. # ------------------------------------------
    379. # 3.5.3 Hierarchical cluster (tree) analysis
    380. #input should contain hellinger!
    381. mpse_abund %<>%
    382. mp_cal_clust(
    383. .abundance = hellinger,
    384. distmethod = "bray",
    385. hclustmethod = "average", # (UPGAE)
    386. action = "add" # action is used to control which result will be returned
    387. )
    388. mpse_abund
    389. # if action = 'add', the result of hierarchical cluster will be added to the MPSE object
    390. # mp_extract_internal_attr can extract it. It is a treedata object, so it can be visualized
    391. # by ggtree.
    392. sample.clust <- mpse_abund %>% mp_extract_internal_attr(name='SampleClust')
    393. sample.clust
    394. library(ggtree)
    395. p_cluster <- ggtree(sample.clust) +
    396. geom_tippoint(aes(color=pre_post_stroke)) +
    397. geom_tiplab(as_ylab = TRUE) +
    398. ggplot2::scale_x_continuous(expand=c(0, 0.01))
    399. png("hierarchical_cluster1.png", width= 1000, height=800)
    400. p_cluster
    401. dev.off()
    402. library(ggtreeExtra)
    403. library(ggplot2)
    404. # # Extract relative abundance of phyla
    405. # phyla.tb <- mouse.time.mpse %>%
    406. # mp_extract_abundance(taxa.class=Phylum, topn=30)
    407. # # The abundance of each samples is nested, it can be flatted using the unnest of tidyr.
    408. # phyla.tb %<>% tidyr::unnest(cols=RareAbundanceBySample) %>% dplyr::rename(Phyla="label")
    409. # phyla.tb
    410. #
    411. # p1 <- p +
    412. # geom_fruit(
    413. # data=phyla.tb,
    414. # geom=geom_col,
    415. # mapping = aes(x = RelRareAbundanceBySample,
    416. # y = Sample,
    417. # fill = Phyla
    418. # ),
    419. # orientation = "y",
    420. # #offset = 0.4,
    421. # pwidth = 3,
    422. # axis.params = list(axis = "x",
    423. # title = "The relative abundance of phyla (%)",
    424. # title.size = 4,
    425. # text.size = 2,
    426. # vjust = 1),
    427. # grid.params = list()
    428. # )
    429. # png("hierarchical_cluster2.png", width = 1000, height = 800)
    430. # p1
    431. # dev.off()
    432. # Extract relative abundance of classes
    433. mpse_abund %>% print(width=150)
    434. class.tb <- mpse_abund %>%
    435. mp_extract_abundance(taxa.class = Class, topn = 30)
    436. # Flatten and rename the columns
    437. class.tb %<>% tidyr::unnest(cols = RareAbundanceBySample) %>% dplyr::rename(Class = "label")
    438. # View the data frame
    439. class.tb
    440. # Create the plot
    441. p1 <- p +
    442. geom_fruit(
    443. data = class.tb,
    444. geom = geom_col,
    445. mapping = aes(x = RelRareAbundanceBySample,
    446. y = Sample,
    447. fill = Class
    448. ),
    449. orientation = "y",
    450. pwidth = 3,
    451. axis.params = list(axis = "x",
    452. title = "The relative abundance of classes (%)",
    453. title.size = 4,
    454. text.size = 2,
    455. vjust = 1),
    456. grid.params = list()
    457. )
    458. # Save the plot to a file
    459. png("hierarchical_cluster2.png", width = 1000, height = 800)
    460. print(p1)
    461. dev.off()
    462. # -----------------------
    463. # 3.6 Biomarker discovery
    464. library(ggtree)
    465. library(ggtreeExtra)
    466. library(ggplot2)
    467. library(MicrobiotaProcess)
    468. library(tidytree)
    469. library(ggstar)
    470. library(forcats)
    471. mpse_abund %>% print(width=150)
    472. mpse_abund %<>%
    473. mp_cal_abundance( # for each samples
    474. .abundance = RareAbundance
    475. ) %>%
    476. mp_cal_abundance( # for each groups
    477. .abundance=RareAbundance,
    478. .group=pre_post_stroke
    479. )
    480. mpse_abund
    481. mpse_abund %<>%
    482. mp_diff_analysis(
    483. .abundance = RelRareAbundanceBySample,
    484. .group = pre_post_stroke,
    485. first.test.alpha = 0.01
    486. )
    487. # The result is stored to the taxatree or otutree slot, you can use mp_extract_tree to extract the specific slot.
    488. taxa.tree <- mpse_abund %>%
    489. mp_extract_tree(type="taxatree")
    490. taxa.tree
    491. ## And the result tibble of different analysis can also be extracted
    492. ## with tidytree (>=0.3.5)
    493. taxa.tree %>% select(label, nodeClass, LDAupper, LDAmean, LDAlower, Sign_pre_post_stroke, pvalue, fdr) %>% dplyr::filter(!is.na(fdr))
    494. taxa.tree %>% print(width=150, n=100)
    495. #.data, layout, tree.type, .taxa.class, tiplab.size, offset.abun, pwidth.abun, offset.effsize, pwidth.effsize, group.abun, tiplab.linetype
    496. p <- mpse_abund %>%
    497. mp_plot_diff_res(
    498. group.abun = TRUE,
    499. pwidth.abun=0.05,
    500. offset.abun=0.02,
    501. pwidth.effsize=0.3,
    502. offset.effsize=0.46,
    503. tiplab.size = 4.9
    504. ) +
    505. scale_fill_manual(values=c("deepskyblue", "orange")) +
    506. scale_fill_manual(
    507. aesthetics = "fill_new", # The fill aes was renamed to "fill_new" for the abundance dotplot layer
    508. values = c("deepskyblue", "orange")
    509. ) +
    510. scale_fill_manual(
    511. aesthetics = "fill_new_new", # The fill aes for hight light layer of tree was renamed to 'fill_new_new'
    512. values = c("#E41A1C", "#377EB8", "#4DAF4A",
    513. "#984EA3", "#FF7F00", "#FFFF33",
    514. "#A65628", "#F781BF", "#00FFFF", "#999999"
    515. )
    516. ) +
    517. theme(
    518. axis.title = element_text(size = 28), # Font size for axis titles
    519. axis.text = element_text(size = 28), # Font size for axis text
    520. plot.title = element_text(size = 28), # Font size for plot title
    521. legend.title = element_text(size = 16), # Font size for legend title
    522. legend.text = element_text(size = 14) # Font size for legend text
    523. )
    524. #p$layers[[2]]$geom <- geom_tiplab(fontsize = 22) # Change 12 to the desired font size
    525. png("differently_expressed_otu.png", width=2000, height=2000)
    526. #svg("p7.svg",width=8, height=8)
    527. p
    528. dev.off()
    529. f <- mpse_abund %>%
    530. mp_plot_diff_cladogram(
    531. label.size = 2.5,
    532. hilight.alpha = .3,
    533. bg.tree.size = .5,
    534. bg.point.size = 2,
    535. bg.point.stroke = .25
    536. ) +
    537. scale_fill_diff_cladogram( # set the color of different group.
    538. values = c('deepskyblue', 'orange')
    539. ) +
    540. scale_size_continuous(range = c(1, 4))
    541. #png("cladogram.png", width=1000, height=1000)
    542. svg("cladogram.svg", width=10, height=10)
    543. f
    544. dev.off()
    545. ## Extract the OTU table and taxonomy table from the phyloseq object
    546. #otu_table <- phyloseq::otu_table(ps.ng.tax_abund) %>% as.data.frame() %>% as.matrix()
    547. #tax_table <- phyloseq::tax_table(ps.ng.tax_abund) %>% as.data.frame() %>% as.matrix()
    548. #write.csv(otu_table, file="otu_table.csv")
    549. #write.csv(tax_table, file="tax_table.csv")
    550. #~/Tools/csv2xls-0.4/csv_to_xls.py otu_table.csv tax_table.csv -d',' -o otu_tax.xls

3.1. Environment Setup: It sets up a Conda environment named picrust2, using the conda create command and then activates this environment using conda activate picrust2.

  1. #https://github.com/picrust/picrust2/wiki/PICRUSt2-Tutorial-(v2.2.0-beta)#minimum-requirements-to-run-full-tutorial
  2. conda create -n picrust2 -c bioconda -c conda-forge picrust2=2.2.0_b
  3. conda activate picrust2

3.2. Data Preparation: The script creates a new directory called picrust2_out, then enters it using mkdir and cd commands. It then identifies input files that are needed for the analysis: metadata.tsv, seqs.fna, table.biom. The biom commands are used to inspect and convert the BIOM format files.

  1. mkdir picrust2_out
  2. cd picrust2_out
  3. # Identifying input data
  4. # Note: Replace the paths and filenames with your actual data if different
  5. # metadata.tsv == ../map_corrected.txt
  6. # seqs.fna == ../clustering/seqs.fna
  7. # table.biom == ../core_diversity_e42369/table_even42369.biom
  8. # Inspect and convert the BIOM format files
  9. biom head -i ../core_diversity_e42369/table_even42369.biom
  10. biom summarize-table -i ../core_diversity_e42369/table_even42369.biom
  11. biom convert -i ../core_diversity_e42369/table_even42369.biom -o table_even42369.tsv --to-tsv

3.3. Running PiCRUST2: The place_seqs.py command aligns the input sequences to a reference tree. The hsp.py commands generate hidden state prediction for multiple functional categories.

  1. #insert reads into reference tree using EPA-NG
  2. cp ../clustering/rep_set.fna ./
  3. grep ">" rep_set.fna | wc -l #44238
  4. vim table_even42369.tsv #40596-2
  5. samtools faidx rep_set.fna
  6. cut -f1-1 table_even42369.tsv > table_even42369.id
  7. #manually modify table_even42369.id by replacing "\n" with " >> seqs.fna\nsamtools faidx rep_set.fna "
  8. run table_even42369.id
  9. rm -rf intermediate/
  10. place_seqs.py -s seqs.fna -o out.tre -p 64 --intermediate intermediate/place_seqs
  11. #castor: Efficient Phylogenetics on Large Trees
  12. #https://github.com/picrust/picrust2/wiki/Hidden-state-prediction
  13. hsp.py -i 16S -t out.tre -o 16S_predicted_and_nsti.tsv.gz -p 100 -n
  14. hsp.py -i COG -t out.tre -o COG_predicted.tsv.gz -p 100
  15. hsp.py -i PFAM -t out.tre -o PFAM_predicted.tsv.gz -p 100
  16. hsp.py -i KO -t out.tre -o KO_predicted.tsv.gz -p 100
  17. hsp.py -i EC -t out.tre -o EC_predicted.tsv.gz -p 100
  18. hsp.py -i TIGRFAM -t out.tre -o TIGRFAM_predicted.tsv.gz -p 100
  19. hsp.py -i PHENO -t out.tre -o PHENO_predicted.tsv.gz -p 100
  20. >In this table the predicted copy number of all Enzyme Classification (EC) numbers is shown for each ASV. The NSTI values per ASV are not in this table since we did not specify the -n option. EC numbers are a type of gene family defined based on the chemical reactions they catalyze. For instance, EC:1.1.1.1 corresponds to alcohol dehydrogenase. In this tutorial we are focusing on EC numbers since they can be used to infer MetaCyc pathway levels (see below).
  21. zless -S EC_predicted.tsv.gz
  22. sequence EC:1.1.1.1 EC:1.1.1.10 EC:1.1.1.100 ...
  23. 20e568023c10eaac834f1c110aacea18 2 0 3 ...
  24. 23fe12a325dfefcdb23447f43b6b896e 0 0 1 ...
  25. 288c8176059111c4c7fdfb0cd5afce64 1 0 1 ...
  26. ...
  27. ##Why import the tsv file to MyData?
  28. #MyData <- read.csv(file="./COG_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 4598 e.g. COG5665
  29. #MyData <- read.csv(file="./PFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 11089 e.g. PF17225
  30. #MyData <- read.csv(file="./KO_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 10543 e.g. K19791
  31. #MyData <- read.csv(file="./EC_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 2913 e.g. EC.6.6.1.2
  32. #MyData <- read.csv(file="./16S_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 1 e.g. X16S_rRNA_Count
  33. #MyData <- read.csv(file="./TIGRFAM_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 4287 e.g. TIGR04571
  34. #MyData <- read.csv(file="./PHENO_predicted.tsv", header=TRUE, sep="\t", row.names=1) #6806 41 e.g. Use_of_nitrate_as_electron_acceptor, Xylose_utilizing

3.4. The metagenome_pipeline.py commands perform metagenomic prediction for several functional categories. Predicted gene families weighted by the relative abundance of ASVs in their community. In other words, we are interested in inferring the metagenomes of the communities.

  1. #Generate metagenome predictions using EC numbers https://en.wikipedia.org/wiki/List_of_enzymes#Category:EC_1.1_(act_on_the_CH-OH_group_of_donors)
  2. metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f COG_predicted.tsv.gz -o COG_metagenome_out --strat_out
  3. metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f EC_predicted.tsv.gz -o EC_metagenome_out --strat_out
  4. metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f KO_predicted.tsv.gz -o KO_metagenome_out --strat_out
  5. metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f PFAM_predicted.tsv.gz -o PFAM_metagenome_out --strat_out
  6. metagenome_pipeline.py -i ../core_diversity_e42369/table_even42369.biom -m 16S_predicted_and_nsti.tsv.gz -f TIGRFAM_predicted.tsv.gz -o TIGRFAM_metagenome_out --strat_out

3.5. Pathway-level inference: By default this script infers MetaCyc pathway abundances based on EC number abundances, although different gene families and pathways can also be optionally specified. This script performs a number of steps by default, which are based on the approach implemented in HUMAnN2:

  • Regroups EC numbers to MetaCyc reactions.
  • Infers which MetaCyc pathways are present based on these reactions with MinPath.
  • Calculates and returns the abundance of pathways identified as present.
    1. pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_contrib.tsv.gz -o pathways_out -p 15
    2. #Note that the path of map files is under /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles
    3. pathway_pipeline.py -i COG_metagenome_out/pred_metagenome_contrib.tsv.gz -o KEGG_pathways_out -p 15 --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
    4. #Mapping predicted KO abundances to legacy KEGG pathways (with stratified output that represents contributions to community-wide abundances):
    5. pathway_pipeline.py -i KO_metagenome_out/pred_metagenome_strat.tsv.gz -o KEGG_pathways_out --no_regroup --map /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/pathway_mapfiles/KEGG_pathways_to_KO.tsv
    6. #Map EC numbers to MetaCyc pathways and get stratified output corresponding to contribution of predicted gene family abundances within each predicted genome:
    7. pathway_pipeline.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -o pathways_out_per_seq --per_sequence_contrib --per_sequence_abun EC_metagenome_out/seqtab_norm.tsv.gz --per_sequence_function EC_predicted.tsv.gz

3.6. Add functional descriptions: Finally, it can be useful to have a description of each functional id in the output abundance tables. The below commands will add these descriptions as new column in gene family and pathway abundance tables

  1. #--6.1. Add descriptions in gene family tables
  2. add_descriptions.py -i COG_metagenome_out/pred_metagenome_unstrat.tsv.gz -m COG -o COG_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
  3. add_descriptions.py -i EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC -o EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
  4. add_descriptions.py -i KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO -o KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz # EC and METACYC is a pair, EC for gene_annotation and METACYC for pathway_annotation
  5. add_descriptions.py -i PFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m PFAM -o PFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
  6. add_descriptions.py -i TIGRFAM_metagenome_out/pred_metagenome_unstrat.tsv.gz -m TIGRFAM -o TIGRFAM_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
  7. #--6.2. Add descriptions in pathway abundance tables
  8. add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -m METACYC -o pathways_out/path_abun_unstrat_descrip.tsv.gz
  9. gunzip path_abun_unstrat_descrip.tsv.gz
  10. #Error - no rows remain after regrouping input table. The default pathway and regroup mapfiles are meant for EC numbers. Note that KEGG pathways are not supported since KEGG is a closed-source database, but you can input custom pathway mapfiles if you have access. If you are using a custom function database did you mean to set the --no-regroup flag and/or change the default pathways mapfile used?
  11. #If ERROR --> USE the METACYC for downstream analyses!!!
  12. add_descriptions.py -i pathways_out/path_abun_unstrat.tsv.gz -o KEGG_pathways_out/path_abun_unstrat_descrip.tsv.gz --custom_map_table /home/jhuang/anaconda3/envs/picrust2/lib/python3.6/site-packages/picrust2/default_files/description_mapfiles/KEGG_pathways_info.tsv.gz

3.7. Visualization

  1. #7.1 STAMP
  2. #https://github.com/picrust/picrust2/wiki/STAMP-example
  3. conda activate stamp; #import path_abun_unstrat_descrip.tsv to STAMP
  4. conda deactivate
  5. conda install -c bioconda stamp
  6. #conda install -c bioconda stamp
  7. #sudo pip install pyqi
  8. #sudo apt-get install libblas-dev liblapack-dev gfortran
  9. #sudo apt-get install freetype* python-pip python-dev python-numpy python-scipy python-matplotlib
  10. #sudo pip install STAMP
  11. #conda install -c bioconda stamp
  12. conda create -n stamp -c bioconda/label/cf201901 stamp
  13. brew install pyqt
  14. #DEBUG the environment
  15. conda install pyqt=4
  16. #conda install icu=56
  17. e.g. path_abun_unstrat_descrip.tsv.gz and metadata.tsv from the tutorial)
  18. cut -d$'\t' -f1 map_corrected.txt > 1
  19. cut -d$'\t' -f5 map_corrected.txt > 5
  20. cut -d$'\t' -f6 map_corrected.txt > 6
  21. paste -d$'\t' 1 5 > 1_5
  22. paste -d$'\t' 1_5 6 > metadata.tsv
  23. #SampleID --> SampleID
  24. SampleID Facility Genotype
  25. 100CHE6KO PaloAlto KO
  26. 101CHE6WT PaloAlto WT
  27. #7.2. ALDEx2
  28. https://bioconductor.org/packages/release/bioc/html/ALDEx2.html
  29. #7.3. Convert png to svg and pdf
  30. inkscape error_bar.png --export-plain-svg=error_bar.svg (embbed)
  31. sudo apt update
  32. sudo apt install autotrace
  33. sudo apt-get install -y libpng-dev libtiff-dev imagemagick
  34. git clone https://github.com/autotrace/autotrace.git
  35. cd autotrace
  36. #sudo apt install intltool
  37. #sudo apt install gettext libglib2.0-dev
  38. #sudo apt install libtool libtool-bin
  39. #sudo apt install automake
  40. sudo apt-get install libxml-parser-perl
  41. ./autogen.sh
  42. ./configure
  43. make
  44. autotrace -output-format svg -output-file error_bar.svg error_bar.png

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum