Gene Set Variation Analysis (GSVA) and Visualization of Gene Sets from Excel Signatures

gene_x 0 like s 1000 view s

Tags: plot, R, scripts

  1. #install.packages("readxl")
  2. library(readxl)
  3. # Path to the Excel file
  4. file_path <- "Signatures.xls"
  5. #example of a signature:
  6. #geneSymbol geneEntrezID ENSEMBL GeneSet
  7. #CD160 11126 ENSG00000117281 Anergic or act. T cells
  8. #CD244 51744 ENSG00000122223 Anergic or act. T cells
  9. #CTLA4 1493 ENSG00000163599 Anergic or act. T cells
  10. #HAVCR2 84868 ENSG00000135077 Anergic or act. T cells
  11. #ICOS 29851 ENSG00000163600 Anergic or act. T cells
  12. #KLRG1 10219 ENSG00000139187 Anergic or act. T cells
  13. #LAG3 3902 ENSG00000089692 Anergic or act. T cells
  14. #PDCD1 5133 ENSG00000188389 Anergic or act. T cells
  15. #PDCD1 5133 ENSG00000276977 Anergic or act. T cells
  16. # Get the names of the sheets
  17. sheet_names <- excel_sheets(file_path)
  18. # Initialize an empty list to hold gene sets
  19. geneSets <- list()
  20. # Loop over each sheet, extract the ENSEMBL IDs, and add to the list
  21. for (sheet in sheet_names) {
  22. # Read the sheet
  23. data <- read_excel(file_path, sheet = sheet)
  24. # Process the GeneSet names (replacing spaces with underscores, for example)
  25. gene_set_name <- gsub(" ", "_", unique(data$GeneSet)[1])
  26. # Add ENSEMBL IDs to the list
  27. geneSets[[gene_set_name]] <- as.character(data$ENSEMBL)
  28. }
  29. # Print the result to check
  30. print(geneSets)
  31. # 1. Compute GSVA scores:
  32. gsva_scores <- gsva(exprs, geneSets, method="gsva")
  33. # 2. Convert to data.frame for ggplot:
  34. gsva_df <- as.data.frame(t(gsva_scores))
  35. # 3. Add conditions to gsva_df:
  36. gsva_df$Condition <- dds$condition
  37. # 4. Filter the gsva_df to retain only the desired conditions:
  38. gsva_df_filtered <- gsva_df[gsva_df$Condition %in% c("Ace2_mock_2h", "Ace2_inf_24h"), ]
  39. # 5. Define a function to plot violin plots:
  40. # Update the condition levels in gsva_df_filtered to ensure the desired order on x-axis:
  41. gsva_df_filtered$Condition <- gsub("Ace2_mock_2h", "mock", gsva_df_filtered$Condition)
  42. gsva_df_filtered$Condition <- gsub("Ace2_inf_24h", "infection", gsva_df_filtered$Condition)
  43. gsva_df_filtered$Condition <- factor(gsva_df_filtered$Condition, levels = c("mock", "infection"))
  44. plot_violin <- function(data, gene_name) {
  45. # Calculate the t-test p-value for the two conditions
  46. condition1_data <- data[data$Condition == "mock", gene_name]
  47. condition2_data <- data[data$Condition == "infection", gene_name]
  48. p_value <- t.test(condition1_data, condition2_data)$p.value
  49. # Convert p-value to annotation
  50. p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
  51. rounded_p_value <- paste0("p = ", round(p_value, 2))
  52. plot_title <- gsub("_", " ", gene_name)
  53. p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name))) +
  54. geom_violin(linewidth=1.2) +
  55. labs(title=plot_title, y="GSVA Score") +
  56. ylim(-1, 1) +
  57. theme_light() +
  58. theme(
  59. axis.title.x = element_text(size=12),
  60. axis.title.y = element_text(size=12),
  61. axis.text.x = element_text(size=10),
  62. axis.text.y = element_text(size=10),
  63. plot.title = element_text(size=12, hjust=0.5)
  64. )
  65. # Add p-value annotation to the plot
  66. p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)
  67. return(p)
  68. }
  69. # 6. Generate the list of plots:
  70. #genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
  71. #plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
  72. # 6. Generate the list of plots in a predefined order:
  73. desired_order <- c("Platelets","Granulocytes","LDG","pDC","Anti-inflammation", "Pro-inflam._IL-1","Dendritic_cells","MHC_II","Alt._complement","TNF", "NLRP3_inflammasome","Unfolded_protein","B_cells","Monocyte_cell_surface","Inflammasome", "Monocyte_secreted","IL-1_cytokines","SNOR_low_UP","CD40_activated","Lectin_complement", "Classical_complement","Cell_cycle","Plasma_cells","IG_chains","Erythrocytes", "IL-6R_complex","IFN","TCRB","TCRA","Cyt._act._T_cells", "TCRG","T_cells","CD8T-NK-NKT","Anergic_or_act._T_cells","T_activated", "NK_cells","TCRD","T_regs","SNOR_low_DOWN","Monocytes", "Myeloid_cells","Neutrophils")
  74. genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
  75. genes <- genes[match(desired_order, genes)]
  76. plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))
  77. # 7. Pad the list of plots:
  78. remaining_plots <- 6 - (length(plots_list) %% 6)
  79. if (remaining_plots != 6) {
  80. plots_list <- c(plots_list, rep(list(NULL), remaining_plots))
  81. }
  82. # 8. Create the plots and arrange them in a grid:
  83. library(gridExtra)
  84. plots_matrix <- matrix(plots_list, ncol=6, byrow=T)
  85. #do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
  86. # 9. Save the plots to a PNG:
  87. png("All_Violin_Plots.png", width=1000, height=1000)
  88. do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
  89. dev.off()

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum