Gene Set Variation Analysis (GSVA) and Visualization of Gene Sets from Excel Signatures

gene_x 0 like s 1268 view s
Tags: plot, R, scripts
#install.packages("readxl")
library(readxl)

# Path to the Excel file
file_path <- "Signatures.xls"

#example of a signature:
#geneSymbol geneEntrezID    ENSEMBL GeneSet
#CD160  11126   ENSG00000117281 Anergic or act. T cells
#CD244  51744   ENSG00000122223 Anergic or act. T cells
#CTLA4  1493    ENSG00000163599 Anergic or act. T cells
#HAVCR2 84868   ENSG00000135077 Anergic or act. T cells
#ICOS   29851   ENSG00000163600 Anergic or act. T cells
#KLRG1  10219   ENSG00000139187 Anergic or act. T cells
#LAG3   3902    ENSG00000089692 Anergic or act. T cells
#PDCD1  5133    ENSG00000188389 Anergic or act. T cells
#PDCD1  5133    ENSG00000276977 Anergic or act. T cells

# Get the names of the sheets
sheet_names <- excel_sheets(file_path)

# Initialize an empty list to hold gene sets
geneSets <- list()

# Loop over each sheet, extract the ENSEMBL IDs, and add to the list
for (sheet in sheet_names) {
  # Read the sheet
  data <- read_excel(file_path, sheet = sheet)

  # Process the GeneSet names (replacing spaces with underscores, for example)
  gene_set_name <- gsub(" ", "_", unique(data$GeneSet)[1])

  # Add ENSEMBL IDs to the list
  geneSets[[gene_set_name]] <- as.character(data$ENSEMBL)
}

# Print the result to check
print(geneSets)

# 1. Compute GSVA scores:
gsva_scores <- gsva(exprs, geneSets, method="gsva")

# 2. Convert to data.frame for ggplot:
gsva_df <- as.data.frame(t(gsva_scores))

# 3. Add conditions to gsva_df:
gsva_df$Condition <- dds$condition

# 4. Filter the gsva_df to retain only the desired conditions:
gsva_df_filtered <- gsva_df[gsva_df$Condition %in% c("Ace2_mock_2h", "Ace2_inf_24h"), ]

# 5. Define a function to plot violin plots:
# Update the condition levels in gsva_df_filtered to ensure the desired order on x-axis:
gsva_df_filtered$Condition <- gsub("Ace2_mock_2h", "mock", gsva_df_filtered$Condition)
gsva_df_filtered$Condition <- gsub("Ace2_inf_24h", "infection", gsva_df_filtered$Condition)
gsva_df_filtered$Condition <- factor(gsva_df_filtered$Condition, levels = c("mock", "infection"))
plot_violin <- function(data, gene_name) {
  # Calculate the t-test p-value for the two conditions
  condition1_data <- data[data$Condition == "mock", gene_name]
  condition2_data <- data[data$Condition == "infection", gene_name]
  p_value <- t.test(condition1_data, condition2_data)$p.value

  # Convert p-value to annotation
  p_annotation <- ifelse(p_value < 0.01, "**", ifelse(p_value < 0.05, "*", ""))
  rounded_p_value <- paste0("p = ", round(p_value, 2))

  plot_title <- gsub("_", " ", gene_name)
  p <- ggplot(data, aes(x=Condition, y=!!sym(gene_name))) +
    geom_violin(linewidth=1.2) + 
    labs(title=plot_title, y="GSVA Score") +
    ylim(-1, 1) +
    theme_light() +
    theme(
      axis.title.x = element_text(size=12),
      axis.title.y = element_text(size=12),
      axis.text.x  = element_text(size=10),
      axis.text.y  = element_text(size=10),
      plot.title   = element_text(size=12, hjust=0.5)
    )

  # Add p-value annotation to the plot
  p <- p + annotate("text", x=1.5, y=0.9, label=paste0(p_annotation, " ", rounded_p_value), size=5, hjust=0.5)

  return(p)
}

# 6. Generate the list of plots:
#genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
#plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))

# 6. Generate the list of plots in a predefined order:
desired_order <- c("Platelets","Granulocytes","LDG","pDC","Anti-inflammation",  "Pro-inflam._IL-1","Dendritic_cells","MHC_II","Alt._complement","TNF",  "NLRP3_inflammasome","Unfolded_protein","B_cells","Monocyte_cell_surface","Inflammasome",  "Monocyte_secreted","IL-1_cytokines","SNOR_low_UP","CD40_activated","Lectin_complement",  "Classical_complement","Cell_cycle","Plasma_cells","IG_chains","Erythrocytes",  "IL-6R_complex","IFN","TCRB","TCRA","Cyt._act._T_cells",  "TCRG","T_cells","CD8T-NK-NKT","Anergic_or_act._T_cells","T_activated",  "NK_cells","TCRD","T_regs","SNOR_low_DOWN","Monocytes",  "Myeloid_cells","Neutrophils")
genes <- colnames(gsva_df_filtered)[!colnames(gsva_df_filtered) %in% "Condition"]
genes <- genes[match(desired_order, genes)]
plots_list <- lapply(genes, function(gene) plot_violin(gsva_df_filtered, gene))


# 7. Pad the list of plots:
remaining_plots <- 6 - (length(plots_list) %% 6)
if (remaining_plots != 6) {
  plots_list <- c(plots_list, rep(list(NULL), remaining_plots))
}

# 8. Create the plots and arrange them in a grid:
library(gridExtra)
plots_matrix <- matrix(plots_list, ncol=6, byrow=T)
#do.call("grid.arrange", c(plots_matrix, list(ncol=6)))

# 9. Save the plots to a PNG:
png("All_Violin_Plots.png", width=1000, height=1000)
do.call("grid.arrange", c(plots_matrix, list(ncol=6)))
dev.off()
like unlike
点赞本文的读者
还没有人对此文章表态
本文有评论

没有评论
Gene Set Variation Analysis (GSVA) and Visualization of Gene Sets from Excel Signatures

本文有评论

看文章，发评论，不要沉默

最受欢迎文章

最新文章

最多评论文章

推荐相似文章