ChIP-seq using HOMER (-style factor, findPeaks + default getDifferentialPeaksReplicates.pl)

gene_x 0 like s 846 view s

Tags:

  1. nextflow ChIP-seq run for NHDF_p783

    1. #under Raw_Data for ChIP-seq
    2. ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf859/3_NHDF_Donor_1_p783_input_S5_R1_001.fastq.gz p783_input_DonorI.fastq.gz
    3. ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf860/4_NHDF_Donor_2_p783_input_S6_R1_001.fastq.gz p783_input_DonorII.fastq.gz
    4. ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf861/5_NHDF_Donor_1_p783_ChIP_S7_R1_001.fastq.gz p783_ChIP_DonorI.fastq.gz
    5. ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf862/6_NHDF_Donor_2_p783_ChIP_S8_R1_001.fastq.gz p783_ChIP_DonorII.fastq.gz
    6. #'hg38' { bwa = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/"
    7. # blacklist = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/blacklists/hg38-blacklist.bed"
    8. # gtf = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
    9. # }
    10. ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
    11. (chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LT_DNA_Bindung --outdir results_LT_DNA_Bindung_hg38 -resume
    12. #By the way: nextflow RNA-seq run for NHDF_p783 (NOT the topics of the post).
    13. #under Raw_Data for RNA-seq
    14. cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ./
    15. cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ./
    16. #under Raw_Data_p783_RNAseq for RNA-seq
    17. ln -s ../Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ctrl_DonorI.fastq.gz
    18. ln -s ../Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ctrl_DonorII.fastq.gz
    19. ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf857/1_NHDF_Donor_1_p783_S1_R1_001.fastq.gz p783_DonorI.fastq.gz
    20. ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf858/2_NHDF_Donor_2_p783_S2_R1_001.fastq.gz p783_DonorII.fastq.gz
    21. #Note that we need to regenerate MultiQC.html after ignoring 'Biotype Counts', since --fcGroupFeaturesType gene_name cannot generate the real biotype counts!
    22. (rnaseq_2021) nextflow run rnaseq --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data_p783/RNA_seq/*.fastq.gz' --fasta "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" --gtf "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" --bed12 "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" --singleEnd -profile standard --aligner star --saveReference -resume --saveAlignedIntermediates --skip_rseqc --skip_dupradar --skip_genebody_coverage --skip_preseq --skip_edger --fcGroupFeaturesType gene_name
  2. nextflow ChIP-seq run for data of truncated LT-Ag + sT expression of WaGa and HEK293

    1. #160719_SN7001212_0156_AC8K76ACXX
    2. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L003_R1_001.fastq.gz > HEK293_Input_p197_r1.fastq.gz
    3. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L003_R1_001.fastq.gz > HEK293_Input_p197_r2.fastq.gz
    4. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L003_R1_001.fastq.gz > HEK293_Input_p197_r3.fastq.gz
    5. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L003_R1_001.fastq.gz > HEK293_LT_p197_r1.fastq.gz
    6. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L003_R1_001.fastq.gz > HEK293_LT_p197_r2.fastq.gz
    7. cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L003_R1_001.fastq.gz > HEK293_LT_p197_r3.fastq.gz
    8. #140117_SN7001212_0097_AC3ECBACXX
    9. cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L004_R1_001.fastq.gz > WaGa_IgG.fastq.gz
    10. cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L004_R1_001.fastq.gz > WaGa_LT.fastq.gz
    11. ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
    12. (chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/LTtr-ChIP/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LTtr_DNA_Bindung --outdir results_LTtr_DNA_Bindung_hg38 -resume
  3. makeTagDirectory

    1. conda activate myperl
    2. mkdir results_ChIPseq_K331A_hg38/homer; cd results_ChIPseq_K331A_hg38/homer
    3. #makeTagDirectory <output directory> <input file> -genome hg38
    4. for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
    5. makeTagDirectory ${sample} ../picard/${sample}.dedup.sorted.bam -genome hg38
    6. done
  4. generate bigwigs

    1. #makeUCSCfile peaks.txt -f peaks.bed -o auto -noadj -bigWig sample.bw -genome hg38
    2. for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
    3. makeUCSCfile ${sample} -pseudo 1 -bigWig /home/jhuang/REFs/hg38.chromSizes -o auto -style chipseq -norm 1e7 -normLength 100 -fsize 1
    4. done
    5. mv ./p783_ChIP_DonorI/p783_ChIP_DonorI.ucsc.bigWig ./p783_ChIP_DonorI/LT_K331A_DI.bigWig
    6. mv ./p783_ChIP_DonorII/p783_ChIP_DonorII.ucsc.bigWig ./p783_ChIP_DonorII/LT_K331A_DII.bigWig
    7. mv ./p783_input_DonorI/p783_input_DonorI.ucsc.bigWig ./p783_input_DonorI/LT_K331A_DI_input.bigWig
    8. mv ./p783_input_DonorII/p783_input_DonorII.ucsc.bigWig ./p783_input_DonorII/LT_K331A_DII_input.bigWig
  5. peak calling, get peaks.txt

    1. #findPeaks <tag directory> -i <input file> -o <output file> -genome hg38
    2. findPeaks p783_ChIP_DonorI -style factor -o auto -i p783_input_DonorI
    3. findPeaks p783_ChIP_DonorII -style factor -o auto -i p783_input_DonorII
    4. cp ../reproduce_2023/tagDirectories/ ./
    5. cd homer
    6. ln -s ../tagDirectories/NHDF_LT_Donor1 ./
    7. ln -s ../tagDirectories/NHDF_LT_Donor2 ./
    8. ln -s ../tagDirectories/NHDF_LT_Donor1_Input ./
    9. ln -s ../tagDirectories/NHDF_LT_Donor2_Input ./
    10. ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1 ./
    11. ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2 ./
    12. ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1_Input ./
    13. ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2_Input ./
    14. ln -s ../tagDirectories/HEK293_LT+sT_r2 ./
    15. ln -s ../tagDirectories/HEK293_LT+sT_r3 ./
    16. ln -s ../tagDirectories/HEK293_LT+sT_r2_Input ./
    17. ln -s ../tagDirectories/HEK293_LT+sT_r3_Input ./
    18. findPeaks NHDF_LT_Donor1 -style factor -o auto -i NHDF_LT_Donor1_Input
    19. findPeaks NHDF_LT_Donor2 -style factor -o auto -i NHDF_LT_Donor2_Input
    20. findPeaks Pfsk-1B_LT+sT_r1 -style factor -o auto -i Pfsk-1B_LT+sT_r1_Input
    21. findPeaks Pfsk-1B_LT+sT_r2 -style factor -o auto -i Pfsk-1B_LT+sT_r2_Input
    22. findPeaks HEK293_LT+sT_r2 -style factor -o auto -i HEK293_LT+sT_r2_Input
    23. findPeaks HEK293_LT+sT_r3 -style factor -o auto -i HEK293_LT+sT_r3_Input
  6. peak calling using getDifferentialPeaksReplicates.pl

    1. cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1_Input ./
    2. cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2_Input ./
    3. cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1 ./
    4. cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2 ./
    5. #-annStats annStats.txt
    6. conda activate myperl
    7. getDifferentialPeaksReplicates.pl -t p783_ChIP_DonorI p783_ChIP_DonorII -i p783_input_DonorI p783_input_DonorII -genome hg38 -use peaks.txt > peaks_K331A_LT.txt
    8. mv peaks_K331A_LT.txt peaks_NHDF_K331A_LT.txt
    9. getDifferentialPeaksReplicates.pl -t NHDF_LT_Donor1 NHDF_LT_Donor2 -i NHDF_LT_Donor1_Input NHDF_LT_Donor2_Input -genome hg38 -use peaks.txt > peaks_NHDF_LT.txt
    10. getDifferentialPeaksReplicates.pl -t Pfsk-1B_LT+sT_r1 Pfsk-1B_LT+sT_r2 -i Pfsk-1B_LT+sT_r1_Input Pfsk-1B_LT+sT_r2_Input -genome hg38 -use peaks.txt > peaks_PFSK-1_LT+sT.txt
    11. getDifferentialPeaksReplicates.pl -t HEK293_LT+sT_r2 HEK293_LT+sT_r3 -i HEK293_LT+sT_r2_Input HEK293_LT+sT_r3_Input -genome hg38 -use peaks.txt > peaks_HEK293_LT+sT.txt
  7. merge peaks: tried 0, 200, 500, 1000, 2000

    1. #http://homer.ucsd.edu/homer/ngs/mergePeaks.html
    2. mergePeaks -d 1000 peaks_PFSK-1_LT+sT.txt peaks_HEK293_LT+sT.txt peaks_NHDF_LT.txt -prefix celllines -venn celllines.txt -matrix celllines
    3. #-- generate bed files --
    4. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_NHDF_LT.txt > peaks_NHDF.bed;
    5. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_HEK293_LT+sT.txt > peaks_HEK293.bed;
    6. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1.bed;
    7. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt > peaks_HEK293_only.bed;
    8. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_HEK293_NHDF.bed;
    9. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_NHDF_LT.txt > peaks_NHDF_only.bed;
    10. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1_only.bed;
    11. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt > peaks_PFSK-1_HEK293.bed;
    12. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_HEK293_NHDF.bed;
    13. awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_NHDF.bed;
    14. #-- annotate the peaks --
    15. annotatePeaks.pl peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF.txt
    16. annotatePeaks.pl peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293.txt
    17. annotatePeaks.pl peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1.txt
    18. annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293_only.txt
    19. annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_HEK293_NHDF.txt
    20. annotatePeaks.pl celllines_peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF_only.txt
    21. annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_only.txt
    22. annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_HEK293.txt
    23. annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_HEK293_NHDF.txt
    24. annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_NHDF.txt
    25. mkdir ../beds_PFSK-1_HEK293_NHDF;
    26. for sample in peaks_HEK293_only peaks_PFSK-1_only peaks_NHDF_only peaks_HEK293 peaks_PFSK-1 peaks_NHDF peaks_PFSK-1_HEK293 peaks_PFSK-1_NHDF peaks_HEK293_NHDF peaks_PFSK-1_HEK293_NHDF; do
    27. grep -v "cmd" ${sample}.bed > ../beds_PFSK-1_HEK293_NHDF/${sample}_.bed
    28. done
    29. #Chr Start End PeakID (cmd=annotatePeaks.pl common_peaks_NHDF.txt hg38) Peak Score Strand
    30. ~/Tools/csv2xls-0.4/csv_to_xls.py celllines.txt annotatedPeaks_HEK293_only.txt annotatedPeaks_PFSK-1_only.txt annotatedPeaks_NHDF_only.txt annotatedPeaks_HEK293.txt annotatedPeaks_PFSK-1.txt annotatedPeaks_NHDF.txt annotatedPeaks_PFSK-1_HEK293.txt annotatedPeaks_PFSK-1_NHDF.txt annotatedPeaks_HEK293_NHDF.txt annotatedPeaks_PFSK-1_HEK293_NHDF.txt -d$'\t' -o annotatedPeaks_PFSK-1_HEK293_NHDF.xls
    31. #IMPORTANT: DELETE the column 'Strand' marked with '+' in the merged Excel file!

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum