ChIP-seq using HOMER (-style factor, findPeaks + default getDifferentialPeaksReplicates.pl)

gene_x 0 like s 1127 view s

Tags:

nextflow ChIP-seq run for NHDF_p783

#under Raw_Data for ChIP-seq 
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf859/3_NHDF_Donor_1_p783_input_S5_R1_001.fastq.gz p783_input_DonorI.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf860/4_NHDF_Donor_2_p783_input_S6_R1_001.fastq.gz p783_input_DonorII.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf861/5_NHDF_Donor_1_p783_ChIP_S7_R1_001.fastq.gz p783_ChIP_DonorI.fastq.gz
ln -s ./230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf862/6_NHDF_Donor_2_p783_ChIP_S8_R1_001.fastq.gz p783_ChIP_DonorII.fastq.gz

#'hg38'      { bwa = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/"
#          blacklist = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/blacklists/hg38-blacklist.bed"
#          gtf = "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"
#        }
ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
(chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LT_DNA_Bindung --outdir results_LT_DNA_Bindung_hg38 -resume

#By the way: nextflow RNA-seq run for NHDF_p783 (NOT the topics of the post).
#under Raw_Data for RNA-seq
cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz ./
cp ~/DATA/Data_Denise_tx_epi_MCPyV_PUBLISHING/Data_Denise_RNASeq/Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ./
#under Raw_Data_p783_RNAseq for RNA-seq
ln -s ../Raw_Data/V_8_2_4_p600_d8_DonorI.fastq.gz  ctrl_DonorI.fastq.gz   
ln -s ../Raw_Data/V_8_2_3_p600_d8_DonorII.fastq.gz ctrl_DonorII.fastq.gz
ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf857/1_NHDF_Donor_1_p783_S1_R1_001.fastq.gz p783_DonorI.fastq.gz
ln -s ../Raw_Data/230306_NB501882_0417_AHMVHHBGXN/2023_022_nf_denise/nf858/2_NHDF_Donor_2_p783_S2_R1_001.fastq.gz p783_DonorII.fastq.gz
#Note that we need to regenerate MultiQC.html after ignoring 'Biotype Counts', since --fcGroupFeaturesType gene_name cannot generate the real biotype counts!
(rnaseq_2021) nextflow run rnaseq --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/Raw_Data_p783/RNA_seq/*.fastq.gz'  --fasta "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" --gtf "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf"  --bed12 "/home/jhuang/REFs/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" --singleEnd -profile standard --aligner star --saveReference -resume --saveAlignedIntermediates --skip_rseqc --skip_dupradar --skip_genebody_coverage --skip_preseq --skip_edger --fcGroupFeaturesType gene_name

nextflow ChIP-seq run for data of truncated LT-Ag + sT expression of WaGa and HEK293

#160719_SN7001212_0156_AC8K76ACXX

cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_1/293_input_1_10_p197_1_GTAGAG_L003_R1_001.fastq.gz > HEK293_Input_p197_r1.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_2/293_input_1_10_p197_2_GTCCGC_L003_R1_001.fastq.gz > HEK293_Input_p197_r2.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_input_1_10_p197_3/293_input_1_10_p197_3_GTGAAA_L003_R1_001.fastq.gz > HEK293_Input_p197_r3.fastq.gz

cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_1/293_lt_p197_1_TAGCTT_L003_R1_001.fastq.gz > HEK293_LT_p197_r1.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_2/293_lt_p197_2_GGCTAC_L003_R1_001.fastq.gz > HEK293_LT_p197_r2.fastq.gz
cat ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L002_R1_001.fastq.gz ../160719_SN7001212_0156_AC8K76ACXX/Sample_293_lt_p197_3/293_lt_p197_3_AGTCAA_L003_R1_001.fastq.gz > HEK293_LT_p197_r3.fastq.gz

#140117_SN7001212_0097_AC3ECBACXX

cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_igg/waga_igg_TAGCTT_L004_R1_001.fastq.gz > WaGa_IgG.fastq.gz

cat ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L003_R1_001.fastq.gz ../140117_SN7001212_0097_AC3ECBACXX/Sample_waga_lt/waga_lt_GGTAGC_L004_R1_001.fastq.gz > WaGa_LT.fastq.gz

ln -s /home/jhuang/Tools/NGI-ChIPseq/ .
(chipseq) nextflow run NGI-ChIPseq/main.nf --reads '/home/jhuang/DATA/Data_Denise_LT_DNA_Binding/LTtr-ChIP/Raw_Data/*.fastq.gz' --genome hg38 --macsconfig macs.config --singleEnd --blacklist_filtering -profile standard --project Denise_LTtr_DNA_Bindung --outdir results_LTtr_DNA_Bindung_hg38 -resume

makeTagDirectory

conda activate myperl
mkdir results_ChIPseq_K331A_hg38/homer; cd results_ChIPseq_K331A_hg38/homer

#makeTagDirectory <output directory> <input file> -genome hg38
for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
  makeTagDirectory ${sample} ../picard/${sample}.dedup.sorted.bam -genome hg38
done

generate bigwigs

#makeUCSCfile peaks.txt -f peaks.bed -o auto -noadj -bigWig sample.bw -genome hg38
for sample in p783_ChIP_DonorI p783_ChIP_DonorII p783_input_DonorI p783_input_DonorII; do
makeUCSCfile ${sample} -pseudo 1 -bigWig /home/jhuang/REFs/hg38.chromSizes -o auto -style chipseq    -norm 1e7 -normLength 100 -fsize 1
done
mv ./p783_ChIP_DonorI/p783_ChIP_DonorI.ucsc.bigWig     ./p783_ChIP_DonorI/LT_K331A_DI.bigWig
mv ./p783_ChIP_DonorII/p783_ChIP_DonorII.ucsc.bigWig   ./p783_ChIP_DonorII/LT_K331A_DII.bigWig
mv ./p783_input_DonorI/p783_input_DonorI.ucsc.bigWig   ./p783_input_DonorI/LT_K331A_DI_input.bigWig
mv ./p783_input_DonorII/p783_input_DonorII.ucsc.bigWig ./p783_input_DonorII/LT_K331A_DII_input.bigWig

peak calling, get peaks.txt

  #findPeaks <tag directory> -i <input file> -o <output file> -genome hg38
  findPeaks p783_ChIP_DonorI  -style factor    -o auto -i p783_input_DonorI
  findPeaks p783_ChIP_DonorII -style factor    -o auto -i p783_input_DonorII
  cp ../reproduce_2023/tagDirectories/ ./
  cd homer
  ln -s ../tagDirectories/NHDF_LT_Donor1 ./
  ln -s ../tagDirectories/NHDF_LT_Donor2 ./
  ln -s ../tagDirectories/NHDF_LT_Donor1_Input ./
  ln -s ../tagDirectories/NHDF_LT_Donor2_Input ./
  ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1 ./
  ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2 ./
  ln -s ../tagDirectories/Pfsk-1B_LT+sT_r1_Input ./
  ln -s ../tagDirectories/Pfsk-1B_LT+sT_r2_Input ./
  ln -s ../tagDirectories/HEK293_LT+sT_r2 ./
  ln -s ../tagDirectories/HEK293_LT+sT_r3 ./
  ln -s ../tagDirectories/HEK293_LT+sT_r2_Input ./
  ln -s ../tagDirectories/HEK293_LT+sT_r3_Input ./

  findPeaks NHDF_LT_Donor1  -style factor      -o auto -i NHDF_LT_Donor1_Input
  findPeaks NHDF_LT_Donor2  -style factor      -o auto -i NHDF_LT_Donor2_Input

  findPeaks Pfsk-1B_LT+sT_r1  -style factor    -o auto -i Pfsk-1B_LT+sT_r1_Input
  findPeaks Pfsk-1B_LT+sT_r2  -style factor    -o auto -i Pfsk-1B_LT+sT_r2_Input

  findPeaks HEK293_LT+sT_r2 -style factor      -o auto -i HEK293_LT+sT_r2_Input
  findPeaks HEK293_LT+sT_r3 -style factor      -o auto -i HEK293_LT+sT_r3_Input

peak calling using getDifferentialPeaksReplicates.pl

cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1_Input ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2_Input ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor1 ./
cp -r ../../reproduce_2023/tagDirectories/NHDF_LT_Donor2 ./
#-annStats annStats.txt 
conda activate myperl
getDifferentialPeaksReplicates.pl -t p783_ChIP_DonorI p783_ChIP_DonorII -i p783_input_DonorI p783_input_DonorII      -genome hg38 -use peaks.txt > peaks_K331A_LT.txt
mv peaks_K331A_LT.txt peaks_NHDF_K331A_LT.txt
getDifferentialPeaksReplicates.pl -t NHDF_LT_Donor1 NHDF_LT_Donor2      -i NHDF_LT_Donor1_Input NHDF_LT_Donor2_Input -genome hg38 -use peaks.txt > peaks_NHDF_LT.txt
getDifferentialPeaksReplicates.pl -t Pfsk-1B_LT+sT_r1 Pfsk-1B_LT+sT_r2  -i Pfsk-1B_LT+sT_r1_Input Pfsk-1B_LT+sT_r2_Input -genome hg38 -use peaks.txt > peaks_PFSK-1_LT+sT.txt
getDifferentialPeaksReplicates.pl -t HEK293_LT+sT_r2 HEK293_LT+sT_r3  -i HEK293_LT+sT_r2_Input HEK293_LT+sT_r3_Input -genome hg38 -use peaks.txt > peaks_HEK293_LT+sT.txt

merge peaks: tried 0, 200, 500, 1000, 2000

#http://homer.ucsd.edu/homer/ngs/mergePeaks.html
mergePeaks -d 1000 peaks_PFSK-1_LT+sT.txt peaks_HEK293_LT+sT.txt peaks_NHDF_LT.txt -prefix celllines -venn celllines.txt -matrix celllines

#-- generate bed files --
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_NHDF_LT.txt > peaks_NHDF.bed;        
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_HEK293_LT+sT.txt > peaks_HEK293.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt > peaks_HEK293_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_HEK293_NHDF.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_NHDF_LT.txt > peaks_NHDF_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt > peaks_PFSK-1_only.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt > peaks_PFSK-1_HEK293.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_HEK293_NHDF.bed;
awk 'BEGIN {OFS="\t"} {print $2,$3,$4,$1,$6}' celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt > peaks_PFSK-1_NHDF.bed;

#-- annotate the peaks --
annotatePeaks.pl peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF.txt
annotatePeaks.pl peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293.txt
annotatePeaks.pl peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1.txt
annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_HEK293_only.txt
annotatePeaks.pl celllines_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_HEK293_NHDF.txt
annotatePeaks.pl celllines_peaks_NHDF_LT.txt hg38 > annotatedPeaks_NHDF_only.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_only.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt hg38 > annotatedPeaks_PFSK-1_HEK293.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_HEK293_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_HEK293_NHDF.txt
annotatePeaks.pl celllines_peaks_PFSK-1_LT+sT.txt_peaks_NHDF_LT.txt hg38 > annotatedPeaks_PFSK-1_NHDF.txt

mkdir ../beds_PFSK-1_HEK293_NHDF;
for sample in peaks_HEK293_only peaks_PFSK-1_only peaks_NHDF_only    peaks_HEK293 peaks_PFSK-1 peaks_NHDF    peaks_PFSK-1_HEK293 peaks_PFSK-1_NHDF peaks_HEK293_NHDF     peaks_PFSK-1_HEK293_NHDF; do
  grep -v "cmd" ${sample}.bed > ../beds_PFSK-1_HEK293_NHDF/${sample}_.bed
done

#Chr     Start   End     PeakID (cmd=annotatePeaks.pl common_peaks_NHDF.txt hg38)        Peak Score      Strand
~/Tools/csv2xls-0.4/csv_to_xls.py celllines.txt annotatedPeaks_HEK293_only.txt annotatedPeaks_PFSK-1_only.txt annotatedPeaks_NHDF_only.txt    annotatedPeaks_HEK293.txt annotatedPeaks_PFSK-1.txt annotatedPeaks_NHDF.txt    annotatedPeaks_PFSK-1_HEK293.txt annotatedPeaks_PFSK-1_NHDF.txt annotatedPeaks_HEK293_NHDF.txt     annotatedPeaks_PFSK-1_HEK293_NHDF.txt  -d$'\t' -o  annotatedPeaks_PFSK-1_HEK293_NHDF.xls

#IMPORTANT: DELETE the column 'Strand' marked with '+' in the merged Excel file!

like unlike

点赞本文的读者

还没有人对此文章表态

本文有评论

没有评论

ChIP-seq using HOMER (-style factor, findPeaks + default getDifferentialPeaksReplicates.pl)

本文有评论

看文章，发评论，不要沉默

最受欢迎文章

最新文章

最多评论文章

推荐相似文章