1.reseq
######################### # 2017 C&K Workshop # # Resequencing Code # # 2017. 11 # ######################### ########Resequencing ########### cd /home/workshop/1.WGS_practice/Genome_analysis ### FASTQC fastqc --nogroup paired_end_sep1.fq paired_end_sep2.fq ### TRIMMOMATIC java -jar /home/Program/Trimmomatic-0.33/trimmomatic-0.33.jar \ PE paired_end_sep1.fq paired_end_sep2.fq paired_end_1P.fastq.gz paired_end_1U.fastq.gz \ paired_end_2P.fastq.gz paired_end_2U.fastq.gz \ ILLUMINACLIP:/home/Program/Trimmomatic-0.33/adapters/total-TruSeq-PE.fa:2:30:10 \ TRAILING:20 MINLEN:75 ### REFERENCE INDEXING /home/Program/bowtie2-2.2.5/bowtie2-build Reference.fa Reference ### READ MAPPING /home/Program/bowtie2-2.2.5/bowtie2 \ -x Reference \ -1 paired_end_1P.fastq.gz -2 paired_end_2P.fastq.gz \ -S paired_end.sam \ 2> /home/workshop/1.WGS_practice/Genome_analysis/paired_end.bowtie2stat ### ADD_RG java -jar /home/Program/picard-tools-1.138/picard.jar AddOrReplaceReadGroups \ INPUT=paired_end.sam \ OUTPUT=paired_end_RG.bam \ SORT_ORDER=coordinate RGID=paired_end RGLB=paired_end RGPL=illumina RGPU=non RGSM=paired_end \ VALIDATION_STRINGENCY=LENIENT ### REMOVE_DU java -jar /home/Program/picard-tools-1.138/picard.jar MarkDuplicates \ INPUT=paired_end_RG.bam \ OUTPUT=paired_end_RG_DU.bam \ METRICS_FILE=paired_end_RG_DU.metrics REMOVE_DUPLICATES=true ASSUME_SORTED=true \ VALIDATION_STRINGENCY=LENIENT ### FIXMATE java -jar /home/Program/picard-tools-1.138/picard.jar FixMateInformation \ INPUT=paired_end_RG_DU.bam \ OUTPUT=paired_end_RG_DU_FIX.bam \ SORT_ORDER=coordinate \ VALIDATION_STRINGENCY=LENIENT ### INDEXING /home/Program/samtools-1.2/samtools faidx Reference.fa /home/Program/samtools-1.2/samtools index paired_end_RG_DU_FIX.bam ### REALIGN java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T RealignerTargetCreator \ -R Reference.fa \ -I paired_end_RG_DU_FIX.bam \ -o paired_end_RG_DU_FIX_Realign.intervals java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T IndelRealigner \ -R Reference.fa \ -I paired_end_RG_DU_FIX.bam \ -o paired_end_RG_DU_FIX_Realign.bam \ -targetIntervals paired_end_RG_DU_FIX_Realign.intervals ### BQ_RECAL java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T BaseRecalibrator \ -R Reference.fa \ -I paired_end_RG_DU_FIX_Realign.bam \ -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate \ -knownSites Reference.vcf \ -o paired_end_RECAL.grp java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T PrintReads \ -R Reference.fa \ -I paired_end_RG_DU_FIX_Realign.bam \ -BQSR paired_end_RECAL.grp \ -o paired_end_RECAL.bam ### INDEXING /home/Program/samtools-1.2/samtools index paired_end_RECAL.bam ### IGV java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar ############ VARIANT CALLING ######### java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T UnifiedGenotyper \ -R Reference.fa \ -I paired_end_RECAL.bam \ -o sample01.raw.vcf \ -glm BOTH java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \ -R Reference.fa \ -T SelectVariants \ --variant sample01.raw.vcf \ -o snp_sample01.raw.vcf \ -selectType SNP java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \ -R Reference.fa \ -T SelectVariants \ --variant sample01.raw.vcf \ -o indel_sample01.raw.vcf \ -selectType INDEL ### VARIANT FILTERING java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \ -R Reference.fa \ -T VariantFiltration \ --variant indel_sample01.raw.vcf \ -o filtered_indel_sample01.vcf \ --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0*DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" \ --filterExpression "QUAL < 30" --filterName "QualFilter" \ --filterExpression "QD < 5.0" --filterName "QD5" \ --filterExpression "FS > 200.0" --filterName "FS200" java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \ -R Reference.fa \ -T VariantFiltration \ --variant snp_sample01.raw.vcf \ -o filtered_snp_sample01.vcf \ --clusterSize 3 --clusterWindowSize 10 \ --mask filtered_indel_sample01.vcf --maskName "InDel" \ --filterExpression "MQ0 >= 4 && ((MQ0 / (1.0*DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" \ --filterExpression "QUAL < 30" --filterName "QualFilter" \ --filterExpression "FS > 200.0 " --filterName "FS200" ### VARIANT ANNOTATION #java -jar /home/Program/snpEff/snpEff.jar databases | more #java -jar /home/Program/snpEff/snpEff.jar -download GRCh38.82 java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_snp_sample01.vcf -s filtered_snp_sample01.ann.html >filtered_snp_sample01.ann.vcf java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_indel_sample01.vcf -s filtered_indel_sample01.ann.html >filtered_indel_sample01.ann.vcf ### IGV java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar ### END ###
1.reseq.txt · Last modified: by 127.0.0.1