User Tools

Site Tools


1.reseq
#########################
#   2017 C&K Workshop   #
#   Resequencing Code   #
#        2017. 11       #
#########################

########Resequencing ###########

cd /home/workshop/1.WGS_practice/Genome_analysis

### FASTQC

fastqc --nogroup paired_end_sep1.fq paired_end_sep2.fq

### TRIMMOMATIC

java -jar /home/Program/Trimmomatic-0.33/trimmomatic-0.33.jar \
PE paired_end_sep1.fq paired_end_sep2.fq paired_end_1P.fastq.gz paired_end_1U.fastq.gz \
paired_end_2P.fastq.gz paired_end_2U.fastq.gz \
ILLUMINACLIP:/home/Program/Trimmomatic-0.33/adapters/total-TruSeq-PE.fa:2:30:10 \
TRAILING:20 MINLEN:75


### REFERENCE INDEXING

/home/Program/bowtie2-2.2.5/bowtie2-build Reference.fa Reference

### READ MAPPING

/home/Program/bowtie2-2.2.5/bowtie2 \
-x Reference \
-1 paired_end_1P.fastq.gz -2 paired_end_2P.fastq.gz \
-S paired_end.sam \
2> /home/workshop/1.WGS_practice/Genome_analysis/paired_end.bowtie2stat

### ADD_RG 

java -jar /home/Program/picard-tools-1.138/picard.jar AddOrReplaceReadGroups \
INPUT=paired_end.sam \
OUTPUT=paired_end_RG.bam \
SORT_ORDER=coordinate RGID=paired_end RGLB=paired_end RGPL=illumina RGPU=non RGSM=paired_end \
VALIDATION_STRINGENCY=LENIENT

### REMOVE_DU

java -jar /home/Program/picard-tools-1.138/picard.jar MarkDuplicates \
INPUT=paired_end_RG.bam \
OUTPUT=paired_end_RG_DU.bam \
METRICS_FILE=paired_end_RG_DU.metrics REMOVE_DUPLICATES=true ASSUME_SORTED=true \
VALIDATION_STRINGENCY=LENIENT 

### FIXMATE

java -jar /home/Program/picard-tools-1.138/picard.jar FixMateInformation \
INPUT=paired_end_RG_DU.bam \
OUTPUT=paired_end_RG_DU_FIX.bam \
SORT_ORDER=coordinate \
VALIDATION_STRINGENCY=LENIENT

### INDEXING

/home/Program/samtools-1.2/samtools faidx Reference.fa

/home/Program/samtools-1.2/samtools index paired_end_RG_DU_FIX.bam

### REALIGN

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T RealignerTargetCreator \
-R Reference.fa \
-I paired_end_RG_DU_FIX.bam \
-o paired_end_RG_DU_FIX_Realign.intervals 

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T IndelRealigner \
-R Reference.fa \
-I paired_end_RG_DU_FIX.bam \
-o paired_end_RG_DU_FIX_Realign.bam \
-targetIntervals paired_end_RG_DU_FIX_Realign.intervals

### BQ_RECAL

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T BaseRecalibrator \
-R Reference.fa \
-I paired_end_RG_DU_FIX_Realign.bam \
-cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate \
-knownSites Reference.vcf \
-o paired_end_RECAL.grp

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T PrintReads \
-R Reference.fa \
-I paired_end_RG_DU_FIX_Realign.bam \
-BQSR paired_end_RECAL.grp \
-o paired_end_RECAL.bam

### INDEXING

/home/Program/samtools-1.2/samtools index paired_end_RECAL.bam

### IGV
java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar



############ VARIANT CALLING #########

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar -T UnifiedGenotyper \
-R Reference.fa \
-I paired_end_RECAL.bam \
-o sample01.raw.vcf \
-glm BOTH

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
-R Reference.fa \
-T SelectVariants \
--variant sample01.raw.vcf \
-o snp_sample01.raw.vcf \
-selectType SNP

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
-R Reference.fa \
-T SelectVariants \
--variant sample01.raw.vcf \
-o indel_sample01.raw.vcf \
-selectType INDEL

### VARIANT FILTERING

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
-R Reference.fa \
-T VariantFiltration \
--variant indel_sample01.raw.vcf \
-o filtered_indel_sample01.vcf \
--filterExpression "MQ0 >= 4 && ((MQ0 / (1.0*DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" \
--filterExpression "QUAL < 30" --filterName "QualFilter" \
--filterExpression "QD < 5.0" --filterName "QD5" \
--filterExpression "FS > 200.0" --filterName "FS200" 

java -jar /home/Program/GenomeAnalysisTK-3.4-46/GenomeAnalysisTK.jar \
-R Reference.fa \
-T VariantFiltration \
--variant snp_sample01.raw.vcf \
-o filtered_snp_sample01.vcf \
--clusterSize 3 --clusterWindowSize 10 \
--mask filtered_indel_sample01.vcf --maskName "InDel" \
--filterExpression "MQ0 >= 4 && ((MQ0 / (1.0*DP)) > 0.1)" --filterName "HARD_TO_VALIDATE" \
--filterExpression "QUAL < 30" --filterName "QualFilter" \
--filterExpression "FS > 200.0 " --filterName "FS200"


### VARIANT ANNOTATION
#java -jar /home/Program/snpEff/snpEff.jar databases | more
#java -jar /home/Program/snpEff/snpEff.jar -download GRCh38.82

java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_snp_sample01.vcf -s filtered_snp_sample01.ann.html >filtered_snp_sample01.ann.vcf
java -jar /home/Program/snpEff/snpEff.jar Human3 filtered_indel_sample01.vcf -s filtered_indel_sample01.ann.html >filtered_indel_sample01.ann.vcf


### IGV
java -Xmx1500m -jar /home/Program/IGV_2.3.80/igv.jar

### END ###
1.reseq.txt · Last modified: by 127.0.0.1