myillu_01.trimpe.sh
Script <myIllu_trimPE.sh>
#!/bin/bash # # takes two paired files, run trimmomatic (PE mode), and makes one interleaved file # Trimmomatic v0.32 manual: # http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf # # Latest version of ngopt (a5_miseq): 20150522 TRIMMOMATIC=/usr/local/apps/a5_miseq_linux_20140604/bin/trimmomatic.jar ADAPTER=/usr/local/apps/a5_miseq_linux_20140604/adapter.fasta INTERLEAVE=/usr/local/apps/khmer/khmerEnv/bin/interleave-reads.py THREADS=24 MINLEN=75 # default MINLENG is 75 (for NGOPT, 36 bp) if [ $# -ne 2 ] then echo "Please specifiy two file names!" exit 1 fi # Check the files are found (code from SGA example script) file_list="$1 $2" for input in $file_list; do if [ ! -f $input ]; then echo "Error input file $input not found"; exit 1; fi done # Using `basename` command is a good idea. Because the stripped BASE # can be used as a output file prefix in "current" directory. BASE=$(basename "$1") # delete any leading path BASE=${BASE%_*} # delete string from '_' to the end (shortest match) # BASE=${1%%_*} makes difference. What is it? (longest match) # BASE=${1/_*/} (longest match) echo "File name base: $BASE" BASE=${BASE}-trim echo "File name base will be changed into ${BASE}" java -jar $TRIMMOMATIC PE -threads ${THREADS} -phred33 -baseout ${BASE}.fq.gz $1 $2 ILLUMINACLIP:$ADAPTER:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:${MINLEN} # compare to a5-miseq pipeline (my $trim_cmd): # ILLUMINACLIP:$adapter:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 echo "Writing interleaved file ${BASE}.pe.fq from ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz..." $INTERLEAVE -o ${BASE}.pe.fq ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz echo "Deleting intermediate files (including orphan files)..." rm ${BASE}_1U.fq.gz ${BASE}_2U.fq.gz ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz
myillu_01.trimpe.sh.txt · Last modified: 2021/03/17 13:09 by 127.0.0.1