User Tools

Site Tools


myillu_01.trimpe.sh

Script <myIllu_trimPE.sh>

#!/bin/bash
#
# takes two paired files, run trimmomatic (PE mode), and makes one interleaved file
# Trimmomatic v0.32 manual: 
#   http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/TrimmomaticManual_V0.32.pdf
#
# Latest version of ngopt (a5_miseq): 20150522

TRIMMOMATIC=/usr/local/apps/a5_miseq_linux_20140604/bin/trimmomatic.jar
ADAPTER=/usr/local/apps/a5_miseq_linux_20140604/adapter.fasta
INTERLEAVE=/usr/local/apps/khmer/khmerEnv/bin/interleave-reads.py
THREADS=24
MINLEN=75
# default MINLENG is 75 (for NGOPT, 36 bp)

if [ $# -ne 2 ]
    then
        echo "Please specifiy two file names!"
        exit 1
fi

# Check the files are found (code from SGA example script)
file_list="$1 $2"
for input in $file_list; do
    if [ ! -f $input ]; then
        echo "Error input file $input not found"; exit 1;
    fi
done

# Using `basename` command is a good idea. Because the stripped BASE
# can be used as a output file prefix in "current" directory.
BASE=$(basename "$1")  # delete any leading path
BASE=${BASE%_*}           # delete string from '_' to the end (shortest match)
# BASE=${1%%_*} makes difference. What is it? (longest match)
# BASE=${1/_*/} (longest match)
echo "File name base: $BASE"
BASE=${BASE}-trim
echo "File name base will be changed into ${BASE}"

java -jar $TRIMMOMATIC PE -threads ${THREADS} -phred33 -baseout ${BASE}.fq.gz $1 $2 ILLUMINACLIP:$ADAPTER:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:${MINLEN}

# compare to a5-miseq pipeline (my $trim_cmd): 
#   ILLUMINACLIP:$adapter:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36

echo "Writing interleaved file ${BASE}.pe.fq from ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz..."
$INTERLEAVE -o ${BASE}.pe.fq ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz

echo "Deleting intermediate files (including orphan files)..."
rm ${BASE}_1U.fq.gz ${BASE}_2U.fq.gz ${BASE}_1P.fq.gz ${BASE}_2P.fq.gz
myillu_01.trimpe.sh.txt · Last modified: 2021/03/17 13:09 by 127.0.0.1