User Tools

Site Tools


custom_scripts

This is an old revision of the document!


my_rename1.pl

#!/usr/bin/perl
#
open ALL, $ARGV[0] or die "Can't open file $ARGV[0]!";
while (<ALL>) {
    chomp;
    next unless /^(GCF|GCA)/;
    my @data = split /\t/, $_;
    $data[8] =~ s/strain=//; # infraspefic_name
    my $strain = $data[8];
    $data[7] =~ s/ $strain$//; # remove redundant strain name
    my $name = $data[7] . ' ' . $data[8];
    $name .= $data[15] if $name =~ /^\S+ \S+ $/; # add asm_name if required
    $name =~ s/:/-/g;
    $name =~ s/\s+/_/g;
    $name =~ s/;.*$//;     # remove semicolon and following information
    $name =~ s/\//_/;
    $name =~ s/_$//;
    my @temp = split /\//, $data[19];
    $key = pop @temp;
    $key2name{$key} = $name;
    print $key, "\t", $name, "\n";
}

my_rename2.pl

#!/usr/bin/perl
#
# $ARGV[0] : id2name file
#    GCF_000015065.1_ASM1506v1       Bt_str._Al_Hakam
#    GCF_000092165.1_ASM9216v1       Bt_BMB171
#    ...
# $ARGV[1] : fna file 
#    GCF_000015065.1_ASM1506v1_genomic.fna
#

open ALL, $ARGV[0] or die "Can't open file $ARGV[0]!";
while (<ALL>) {
    chomp;
    my @data = split /\t/, $_;
    $key2name{$data[0]} = $data[1];
}

my @temp = split /_/, $ARGV[1];
my $suffix = pop @temp;
my $accession = $temp[0] . '_' . $temp[1];
$suffix =~ /^.+\.(.+)$/;
$end = $1;
$key = join '_', @temp;

#print $key, " ", $key2name{$key}, " ", $end, "\n";
$file = $key2name{$key} . '_' . $accession . '.' . $end;
print "$ARGV[1] ===> $file\n";
system("cp $ARGV[1] $file") if defined $file;

safe_download.sh

#!/usr/bin/bash

# argument (FtpPath_RefSeq) example
# ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/741/935/GCF_000741935.1_ASM74193v1
#
# How to run:
#   while read path; do bash ./THIS_SCRIPT; done < download_path.txt

TARGET_DIR=download

# Modify 'genomic.gbff.gz' as you want!
DOWNLOAD_PATH=$(echo $1 | sed -r 's|(ftp://ftp.ncbi.nlm.nih.gov/genomes/all/.+\/)(GC._.+)|\1\2\/\2_genomic.gbff.gz|')
FILE=${DOWNLOAD_PATH##*/}
MD5SUM_FILE=${1}/md5checksums.txt

wget ${MD5SUM_FILE}
grep ${FILE} md5checksums.txt > $$checksum.txt
wget ${DOWNLOAD_PATH}

md5sum -c $$checksum.txt
if [ $? -eq 0 ]; then
    echo ${FILE} download OK! Moving to ${TARGET_DIR}...
    mv ${FILE} $TARGET_DIR
else
    echo ${FILE} download FAIL!
    echo ${DOWNLOAD_PATH} >> failed_download.txt
fi

rm md5checksums.txt* $$checksum.txt
custom_scripts.1687421549.txt.gz · Last modified: 2023/06/22 17:12 by hyjeong