Answers for "batch download rcsb pdb"

0

batch download rcsb pdb

see : https://www.rcsb.org/docs/programmatic-access/batch-downloads-with-shell-script
I changed the script to make separate folders for each pdb,
how to run : ./batch_download.sh -f list_file.txt -p
make a list using above link
############################
#!/bin/bash

# Script to download files from RCSB http file download services.
# Use the -h switch to get help on usage.

if ! command -v curl &> /dev/null
then
    echo "'curl' could not be found. You need to install 'curl' for this script to work."
    exit 1
fi

PROGNAME=$0
BASE_URL="https://files.rcsb.org/download"

usage() {
  cat << EOF >&2
Usage: $PROGNAME -f <file> [-o <dir>] [-c] [-p]

 -f <file>: the input file containing a comma-separated list of PDB ids
 -o  <dir>: the output dir, default: current dir
 -c       : download a cif.gz file for each PDB id
 -p       : download a pdb.gz file for each PDB id (not available for large structures)
 -a       : download a pdb1.gz file (1st bioassembly) for each PDB id (not available for large structures)
 -x       : download a xml.gz file for each PDB id
 -s       : download a sf.cif.gz file for each PDB id (diffraction only)
 -m       : download a mr.gz file for each PDB id (NMR only)
 -r       : download a mr.str.gz for each PDB id (NMR only)
EOF
  exit 1
}

download() {
  url="$BASE_URL/$1"
  out=$2/$1
  echo "Downloading $url to $out"
  curl -s -f $url -o $out || echo "Failed to download $url"
}

listfile=""
outdir="."
cif=false
pdb=false
pdb1=false
xml=false
sf=false
mr=false
mrstr=false
while getopts f:o:cpaxsmr o
do
  case $o in
    (f) listfile=$OPTARG;;
    (o) outdir=$OPTARG;;
    (c) cif=true;;
    (p) pdb=true;;
    (a) pdb1=true;;
    (x) xml=true;;
    (s) sf=true;;
    (m) mr=true;;
    (r) mrstr=true;;
    (*) usage
  esac
done
shift "$((OPTIND - 1))"
if [ "$listfile" == "" ]
then
  echo "Parameter -f must be provided"
  exit 1
fi
contents=$(cat $listfile)

# see https://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash#tab-top
IFS=',' read -ra tokens <<< "$contents"

#mkdir ${tokens[@]}

for token in "${tokens[@]}"
do
  
  if [ "$cif" == true ]
  then
    download ${token}.cif.gz $outdir
  fi
  if [ "$pdb" == true ]
  then

    mkdir $token         ## folders by pdb name
    cd $token                    
    download $token.pdb.gz $outdir
    cd ..                       

  fi
  if [ "$pdb1" == true ]
  then
    download ${token}.pdb1.gz $outdir
  fi
  if [ "$xml" == true ]
  then
    download ${token}.xml.gz $outdir
  fi
  if [ "$sf" == true ]
  then
    download ${token}-sf.cif.gz $outdir
  fi
  if [ "$mr" == true ]
  then
    download ${token}.mr.gz $outdir
  fi
  if [ "$mrstr" == true ]
  then
    download ${token}_mr.str.gz $outdir
  fi

done
Posted by: Guest on August-11-2021

Code answers related to "Shell/Bash"

Browse Popular Code Answers by Language