RepeatMasker

Typical Usage

/exports/software/repeatmasker/repeatmasker-4-0-5/RepeatMasker \
       -pa 48 \
       -lib taxon.repeatlib.fa \
       -dir . \
       -xsmall \
       seq.fa

Gridengine wrapper

repeatmasker.sh:

#!/bin/bash

#$ -V
#$ -cwd
#$ -j y
#$ -o $JOB_ID.log

#       export DATADIR=`pwd`
#              -v SEQFILE=seq.fa
#              -v REPLIB=repeatLib.fa ...

WORKDIR=/scratch/$USER/repeatmasker/$JOB_ID
mkdir -p $WORKDIR
cd $WORKDIR
/exports/software/repeatmasker/repeatmasker-4-0-5/RepeatMasker \
       -pa $NSLOTS \
       -lib $DATADIR/$REPLIB \
       -dir . \
       -xsmall \
       $DATADIR/$SEQFILE
rsync -a ./$SEQFILE.* $DATADIR/

qsub command

export DATADIR=`pwd`
qsub -v SEQFILE=seq.fa -v REPLIB=repeatLib.fa -pe smp 32 /path/to/repeatmasker.sh

useful one-liners

summarise genome size and repeat content across multiple results files in a
folder (pattern matching specific to lepbase naming conventions):
perl -0777 -ne '$ARGV=~s/_-.+//;m/total.+?(\d+).+bases.+?(\d+)/s;print "$ARGV\t$1\t",($2/$1*100),"\n"' *_-_scaffolds.fa.tbl