Simulate dataset with samtools, align with Bowtie, BWA, MAQ and pileup

This is an old blog post, migrated from my blogspot account. How much of this information is still valid I don’t know.

Place your reference in to phi.fa (phix reference). Bowtie in bowtie subdirectory. Samtools in samtools subdirectory. Bwa in bwa subdirectory. Maq in the maq subdirectory. cd in to each directory and type make to build them. The phix reference I used had additional characters in the reference tag line which confused samtools when used with bwa. The sed in the following script removes them. The script also aligns each end of the simulated pair independently.

#!/bin/bash

#Generate simulated dataset
./samtools/misc/wgsim ./phi.fa ./sim_phi1.fq ./sim_phi2.fq > sim_phi.info

# Bowtie Create binary reference for bowtie
./bowtie/bowtie-build ./phi.fa phi

# Bowtie Align ends independently
./bowtie/bowtie -t phi ./sim_phi1.fq ./sim_phi1.fq.bowtie.align
./bowtie/bowtie -t phi ./sim_phi2.fq ./sim_phi2.fq.bowtie.align

# Bowtie Convert to SAM
./samtools/misc/bowtie2sam.pl ./sim_phi1.fq.bowtie.align > ./sim_phi1.fq.bowtie.align.sam
./samtools/misc/bowtie2sam.pl ./sim_phi2.fq.bowtie.align > ./sim_phi2.fq.bowtie.align.sam

# Clean up SAM reference tags (which in this case were screwy)
sed 's/ Coliphage phiX174, complete genome//' ./sim_phi1.fq.bowtie.align.sam > ./sim_phi1.fq.bowtie.align.sam.clean
sed 's/ Coliphage phiX174, complete genome//' ./sim_phi2.fq.bowtie.align.sam > ./sim_phi2.fq.bowtie.align.sam.clean

# Create sam phi index
./samtools/samtools faidx phi.fa

# Bowtie Convert to BAM
./samtools/samtools import ./phi.fa.fai ./sim_phi1.fq.bowtie.align.sam.clean ./sim_phi1.fq.bowtie.align.bam
./samtools/samtools import ./phi.fa.fai ./sim_phi2.fq.bowtie.align.sam.clean ./sim_phi2.fq.bowtie.align.bam

# Bowtie Sort alignments
./samtools/samtools sort ./sim_phi1.fq.bowtie.align.bam ./sim_phi1.fq.bowtie.align.bam.sorted
./samtools/samtools sort ./sim_phi2.fq.bowtie.align.bam ./sim_phi2.fq.bowtie.align.bam.sorted

# Bowtie Index alignments
./samtools/samtools index ./sim_phi1.fq.bowtie.align.bam.sorted.bam
./samtools/samtools index ./sim_phi2.fq.bowtie.align.bam.sorted.bam

# Bowtie Pileup
./samtools/samtools pileup -cf ./phi.fa ./sim_phi1.fq.bowtie.align.bam.sorted.bam > ./sim_phi1.fq.bowtie.align.pileup
./samtools/samtools pileup -cf ./phi.fa ./sim_phi2.fq.bowtie.align.bam.sorted.bam > ./sim_phi2.fq.bowtie.align.pileup

# Bowtie Filter to SNP locations
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi1.fq.bowtie.align.pileup &> ./sim_phi1.fq.bowtie.align.pileup.snps
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi2.fq.bowtie.align.pileup &> ./sim_phi2.fq.bowtie.align.pileup.snps

# BWA Build index
./bwa/bwa index phi.fa

# BWA
./bwa/bwa aln phi.fa sim_phi1.fq > sim_phi1.fq.bwa.align.sai
./bwa/bwa aln phi.fa sim_phi2.fq > sim_phi2.fq.bwa.align.sai

# BWA Convert to SAM
./bwa/bwa samse phi.fa sim_phi1.fq.bwa.align.sai ./sim_phi1.fq > sim_phi1.fq.bwa.align.sam
./bwa/bwa samse phi.fa sim_phi2.fq.bwa.align.sai ./sim_phi2.fq > sim_phi2.fq.bwa.align.sam

# BWA Convert to BAM
./samtools/samtools import ./phi.fa.fai ./sim_phi1.fq.bwa.align.sam ./sim_phi1.fq.bwa.align.bam
./samtools/samtools import ./phi.fa.fai ./sim_phi2.fq.bwa.align.sam ./sim_phi2.fq.bwa.align.bam

# BWA Sort alignments
./samtools/samtools sort ./sim_phi1.fq.bwa.align.bam ./sim_phi1.fq.bwa.align.bam.sorted
./samtools/samtools sort ./sim_phi2.fq.bwa.align.bam ./sim_phi2.fq.bwa.align.bam.sorted

# BWA Index alignments
./samtools/samtools index ./sim_phi1.fq.bwa.align.bam.sorted.bam
./samtools/samtools index ./sim_phi2.fq.bwa.align.bam.sorted.bam

# BWA Pileup
./samtools/samtools pileup -cf ./phi.fa ./sim_phi1.fq.bwa.align.bam.sorted.bam > ./sim_phi1.fq.bwa.align.pileup
./samtools/samtools pileup -cf ./phi.fa ./sim_phi2.fq.bwa.align.bam.sorted.bam > ./sim_phi2.fq.bwa.align.pileup

# BWA Filter to SNP locations
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi1.fq.bwa.align.pileup &> ./sim_phi1.fq.bwa.align.pileup.snps
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi2.fq.bwa.align.pileup &> ./sim_phi2.fq.bwa.align.pileup.snps

# MAQ Convert reference to binary fasta format
./maq/maq fasta2bfa ./phi.fa ./phi.bfa

# MAQ Convert reads to binary fastq format
./maq/maq fastq2bfq ./sim_phi1.fq ./sim_phi1.bfq
./maq/maq fastq2bfq ./sim_phi2.fq ./sim_phi2.bfq

# MAQ Align read to the reference
./maq/maq match ./sim_phi1.maq.map ./phi.bfa sim_phi1.bfq
./maq/maq match ./sim_phi2.maq.map ./phi.bfa sim_phi2.bfq

# MAQ Mapcheck
./maq/maq mapcheck phi.bfa sim_phi1.maq.map &> sim_phi1.mapcheck
./maq/maq mapcheck phi.bfa sim_phi2.maq.map &> sim_phi2.mapcheck

# MAQ Call SNPs
./maq/maq assemble ./sim_phi1.maq.cns ./phi.bfa ./sim_phi1.maq.map &> ./sim_phi1.maq.cns.log
./maq/maq assemble ./sim_phi2.maq.cns ./phi.bfa ./sim_phi2.maq.map &> ./sim_phi2.maq.cns.log

# MAQ Build Consensus
./maq/maq cns2fq ./sim_phi1.maq.cns > ./sim_phi1.maq.cns.fq
./maq/maq cns2fq ./sim_phi2.maq.cns > ./sim_phi2.maq.cns.fq

# MAQ Extract SNPs
./maq/maq cns2snp ./sim_phi1.maq.cns > ./sim_phi1.maq.snps
./maq/maq cns2snp ./sim_phi2.maq.cns > ./sim_phi2.maq.snps

# MAQ Alignments to SAM
./samtools/misc/maq2sam-long ./sim_phi1.maq.map > ./sim_phi1.maq.sam
./samtools/misc/maq2sam-long ./sim_phi2.maq.map > ./sim_phi2.maq.sam

# MAQ SAM to BAM
./samtools/samtools import ./phi.fa.fai ./sim_phi1.maq.sam ./sim_phi1.maq.bam
./samtools/samtools import ./phi.fa.fai ./sim_phi2.maq.sam ./sim_phi2.maq.bam

# MAQ Sort alignments
./samtools/samtools sort ./sim_phi1.maq.bam ./sim_phi1.maq.bam.sorted
./samtools/samtools sort ./sim_phi2.maq.bam ./sim_phi2.maq.bam.sorted

# MAQ Index alignments
./samtools/samtools index ./sim_phi1.maq.bam.sorted.bam
./samtools/samtools index ./sim_phi2.maq.bam.sorted.bam

# MAQ Run MAQ alignments against samtools for SNP calling
./samtools/samtools pileup -cf ./phi.fa ./sim_phi1.maq.bam.sorted.bam > ./sim_phi1.maq.pileup
./samtools/samtools pileup -cf ./phi.fa ./sim_phi2.maq.bam.sorted.bam > ./sim_phi2.maq.pileup

# MAQ Filter to SNP locations
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi1.maq.pileup &> ./sim_phi1.maq.pileup.snps
./samtools/misc/samtools.pl varFilter -p -D 200000 ./sim_phi2.maq.pileup &> ./sim_phi2.maq.pileup.snps