A Burrows Wheeler Transform Implementation

This is a simple, inefficient implementation of the Burrows-Wheeler transform in C++. I wrote it just to play around with the algorithm, not for any practical application. It takes a single text file as the input, then performs the transform and reverses it:



#include <iostream>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

void bwt(string input,string &bwt_str,int &primaryidx) {

  vector<string> rotations;

  // 1. generate rotations of input
  cout << "rotations:" << endl;
  for(int n=0;n<input.size();n++) {
    string s;

    s += input[n];
    for(int i=n+1;i!=n;i++) {
      if(i==input.size()) i=0;
      s += input[i];
      if(i==input.size()-1) i=-1;
    }
    rotations.push_back(s);
    cout << s << endl;
  }

  // 2. sort
  cout << "sorted:" << endl;
  sort(rotations.begin(),rotations.end());
  for(size_t n=0;n<rotations.size();n++) {
    cout << rotations[n] << endl;
  }

  bwt_str.clear();
  for(size_t n=0;n<rotations.size();n++) {
    bwt_str += rotations[n].substr(rotations[n].size()-1,1);
    if(rotations[n] == input) {primaryidx = n;}
  }
  cout << "bwt string: " << bwt_str << endl;
  cout << "primaryidx: " << primaryidx << endl;
}

string rbwt(string bwt_str,int primaryidx) {

  vector<string> cur_str;

  for(size_t n=0;n<bwt_str.size();n++) {
    string s;
    s = bwt_str.substr(n,1);
    cur_str.push_back(s);
  }

  for(;cur_str[0].size() < bwt_str.size();) {
    vector<string> new_str = cur_str;
    sort(new_str.begin(),new_str.end());

    for(size_t n=0;n<cur_str.size();n++) {
      cur_str[n] = cur_str[n] + new_str[n].substr(new_str[n].size()-1,1);
    }
  }

  cout << "reversed transform:" << endl;
  for(size_t n=0;n<cur_str.size();n++) {
    cout << cur_str[n] << endl;
  }

  sort(cur_str.begin(),cur_str.end());

  cout << "Original String: " << cur_str[primaryidx] << endl;

  return cur_str[primaryidx]; 

}

int main(int argc,char **argv) {

  string s;

  ifstream infile(argv[1]);

  for(;!infile.eof();) {
    char c = (char)infile.get();

    if(!infile.eof()) s += string(1,c);
  }
  cout << "input: " << s << "END" << endl;

  string bwt_str;
  int primaryidx;

  bwt(s,bwt_str,primaryidx);
  
  rbwt(bwt_str,primaryidx);
}

Leave a Reply

Your email address will not be published.

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>