A Burrows Wheeler Transform Implementation
This is a simple, inefficient implementation of the Burrows-Wheeler transform in C++. I wrote it just to play around with the algorithm, not for any practical application. It takes a single text file as the input, then performs the transform and reverses it:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | #include <iostream> #include <fstream> #include <string> #include <vector> using namespace std; void bwt(string input,string &bwt_str, int &primaryidx) { vector<string> rotations; // 1. generate rotations of input cout << "rotations:" << endl; for ( int n=0;n<input.size();n++) { string s; s += input[n]; for ( int i=n+1;i!=n;i++) { if (i==input.size()) i=0; s += input[i]; if (i==input.size()-1) i=-1; } rotations.push_back(s); cout << s << endl; } // 2. sort cout << "sorted:" << endl; sort(rotations.begin(),rotations.end()); for ( size_t n=0;n<rotations.size();n++) { cout << rotations[n] << endl; } bwt_str.clear(); for ( size_t n=0;n<rotations.size();n++) { bwt_str += rotations[n].substr(rotations[n].size()-1,1); if (rotations[n] == input) {primaryidx = n;} } cout << "bwt string: " << bwt_str << endl; cout << "primaryidx: " << primaryidx << endl; } string rbwt(string bwt_str, int primaryidx) { vector<string> cur_str; for ( size_t n=0;n<bwt_str.size();n++) { string s; s = bwt_str.substr(n,1); cur_str.push_back(s); } for (;cur_str[0].size() < bwt_str.size();) { vector<string> new_str = cur_str; sort(new_str.begin(),new_str.end()); for ( size_t n=0;n<cur_str.size();n++) { cur_str[n] = cur_str[n] + new_str[n].substr(new_str[n].size()-1,1); } } cout << "reversed transform:" << endl; for ( size_t n=0;n<cur_str.size();n++) { cout << cur_str[n] << endl; } sort(cur_str.begin(),cur_str.end()); cout << "Original String: " << cur_str[primaryidx] << endl; return cur_str[primaryidx]; } int main( int argc, char **argv) { string s; ifstream infile(argv[1]); for (;!infile.eof();) { char c = ( char )infile.get(); if (!infile.eof()) s += string(1,c); } cout << "input: " << s << "END" << endl; string bwt_str; int primaryidx; bwt(s,bwt_str,primaryidx); rbwt(bwt_str,primaryidx); } |