{"id":842,"date":"2012-03-20T16:42:32","date_gmt":"2012-03-20T16:42:32","guid":{"rendered":"http:\/\/41j.com\/blog\/?p=842"},"modified":"2012-03-20T16:42:32","modified_gmt":"2012-03-20T16:42:32","slug":"a-burrows-wheeler-transform-implementation","status":"publish","type":"post","link":"https:\/\/41j.com\/blog\/2012\/03\/a-burrows-wheeler-transform-implementation\/","title":{"rendered":"A Burrows Wheeler Transform Implementation"},"content":{"rendered":"<p>This is a simple, inefficient implementation of the Burrows-Wheeler transform in C++. I wrote it just to play around with the algorithm, not for any practical application. It takes a single text file as the input, then performs the transform and reverses it:<\/p>\n<pre class=\"brush: cpp; title: ; notranslate\" title=\"\">\r\n\r\n\r\n#include &lt;iostream&gt;\r\n#include &lt;fstream&gt;\r\n#include &lt;string&gt;\r\n#include &lt;vector&gt;\r\n\r\nusing namespace std;\r\n\r\nvoid bwt(string input,string &amp;bwt_str,int &amp;primaryidx) {\r\n\r\n  vector&lt;string&gt; rotations;\r\n\r\n  \/\/ 1. generate rotations of input\r\n  cout &lt;&lt; &quot;rotations:&quot; &lt;&lt; endl;\r\n  for(int n=0;n&lt;input.size();n++) {\r\n    string s;\r\n\r\n    s += input&#x5B;n];\r\n    for(int i=n+1;i!=n;i++) {\r\n      if(i==input.size()) i=0;\r\n      s += input&#x5B;i];\r\n      if(i==input.size()-1) i=-1;\r\n    }\r\n    rotations.push_back(s);\r\n    cout &lt;&lt; s &lt;&lt; endl;\r\n  }\r\n\r\n  \/\/ 2. sort\r\n  cout &lt;&lt; &quot;sorted:&quot; &lt;&lt; endl;\r\n  sort(rotations.begin(),rotations.end());\r\n  for(size_t n=0;n&lt;rotations.size();n++) {\r\n    cout &lt;&lt; rotations&#x5B;n] &lt;&lt; endl;\r\n  }\r\n\r\n  bwt_str.clear();\r\n  for(size_t n=0;n&lt;rotations.size();n++) {\r\n    bwt_str += rotations&#x5B;n].substr(rotations&#x5B;n].size()-1,1);\r\n    if(rotations&#x5B;n] == input) {primaryidx = n;}\r\n  }\r\n  cout &lt;&lt; &quot;bwt string: &quot; &lt;&lt; bwt_str &lt;&lt; endl;\r\n  cout &lt;&lt; &quot;primaryidx: &quot; &lt;&lt; primaryidx &lt;&lt; endl;\r\n}\r\n\r\nstring rbwt(string bwt_str,int primaryidx) {\r\n\r\n  vector&lt;string&gt; cur_str;\r\n\r\n  for(size_t n=0;n&lt;bwt_str.size();n++) {\r\n    string s;\r\n    s = bwt_str.substr(n,1);\r\n    cur_str.push_back(s);\r\n  }\r\n\r\n  for(;cur_str&#x5B;0].size() &lt; bwt_str.size();) {\r\n    vector&lt;string&gt; new_str = cur_str;\r\n    sort(new_str.begin(),new_str.end());\r\n\r\n    for(size_t n=0;n&lt;cur_str.size();n++) {\r\n      cur_str&#x5B;n] = cur_str&#x5B;n] + new_str&#x5B;n].substr(new_str&#x5B;n].size()-1,1);\r\n    }\r\n  }\r\n\r\n  cout &lt;&lt; &quot;reversed transform:&quot; &lt;&lt; endl;\r\n  for(size_t n=0;n&lt;cur_str.size();n++) {\r\n    cout &lt;&lt; cur_str&#x5B;n] &lt;&lt; endl;\r\n  }\r\n\r\n  sort(cur_str.begin(),cur_str.end());\r\n\r\n  cout &lt;&lt; &quot;Original String: &quot; &lt;&lt; cur_str&#x5B;primaryidx] &lt;&lt; endl;\r\n\r\n  return cur_str&#x5B;primaryidx]; \r\n\r\n}\r\n\r\nint main(int argc,char **argv) {\r\n\r\n  string s;\r\n\r\n  ifstream infile(argv&#x5B;1]);\r\n\r\n  for(;!infile.eof();) {\r\n    char c = (char)infile.get();\r\n\r\n    if(!infile.eof()) s += string(1,c);\r\n  }\r\n  cout &lt;&lt; &quot;input: &quot; &lt;&lt; s &lt;&lt; &quot;END&quot; &lt;&lt; endl;\r\n\r\n  string bwt_str;\r\n  int primaryidx;\r\n\r\n  bwt(s,bwt_str,primaryidx);\r\n  \r\n  rbwt(bwt_str,primaryidx);\r\n}\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>This is a simple, inefficient implementation of the Burrows-Wheeler transform in C++. I wrote it just to play around with the algorithm, not for any practical application. It takes a single text file as the input, then performs the transform and reverses it: #include &lt;iostream&gt; #include &lt;fstream&gt; #include &lt;string&gt; #include &lt;vector&gt; using namespace std; void [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":false,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2}},"categories":[1],"tags":[],"class_list":["post-842","post","type-post","status-publish","format-standard","hentry","category-uncategorized"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_shortlink":"https:\/\/wp.me\/p1RRoU-dA","jetpack_sharing_enabled":true,"_links":{"self":[{"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/posts\/842","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/comments?post=842"}],"version-history":[{"count":1,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/posts\/842\/revisions"}],"predecessor-version":[{"id":843,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/posts\/842\/revisions\/843"}],"wp:attachment":[{"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/media?parent=842"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/categories?post=842"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/41j.com\/blog\/wp-json\/wp\/v2\/tags?post=842"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}