A Burrows Wheeler Transform Implementation

This is a simple, inefficient implementation of the Burrows-Wheeler transform in C++. I wrote it just to play around with the algorithm, not for any practical application. It takes a single text file as the input, then performs the transform and reverses it:



#include <iostream>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

void bwt(string input,string &bwt_str,int &primaryidx) {

  vector<string> rotations;

  // 1. generate rotations of input
  cout << "rotations:" << endl;
  for(int n=0;n<input.size();n++) {
    string s;

    s += input[n];
    for(int i=n+1;i!=n;i++) {
      if(i==input.size()) i=0;
      s += input[i];
      if(i==input.size()-1) i=-1;
    }
    rotations.push_back(s);
    cout << s << endl;
  }

  // 2. sort
  cout << "sorted:" << endl;
  sort(rotations.begin(),rotations.end());
  for(size_t n=0;n<rotations.size();n++) {
    cout << rotations[n] << endl;
  }

  bwt_str.clear();
  for(size_t n=0;n<rotations.size();n++) {
    bwt_str += rotations[n].substr(rotations[n].size()-1,1);
    if(rotations[n] == input) {primaryidx = n;}
  }
  cout << "bwt string: " << bwt_str << endl;
  cout << "primaryidx: " << primaryidx << endl;
}

string rbwt(string bwt_str,int primaryidx) {

  vector<string> cur_str;

  for(size_t n=0;n<bwt_str.size();n++) {
    string s;
    s = bwt_str.substr(n,1);
    cur_str.push_back(s);
  }

  for(;cur_str[0].size() < bwt_str.size();) {
    vector<string> new_str = cur_str;
    sort(new_str.begin(),new_str.end());

    for(size_t n=0;n<cur_str.size();n++) {
      cur_str[n] = cur_str[n] + new_str[n].substr(new_str[n].size()-1,1);
    }
  }

  cout << "reversed transform:" << endl;
  for(size_t n=0;n<cur_str.size();n++) {
    cout << cur_str[n] << endl;
  }

  sort(cur_str.begin(),cur_str.end());

  cout << "Original String: " << cur_str[primaryidx] << endl;

  return cur_str[primaryidx]; 

}

int main(int argc,char **argv) {

  string s;

  ifstream infile(argv[1]);

  for(;!infile.eof();) {
    char c = (char)infile.get();

    if(!infile.eof()) s += string(1,c);
  }
  cout << "input: " << s << "END" << endl;

  string bwt_str;
  int primaryidx;

  bwt(s,bwt_str,primaryidx);
  
  rbwt(bwt_str,primaryidx);
}

Random walks on a grid, Javascript

Some quick Javascript which plots random walks from a point…

View it here.

<html>
<head>

<script type="text/javascript">

  var ittr=8;
  var delta=-0.005;

  function draw_frame() {
    var canvas = document.getElementById("m_canvas");
    if(canvas.getContext) {
      var ctx = canvas.getContext("2d");
      ctx.lineWidth  = 0;
      ctx.fillStyle = "rgb(0,0,0)";


      var x=250;
      var y=250;
      for(var n=0;n<200;n++) {
        var r=Math.floor(Math.random()*4);
        if(r==0) { x+=1; y+=0; }
        if(r==1) { x+=0; y+=1; }
        if(r==2) { x-=1; y+=0; }
        if(r==3) { x-=0; y-=1; }
        ctx.fillRect(x*1,y*1,1,1);
      }

      ctx.fillStyle = "rgb(0,200,200)";
      ctx.fillRect(250,250,3,3);
    }
  }

</script>

</head>

<body  onload="setInterval(draw_frame,10);" >
<canvas id="m_canvas" width="500" height="500">You need a Javascipt</canvas>

</body>

</html>

A quick and dirty BST implementation

Another quick and dirty algorithm implementation in C++. This time a Binary search tree (no balancing):


#include <iostream>
#include <vector>

using namespace std;

class Node {

public:

  int value;

  Node *less;
  Node *more;
};

class BinarySearchTree {

public:

  BinarySearchTree() : root(NULL) {}

  void find_node(int value,Node **findnode,Node **parent) {

    *findnode = NULL;
    *parent   = NULL;

    if(root == NULL) {
      return;
    }

    Node *current = root;
    for(;;) {
      if(current->value == value) {*findnode = current; return;}

      if(value < current->value) {
        if(current->less != NULL) {*parent = current; current = current->less;}
        else {
          return;
        }
      } else {
        if(current->more != NULL) {*parent = current; current = current->more;}
        else {
          return;
        }
      }
    }
  }
 
  Node *getr(Node *c,Node **parent) {
    *parent = c;
    for(;c->more != NULL;) {
      *parent = c;
      c = c->more;
    }
    return c;
  }

  void remove(int value) {
  
    Node *delnode;
    Node *parent;

    find_node(value,&delnode,&parent);

    if(delnode == NULL) return;

    // leaf
    if((delnode->less == NULL) && (delnode->more == NULL)) {
      if(parent->less == delnode) {parent->less = NULL;}
      if(parent->more == delnode) {parent->more = NULL;}
      delete delnode;
      return;
    }

    // only one child
    if((delnode->less == NULL) || (delnode->more == NULL)) {
      if(delnode->less != NULL) {
        if(parent->less == delnode) {parent->less = delnode->less;}
        if(parent->more == delnode) {parent->more = delnode->less;}
        delete delnode; 
        return;
      }

      if(delnode->more != NULL) {
        if(parent->less == delnode) {parent->less = delnode->more;}
        if(parent->more == delnode) {parent->more = delnode->more;}
        delete delnode;
        return;
      }
    }

    // two children
    Node *i = getr(delnode->less,&parent);

    delnode->value = i->value;
    if(parent == i)       { delnode->less = NULL;}
    if(parent->less == i) { parent->less = NULL; }
                     else { parent->more = NULL; }
    delete i;
  }

  void insert(int value) {

    if(root == NULL) {
      Node *n = new Node();
      n->value = value;
      n->less = NULL;
      n->more = NULL;
      root = n;

      return;
    }

    Node *current = root;
    for(;;) {
      if(value < current->value) {
        if(current->less != NULL) current = current->less;
        else {
          Node *n = new Node();
          n->value = value;
          n->less = NULL;
          n->more = NULL;
          current->less = n;
          break;
        }
      } else {
        if(current->more != NULL) current = current->more;
        else {
          Node *n = new Node();
          n->value = value;
          n->less = NULL;
          n->more = NULL;
          current->more = n;
          break;
        }
      }
    }
  }

  bool exists(int value) {
    if(root == NULL) {
      return false;
    }

    Node *current = root;
    for(;;) {
      if(current->value == value) return true;

      if(value < current->value) {
        if(current->less != NULL) current = current->less;
        else {
          return false;
        }
      } else {
        if(current->more != NULL) current = current->more;
        else {
          return false;
        }
      }
    }
  }

  Node *root;
};

int main() {

  BinarySearchTree s;

  vector<int> testdata;

  for(size_t n=0;n<10;n++) { testdata.push_back(rand()%20); }

  for(size_t n=0;n<10;n++) { s.insert(testdata[n]); }

  cout << "testdata: " << endl;
  for(size_t n=0;n<10;n++) { cout << testdata[n] << endl; }

  cout << "existance check: " << endl;
  for(size_t n=0;n<10;n++) { if(s.exists(testdata[n])) cout << "existance: " << testdata[n] << " true" << endl; else cout << "existance: " << testdata[n] << " false" << endl;}
  for(size_t n=0;n<10;n++) { if(s.exists(testdata[n]+20)) cout << "existance: " << testdata[n]+20 << " true" << endl; else cout << "existance: " << testdata[n]+20 << " false" << endl;}

  cout << "remove: " << testdata[5] << endl;
  s.remove(testdata[5]);
  if(s.exists(testdata[5])) cout << testdata[5] << " still exists" << endl; else cout << testdata[5] << " no longer exists" << endl;

  cout << "remove: " << testdata[3] << endl;
  s.remove(testdata[3]);
  if(s.exists(testdata[3])) cout << testdata[3] << " still exists" << endl; else cout << testdata[3] << " no longer exists" << endl;

  cout << "remove: " << testdata[0] << endl;
  s.remove(testdata[0]);
  if(s.exists(testdata[0])) cout << testdata[0] << " still exists" << endl; else cout << testdata[0] << " no longer exists" << endl;
}

Isilon /var full usage at 100% but few files

I had this problem last week. The /var filesystem was full, but contained few files. This in term seemed to kill cifs and the web interface, though nfs was fine.

Long story short, it’s probably snmpd, there’s a bug in a version of the isilon os (possibly fixed now).

You can use fstat to find abnormally large open files (unfortunately lsof isn’t present, so I couldn’t see a way to locate unlinked files) and the process that has them open. You can then kill -9 snmpd. After that you can restart services as follows:

isi services apache2 disable
Isi services apache2 enable
isi services cifs disable
isi services cifs enable

You may also need to kill off webui/smbd (killall -9 isi_webui_d).