Select a random line from a file in a single pass

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include <iostream>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <vector>
 
using namespace std;
 
void getrandline(string filename,size_t &selected_line_no,string &selected_line) {
 
  ifstream file(filename.c_str());
  selected_line_no = 0;
  for(size_t n=1;!file.eof();n++) {
 
    string current_line;
    getline(file,current_line);
 
    if(file.eof()) break;
 
    if(rand()%n == 0) { selected_line = current_line; selected_line_no = n-1; }
  }
 
}
 
int main(int argc,char **argv) {
 
  srand(time(NULL));
 
  vector<size_t> count(100,0); // just for testing, 100 should be > size of the file...
  for(size_t n=0;n<100000;n++) {
   size_t linenum;
   string line;
   getrandline(argv[1],linenum,line);
 
   count[linenum]++;
   cout << linenum << " " << line << endl;
  }
 
  for(size_t n=0;n<count.size();n++) {
    cout << n << " " << count[n] << endl;
  }
 
}

Leave a Reply