Archive for the ‘Uncategorized’ Category.

Some Awful Python Code

Here’s the modeling code from the substack post.

from math import comb
from math import log10
import sys


def tprb(n, k, t):

        totalp = 1

        for i in range(0,k,1):
                p = 1 - (t / (n - i))
                totalp = totalp * p

        return 1-totalp

ctoffset = 45
ctslope  = -3.36

def predictCT(fragments,ampregion,fragmentsize,genomesize):
	totalfragments    = genomesize-fragmentsize
	ontargetfragments = fragmentsize-ampregion
	targetfragments = fragments*(ontargetfragments/totalfragments)
	ct = ctoffset+log10(targetfragments)*ctslope
	return ct

genomesize      = 30000
totalng         = 10
qpcrampregion   = 90
basesperng      = 1771000000000 
rrnafraction    = 0.6
fragmentsize    = 1000
starttargetfraction  = 0.000000001
targetfraction  = starttargetfraction


totalfragments  = (basesperng*totalng)/fragmentsize
reads           = 100000
targetfragments = totalfragments*targetfraction

totalfragments_norRNA = ((totalfragments-targetfragments)*(1-rrnafraction)) + targetfragments

# Vary reads count
#print ("total fragments, reads, target fragments, probability of >1 read, probability of >1 read (no rRNA), predicted CT")
#for creads in range(reads,100000000,1000000):
#	print (totalfragments, " ", creads, " ", targetfragments, " ", tprb(totalfragments,creads,targetfragments), " ", tprb(totalfragments_norRNA,creads,targetfragments),predictCT(targetfragments,qpcrampregion,fragmentsize,genomesize))

# Vary target fraction
#print ("target fraction, total fragments, reads, target fragments, probability of >1 read, probability of >1 read (no rRNA), predicted CT")
#while targetfraction < 1:
#	targetfragments = totalfragments*targetfraction
#	print (targetfraction, " ", totalfragments, " ", reads, " ", targetfragments, " ", tprb(totalfragments,reads,targetfragments), " ", tprb(totalfragments_norRNA,reads,targetfragments),predictCT(targetfragments,qpcrampregion,fragmentsize,genomesize))
#	targetfraction *= 10

# Vary both
#print(", ", end='')
#while targetfraction < 1:
#	targetfragments = totalfragments*targetfraction
#	print(targetfraction, ", ", end='')
#	targetfraction *= 2
#print()

#for creads in range(reads,5000000,100000):
#	print(creads, ", ", end='')
#	targetfraction  = starttargetfraction
#	while targetfraction < 1:
#		targetfragments = totalfragments*targetfraction
#		print (tprb(totalfragments,creads,targetfragments), ", ", end='')
#		
#		targetfraction *= 2
#	print()

# Vary both - CT
print(", ", end='')
while targetfraction < 1:
	targetfragments = totalfragments*targetfraction
	print("{:.2f}".format(predictCT(targetfragments,qpcrampregion,fragmentsize,genomesize)), ", ", end='')
	targetfraction *= 2
print()

for creads in range(reads,5000000,100000):
	print(creads, ", ", end='')
	targetfraction  = starttargetfraction
	while targetfraction < 1:
		targetfragments = totalfragments*targetfraction
		print (tprb(totalfragments,creads,targetfragments), ", ", end='')
		
		targetfraction *= 2
	print()

Ion314 die images

N choose K, with T targets. Probability of choosing at least one target

Looking at n choose k, but you want to get at least one of a target t. For example you have a bag of blue marbles, which contains some number of red marbles. N is the total number of marbles in the bag. K is the number of marbles being chosen. T is the number of red marbles.

This is probably well known, but I worked through it a few different ways anyway…

First a simulation (C++):

#include <iostream>
#include <vector>

using namespace std;


int main(int argc,char **argv) {

  int n = 3000;
  int k = 50;
  int t = 30; // targets

  vector<int> items(n,0);

  for(int i=0;i<t;i++) {
    int itm=rand()%items.size();
    for(;items[itm] !=0;) itm = rand()%items.size();
    items[itm] = 1;
    //items[i] = 1;
  }

  //cout << "items: ";
  //for(int i=0;i<items.size();i++) cout << items[i] << " ";
  //cout << endl;

  vector<int> count(t,0);

  // rounds of choosing
  int rounds=100000;
  for(int j=0;j<rounds;j++) {
    cout << "round: " << j << endl;
    int tcount=0;

    vector<int> selected;
    for(int i=0;i<k;i++) {

      int itm = rand()%items.size();
      for(;std::count(selected.begin(), selected.end(), itm);) itm = rand()%items.size();
      selected.push_back(itm);
      //cout << "selected: " << itm << " " << items[itm] << endl;
      if(i%100000==0) cout << i << endl;
    }

    //count items on target
    for(int i=0;i<selected.size();i++) {
      if(items[selected[i]] == 1) tcount++;
    }
    cout << "tcount: " << tcount << endl;

    count[tcount]++;
  }

  int morezero=0;
  for(int i=0;i<t;i++) {
    if((count[i] > 0) || (i==0)) cout << i << " " << count[i] << endl;
    if(i>=1) morezero+=count[i];
  }
  cout << "One or more: " << morezero << endl;
  cout << "One or more fraction: " << (double)morezero/rounds << endl;
}

Next I calculated this by looking at the number of combinations calculating the fraction of combinations containing at least one target and total number of combinations:

from math import comb
import sys
def totalcomb(n, k, t):
        return comb(n,k)
def targetcomb(n, k, t):

        total = 0
        total += comb(t,k)
        for i in range(k-1,0,-1): #(i=k-1;i>0;i--)
                # print(t," ",i, ", ", n, " ", k-i)
                # Ways of choosing i items out of t. Multiply by
                total += comb(t,i)*comb(n-t,k-i)
        return total

n = int(sys.argv[1])
k = int(sys.argv[2])
t = int(sys.argv[3])
print (targetcomb(n,k,t)/totalcomb(n,k,t))
print (targetcomb(n,k,t))
print (totalcomb(n,k,t))

Then in terms of probability, using the probability that each draw does not contain a target:

from math import comb
import sys

def tprb(n, k, t):

        totalp = 1

        for i in range(0,k,1):
                p = 1 - (t / (n - i))
                totalp = totalp * p

        return 1-totalp

n = int(sys.argv[1])
k = int(sys.argv[2])
t = int(sys.argv[3])

print (tprb(n,k,t))

All approaches give the same answer, the last is the least computationally taxing…

Illumina iSeq (FireFly) IC Images

Comments will be posted over on the substack.