## N choose K, with T targets. Probability of choosing at least one target

Looking at n choose k, but you want to get at least one of a target t. For example you have a bag of blue marbles, which contains some number of red marbles. N is the total number of marbles in the bag. K is the number of marbles being chosen. T is the number of red marbles.

This is probably well known, but I worked through it a few different ways anyway…

First a simulation (C++):

``````#include <iostream>
#include <vector>

using namespace std;

int main(int argc,char **argv) {

int n = 3000;
int k = 50;
int t = 30; // targets

vector<int> items(n,0);

for(int i=0;i<t;i++) {
int itm=rand()%items.size();
for(;items[itm] !=0;) itm = rand()%items.size();
items[itm] = 1;
//items[i] = 1;
}

//cout << "items: ";
//for(int i=0;i<items.size();i++) cout << items[i] << " ";
//cout << endl;

vector<int> count(t,0);

// rounds of choosing
int rounds=100000;
for(int j=0;j<rounds;j++) {
cout << "round: " << j << endl;
int tcount=0;

vector<int> selected;
for(int i=0;i<k;i++) {

int itm = rand()%items.size();
for(;std::count(selected.begin(), selected.end(), itm);) itm = rand()%items.size();
selected.push_back(itm);
//cout << "selected: " << itm << " " << items[itm] << endl;
if(i%100000==0) cout << i << endl;
}

//count items on target
for(int i=0;i<selected.size();i++) {
if(items[selected[i]] == 1) tcount++;
}
cout << "tcount: " << tcount << endl;

count[tcount]++;
}

int morezero=0;
for(int i=0;i<t;i++) {
if((count[i] > 0) || (i==0)) cout << i << " " << count[i] << endl;
if(i>=1) morezero+=count[i];
}
cout << "One or more: " << morezero << endl;
cout << "One or more fraction: " << (double)morezero/rounds << endl;
}``````

Next I calculated this by looking at the number of combinations calculating the fraction of combinations containing at least one target and total number of combinations:

``````from math import comb
import sys
def totalcomb(n, k, t):
return comb(n,k)
def targetcomb(n, k, t):

total = 0
total += comb(t,k)
for i in range(k-1,0,-1): #(i=k-1;i>0;i--)
# print(t," ",i, ", ", n, " ", k-i)
# Ways of choosing i items out of t. Multiply by
total += comb(t,i)*comb(n-t,k-i)

n = int(sys.argv)
k = int(sys.argv)
t = int(sys.argv)
print (targetcomb(n,k,t)/totalcomb(n,k,t))
print (targetcomb(n,k,t))
print (totalcomb(n,k,t))``````

Then in terms of probability, using the probability that each draw does not contain a target:

``````from math import comb
import sys

def tprb(n, k, t):

totalp = 1

for i in range(0,k,1):
p = 1 - (t / (n - i))
totalp = totalp * p

return 1-totalp

n = int(sys.argv)
k = int(sys.argv)
t = int(sys.argv)

print (tprb(n,k,t))``````

All approaches give the same answer, the last is the least computationally taxing…