Les 10 - Empirical probability
Python dictionaries and simulation of probabilistic experiments
Downloads
Powerpoint
Excercises
Solutions
Excercises
import random
### GENERALIZING EVENT FROM ANY SAMPLE SPACE
# We will start by writing a function that returns random sample
# from any given sample space
# IDEA: count the number of elements in sample space,
# use random number for given range to select one sample
def event(sample_space):
sample_index = random.randint(0, len(sample_space)-1)
sample = list(sample_space)[sample_index]
return sample
# To test the function we create 2 sample spaces, one for die and one for coin
dice_sample_space = {1, 2, 3, 4, 5, 6}
coin_sample_space = {"H", "T"}
# Time to test the function. Uncomment the line and run the program
#s1 = event(dice_sample_space)
#s2 = event(dice_sample_space)
#s3 = event(coin_sample_space)
#s4 = event(coin_sample_space)
#print(s1, s2, s3, s4)
# EXERCISE 1:
# Define your own sample space (e.g. 4 sided die) or use one from the above
# Run event() function in a loop 100 times and print the return values
# ... write your code here ...
### GENERALIZING EVENTs with functional programming
# Let's generalize even further.
# Using functional programming, we write a function that returns event generating function
def make_event(sample_space):
def my_evt():
sample_index = random.randint(0, len(sample_space)-1)
sample = list(sample_space)[sample_index]
return sample
return my_evt
# in case we destroyed test sample spaces, we put them again (not needed)
dice_sample_space = {1, 2, 3, 4, 5, 6}
coin_sample_space = {"H", "T"}
# We create one 6 sided die function above
d6 = make_event(dice_sample_space)
# Time to test the function. Uncomment the line and run the program
#print("Testing function ... The result returned is: ", d6())
# or just type: d6() in console
# EXERCISE 2:
# call d6() function in a loop 100 times and print the results
# ... write your code here ...
# EXERCISE 3a:
# Let's use the same function to make a coin. Finish the code below
#c2 = make_event(...)
# ... write your code here ...
# EXERCISE 3b:
# call c2() function in a loop 100 times and print the results
# ... write your code here ...
### ADDING VARYING WEIGHTs
# The function used above assumes "fair" selection, with all elements conforming to UNIFORM DISTRIBUTION
# However, we want to simulate biased dice and coin. Therefore, we have to introduce variable weights - chances
# for picking each element
# Before doing that, let's create two helper functions ...
# We can use this function to convert any dictionary to function
def dict2fun(d):
def f_(k):
return d[k] if k in d else 0
return f_
# Also, it is common to make mistakes when using different weights.
# Therefore, we create a function to test if all probabilities add up to 1.0
# NOTE: we assume that argument weights is always a function and skip type checking
def dist_test(sample_space, weights):
sum = 0
for el in sample_space:
sum += weights(el)
return sum
# Time to extend make_event function to incorporate weights
def make_weight_event(sample_space, weights):
# before creating a function we will test distribution
# AND convert weights to function if given as dictionary
ERROR_TOLERANCE = 0.01 # this could be skipped, but we add this to allow small imprecision with fractions
if type(weights) == dict:
weights = dict2fun(weights)
if abs(dist_test(sample_space, weights)-1) > ERROR_TOLERANCE:
print ("Error! Given weights exceed tolerance levels for this sample space!")
return None
# Finally, here we create the function
# IDEA: calculate cumulative probability; then pick random value [0-1)
# finally, return one element with cumulative probability corresponding to the picked value
def my_w_evt():
selection = random.random()
sum = 0
for el in sample_space:
sum += weights(el)
if sum > selection:
return el
return my_w_evt
# Alternatively, you can try using built-in random.choices() function
# Time to test our function ...
# First let's create 2 dictionaries containing weights and 2 lambda functions with weights
die_dict1 = {1: 1/6, 2: 1/6, 3: 1/6, 4: 1/6, 5: 1/6, 6: 1/6}
die_dict2 = {1: 1/10, 2: 1/10, 3: 1/10, 4: 1/10, 5: 1/10, 6: 1/2}
l1 = lambda x: 1/6
l2 = lambda x: 1/4 if x == 1 or x == 2 else 1/8
# Uncomment the line(s) and run the program
#d6 = make_weight_event(dice_sample_space, die_dict1)
#print(d6())
#d6 = make_weight_event(dice_sample_space, die_dict2)
#print(d6())
#d6 = make_weight_event(dice_sample_space, l1)
#print(d6())
#d6 = make_weight_event(dice_sample_space, l2)
#print(d6())
# EXERCISE 4:
# Use make_weight_event() to generate two coins.
# The first one should have equal chances to return "HEAD" or "TAIL",
# while the second one should be biased to return "HEAD" 80% time
#c2_fair = ... ADD YOUR CODE HERE ...
#c2_bias = ... ADD YOUR CODE HERE ...
### GENERIC EXPERIMENTS
# Next, we need a function that can simulate statistical experiment.
# It should be able to take any generalized sample function as argument,
# execute it N times, counting occurrences and returning them as dictionary with frequencies
def experiment(event, N = 10000):
res_dict = {}
for i in range(N):
sample = event()
if sample in res_dict:
res_dict[sample] += 1
else:
res_dict[sample] = 1
return res_dict
# Testing experiment() function is much easier with visualization
# Here, we add one more function that uses matplotlib to create bar chart
def bar_graph(dict):
import matplotlib.pyplot as plt
height = [v for v in dict.values()]
bars = [k for k in dict.keys()]
x_pos = range(0,len(bars))
plt.bar(x_pos, height, align='center')
plt.xticks(x_pos, bars)
plt.margins(0.05, 0)
plt.show()
# Time to test the function. Uncomment the line(s) and run the program
EXPERIMENT_NO = 100000
#d6 = make_weight_event(dice_sample_space, lambda x: 1/4 if x == 1 or x == 2 else 1/8)
#freq = experiment(d6, EXPERIMENT_NO)
#print(freq)
#bar_graph(freq)
# EXERCISE 5:
# Use c2_bias you created in exercise 4 as argument for experiment() function
# Run experiment() for biased coin EXPERIMENT_NO times and print frequencies and display bar chart
# ... write your code here ...
### CALCULATING PROBABILITY
# Above, we used statistical experiments to determine probability and probability distribution
# In the same manner we can use analytical method to calculate probability
# Here, we will generalize such function
def a_prob(sample_space, outcome):
# NOTE: since python doesn't support argument overloading
# and we want this function to take as OUTCOME either another function or a set
# we have to check if OUTCOME is function first. To that end we use callable() function
# that returns TRUE if OUTCOME is function and we store it in boolean variable IS_FUNCTION
IS_FUNCTION = callable(outcome)
favorable = 0
for el in sample_space:
if IS_FUNCTION:
#if outcome was function, we just call it here
if outcome(el):
favorable += 1
else:
# else, if outcome was set, we test if element exists in a set
if el in outcome:
favorable += 1
probability = favorable / len(sample_space)
return probability
# To test a_prob() function, lets create some outcomes both as lambda functions and as sets
# We may want to find out all probabilities for: f1 - even numbers, f2 - values greater than 4,
# f3 - when result is only 6, set1 - also when result is only 6,
# and set2 - all outcomes that can be either 2 or 5
f1 = lambda x: x%2==0
f2 = lambda x: x>4
f3 = lambda x: x==6
set1 = {6}
set2 = {2,5}
p1 = a_prob(dice_sample_space, f1)
p2 = a_prob(dice_sample_space, f2)
p3 = a_prob(dice_sample_space, f3)
p4 = a_prob(dice_sample_space, set1)
p5 = a_prob(dice_sample_space, set2)
# we can also write our desired outcome directly as argument:
p6 = a_prob(dice_sample_space, lambda x: x<3)
p7 = a_prob(dice_sample_space, {1,3,4})
# Uncomment the line below to print probabilities
#print("Probability that we get even number: ", p1)
#print("Probability that we get number greater than 4: ", p2)
#print("Probability that we get 6: ", p3)
#print("Probability that we get 6: ", p4)
#print("Probability that we get 2 or 5: ", p5)
#print("Probability that we get number lower than 3: ", p6)
#print("Probability that we get either 1 or 3 or 4: ", p7)
# EXERCISE 6:
# Using a_prob() function, answer the following questions:
# 1) What is the probability of getting either 1 or 6 from rolling one 6 sided die?
# 2) What is the probability of getting TAIL when flipping fair 2 sided coin?
# ... write your code here ...
### CALCULATING PROBABILITY WITH BIAS
# Calculating probability with uniform distribution is not difficult.
# However, introducing bias / weights complicates the problem.
# We can extend a_prob() function to handle arbitrary weights
def aw_prob(sample_space, weights, outcome):
# first, we need two checks: is outcome function or set AND are weights dictionary or function
IS_FUNCTION = callable(outcome)
if type(weights) == dict:
weights = dict2fun(weights)
# as we are dealing with DISJOINT EVENTS ...
# ... we can calculate probability simply by summing probabilities of all favorable outcomes
probability = 0
for el in sample_space:
if IS_FUNCTION:
if outcome(el):
probability += weights(el)
else:
if el in outcome:
probability += weights(el)
return probability
# Let's run few tests ...
# This is a "fair" dice (uniform distribution, all events have 1/6 prob).
# We want to know all outcomes where the number is even (should be 0.5)
p10 = aw_prob(dice_sample_space, lambda x: 1/6, f1)
# Next we use "biased" die that has 50% chance of returning 6, and the equal opportunity for the remaining outcomes ( (1-0.5)/5 = 1/10 = 0.1)
# Therefore, for outcome '1' we must get 10% probability and for outcome '6' we must get 50% probability
p11 = aw_prob(dice_sample_space, die_dict2, {1})
p12 = aw_prob(dice_sample_space, die_dict2, {6})
# For the same "biased" die we want to know the probability of getting an even number
p13 = aw_prob(dice_sample_space, die_dict2, f1)
# And also to get either 2 or 6
p14 = aw_prob(dice_sample_space, die_dict2, {2,6})
# Uncomment the line to print probabilities
#print(p10, p11, p12, p13, p14)
# FINAL EXERCISE:
# It is your turn now. :)
# Assume that you are given a biased die with 1/3 probability to get either 1 or 2 and 1/12 prob. for the remaining numbers
w = lambda x: 1/3 if x == 1 or x == 2 else 1/12
# Answer the following questions:
# 1) What is the probability of getting an even number?
# 2) What is the probability of getting a number smaller than 4?
# 3) What is the probability of getting either 2 or 4?
# 4) Confirm all your answers through statistical experiment and plotting a bar chart
# ... write your code here ...
# BONUS:
# Write a decorator function for aw_prob so that result is given as percentage rounded to 2 decimal values
# ... write your code here ...