# Public and private liquor prices

Find this notebook on the web at
<a class="quarto-xref" href="https://resampling-stats.github.io/latest-python/probability_theory_3.html#nte-liquor_prices">NoteÂ <span>12.8</span></a>.

This notebook asks the question whether the difference in the means of
private and government-specified prices of a particular whiskey could
plausibly have come about as a result of random sampling.

In [None]:
import numpy as np
rnd = np.random.default_rng()

# Import the plotting library
import matplotlib.pyplot as plt

In [None]:
fake_diffs = np.zeros(10000)

priv = np.array([
    4.82, 5.29, 4.89, 4.95, 4.55, 4.90, 5.25, 5.30, 4.29, 4.85, 4.54, 4.75,
    4.85, 4.85, 4.50, 4.75, 4.79, 4.85, 4.79, 4.95, 4.95, 4.75, 5.20, 5.10,
    4.80, 4.29])

govt = np.array([
    4.65, 4.55, 4.11, 4.15, 4.20, 4.55, 3.80, 4.00, 4.19, 4.75, 4.74, 4.50,
    4.10, 4.00, 5.05, 4.20])

actual_diff = np.mean(priv) - np.mean(govt)

# Join the two arrays of data into one array.
both = np.concatenate([priv, govt])

# Repeat 10000 simulation trials
for i in range(10000):

    # Sample 26 with replacement for private group
    fake_priv = np.random.choice(both, size=26)

    # Sample 16 with replacement for govt. group
    fake_govt = np.random.choice(both, size=16)

    # Find the mean of the "private" group.
    p = np.mean(fake_priv)

    # Mean of the "govt." group
    g = np.mean(fake_govt)

    # Difference in the means
    diff = p - g

    # Keep score of the trials
    fake_diffs[i] = diff

# Graph of simulation results to compare with the observed result.
plt.hist(fake_diffs)
plt.xlabel('Difference in average prices (cents)')
plt.title('Average price difference (Actual difference = '
f'{actual_diff * 100:.0f} cents)');