Python Statistics Module
Python Statistics Module: Complete Methods Guide with Examples
Here’s a detailed explanation of each method in the Python statistics module with 3 practical examples for each:
1. Measures of Central Tendency
mean() – Arithmetic Average
python
import statistics as stats
# Example 1: Basic mean calculation
data1 = [1, 2, 3, 4, 5]
result1 = stats.mean(data1)
print(f"Mean of {data1}: {result1}") # Output: 3.0
# Example 2: Student grades
grades = [85, 92, 78, 96, 88]
average_grade = stats.mean(grades)
print(f"Average grade: {average_grade}") # Output: 87.8
# Example 3: Temperature data
temperatures = [22.5, 23.1, 21.8, 24.2, 22.9]
avg_temp = stats.mean(temperatures)
print(f"Average temperature: {avg_temp:.1f}°C") # Output: 22.9°C
harmonic_mean() – Reciprocal Average
python
import statistics as stats
# Example 1: Basic harmonic mean
data1 = [1, 2, 3, 4, 5]
result1 = stats.harmonic_mean(data1)
print(f"Harmonic mean of {data1}: {result1:.3f}") # Output: 2.189
# Example 2: Speed calculation (rates)
speeds = [60, 40, 80] # km/h
avg_speed = stats.harmonic_mean(speeds)
print(f"Average speed: {avg_speed:.1f} km/h") # Output: 55.4 km/h
# Example 3: Investment returns
returns = [1.05, 1.08, 1.12] # 5%, 8%, 12% returns
geo_avg_return = stats.harmonic_mean(returns)
print(f"Geometric average return: {geo_avg_return:.3f}") # Output: 1.082
median() – Middle Value
python
import statistics as stats
# Example 1: Odd number of elements
data1 = [1, 3, 5, 7, 9]
result1 = stats.median(data1)
print(f"Median of {data1}: {result1}") # Output: 5
# Example 2: Even number of elements
data2 = [1, 2, 3, 4, 5, 6]
result2 = stats.median(data2)
print(f"Median of {data2}: {result2}") # Output: 3.5
# Example 3: House prices (resistant to outliers)
prices = [200000, 250000, 300000, 350000, 2000000]
median_price = stats.median(prices)
print(f"Median house price: ${median_price:,}") # Output: $300,000
median_low() – Lower Middle Value
python
import statistics as stats
# Example 1: Even dataset - lower value
data1 = [10, 20, 30, 40]
result1 = stats.median_low(data1)
print(f"Low median of {data1}: {result1}") # Output: 20
# Example 2: Test scores
scores = [65, 70, 75, 80, 85, 90]
low_median = stats.median_low(scores)
print(f"Low median score: {low_median}") # Output: 75
# Example 3: Age groups
ages = [25, 30, 35, 40]
low_median_age = stats.median_low(ages)
print(f"Low median age: {low_median_age}") # Output: 30
median_high() – Higher Middle Value
python
import statistics as stats
# Example 1: Even dataset - higher value
data1 = [10, 20, 30, 40]
result1 = stats.median_high(data1)
print(f"High median of {data1}: {result1}") # Output: 30
# Example 2: Product prices
prices = [15, 20, 25, 30, 35, 40]
high_median_price = stats.median_high(prices)
print(f"High median price: ${high_median_price}") # Output: $30
# Example 3: Employee salaries
salaries = [50000, 55000, 60000, 65000]
high_median_salary = stats.median_high(salaries)
print(f"High median salary: ${high_median_salary:,}") # Output: $60,000
mode() – Most Frequent Value
python
import statistics as stats
# Example 1: Clear single mode
data1 = [1, 1, 2, 2, 2, 3, 4]
result1 = stats.mode(data1)
print(f"Mode of {data1}: {result1}") # Output: 2
# Example 2: Survey responses
responses = ['Yes', 'No', 'Yes', 'Yes', 'Maybe']
most_common = stats.mode(responses)
print(f"Most common response: {most_common}") # Output: Yes
# Example 3: Product sales
sales = ['A', 'B', 'A', 'C', 'A', 'B', 'A']
popular_product = stats.mode(sales)
print(f"Most popular product: {popular_product}") # Output: A
multimode() – All Modes
python
import statistics as stats
# Example 1: Multiple modes
data1 = [1, 1, 2, 2, 3, 4]
result1 = stats.multimode(data1)
print(f"All modes of {data1}: {result1}") # Output: [1, 2]
# Example 2: Voting results
votes = ['A', 'B', 'A', 'C', 'B', 'A', 'B']
all_modes = stats.multimode(votes)
print(f"All modal candidates: {all_modes}") # Output: ['A', 'B']
# Example 3: Test scores with ties
scores = [85, 90, 85, 92, 90, 88]
common_scores = stats.multimode(scores)
print(f"Most common scores: {common_scores}") # Output: [85, 90]
2. Measures of Spread/Dispersion
pstdev() – Population Standard Deviation
python
import statistics as stats
# Example 1: Basic population standard deviation
data1 = [1, 2, 3, 4, 5]
result1 = stats.pstdev(data1)
print(f"Population stdev of {data1}: {result1:.3f}") # Output: 1.414
# Example 2: Manufacturing consistency
measurements = [10.1, 10.2, 9.9, 10.0, 10.3, 9.8]
consistency = stats.pstdev(measurements)
print(f"Manufacturing consistency: ±{consistency:.3f} mm") # Output: ±0.187 mm
# Example 3: Complete class test scores
class_scores = [78, 85, 92, 65, 90, 88, 76]
class_std = stats.pstdev(class_scores)
print(f"Class score spread: {class_std:.2f} points") # Output: 8.57 points
pvariance() – Population Variance
python
import statistics as stats
# Example 1: Basic population variance
data1 = [1, 2, 3, 4, 5]
result1 = stats.pvariance(data1)
print(f"Population variance of {data1}: {result1}") # Output: 2.0
# Example 2: Investment risk (entire portfolio)
returns = [0.05, 0.08, -0.02, 0.12, 0.06]
portfolio_risk = stats.pvariance(returns)
print(f"Portfolio risk (variance): {portfolio_risk:.4f}") # Output: 0.0025
# Example 3: Temperature variation (complete dataset)
temps = [22, 24, 19, 25, 23, 21]
temp_variance = stats.pvariance(temps)
print(f"Temperature variance: {temp_variance:.2f}") # Output: 3.89
stdev() – Sample Standard Deviation
python
import statistics as stats
# Example 1: Basic sample standard deviation
data1 = [1, 2, 3, 4, 5]
result1 = stats.stdev(data1)
print(f"Sample stdev of {data1}: {result1:.3f}") # Output: 1.581
# Example 2: Sample from larger population
sample_heights = [165, 170, 175, 168, 172]
height_std = stats.stdev(sample_heights)
print(f"Sample height deviation: {height_std:.1f} cm") # Output: 3.8 cm
# Example 3: Market research sample
customer_ages = [25, 30, 35, 28, 32, 29]
age_std = stats.stdev(customer_ages)
print(f"Sample age deviation: {age_std:.1f} years") # Output: 3.3 years
variance() – Sample Variance
python
import statistics as stats
# Example 1: Basic sample variance
data1 = [1, 2, 3, 4, 5]
result1 = stats.variance(data1)
print(f"Sample variance of {data1}: {result1}") # Output: 2.5
# Example 2: Quality control sample
sample_weights = [99.8, 100.2, 100.1, 99.9, 100.3]
weight_variance = stats.variance(sample_weights)
print(f"Sample weight variance: {weight_variance:.4f}") # Output: 0.0370
# Example 3: Response time sample
response_times = [2.1, 1.8, 2.3, 1.9, 2.0, 2.2]
time_variance = stats.variance(response_times)
print(f"Response time variance: {time_variance:.3f}") # Output: 0.029
3. Quantiles and Distribution
quantiles() – Data Division
python
import statistics as stats
# Example 1: Quartiles (4 equal parts)
data1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
quartiles = stats.quantiles(data1, n=4)
print(f"Quartiles of {data1}: {quartiles}") # Output: [3.25, 5.5, 7.75]
# Example 2: Income distribution (quintiles)
incomes = [30000, 35000, 42000, 50000, 55000, 60000, 75000, 80000, 95000, 120000]
quintiles = stats.quantiles(incomes, n=5)
print(f"Income quintiles: {[f'${q:,.0f}' for q in quintiles]}")
# Output: ['$36,000', '$52,500', '$67,500', '$87,500']
# Example 3: Test score percentiles (deciles)
scores = [65, 70, 75, 78, 80, 82, 85, 88, 90, 92, 95, 96]
deciles = stats.quantiles(scores, n=10)
print(f"Score deciles: {[f'{d:.1f}' for d in deciles]}")
# Output: ['68.5', '72.8', '76.5', '79.4', '81.5', '83.9', '87.3', '90.8', '93.8']
4. Advanced Statistical Functions
correlation() – Pearson Correlation
python
import statistics as stats
# Example 1: Perfect positive correlation
x1 = [1, 2, 3, 4, 5]
y1 = [2, 4, 6, 8, 10]
corr1 = stats.correlation(x1, y1)
print(f"Correlation: {corr1:.2f}") # Output: 1.00
# Example 2: Study time vs Grades
study_hours = [10, 15, 20, 25, 30]
exam_scores = [65, 70, 75, 85, 90]
study_corr = stats.correlation(study_hours, exam_scores)
print(f"Study-Score correlation: {study_corr:.2f}") # Output: ~0.98
# Example 3: Temperature vs Sales
temperature = [20, 22, 25, 28, 30]
ice_cream_sales = [50, 65, 80, 95, 110]
temp_sales_corr = stats.correlation(temperature, ice_cream_sales)
print(f"Temperature-Sales correlation: {temp_sales_corr:.2f}") # Output: ~1.00
linear_regression() – Simple Linear Regression
python
import statistics as stats
# Example 1: Basic linear relationship
x1 = [1, 2, 3, 4, 5]
y1 = [2, 4, 6, 8, 10]
slope1, intercept1 = stats.linear_regression(x1, y1)
print(f"Regression: y = {slope1:.1f}x + {intercept1:.1f}") # Output: y = 2.0x + 0.0
# Example 2: Sales prediction
months = [1, 2, 3, 4, 5] # Month numbers
sales = [100, 120, 150, 170, 190] # Units sold
slope2, intercept2 = stats.linear_regression(months, sales)
month_6_prediction = slope2 * 6 + intercept2
print(f"Month 6 prediction: {month_6_prediction:.0f} units") # Output: 210 units
# Example 3: House price prediction
size_sqft = [1000, 1200, 1500, 1800, 2000]
price = [200000, 240000, 300000, 350000, 380000]
slope3, intercept3 = stats.linear_regression(size_sqft, price)
price_2500 = slope3 * 2500 + intercept3
print(f"Predicted price for 2500 sqft: ${price_2500:,.0f}") # Output: $447,500
5. Complete Practical Example
python
import statistics as stats
def comprehensive_analysis(data, name="Dataset"):
"""Perform complete statistical analysis"""
print(f"\n=== {name} Analysis ===")
print(f"Data: {data}")
print(f"Count: {len(data)}")
# Central Tendency
print(f"Mean: {stats.mean(data):.2f}")
print(f"Median: {stats.median(data)}")
print(f"Mode: {stats.mode(data)}")
# Spread
print(f"Population Variance: {stats.pvariance(data):.2f}")
print(f"Population Std Dev: {stats.pstdev(data):.2f}")
print(f"Sample Variance: {stats.variance(data):.2f}")
print(f"Sample Std Dev: {stats.stdev(data):.2f}")
# Quantiles
q1, q2, q3 = stats.quantiles(data)
print(f"Quartiles - Q1: {q1:.2f}, Q2: {q2:.2f}, Q3: {q3:.2f}")
# Usage example
student_grades = [85, 92, 78, 96, 88, 85, 90, 78, 92, 85]
comprehensive_analysis(student_grades, "Student Grades")
This comprehensive guide provides practical examples for every method in the statistics module, making it easy to apply these statistical functions to real-world data analysis problems!