Python Statistics Module

Python Statistics Module: Complete Methods Guide with Examples

Here’s a detailed explanation of each method in the Python statistics module with 3 practical examples for each:


1. Measures of Central Tendency

mean() – Arithmetic Average

python

import statistics as stats

# Example 1: Basic mean calculation
data1 = [1, 2, 3, 4, 5]
result1 = stats.mean(data1)
print(f"Mean of {data1}: {result1}")  # Output: 3.0

# Example 2: Student grades
grades = [85, 92, 78, 96, 88]
average_grade = stats.mean(grades)
print(f"Average grade: {average_grade}")  # Output: 87.8

# Example 3: Temperature data
temperatures = [22.5, 23.1, 21.8, 24.2, 22.9]
avg_temp = stats.mean(temperatures)
print(f"Average temperature: {avg_temp:.1f}°C")  # Output: 22.9°C

harmonic_mean() – Reciprocal Average

python

import statistics as stats

# Example 1: Basic harmonic mean
data1 = [1, 2, 3, 4, 5]
result1 = stats.harmonic_mean(data1)
print(f"Harmonic mean of {data1}: {result1:.3f}")  # Output: 2.189

# Example 2: Speed calculation (rates)
speeds = [60, 40, 80]  # km/h
avg_speed = stats.harmonic_mean(speeds)
print(f"Average speed: {avg_speed:.1f} km/h")  # Output: 55.4 km/h

# Example 3: Investment returns
returns = [1.05, 1.08, 1.12]  # 5%, 8%, 12% returns
geo_avg_return = stats.harmonic_mean(returns)
print(f"Geometric average return: {geo_avg_return:.3f}")  # Output: 1.082

median() – Middle Value

python

import statistics as stats

# Example 1: Odd number of elements
data1 = [1, 3, 5, 7, 9]
result1 = stats.median(data1)
print(f"Median of {data1}: {result1}")  # Output: 5

# Example 2: Even number of elements
data2 = [1, 2, 3, 4, 5, 6]
result2 = stats.median(data2)
print(f"Median of {data2}: {result2}")  # Output: 3.5

# Example 3: House prices (resistant to outliers)
prices = [200000, 250000, 300000, 350000, 2000000]
median_price = stats.median(prices)
print(f"Median house price: ${median_price:,}")  # Output: $300,000

median_low() – Lower Middle Value

python

import statistics as stats

# Example 1: Even dataset - lower value
data1 = [10, 20, 30, 40]
result1 = stats.median_low(data1)
print(f"Low median of {data1}: {result1}")  # Output: 20

# Example 2: Test scores
scores = [65, 70, 75, 80, 85, 90]
low_median = stats.median_low(scores)
print(f"Low median score: {low_median}")  # Output: 75

# Example 3: Age groups
ages = [25, 30, 35, 40]
low_median_age = stats.median_low(ages)
print(f"Low median age: {low_median_age}")  # Output: 30

median_high() – Higher Middle Value

python

import statistics as stats

# Example 1: Even dataset - higher value
data1 = [10, 20, 30, 40]
result1 = stats.median_high(data1)
print(f"High median of {data1}: {result1}")  # Output: 30

# Example 2: Product prices
prices = [15, 20, 25, 30, 35, 40]
high_median_price = stats.median_high(prices)
print(f"High median price: ${high_median_price}")  # Output: $30

# Example 3: Employee salaries
salaries = [50000, 55000, 60000, 65000]
high_median_salary = stats.median_high(salaries)
print(f"High median salary: ${high_median_salary:,}")  # Output: $60,000

mode() – Most Frequent Value

python

import statistics as stats

# Example 1: Clear single mode
data1 = [1, 1, 2, 2, 2, 3, 4]
result1 = stats.mode(data1)
print(f"Mode of {data1}: {result1}")  # Output: 2

# Example 2: Survey responses
responses = ['Yes', 'No', 'Yes', 'Yes', 'Maybe']
most_common = stats.mode(responses)
print(f"Most common response: {most_common}")  # Output: Yes

# Example 3: Product sales
sales = ['A', 'B', 'A', 'C', 'A', 'B', 'A']
popular_product = stats.mode(sales)
print(f"Most popular product: {popular_product}")  # Output: A

multimode() – All Modes

python

import statistics as stats

# Example 1: Multiple modes
data1 = [1, 1, 2, 2, 3, 4]
result1 = stats.multimode(data1)
print(f"All modes of {data1}: {result1}")  # Output: [1, 2]

# Example 2: Voting results
votes = ['A', 'B', 'A', 'C', 'B', 'A', 'B']
all_modes = stats.multimode(votes)
print(f"All modal candidates: {all_modes}")  # Output: ['A', 'B']

# Example 3: Test scores with ties
scores = [85, 90, 85, 92, 90, 88]
common_scores = stats.multimode(scores)
print(f"Most common scores: {common_scores}")  # Output: [85, 90]

2. Measures of Spread/Dispersion

pstdev() – Population Standard Deviation

python

import statistics as stats

# Example 1: Basic population standard deviation
data1 = [1, 2, 3, 4, 5]
result1 = stats.pstdev(data1)
print(f"Population stdev of {data1}: {result1:.3f}")  # Output: 1.414

# Example 2: Manufacturing consistency
measurements = [10.1, 10.2, 9.9, 10.0, 10.3, 9.8]
consistency = stats.pstdev(measurements)
print(f"Manufacturing consistency: ±{consistency:.3f} mm")  # Output: ±0.187 mm

# Example 3: Complete class test scores
class_scores = [78, 85, 92, 65, 90, 88, 76]
class_std = stats.pstdev(class_scores)
print(f"Class score spread: {class_std:.2f} points")  # Output: 8.57 points

pvariance() – Population Variance

python

import statistics as stats

# Example 1: Basic population variance
data1 = [1, 2, 3, 4, 5]
result1 = stats.pvariance(data1)
print(f"Population variance of {data1}: {result1}")  # Output: 2.0

# Example 2: Investment risk (entire portfolio)
returns = [0.05, 0.08, -0.02, 0.12, 0.06]
portfolio_risk = stats.pvariance(returns)
print(f"Portfolio risk (variance): {portfolio_risk:.4f}")  # Output: 0.0025

# Example 3: Temperature variation (complete dataset)
temps = [22, 24, 19, 25, 23, 21]
temp_variance = stats.pvariance(temps)
print(f"Temperature variance: {temp_variance:.2f}")  # Output: 3.89

stdev() – Sample Standard Deviation

python

import statistics as stats

# Example 1: Basic sample standard deviation
data1 = [1, 2, 3, 4, 5]
result1 = stats.stdev(data1)
print(f"Sample stdev of {data1}: {result1:.3f}")  # Output: 1.581

# Example 2: Sample from larger population
sample_heights = [165, 170, 175, 168, 172]
height_std = stats.stdev(sample_heights)
print(f"Sample height deviation: {height_std:.1f} cm")  # Output: 3.8 cm

# Example 3: Market research sample
customer_ages = [25, 30, 35, 28, 32, 29]
age_std = stats.stdev(customer_ages)
print(f"Sample age deviation: {age_std:.1f} years")  # Output: 3.3 years

variance() – Sample Variance

python

import statistics as stats

# Example 1: Basic sample variance
data1 = [1, 2, 3, 4, 5]
result1 = stats.variance(data1)
print(f"Sample variance of {data1}: {result1}")  # Output: 2.5

# Example 2: Quality control sample
sample_weights = [99.8, 100.2, 100.1, 99.9, 100.3]
weight_variance = stats.variance(sample_weights)
print(f"Sample weight variance: {weight_variance:.4f}")  # Output: 0.0370

# Example 3: Response time sample
response_times = [2.1, 1.8, 2.3, 1.9, 2.0, 2.2]
time_variance = stats.variance(response_times)
print(f"Response time variance: {time_variance:.3f}")  # Output: 0.029

3. Quantiles and Distribution

quantiles() – Data Division

python

import statistics as stats

# Example 1: Quartiles (4 equal parts)
data1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
quartiles = stats.quantiles(data1, n=4)
print(f"Quartiles of {data1}: {quartiles}")  # Output: [3.25, 5.5, 7.75]

# Example 2: Income distribution (quintiles)
incomes = [30000, 35000, 42000, 50000, 55000, 60000, 75000, 80000, 95000, 120000]
quintiles = stats.quantiles(incomes, n=5)
print(f"Income quintiles: {[f'${q:,.0f}' for q in quintiles]}")
# Output: ['$36,000', '$52,500', '$67,500', '$87,500']

# Example 3: Test score percentiles (deciles)
scores = [65, 70, 75, 78, 80, 82, 85, 88, 90, 92, 95, 96]
deciles = stats.quantiles(scores, n=10)
print(f"Score deciles: {[f'{d:.1f}' for d in deciles]}")
# Output: ['68.5', '72.8', '76.5', '79.4', '81.5', '83.9', '87.3', '90.8', '93.8']

4. Advanced Statistical Functions

correlation() – Pearson Correlation

python

import statistics as stats

# Example 1: Perfect positive correlation
x1 = [1, 2, 3, 4, 5]
y1 = [2, 4, 6, 8, 10]
corr1 = stats.correlation(x1, y1)
print(f"Correlation: {corr1:.2f}")  # Output: 1.00

# Example 2: Study time vs Grades
study_hours = [10, 15, 20, 25, 30]
exam_scores = [65, 70, 75, 85, 90]
study_corr = stats.correlation(study_hours, exam_scores)
print(f"Study-Score correlation: {study_corr:.2f}")  # Output: ~0.98

# Example 3: Temperature vs Sales
temperature = [20, 22, 25, 28, 30]
ice_cream_sales = [50, 65, 80, 95, 110]
temp_sales_corr = stats.correlation(temperature, ice_cream_sales)
print(f"Temperature-Sales correlation: {temp_sales_corr:.2f}")  # Output: ~1.00

linear_regression() – Simple Linear Regression

python

import statistics as stats

# Example 1: Basic linear relationship
x1 = [1, 2, 3, 4, 5]
y1 = [2, 4, 6, 8, 10]
slope1, intercept1 = stats.linear_regression(x1, y1)
print(f"Regression: y = {slope1:.1f}x + {intercept1:.1f}")  # Output: y = 2.0x + 0.0

# Example 2: Sales prediction
months = [1, 2, 3, 4, 5]  # Month numbers
sales = [100, 120, 150, 170, 190]  # Units sold
slope2, intercept2 = stats.linear_regression(months, sales)
month_6_prediction = slope2 * 6 + intercept2
print(f"Month 6 prediction: {month_6_prediction:.0f} units")  # Output: 210 units

# Example 3: House price prediction
size_sqft = [1000, 1200, 1500, 1800, 2000]
price = [200000, 240000, 300000, 350000, 380000]
slope3, intercept3 = stats.linear_regression(size_sqft, price)
price_2500 = slope3 * 2500 + intercept3
print(f"Predicted price for 2500 sqft: ${price_2500:,.0f}")  # Output: $447,500

5. Complete Practical Example

python

import statistics as stats

def comprehensive_analysis(data, name="Dataset"):
    """Perform complete statistical analysis"""
    print(f"\n=== {name} Analysis ===")
    print(f"Data: {data}")
    print(f"Count: {len(data)}")
    
    # Central Tendency
    print(f"Mean: {stats.mean(data):.2f}")
    print(f"Median: {stats.median(data)}")
    print(f"Mode: {stats.mode(data)}")
    
    # Spread
    print(f"Population Variance: {stats.pvariance(data):.2f}")
    print(f"Population Std Dev: {stats.pstdev(data):.2f}")
    print(f"Sample Variance: {stats.variance(data):.2f}")
    print(f"Sample Std Dev: {stats.stdev(data):.2f}")
    
    # Quantiles
    q1, q2, q3 = stats.quantiles(data)
    print(f"Quartiles - Q1: {q1:.2f}, Q2: {q2:.2f}, Q3: {q3:.2f}")

# Usage example
student_grades = [85, 92, 78, 96, 88, 85, 90, 78, 92, 85]
comprehensive_analysis(student_grades, "Student Grades")

This comprehensive guide provides practical examples for every method in the statistics module, making it easy to apply these statistical functions to real-world data analysis problems!

Similar Posts

Leave a Reply

Your email address will not be published. Required fields are marked *