Introduction to Linear Functions
Linear functions are fundamental to data science, forming the basis of regression analysis, trend analysis, and many machine learning algorithms. Understanding how to plot and interpret linear functions is essential for visualizing relationships between variables and communicating insights.
Key Concepts
- Linear Equation: y = mx + b (slope-intercept form)
- Slope (m): Rate of change, steepness of the line
- Intercept (b): Where the line crosses the y-axis
- Domain: Set of x-values (independent variable)
- Range: Set of y-values (dependent variable)
- Correlation: Strength of linear relationship
1. Basics of Linear Functions
The Linear Equation
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
def plot_linear_function(m, b, x_range=(-10, 10), title="Linear Function"):
"""
Plot a linear function y = mx + b
"""
x = np.linspace(x_range[0], x_range[1], 100)
y = m * x + b
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'b-', linewidth=2, label=f'y = {m}x + {b}')
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
plt.grid(True, alpha=0.3)
# Highlight intercept
plt.plot(0, b, 'ro', markersize=8, label=f'y-intercept: ({0}, {b})')
# Calculate x-intercept
if m != 0:
x_intercept = -b / m
plt.plot(x_intercept, 0, 'go', markersize=8, label=f'x-intercept: ({x_intercept:.2f}, 0)')
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title(f'{title}\n y = {m}x + {b}', fontsize=14)
plt.legend()
plt.axis('equal')
plt.tight_layout()
plt.show()
# Example plots
plot_linear_function(2, 3, title="Positive Slope")
plot_linear_function(-1.5, 5, title="Negative Slope")
plot_linear_function(0, 4, title="Zero Slope (Constant Function)")
Understanding Slope and Intercept
def explore_slope_intercept():
"""
Demonstrate how slope and intercept affect the line
"""
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
x = np.linspace(-5, 5, 100)
# Different slopes with same intercept
intercept = 2
slopes = [0.5, 1, 2, -0.5, -1, -2]
for idx, (ax, slope) in enumerate(zip(axes.flat, slopes)):
y = slope * x + intercept
ax.plot(x, y, 'b-', linewidth=2)
ax.axhline(y=0, color='k', linestyle='-', alpha=0.3)
ax.axvline(x=0, color='k', linestyle='-', alpha=0.3)
ax.grid(True, alpha=0.3)
ax.set_xlim(-5, 5)
ax.set_ylim(-5, 5)
ax.set_title(f'Slope = {slope}, Intercept = {intercept}')
ax.set_xlabel('x')
ax.set_ylabel('y')
# Mark intercept
ax.plot(0, intercept, 'ro', markersize=6)
# Add slope indication
if slope > 0:
ax.text(2, 3, f'↗ Slope = {slope}', fontsize=10, ha='center')
elif slope < 0:
ax.text(2, -2, f'↘ Slope = {slope}', fontsize=10, ha='center')
else:
ax.text(2, intercept, f'→ Slope = {slope}', fontsize=10, ha='center')
plt.suptitle('Effect of Slope on Linear Functions', fontsize=16)
plt.tight_layout()
plt.show()
explore_slope_intercept()
2. Plotting Multiple Linear Functions
Comparing Lines
def compare_linear_functions():
"""
Plot multiple linear functions for comparison
"""
x = np.linspace(-5, 5, 100)
# Define different functions
functions = [
{'m': 2, 'b': 1, 'label': 'y = 2x + 1', 'color': 'blue'},
{'m': 1, 'b': 3, 'label': 'y = x + 3', 'color': 'green'},
{'m': -1, 'b': 2, 'label': 'y = -x + 2', 'color': 'red'},
{'m': 0.5, 'b': -1, 'label': 'y = 0.5x - 1', 'color': 'orange'},
{'m': -2, 'b': -2, 'label': 'y = -2x - 2', 'color': 'purple'}
]
plt.figure(figsize=(12, 8))
for func in functions:
y = func['m'] * x + func['b']
plt.plot(x, y, label=func['label'], color=func['color'], linewidth=2)
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
plt.grid(True, alpha=0.3)
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('Comparison of Linear Functions', fontsize=14)
plt.legend(loc='best')
plt.axis('equal')
plt.tight_layout()
plt.show()
compare_linear_functions()
Family of Lines
def family_of_lines():
"""
Plot families of lines with varying slope or intercept
"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
x = np.linspace(-5, 5, 100)
# Family with varying slope, fixed intercept
intercept = 2
slopes = [-2, -1, -0.5, 0, 0.5, 1, 2]
for slope in slopes:
y = slope * x + intercept
ax1.plot(x, y, linewidth=1.5, alpha=0.7, label=f'slope={slope}')
ax1.axhline(y=0, color='k', linestyle='-', alpha=0.3)
ax1.axvline(x=0, color='k', linestyle='-', alpha=0.3)
ax1.grid(True, alpha=0.3)
ax1.set_title('Family: Varying Slope, Fixed Intercept')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.legend(loc='best', fontsize=8)
# Family with varying intercept, fixed slope
slope = 1
intercepts = [-3, -2, -1, 0, 1, 2, 3]
for intercept in intercepts:
y = slope * x + intercept
ax2.plot(x, y, linewidth=1.5, alpha=0.7, label=f'intercept={intercept}')
ax2.axhline(y=0, color='k', linestyle='-', alpha=0.3)
ax2.axvline(x=0, color='k', linestyle='-', alpha=0.3)
ax2.grid(True, alpha=0.3)
ax2.set_title('Family: Fixed Slope, Varying Intercept')
ax2.set_xlabel('x')
ax2.set_ylabel('y')
ax2.legend(loc='best', fontsize=8)
plt.suptitle('Families of Linear Functions', fontsize=16)
plt.tight_layout()
plt.show()
family_of_lines()
3. Linear Regression Visualization
Simple Linear Regression
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
def plot_linear_regression():
"""
Demonstrate linear regression with synthetic data
"""
# Generate synthetic data
np.random.seed(42)
n_samples = 100
x = np.random.uniform(-5, 5, n_samples)
true_slope = 2
true_intercept = 3
noise = np.random.normal(0, 2, n_samples)
y = true_slope * x + true_intercept + noise
# Fit linear regression
X = x.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
# Create plot
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Scatter plot with regression line
axes[0].scatter(x, y, alpha=0.6, label='Data points')
axes[0].plot(x, y_pred, 'r-', linewidth=2,
label=f'Regression line: y = {model.coef_[0]:.2f}x + {model.intercept_:.2f}')
axes[0].axhline(y=0, color='k', linestyle='-', alpha=0.3)
axes[0].axvline(x=0, color='k', linestyle='-', alpha=0.3)
axes[0].set_xlabel('x', fontsize=12)
axes[0].set_ylabel('y', fontsize=12)
axes[0].set_title('Linear Regression Fit', fontsize=14)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Residual plot
residuals = y - y_pred
axes[1].scatter(y_pred, residuals, alpha=0.6)
axes[1].axhline(y=0, color='r', linestyle='-', linewidth=2)
axes[1].set_xlabel('Predicted values', fontsize=12)
axes[1].set_ylabel('Residuals', fontsize=12)
axes[1].set_title('Residual Plot', fontsize=14)
axes[1].grid(True, alpha=0.3)
# Add statistics
stats_text = f'R² = {r2_score(y, y_pred):.3f}\nRMSE = {np.sqrt(mean_squared_error(y, y_pred)):.3f}'
axes[1].text(0.05, 0.95, stats_text, transform=axes[1].transAxes,
fontsize=10, verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout()
plt.show()
return model, {'r2': r2_score(y, y_pred), 'rmse': np.sqrt(mean_squared_error(y, y_pred))}
model, metrics = plot_linear_regression()
print(f"Model: y = {model.coef_[0]:.2f}x + {model.intercept_:.2f}")
print(f"R² Score: {metrics['r2']:.4f}")
print(f"RMSE: {metrics['rmse']:.4f}")
Confidence and Prediction Intervals
def plot_regression_intervals():
"""
Plot linear regression with confidence and prediction intervals
"""
from scipy import stats
# Generate data
np.random.seed(42)
n = 50
x = np.linspace(0, 10, n)
true_slope = 1.5
true_intercept = 2
noise = np.random.normal(0, 1.5, n)
y = true_slope * x + true_intercept + noise
# Fit linear regression
X = x.reshape(-1, 1)
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
# Calculate intervals
n = len(x)
m = 1 # Number of predictors
residual_std = np.sqrt(np.sum((y - y_pred)**2) / (n - m - 1))
# Confidence interval for mean
t_value = stats.t.ppf(0.975, n - m - 1)
se_mean = residual_std * np.sqrt(1/n + (x - x.mean())**2 / np.sum((x - x.mean())**2))
ci_lower = y_pred - t_value * se_mean
ci_upper = y_pred + t_value * se_mean
# Prediction interval for individual observations
se_pred = residual_std * np.sqrt(1 + 1/n + (x - x.mean())**2 / np.sum((x - x.mean())**2))
pi_lower = y_pred - t_value * se_pred
pi_upper = y_pred + t_value * se_pred
# Create plot
plt.figure(figsize=(12, 8))
# Scatter plot
plt.scatter(x, y, alpha=0.6, label='Data points')
# Regression line
plt.plot(x, y_pred, 'r-', linewidth=2, label='Regression line')
# Confidence interval
plt.fill_between(x, ci_lower, ci_upper, alpha=0.2, color='blue',
label='95% Confidence Interval')
# Prediction interval
plt.fill_between(x, pi_lower, pi_upper, alpha=0.1, color='green',
label='95% Prediction Interval')
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.title('Linear Regression with Confidence and Prediction Intervals', fontsize=14)
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
plot_regression_intervals()
4. Advanced Linear Plots
Multiple Linear Regression Visualization
from mpl_toolkits.mplot3d import Axes3D
def plot_multiple_linear_regression():
"""
Visualize multiple linear regression in 3D
"""
# Generate 3D data
np.random.seed(42)
n = 200
x1 = np.random.uniform(0, 10, n)
x2 = np.random.uniform(0, 10, n)
# True relationship: y = 2x1 + 3x2 + 5 + noise
true_coef1 = 2
true_coef2 = 3
true_intercept = 5
noise = np.random.normal(0, 3, n)
y = true_coef1 * x1 + true_coef2 * x2 + true_intercept + noise
# Fit model
X = np.column_stack([x1, x2])
model = LinearRegression()
model.fit(X, y)
# Create 3D plot
fig = plt.figure(figsize=(14, 6))
# 3D scatter plot with regression plane
ax1 = fig.add_subplot(121, projection='3d')
ax1.scatter(x1, x2, y, c=y, cmap='viridis', alpha=0.6)
# Create meshgrid for regression plane
x1_grid, x2_grid = np.meshgrid(np.linspace(0, 10, 20), np.linspace(0, 10, 20))
y_grid = model.intercept_ + model.coef_[0] * x1_grid + model.coef_[1] * x2_grid
ax1.plot_surface(x1_grid, x2_grid, y_grid, alpha=0.3, color='red')
ax1.set_xlabel('x₁')
ax1.set_ylabel('x₂')
ax1.set_zlabel('y')
ax1.set_title('3D Multiple Linear Regression')
# Contour plot of regression plane
ax2 = fig.add_subplot(122)
contour = ax2.contourf(x1_grid, x2_grid, y_grid, levels=20, cmap='viridis')
plt.colorbar(contour, ax=ax2, label='Predicted y')
ax2.set_xlabel('x₁')
ax2.set_ylabel('x₂')
ax2.set_title('Contour Plot of Regression Surface')
plt.tight_layout()
plt.show()
print(f"True coefficients: [{true_coef1}, {true_coef2}], intercept: {true_intercept}")
print(f"Estimated coefficients: {model.coef_}, intercept: {model.intercept_:.2f}")
print(f"R² Score: {model.score(X, y):.4f}")
plot_multiple_linear_regression()
Ridge and Lasso Regression
from sklearn.linear_model import Ridge, Lasso
def compare_regularization():
"""
Compare Ridge and Lasso regression with different alpha values
"""
# Generate data with many features
np.random.seed(42)
n_samples = 100
n_features = 10
X = np.random.randn(n_samples, n_features)
# True coefficients (sparse)
true_coef = np.zeros(n_features)
true_coef[:3] = [2, -1.5, 0.5]
y = X @ true_coef + np.random.randn(n_samples) * 0.1
# Test different alpha values
alphas = np.logspace(-3, 1, 10)
ridge_coefs = []
lasso_coefs = []
for alpha in alphas:
ridge = Ridge(alpha=alpha)
ridge.fit(X, y)
ridge_coefs.append(ridge.coef_)
lasso = Lasso(alpha=alpha)
lasso.fit(X, y)
lasso_coefs.append(lasso.coef_)
# Plot coefficient paths
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Ridge path
ridge_coefs = np.array(ridge_coefs)
for i in range(n_features):
axes[0].plot(alphas, ridge_coefs[:, i], label=f'β{i+1}', linewidth=2)
axes[0].set_xscale('log')
axes[0].set_xlabel('Alpha (log scale)', fontsize=12)
axes[0].set_ylabel('Coefficient Value', fontsize=12)
axes[0].set_title('Ridge Regression: Coefficient Paths', fontsize=14)
axes[0].legend(loc='best', fontsize=8)
axes[0].grid(True, alpha=0.3)
# Lasso path
lasso_coefs = np.array(lasso_coefs)
for i in range(n_features):
axes[1].plot(alphas, lasso_coefs[:, i], label=f'β{i+1}', linewidth=2)
axes[1].set_xscale('log')
axes[1].set_xlabel('Alpha (log scale)', fontsize=12)
axes[1].set_ylabel('Coefficient Value', fontsize=12)
axes[1].set_title('Lasso Regression: Coefficient Paths', fontsize=14)
axes[1].legend(loc='best', fontsize=8)
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("True coefficients:", true_coef)
compare_regularization()
5. Real-World Applications
Trend Analysis
import pandas as pd
from datetime import datetime, timedelta
def plot_trend_analysis():
"""
Plot linear trends in time series data
"""
# Generate time series with trend
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=365, freq='D')
# Linear trend + seasonal component + noise
linear_trend = np.linspace(0, 100, 365)
seasonal = 20 * np.sin(2 * np.pi * np.arange(365) / 365)
noise = np.random.normal(0, 10, 365)
y = linear_trend + seasonal + noise
# Create DataFrame
df = pd.DataFrame({'date': dates, 'value': y})
# Fit linear trend
x_numeric = np.arange(len(df)).reshape(-1, 1)
model = LinearRegression()
model.fit(x_numeric, df['value'].values)
trend = model.predict(x_numeric)
# Plot
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Original data with trend line
axes[0, 0].plot(df['date'], df['value'], alpha=0.6, label='Data')
axes[0, 0].plot(df['date'], trend, 'r-', linewidth=2,
label=f'Trend: y = {model.coef_[0]:.2f}x + {model.intercept_:.2f}')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Value')
axes[0, 0].set_title('Time Series with Linear Trend')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
# Detrended data
detrended = df['value'] - trend
axes[0, 1].plot(df['date'], detrended, 'g-', alpha=0.6)
axes[0, 1].axhline(y=0, color='r', linestyle='-', linewidth=1)
axes[0, 1].set_xlabel('Date')
axes[0, 1].set_ylabel('Detrended Value')
axes[0, 1].set_title('Detrended Data')
axes[0, 1].grid(True, alpha=0.3)
# Residual analysis
residuals = detrended
axes[1, 0].scatter(df['date'], residuals, alpha=0.5)
axes[1, 0].axhline(y=0, color='r', linestyle='-', linewidth=1)
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Residuals')
axes[1, 0].set_title('Residual Plot')
axes[1, 0].grid(True, alpha=0.3)
# Q-Q plot for normality check
from scipy import stats
stats.probplot(residuals, dist="norm", plot=axes[1, 1])
axes[1, 1].set_title('Q-Q Plot of Residuals')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"Trend slope: {model.coef_[0]:.3f} units per day")
print(f"Annual change: {model.coef_[0] * 365:.1f} units")
plot_trend_analysis()
Demand Forecasting
def plot_demand_forecast():
"""
Plot demand forecasting with linear regression
"""
# Historical data
np.random.seed(42)
months = np.arange(1, 25)
base_demand = 100
trend = 3 # Increasing 3 units per month
seasonal = 20 * np.sin(2 * np.pi * months / 12)
noise = np.random.normal(0, 10, 24)
demand = base_demand + trend * months + seasonal + noise
# Fit model
X = months.reshape(-1, 1)
model = LinearRegression()
model.fit(X, demand)
# Forecast next 12 months
future_months = np.arange(25, 37)
forecast = model.predict(future_months.reshape(-1, 1))
# Plot
plt.figure(figsize=(12, 8))
# Historical data
plt.scatter(months, demand, alpha=0.6, label='Historical data', s=50)
plt.plot(months, demand, 'b-', alpha=0.3)
# Historical trend line
historical_trend = model.predict(X)
plt.plot(months, historical_trend, 'g--', linewidth=2,
label=f'Trend line: y = {model.coef_[0]:.2f}x + {model.intercept_:.2f}')
# Forecast
plt.plot(future_months, forecast, 'r-', linewidth=2, label='Forecast')
plt.fill_between(future_months, forecast - 20, forecast + 20,
alpha=0.2, color='red', label='Forecast uncertainty')
plt.xlabel('Month', fontsize=12)
plt.ylabel('Demand', fontsize=12)
plt.title('Demand Forecasting with Linear Regression', fontsize=14)
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
# Add vertical line
plt.axvline(x=24.5, color='k', linestyle='-', alpha=0.5,
label='Forecast start')
plt.tight_layout()
plt.show()
print(f"Projected growth: {model.coef_[0]:.2f} units/month")
print(f"Forecast for next 12 months: {forecast.sum():.0f} total units")
plot_demand_forecast()
Break-Even Analysis
def plot_break_even_analysis():
"""
Plot break-even analysis using linear functions
"""
# Cost and revenue functions
fixed_cost = 10000 # Fixed costs
variable_cost_per_unit = 50 # Variable cost per unit
price_per_unit = 100 # Selling price
# Create unit range
units = np.linspace(0, 400, 100)
# Total cost: C(x) = fixed_cost + variable_cost_per_unit * x
total_cost = fixed_cost + variable_cost_per_unit * units
# Total revenue: R(x) = price_per_unit * x
total_revenue = price_per_unit * units
# Profit: P(x) = revenue - cost
profit = total_revenue - total_cost
# Find break-even point
break_even_units = fixed_cost / (price_per_unit - variable_cost_per_unit)
break_even_revenue = price_per_unit * break_even_units
# Create plot
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Cost and revenue plot
axes[0].plot(units, total_cost, 'b-', linewidth=2, label='Total Cost')
axes[0].plot(units, total_revenue, 'g-', linewidth=2, label='Total Revenue')
axes[0].axhline(y=fixed_cost, color='b', linestyle='--', alpha=0.5,
label='Fixed Cost')
axes[0].fill_between(units, total_cost, total_revenue,
where=(total_revenue >= total_cost),
color='green', alpha=0.3, label='Profit Zone')
axes[0].fill_between(units, total_cost, total_revenue,
where=(total_revenue < total_cost),
color='red', alpha=0.3, label='Loss Zone')
axes[0].plot(break_even_units, break_even_revenue, 'ro', markersize=10,
label=f'Break-even: {break_even_units:.0f} units')
axes[0].set_xlabel('Units Sold', fontsize=12)
axes[0].set_ylabel('Amount ($)', fontsize=12)
axes[0].set_title('Break-Even Analysis: Cost vs Revenue', fontsize=14)
axes[0].legend(loc='best')
axes[0].grid(True, alpha=0.3)
# Profit plot
axes[1].plot(units, profit, 'r-', linewidth=2, label='Profit')
axes[1].axhline(y=0, color='k', linestyle='-', alpha=0.5)
axes[1].fill_between(units, 0, profit, where=(profit >= 0),
color='green', alpha=0.3, label='Profit')
axes[1].fill_between(units, 0, profit, where=(profit < 0),
color='red', alpha=0.3, label='Loss')
axes[1].plot(break_even_units, 0, 'ro', markersize=10)
axes[1].set_xlabel('Units Sold', fontsize=12)
axes[1].set_ylabel('Profit ($)', fontsize=12)
axes[1].set_title('Profit Function', fontsize=14)
axes[1].legend(loc='best')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"Break-even point: {break_even_units:.0f} units")
print(f"Revenue at break-even: ${break_even_revenue:,.0f}")
print(f"Profit at 400 units: ${profit[-1]:,.0f}")
plot_break_even_analysis()
6. Interactive Plots
Interactive Linear Function Explorer
from ipywidgets import interact, FloatSlider, IntSlider
def interactive_linear_plot():
"""
Interactive exploration of linear functions
"""
def plot_interactive(m=1.0, b=0.0, x_min=-5, x_max=5):
x = np.linspace(x_min, x_max, 100)
y = m * x + b
plt.figure(figsize=(10, 6))
plt.plot(x, y, 'b-', linewidth=2, label=f'y = {m}x + {b}')
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
plt.grid(True, alpha=0.3)
plt.xlim(x_min, x_max)
# Calculate y-range
y_min = min(m * x_min + b, m * x_max + b)
y_max = max(m * x_min + b, m * x_max + b)
y_padding = (y_max - y_min) * 0.1
plt.ylim(y_min - y_padding, y_max + y_padding)
plt.xlabel('x')
plt.ylabel('y')
plt.title(f'Linear Function Explorer\ny = {m:.2f}x + {b:.2f}')
plt.legend()
plt.tight_layout()
plt.show()
interact(plot_interactive,
m=FloatSlider(min=-5, max=5, step=0.1, value=1, description='Slope (m)'),
b=FloatSlider(min=-5, max=5, step=0.1, value=0, description='Intercept (b)'),
x_min=FloatSlider(min=-10, max=0, step=1, value=-5, description='x min'),
x_max=FloatSlider(min=0, max=10, step=1, value=5, description='x max'))
# Uncomment to run interactive plot
# interactive_linear_plot()
Regression Parameter Explorer
def interactive_regression_explorer():
"""
Interactive exploration of regression parameters
"""
# Generate data with known parameters
np.random.seed(42)
n = 100
x = np.random.uniform(-5, 5, n)
true_slope = 2
true_intercept = 1
noise_level = 2
y = true_slope * x + true_intercept + np.random.normal(0, noise_level, n)
def plot_regression_interactive(slope=2, intercept=1, show_residuals=True):
# Model predictions
y_pred = slope * x + intercept
# Calculate metrics
residuals = y - y_pred
mse = np.mean(residuals**2)
fig, axes = plt.subplots(1, 2 if show_residuals else 1,
figsize=(14 if show_residuals else 8, 5))
if not show_residuals:
axes = [axes]
# Main scatter plot
axes[0].scatter(x, y, alpha=0.6, label='Data points')
# True line
x_sorted = np.sort(x)
y_true = true_slope * x_sorted + true_intercept
axes[0].plot(x_sorted, y_true, 'g--', linewidth=2,
label=f'True: y = {true_slope}x + {true_intercept}')
# Current model
axes[0].plot(x_sorted, slope * x_sorted + intercept, 'r-', linewidth=2,
label=f'Model: y = {slope:.2f}x + {intercept:.2f}')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].set_title(f'Linear Regression\nMSE = {mse:.2f}')
axes[0].legend(loc='best')
axes[0].grid(True, alpha=0.3)
# Residual plot
if show_residuals:
axes[1].scatter(x, residuals, alpha=0.6)
axes[1].axhline(y=0, color='r', linestyle='-', linewidth=2)
axes[1].set_xlabel('x')
axes[1].set_ylabel('Residuals')
axes[1].set_title('Residual Plot')
axes[1].grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
interact(plot_regression_interactive,
slope=FloatSlider(min=-1, max=5, step=0.1, value=2, description='Slope'),
intercept=FloatSlider(min=-2, max=4, step=0.1, value=1, description='Intercept'),
show_residuals=True)
# Uncomment to run interactive plot
# interactive_regression_explorer()
7. Statistical Visualization
Correlation and Regression
def plot_correlation_and_regression():
"""
Plot correlation and regression analysis
"""
from scipy import stats
# Generate data with different correlations
np.random.seed(42)
n = 200
x = np.random.randn(n)
correlations = [-0.9, -0.5, 0, 0.5, 0.9]
fig, axes = plt.subplots(2, len(correlations), figsize=(16, 8))
for i, rho in enumerate(correlations):
# Generate y with specified correlation
y = rho * x + np.sqrt(1 - rho**2) * np.random.randn(n)
# Scatter plot
axes[0, i].scatter(x, y, alpha=0.6)
# Regression line
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
x_line = np.linspace(x.min(), x.max(), 100)
y_line = slope * x_line + intercept
axes[0, i].plot(x_line, y_line, 'r-', linewidth=2)
axes[0, i].set_title(f'ρ = {rho}, r = {r_value:.3f}')
axes[0, i].set_xlabel('x')
axes[0, i].set_ylabel('y')
axes[0, i].grid(True, alpha=0.3)
# Residual plot
residuals = y - (slope * x + intercept)
axes[1, i].scatter(x, residuals, alpha=0.6)
axes[1, i].axhline(y=0, color='r', linestyle='-', linewidth=2)
axes[1, i].set_xlabel('x')
axes[1, i].set_ylabel('Residuals')
axes[1, i].grid(True, alpha=0.3)
plt.suptitle('Correlation and Regression Analysis', fontsize=16)
plt.tight_layout()
plt.show()
plot_correlation_and_regression()
Anscombe's Quartet
def plot_anscombe_quartet():
"""
Plot Anscombe's quartet to demonstrate importance of visualization
"""
from sklearn.datasets import make_regression
import pandas as pd
# Load Anscombe's quartet
anscombe = sns.load_dataset('anscombe')
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
for i, dataset in enumerate(anscombe['dataset'].unique()):
data = anscombe[anscombe['dataset'] == dataset]
x = data['x']
y = data['y']
# Calculate statistics
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
# Scatter plot
axes[i].scatter(x, y, alpha=0.6, s=50)
# Regression line
x_line = np.linspace(x.min(), x.max(), 100)
y_line = slope * x_line + intercept
axes[i].plot(x_line, y_line, 'r-', linewidth=2)
# Add statistics
stats_text = f'r = {r_value:.3f}\ny = {slope:.2f}x + {intercept:.2f}'
axes[i].text(0.05, 0.95, stats_text, transform=axes[i].transAxes,
fontsize=9, verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
axes[i].set_title(f'Dataset {dataset}')
axes[i].set_xlabel('x')
axes[i].set_ylabel('y')
axes[i].grid(True, alpha=0.3)
plt.suptitle('Anscombe\'s Quartet: Same Statistics, Different Visualizations',
fontsize=14)
plt.tight_layout()
plt.show()
plot_anscombe_quartet()
8. Customization and Styling
Professional Plot Styling
def professional_linear_plot():
"""
Create publication-quality linear plots
"""
# Set professional style
plt.style.use('seaborn-v0_8-paper')
# Create figure with specific size
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Generate data
np.random.seed(42)
x = np.linspace(0, 10, 100)
y1 = 2 * x + 3 + np.random.normal(0, 1, 100)
y2 = -1.5 * x + 8 + np.random.normal(0, 1.5, 100)
# Fit models
slope1, intercept1, r1, _, _ = stats.linregress(x, y1)
slope2, intercept2, r2, _, _ = stats.linregress(x, y2)
# First plot
ax1.scatter(x, y1, alpha=0.5, s=30, label='Data', color='#2E86AB')
ax1.plot(x, slope1 * x + intercept1, 'r-', linewidth=2,
label=f'Fit: y = {slope1:.2f}x + {intercept1:.2f}\n(r = {r1:.3f})')
ax1.set_xlabel('Independent Variable (x)', fontsize=10)
ax1.set_ylabel('Dependent Variable (y)', fontsize=10)
ax1.set_title('Positive Correlation', fontsize=12, fontweight='bold')
ax1.legend(loc='lower right', frameon=True, fancybox=True, shadow=True)
ax1.grid(True, alpha=0.3, linestyle='--')
# Second plot
ax2.scatter(x, y2, alpha=0.5, s=30, label='Data', color='#A23B72')
ax2.plot(x, slope2 * x + intercept2, 'r-', linewidth=2,
label=f'Fit: y = {slope2:.2f}x + {intercept2:.2f}\n(r = {r2:.3f})')
ax2.set_xlabel('Independent Variable (x)', fontsize=10)
ax2.set_ylabel('Dependent Variable (y)', fontsize=10)
ax2.set_title('Negative Correlation', fontsize=12, fontweight='bold')
ax2.legend(loc='lower left', frameon=True, fancybox=True, shadow=True)
ax2.grid(True, alpha=0.3, linestyle='--')
# Add overall title
fig.suptitle('Linear Regression Analysis', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()
professional_linear_plot()
Color Maps and Themes
def plot_color_themes():
"""
Demonstrate different color themes for linear plots
"""
np.random.seed(42)
x = np.linspace(0, 10, 50)
# Define themes
themes = {
'viridis': {'colors': plt.cm.viridis, 'bg': '#f5f5f5'},
'plasma': {'colors': plt.cm.plasma, 'bg': '#f5f5f5'},
'coolwarm': {'colors': plt.cm.coolwarm, 'bg': '#f5f5f5'},
'Dark': {'colors': plt.cm.viridis, 'bg': '#2E2E2E', 'text': 'white'}
}
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
for idx, (theme_name, theme) in enumerate(themes.items()):
ax = axes[idx]
ax.set_facecolor(theme['bg'])
# Generate multiple lines with different slopes
for i in range(5):
slope = (i - 2) * 0.5
intercept = 5
y = slope * x + intercept + np.random.normal(0, 0.5, 50)
color = theme['colors'](i / 5)
ax.scatter(x, y, alpha=0.5, s=20, color=color)
# Fit and plot regression
slope_fit, intercept_fit, r, _, _ = stats.linregress(x, y)
ax.plot(x, slope_fit * x + intercept_fit, color=color,
linewidth=1.5, alpha=0.8, label=f'Line {i+1}')
ax.set_xlabel('x', fontsize=10, color=theme.get('text', 'black'))
ax.set_ylabel('y', fontsize=10, color=theme.get('text', 'black'))
ax.set_title(f'{theme_name} Theme', fontsize=12,
color=theme.get('text', 'black'))
ax.grid(True, alpha=0.3, linestyle='--')
if 'text' in theme:
ax.tick_params(colors='white')
plt.suptitle('Linear Plots with Different Color Themes', fontsize=14)
plt.tight_layout()
plt.show()
plot_color_themes()
9. Exporting and Saving
High-Quality Exports
def save_high_quality_plot():
"""
Save plots in various formats with high quality
"""
# Generate data
np.random.seed(42)
x = np.linspace(0, 10, 100)
y = 2 * x + 3 + np.random.normal(0, 1.5, 100)
# Create figure with high DPI
fig, ax = plt.subplots(figsize=(8, 6), dpi=300)
# Plot
ax.scatter(x, y, alpha=0.6, s=30, label='Data points')
slope, intercept, r, _, _ = stats.linregress(x, y)
ax.plot(x, slope * x + intercept, 'r-', linewidth=2,
label=f'Regression line: y = {slope:.2f}x + {intercept:.2f}')
ax.set_xlabel('X Variable', fontsize=12)
ax.set_ylabel('Y Variable', fontsize=12)
ax.set_title('Linear Regression Analysis', fontsize=14, fontweight='bold')
ax.legend(loc='best', frameon=True, fancybox=True)
ax.grid(True, alpha=0.3)
# Add text box with statistics
stats_text = f'R² = {r**2:.3f}\nSlope: {slope:.3f}\nIntercept: {intercept:.3f}'
ax.text(0.05, 0.95, stats_text, transform=ax.transAxes,
fontsize=10, verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
plt.tight_layout()
# Save in different formats
plt.savefig('linear_plot.png', dpi=300, bbox_inches='tight',
facecolor='white', edgecolor='none')
plt.savefig('linear_plot.pdf', bbox_inches='tight')
plt.savefig('linear_plot.svg', bbox_inches='tight')
print("Plots saved as:")
print(" - linear_plot.png (300 DPI)")
print(" - linear_plot.pdf (vector format)")
print(" - linear_plot.svg (vector format)")
plt.show()
save_high_quality_plot()
Conclusion
Plotting linear functions is a fundamental skill in data science for understanding relationships between variables and communicating insights effectively.
Key Takeaways
- Understanding Components: Slope and intercept define the behavior of linear functions
- Regression Analysis: Linear regression provides a framework for modeling relationships
- Visualization Best Practices: Use appropriate scales, labels, and legends for clarity
- Interactive Exploration: Interactive plots help understand parameter effects
- Statistical Validation: Always check residuals and confidence intervals
- Real-World Applications: Linear functions model trends, costs, and forecasts
Quick Reference
| Plot Type | Use Case | Best Practice |
|---|---|---|
| Scatter + Line | Data with regression | Show confidence intervals |
| Residual Plot | Model diagnostics | Look for patterns |
| Coefficient Paths | Regularization | Compare different alphas |
| Time Series | Trend analysis | Decompose components |
| Interactive | Exploration | Use sliders for parameters |
Next Steps
- Explore polynomial and non-linear regression
- Study multivariate linear regression
- Investigate regularization techniques
- Learn about generalized linear models
- Practice with real datasets
Mastering linear function visualization is essential for building a strong foundation in data science and machine learning!