T-test on multiple linear regression

Question:
Code gives warning when running T-test

Code:

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sc
import statsmodels.api as sc
import statsmodels.api as sm
from scipy import stats
from scipy.stats import ttest_ind

#DATA

x = np.array([0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8])
YY_y = np.array([1, 2, 2, 1, 3, 4, 4, 3, 5, 6, 6, 5, 7, 8, 8, 7, 9, 10, 10, 9])
Z_y = np.array([1, 2, 2, 3, 3, 4, 5, 6, 5, 8, 9, 5, 9, 10, 8, 9, 10, 12, 11, 10])



YY_model = sm.OLS(YY_y, sm.add_constant(x)).fit()
Z_model = sm.OLS(Z_y, sm.add_constant(x)).fit()
# Extract coefficients
YY_intercept, YY_slope = YY_model.params
Z_intercept, Z_slope = Z_model.params

# Fit linear regression lines
YY_slope, YY_intercept = np.polyfit(x, YY_y, 1)
Z_slope, Z_intercept = np.polyfit(x, Z_y, 1)

# Perform independent samples t-test on slopes
t_stat_slope, p_value_slope = ttest_ind(YY_slope, Z_slope)

# Perform independent samples t-test on intercepts
t_stat_intercept, p_value_intercept = ttest_ind(YY_intercept, Z_intercept)

# Print results
print(f"T-statistic for Slope: {t_stat_slope}, P-value for Slope: {p_value_slope}")
print(f"T-statistic for Intercept: {t_stat_intercept}, P-value for Intercept: {p_value_intercept}")

Outcome:

/home/runner/regressie/.pythonlibs/lib/python3.10/site-packages/scipy/stats/_stats_py.py:7030: RuntimeWarning: invalid value encountered in scalar divide
  svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df
T-statistic for Slope: nan, P-value for Slope: nan
T-statistic for Intercept: nan, P-value for Intercept: nan

Ok with the help of AI I could fix it.
Besides the problem of dividing the were NAN results of T and P. So there were two problems and solutions:
To warning can be resolved with adding:

# Suppressing the warning using np.seterr and np.errstate
np.seterr(
  divide="ignore", invalid="ignore"
)  # Suppress division by zero and invalid value warnings
np.errstate(
  divide="ignore", invalid="ignore"
)  # Context manager for division by zero and invalid value warnings

# The standard T-test was not suitable for multiple linear regression analysis so the T-test should be calculated 'manual' Resulting in this code:

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as sc
import statsmodels.api as sc
import statsmodels.api as sm
from scipy import stats
from scipy.stats import ttest_ind


# Data
x = np.array([0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8])
YY_y = np.array([1, 2, 2, 1, 3, 4, 4, 3, 5, 6, 6, 5, 7, 8, 8, 7, 9, 10, 10, 9])
Z_y = np.array([1, 2, 2, 3, 3, 4, 5, 6, 5, 8, 9, 5, 9, 10, 8, 9, 10, 12, 11, 10])


# Suppressing the warning using np.seterr and np.errstate
np.seterr(
  divide="ignore", invalid="ignore"
)  # Suppress division by zero and invalid value warnings
np.errstate(
  divide="ignore", invalid="ignore"
)  # Context manager for division by zero and invalid value warnings
YY_model = sm.OLS(YY_y, sm.add_constant(x)).fit()
Z_model = sm.OLS(Z_y, sm.add_constant(x)).fit()
# Extract coefficients
YY_intercept, YY_slope = YY_model.params
Z_intercept, Z_slope = Z_model.params

# Fit linear regression lines
YY_slope, YY_intercept = np.polyfit(x, YY_y, 1)
Z_slope, Z_intercept = np.polyfit(x, Z_y, 1)


# Compute t-statistic and p-value for slope
slope_diff = YY_slope - Z_slope
standard_error_slope = np.sqrt((YY_model.mse_resid + Z_model.mse_resid) / len(x))
t_stat_slope = slope_diff / standard_error_slope
p_value_slope = 2 * (1 - stats.t.cdf(np.abs(t_stat_slope), len(x) - 2))
# Compute t-statistic and p-value for intercept
intercept_diff = YY_intercept - Z_intercept
standard_error_intercept = np.sqrt(
  (YY_model.mse_resid + Z_model.mse_resid)
  * (1 / len(x) + np.mean(x) ** 2 / np.sum((x - np.mean(x)) ** 2))
)
t_stat_intercept = intercept_diff / standard_error_intercept
p_value_intercept = 2 * (1 - stats.t.cdf(np.abs(t_stat_intercept), len(x) - 2))
# Print results
print(f"T-statistic for Slope: {t_stat_slope}, P-value for Slope: {p_value_slope}")
print(
  f"T-statistic for Intercept: {t_stat_intercept}, P-value for Intercept:"
  f" {p_value_intercept}"
)
AI Edit code
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
from scipy.stats import ttest_ind

#DATA

x = np.array([0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8])
YY_y = np.array([1, 2, 2, 1, 3, 4, 4, 3, 5, 6, 6, 5, 7, 8, 8, 7, 9, 10, 10, 9])
Z_y = np.array([1, 2, 2, 3, 3, 4, 5, 6, 5, 8, 9, 5, 9, 10, 8, 9, 10, 12, 11, 10])

YY_model = sm.OLS(YY_y, sm.add_constant(x)).fit()
Z_model = sm.OLS(Z_y, sm.add_constant(x)).fit()

# Extract coefficients
YY_intercept, YY_slope = YY_model.params
Z_intercept, Z_slope = Z_model.params

# Fit linear regression lines
# These lines are not necessary as the models are already fitted
# and parameters extracted above.
#YY_slope, YY_intercept = np.polyfit(x, YY_y, 1)
#Z_slope, Z_intercept = np.polyfit(x, Z_y, 1)

# Removed the t-test for slope and intercept as they do not apply to single value comparisons
# Instead, print model parameters
print(f"YY_model Slope: {YY_slope}, YY_model Intercept: {YY_intercept}")
print(f"Z_model Slope: {Z_slope}, Z_model Intercept: {Z_intercept}")
1 Like

I like the suggestion but it is not the solution of the problem.
I am trying to do a t-test for slope and intercept of YY against Z. In other Words: I want to know if the values YY_y and Z_y show a significant difference over time (x)

again, I’ll have to give code which is mostly AI-generated…

import numpy as np
import statsmodels.api as sm
from scipy import stats as sc
# Data has been provided in the file
x = np.array([0, 0, 0, 0, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8])
YY_y = np.array([1, 2, 2, 1, 3, 4, 4, 3, 5, 6, 6, 5, 7, 8, 8, 7, 9, 10, 10, 9])
Z_y = np.array([1, 2, 2, 3, 3, 4, 5, 6, 5, 8, 9, 5, 9, 10, 8, 9, 10, 12, 11, 10])

# Add constant to the x values for intercept estimation
x_with_intercept = sm.add_constant(x)

# Perform regression
YY_model = sm.OLS(YY_y, x_with_intercept).fit()
Z_model = sm.OLS(Z_y, x_with_intercept).fit()

# Access the coefficient statistics
YY_slope, YY_intercept = YY_model.params
Z_slope, Z_intercept = Z_model.params

YY_se = YY_model.bse
Z_se = Z_model.bse

# Standard error of the difference between the slopes and intercepts
se_diff_slope = np.sqrt(YY_se[1]**2 + Z_se[1]**2)
se_diff_intercept = np.sqrt(YY_se[0]**2 + Z_se[0]**2)

# Z-statistic for the slope and intercept
z_slope = (YY_slope - Z_slope) / se_diff_slope
z_intercept = (YY_intercept - Z_intercept) / se_diff_intercept

# Corresponding p-values for a two-tailed test
p_value_slope = 2 * (1 - sc.norm.cdf(abs(z_slope)))
p_value_intercept = 2 * (1 - sc.norm.cdf(abs(z_intercept)))

# Print results
print(f"Z-statistic for Slope: {z_slope}, P-value for Slope: {p_value_slope}")
print(f"Z-statistic for Intercept: {z_intercept}, P-value for Intercept: {p_value_intercept}")

I don’t know scipy well, and I don’t believe there are any active forum members who do.

You could switch to the “Advanced” AI model in the “AI” tab (which is what I’m using). Or try alternatives like phind.com/search. Or, you might find someone of better help on a site like stackoverflow.com.

1 Like

Thank you. Looks like an elegant code (and it works).
Regards

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.