Module TDA_Regression
Expand source code
# importing toy & real world datasets from the scikit-learn library
from sklearn import datasets
def dataload():
"""
upload toy datasets from scikit-learn
"""
data = None
return data
def datafetch(file_name):
"""
upload real world datasets from scikit-learn
"""
data = None
print("reading data from:", file_name)
return data
# standard descriptive statistic analysis of data
def descriptive_statistic(df):
"""
Provides brief descriptive statistics on dataset.
Takes dataframe as input.
"""
print("Type : ", None, "\n\n")
print("Shape : ", None)
print("Head -- \n", None)
print("\n\n Tail -- \n", None)
print("Describe : ", None)
# model selection
def model_selection(df):
"""
Takes dateframe as input. Performs foward/backward stepwise
regression. Returns best model for both methods.
"""
null_fit = None
foward_step = None
backward_step = None
return foward_step, backward_step
# model accuracy
def MSE_fit(fit):
"""
Takes in a fitted model as the input.
Calculates the MSU of the fitted model.
Outputs the model's MSE.
"""
MSE = None
return MSE
def accuracy_metrics(fit, MSE):
"""
This function is used for model validation. It returns a dictionary
of several regression model accuracy metrics. Its inputs are a fitted model
and the MSE of the fitted model.
"""
d = dict()
sumObj = None
SSE = None
n = None
p = None
pr = None
d['R2'] = None
d['R2ad'] = None
d['AIC'] = None
d['BIC'] = None
d['PRESS'] = None
d['Cp']= None
return d
# test code
file_name = 'data.csv'
a = datafetch(file_name)
print(a)
b = descriptive_statistic(a)
print(b)
c = model_selection(a)
print(c)
d = MSE_fit(c)
print(d)
print(accuracy_metrics(c, d))
Functions
def MSE_fit(fit)
-
Takes in a fitted model as the input. Calculates the MSU of the fitted model. Outputs the model's MSE.
Expand source code
def MSE_fit(fit): """ Takes in a fitted model as the input. Calculates the MSU of the fitted model. Outputs the model's MSE. """ MSE = None return MSE
def accuracy_metrics(fit, MSE)
-
This function is used for model validation. It returns a dictionary of several regression model accuracy metrics. Its inputs are a fitted model and the MSE of the fitted model.
Expand source code
def accuracy_metrics(fit, MSE): """ This function is used for model validation. It returns a dictionary of several regression model accuracy metrics. Its inputs are a fitted model and the MSE of the fitted model. """ d = dict() sumObj = None SSE = None n = None p = None pr = None d['R2'] = None d['R2ad'] = None d['AIC'] = None d['BIC'] = None d['PRESS'] = None d['Cp']= None return d
def datafetch(file_name)
-
upload real world datasets from scikit-learn
Expand source code
def datafetch(file_name): """ upload real world datasets from scikit-learn """ data = None print("reading data from:", file_name) return data
def dataload()
-
upload toy datasets from scikit-learn
Expand source code
def dataload(): """ upload toy datasets from scikit-learn """ data = None return data
def descriptive_statistic(df)
-
Provides brief descriptive statistics on dataset. Takes dataframe as input.
Expand source code
def descriptive_statistic(df): """ Provides brief descriptive statistics on dataset. Takes dataframe as input. """ print("Type : ", None, "\n\n") print("Shape : ", None) print("Head -- \n", None) print("\n\n Tail -- \n", None) print("Describe : ", None)
def model_selection(df)
-
Takes dateframe as input. Performs foward/backward stepwise regression. Returns best model for both methods.
Expand source code
def model_selection(df): """ Takes dateframe as input. Performs foward/backward stepwise regression. Returns best model for both methods. """ null_fit = None foward_step = None backward_step = None return foward_step, backward_step