diff --git a/Models/0214-PROJECT_Auto_Documentation.py b/Models/0214-PROJECT_Auto_Documentation.py new file mode 100644 index 0000000000000000000000000000000000000000..81bd176c69d688df2a9b15923d45005af203f0c0 --- /dev/null +++ b/Models/0214-PROJECT_Auto_Documentation.py @@ -0,0 +1,93 @@ +# importing toy & real world datasets from the scikit-learn library + +from sklearn import datasets + +def dataload(): + """ + upload toy datasets from scikit-learn + """ + data = None + return data + +def datafetch(file_name): + """ + upload real world datasets from scikit-learn + """ + data = None + print("reading data from:", file_name) + return data + +# standard descriptive statistic analysis of data + +def descriptive_statistic(df): + """ + Provides brief descriptive statistics on dataset. + Takes dataframe as input. + """ + print("Type : ", None, "\n\n") + print("Shape : ", None) + print("Head -- \n", None) + print("\n\n Tail -- \n", None) + print("Describe : ", None) + + +# model selection + +def model_selection(df): + """ + Takes dateframe as input. Performs foward/backward stepwise + regression. Returns best model for both methods. + """ + null_fit = None + foward_step = None + backward_step = None + return foward_step, backward_step + +# model accuracy + +def MSE_fit(fit): + """ + Takes in a fitted model as the input. + Calculates the MSU of the fitted model. + Outputs the model's MSE. + """ + MSE = None + return MSE + +def accuracy_metrics(fit, MSE): + """ + This function is used for model validation. It returns a dictionary + of several regression model accuracy metrics. Its inputs are a fitted model + and the MSE of the fitted model. + """ + d = dict() + sumObj = None + SSE = None + n = None + p = None + pr = None + d['R2'] = None + d['R2ad'] = None + d['AIC'] = None + d['BIC'] = None + d['PRESS'] = None + d['Cp']= None + return d + +# test code + +file_name = 'data.csv' + +a = datafetch(file_name) +print(a) + +b = descriptive_statistic(a) +print(b) + +c = model_selection(a) +print(c) + +d = MSE_fit(c) +print(d) + +print(accuracy_metrics(c, d)) \ No newline at end of file