Skip to content
Snippets Groups Projects
Commit 07138663 authored by shawk masboob's avatar shawk masboob
Browse files

added new functions

parent 818a46ad
No related branches found
No related tags found
No related merge requests found
from Topological_ML import TDA_Prediction as tdap
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import kmapper as km
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from sklearn import ensemble
def numpy_to_pandas(sklearn_data):
"""
Converts scikit-learn numpy data into pandas dataframe.
......@@ -8,16 +19,17 @@ def numpy_to_pandas(sklearn_data):
df['response'] = pd.Series(sklearn_data.target)
return df
def descriptive_statistic(df, n):
def data_summary(df, n):
"""
Provides brief descriptive statistics on dataset.
Input: df = dataframe
n = the first n rows of the dataframe
Output: shape, head, and descriptive statistics of dataframe
Input: name of dataframe
Output: dictionary
"""
print("Shape : ", df.shape)
print("Head -- \n", df.head(n))
print("Describe : ", df.describe())
d = dict()
d['head'] = df.head(n)
d['shape'] = df.shape
#d['missing values'] = df.isna().sum()
return d
def model_selection(df):
"""
......@@ -38,29 +50,47 @@ def MSE_fit(fit):
MSE = None
return MSE
def accuracy_metrics(fit, MSE):
def accuracy_metrics(fit, MSE, n, k):
"""
This function is used for model validation. It returns a dictionary
of several regression model accuracy metrics. Its inputs are a fitted model
and the MSE of the fitted model.
"""
d = dict()
sumObj = None
SSE = None
y_hat = model.predict(X)
resid = y - y_hat
SSE = sum(resid**2)
n = None
p = None
pr = None
d['R2'] = None
d['R2ad'] = None
d['AIC'] = None
d['BIC'] = None
d['AIC'] = 2*k - 2*ln(SSE)
d['BIC'] = n*ln(SSE/n) + k*ln(n)
d['PRESS'] = None
d['Cp']= None
return None
def linear_regression(x, y):
"""
Ordinary least squares Linear Regression.
input: x = independent variables
y = dependent variable
output: R^2
"""
model = LinearRegression()
model.fit(x, y)
return model.score(x ,y)
def mysqrt(n):
if n < 0:
n = 1.5*abs(n)
sqrt1 = n**(1/2)
return sqrt1
\ No newline at end of file
def lens_1d(X, rs, v):
"""
input:
output:
"""
model = sklearn.ensemble.IsolationForest(random_state = rs)
model.fit(X)
lens1 = model.decision_function(X).reshape((X.shape[0], 1))
mapper = km.KeplerMapper(verbose = v)
lens2 = mapper.fit_transform(X, projection="l2norm")
lens = np.c_[lens1, lens2]
return lens
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment