Forked from
Masboob, Shawk / Topological_Machine_Learning
40 commits behind the upstream repository.
-
shawk masboob authoredshawk masboob authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
tda_function.py 1.41 KiB
import sklearn
from sklearn.linear_model import LinearRegression
import kmapper as km
import pandas as pd
import numpy as np
def numpy_to_pandas(sklearn_data):
"""
Converts scikit-learn numpy data into pandas dataframe.
Input: name of dataframe
Output: pandas dataframe
"""
data = pd.DataFrame(data=sklearn_data.data, columns=sklearn_data.feature_names)
data['target'] = pd.Series(sklearn_data.target)
return data
def linear_regression(feature, predictor):
"""
Ordinary least squares Linear Regression.
input: x = independent variables
y = dependent variable
output: R^2
"""
model = LinearRegression()
model.fit(feature, predictor)
return model.score(feature, predictor)
def lens_1d(features, random_num, verbosity):
"""
input:
output:
"""
model = sklearn.ensemble.IsolationForest(random_state=random_num)
model.fit(features)
lens1 = model.decision_function(features).reshape((features.shape[0], 1))
mapper = km.KeplerMapper(verbose=verbosity)
lens2 = mapper.fit_transform(features, projection="l2norm")
lens = np.c_[lens1, lens2]
return lens
def county_crosstab(data, county, year, index, columns):
"""
input:
output:
"""
subset_df = data[data.year == year]
sub_df = subset_df[subset_df.county == county]
crosstab = pd.crosstab(index=sub_df[index], columns=sub_df[columns])
return crosstab