diff --git a/Topological_ML/tda_function.py b/Topological_ML/tda_function.py new file mode 100644 index 0000000000000000000000000000000000000000..cce126ba5985fb78735a1c652719b9ecfdfce60a --- /dev/null +++ b/Topological_ML/tda_function.py @@ -0,0 +1,49 @@ +import sklearn +from sklearn.linear_model import LinearRegression +import kmapper as km +import pandas as pd +import numpy as np + +def numpy_to_pandas(sklearn_data): + """ + Converts scikit-learn numpy data into pandas dataframe. + Input: name of dataframe + Output: pandas dataframe + """ + data = pd.DataFrame(data=sklearn_data.data, columns=sklearn_data.feature_names) + data['target'] = pd.Series(sklearn_data.target) + return data + +def linear_regression(feature, predictor): + """ + Ordinary least squares Linear Regression. + input: x = independent variables + y = dependent variable + output: R^2 + """ + model = LinearRegression() + model.fit(feature, predictor) + return model.score(feature, predictor) + +def lens_1d(features, random_num, verbosity): + """ + input: + output: + """ + model = sklearn.ensemble.IsolationForest(random_state=random_num) + model.fit(features) + lens1 = model.decision_function(features).reshape((features.shape[0], 1)) + mapper = km.KeplerMapper(verbose=verbosity) + lens2 = mapper.fit_transform(features, projection="l2norm") + lens = np.c_[lens1, lens2] + return lens + +def county_crosstab(data, county, year, index, columns): + """ + input: + output: + """ + subset_df = data[data.year == year] + sub_df = subset_df[subset_df.county == county] + crosstab = pd.crosstab(index=sub_df[index], columns=sub_df[columns]) + return crosstab