Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
tda_function.py 1.41 KiB
import sklearn
from sklearn.linear_model import LinearRegression
import kmapper as km
import pandas as pd
import numpy as np

def numpy_to_pandas(sklearn_data):
    """
    Converts scikit-learn numpy data into pandas dataframe.
    Input: name of dataframe
    Output: pandas dataframe
    """
    data = pd.DataFrame(data=sklearn_data.data, columns=sklearn_data.feature_names)
    data['target'] = pd.Series(sklearn_data.target)
    return data

def linear_regression(feature, predictor):
    """
    Ordinary least squares Linear Regression.
    input: x = independent variables
           y = dependent variable
    output: R^2
    """
    model = LinearRegression()
    model.fit(feature, predictor)
    return model.score(feature, predictor)

def lens_1d(features, random_num, verbosity):
    """
    input: 
    output:
    """
    model = sklearn.ensemble.IsolationForest(random_state=random_num)
    model.fit(features)
    lens1 = model.decision_function(features).reshape((features.shape[0], 1))
    mapper = km.KeplerMapper(verbose=verbosity)
    lens2 = mapper.fit_transform(features, projection="l2norm")
    lens = np.c_[lens1, lens2]
    return lens

def county_crosstab(data, county, year, index, columns):
    """
    input:
    output:
    """
    subset_df = data[data.year == year]
    sub_df = subset_df[subset_df.county == county]
    crosstab = pd.crosstab(index=sub_df[index], columns=sub_df[columns])
    return crosstab