Skip to content
Snippets Groups Projects
Commit 5a63479a authored by shawk masboob's avatar shawk masboob
Browse files

made changes to prediction functions

parent fe6863e6
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
<h1><center>Prediction using Topological Data Analysis</center></h1>
%% Cell type:markdown id: tags:
Give brief overview of notebook's purpose.
Also maybe add cool picture.
%% Cell type:code id: tags:
``` python
from Topological_ML import TDA_Prediction as tdap
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np
```
%% Cell type:code id: tags:
``` python
cal_housing = fetch_california_housing()
def numpy_to_pandas(sklearn_data):
df = pd.DataFrame(data = sklearn_data.data, columns = sklearn_data.feature_names)
df['response'] = pd.Series(sklearn_data.target)
return df
```
%% Cell type:code id: tags:
``` python
df = numpy_to_pandas(cal_housing)
```
%% Cell type:code id: tags:
``` python
def descriptive_statistic(df, n):
"""
Provides brief descriptive statistics on dataset.
Takes dataframe as input.
"""
print("Shape : ", df.shape)
print("Head -- \n", df.head(n))
print("Describe : ", df.describe())
descriptive_statistic(df, 5)
```
%% Output
Shape : (20640, 9)
Head --
MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \
0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88
1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86
2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85
3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85
4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85
Longitude target
0 -122.23 4.526
1 -122.22 3.585
2 -122.24 3.521
3 -122.25 3.413
4 -122.25 3.422
Describe : MedInc HouseAge AveRooms AveBedrms Population \
count 20640.000000 20640.000000 20640.000000 20640.000000 20640.000000
mean 3.870671 28.639486 5.429000 1.096675 1425.476744
std 1.899822 12.585558 2.474173 0.473911 1132.462122
min 0.499900 1.000000 0.846154 0.333333 3.000000
25% 2.563400 18.000000 4.440716 1.006079 787.000000
50% 3.534800 29.000000 5.229129 1.048780 1166.000000
75% 4.743250 37.000000 6.052381 1.099526 1725.000000
max 15.000100 52.000000 141.909091 34.066667 35682.000000
AveOccup Latitude Longitude target
count 20640.000000 20640.000000 20640.000000 20640.000000
mean 3.070655 35.631861 -119.569704 2.068558
std 10.386050 2.135952 2.003532 1.153956
min 0.692308 32.540000 -124.350000 0.149990
25% 2.429741 33.930000 -121.800000 1.196000
50% 2.818116 34.260000 -118.490000 1.797000
75% 3.282261 37.710000 -118.010000 2.647250
max 1243.333333 41.950000 -114.310000 5.000010
None
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
def test_srt_rt1():
assert tdap.mysqrt(-4) == 2
return
def test_srt_rt2():
assert tdap.mysqrt(4) == 2
return
```
def dataload():
def numpy_to_pandas(sklearn_data):
"""
upload toy datasets from scikit-learn
Converts scikit-learn numpy data into pandas dataframe.
Input: name of dataframe
Output: pandas dataframe
"""
data = None
return data
df = pd.DataFrame(data = sklearn_data.data, columns = sklearn_data.feature_names)
df['response'] = pd.Series(sklearn_data.target)
return df
def datafetch(file_name):
"""
upload real world datasets from scikit-learn
"""
data = None
print("reading data from:", file_name)
return data
def descriptive_statistic(df):
def descriptive_statistic(df, n):
"""
Provides brief descriptive statistics on dataset.
Takes dataframe as input.
Input: df = dataframe
n = the first n rows of the dataframe
Output: shape, head, and descriptive statistics of dataframe
"""
print("Type : ", None, "\n\n")
print("Shape : ", None)
print("Head -- \n", None)
print("\n\n Tail -- \n", None)
print("Describe : ", None)
print("Shape : ", df.shape)
print("Head -- \n", df.head(n))
print("Describe : ", df.describe())
def model_selection(df):
"""
......@@ -61,4 +56,11 @@ def accuracy_metrics(fit, MSE):
d['BIC'] = None
d['PRESS'] = None
d['Cp']= None
return d
\ No newline at end of file
return None
def mysqrt(n):
if n < 0:
n = 1.5*abs(n)
sqrt1 = n**(1/2)
return sqrt1
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment