Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Topological_Machine_Learning
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Colbry, Dirk
Topological_Machine_Learning
Commits
5a63479a
Commit
5a63479a
authored
5 years ago
by
shawk masboob
Browse files
Options
Downloads
Patches
Plain Diff
made changes to prediction functions
parent
fe6863e6
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
TDA_Prediction.ipynb
+160
-0
160 additions, 0 deletions
TDA_Prediction.ipynb
Topological_ML/TDA_Prediction.py
+22
-20
22 additions, 20 deletions
Topological_ML/TDA_Prediction.py
with
182 additions
and
20 deletions
TDA_Prediction.ipynb
0 → 100644
+
160
−
0
View file @
5a63479a
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h1><center>Prediction using Topological Data Analysis</center></h1>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Give brief overview of notebook's purpose.\n",
"Also maybe add cool picture."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from Topological_ML import TDA_Prediction as tdap\n",
"from sklearn.datasets import fetch_california_housing\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"cal_housing = fetch_california_housing()\n",
"\n",
"def numpy_to_pandas(sklearn_data):\n",
" df = pd.DataFrame(data = sklearn_data.data, columns = sklearn_data.feature_names)\n",
" df['response'] = pd.Series(sklearn_data.target)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"df = numpy_to_pandas(cal_housing)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape : (20640, 9)\n",
"Head -- \n",
" MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
"0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
"1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
"2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
"3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
"4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
"\n",
" Longitude target \n",
"0 -122.23 4.526 \n",
"1 -122.22 3.585 \n",
"2 -122.24 3.521 \n",
"3 -122.25 3.413 \n",
"4 -122.25 3.422 \n",
"Describe : MedInc HouseAge AveRooms AveBedrms Population \\\n",
"count 20640.000000 20640.000000 20640.000000 20640.000000 20640.000000 \n",
"mean 3.870671 28.639486 5.429000 1.096675 1425.476744 \n",
"std 1.899822 12.585558 2.474173 0.473911 1132.462122 \n",
"min 0.499900 1.000000 0.846154 0.333333 3.000000 \n",
"25% 2.563400 18.000000 4.440716 1.006079 787.000000 \n",
"50% 3.534800 29.000000 5.229129 1.048780 1166.000000 \n",
"75% 4.743250 37.000000 6.052381 1.099526 1725.000000 \n",
"max 15.000100 52.000000 141.909091 34.066667 35682.000000 \n",
"\n",
" AveOccup Latitude Longitude target \n",
"count 20640.000000 20640.000000 20640.000000 20640.000000 \n",
"mean 3.070655 35.631861 -119.569704 2.068558 \n",
"std 10.386050 2.135952 2.003532 1.153956 \n",
"min 0.692308 32.540000 -124.350000 0.149990 \n",
"25% 2.429741 33.930000 -121.800000 1.196000 \n",
"50% 2.818116 34.260000 -118.490000 1.797000 \n",
"75% 3.282261 37.710000 -118.010000 2.647250 \n",
"max 1243.333333 41.950000 -114.310000 5.000010 \n",
"None\n"
]
}
],
"source": [
"def descriptive_statistic(df, n):\n",
" \"\"\"\n",
" Provides brief descriptive statistics on dataset. \n",
" Takes dataframe as input.\n",
" \"\"\"\n",
" print(\"Shape : \", df.shape)\n",
" print(\"Head -- \\n\", df.head(n))\n",
" print(\"Describe : \", df.describe())\n",
" \n",
"descriptive_statistic(df, 5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def test_srt_rt1():\n",
" assert tdap.mysqrt(-4) == 2\n",
" return\n",
"\n",
"def test_srt_rt2():\n",
" assert tdap.mysqrt(4) == 2\n",
" return\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:markdown id: tags:
<h1><center>
Prediction using Topological Data Analysis
</center></h1>
%% Cell type:markdown id: tags:
Give brief overview of notebook's purpose.
Also maybe add cool picture.
%% Cell type:code id: tags:
```
python
from
Topological_ML
import
TDA_Prediction
as
tdap
from
sklearn.datasets
import
fetch_california_housing
import
pandas
as
pd
import
numpy
as
np
```
%% Cell type:code id: tags:
```
python
cal_housing
=
fetch_california_housing
()
def
numpy_to_pandas
(
sklearn_data
):
df
=
pd
.
DataFrame
(
data
=
sklearn_data
.
data
,
columns
=
sklearn_data
.
feature_names
)
df
[
'
response
'
]
=
pd
.
Series
(
sklearn_data
.
target
)
return
df
```
%% Cell type:code id: tags:
```
python
df
=
numpy_to_pandas
(
cal_housing
)
```
%% Cell type:code id: tags:
```
python
def
descriptive_statistic
(
df
,
n
):
"""
Provides brief descriptive statistics on dataset.
Takes dataframe as input.
"""
print
(
"
Shape :
"
,
df
.
shape
)
print
(
"
Head --
\n
"
,
df
.
head
(
n
))
print
(
"
Describe :
"
,
df
.
describe
())
descriptive_statistic
(
df
,
5
)
```
%% Output
Shape : (20640, 9)
Head --
MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \
0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88
1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86
2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85
3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85
4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85
Longitude target
0 -122.23 4.526
1 -122.22 3.585
2 -122.24 3.521
3 -122.25 3.413
4 -122.25 3.422
Describe : MedInc HouseAge AveRooms AveBedrms Population \
count 20640.000000 20640.000000 20640.000000 20640.000000 20640.000000
mean 3.870671 28.639486 5.429000 1.096675 1425.476744
std 1.899822 12.585558 2.474173 0.473911 1132.462122
min 0.499900 1.000000 0.846154 0.333333 3.000000
25% 2.563400 18.000000 4.440716 1.006079 787.000000
50% 3.534800 29.000000 5.229129 1.048780 1166.000000
75% 4.743250 37.000000 6.052381 1.099526 1725.000000
max 15.000100 52.000000 141.909091 34.066667 35682.000000
AveOccup Latitude Longitude target
count 20640.000000 20640.000000 20640.000000 20640.000000
mean 3.070655 35.631861 -119.569704 2.068558
std 10.386050 2.135952 2.003532 1.153956
min 0.692308 32.540000 -124.350000 0.149990
25% 2.429741 33.930000 -121.800000 1.196000
50% 2.818116 34.260000 -118.490000 1.797000
75% 3.282261 37.710000 -118.010000 2.647250
max 1243.333333 41.950000 -114.310000 5.000010
None
%% Cell type:code id: tags:
```
python
``
`
%%
Cell
type
:
code
id
:
tags
:
```
python
def test_srt_rt1():
assert tdap.mysqrt(-4) == 2
return
def test_srt_rt2():
assert tdap.mysqrt(4) == 2
return
```
This diff is collapsed.
Click to expand it.
Topological_ML/TDA_Prediction.py
+
22
−
20
View file @
5a63479a
def
dataload
(
):
def
numpy_to_pandas
(
sklearn_data
):
"""
upload toy datasets from scikit-learn
Converts scikit-learn numpy data into pandas dataframe.
Input: name of dataframe
Output: pandas dataframe
"""
data
=
None
return
data
df
=
pd
.
DataFrame
(
data
=
sklearn_data
.
data
,
columns
=
sklearn_data
.
feature_names
)
df
[
'
response
'
]
=
pd
.
Series
(
sklearn_data
.
target
)
return
df
def
datafetch
(
file_name
):
"""
upload real world datasets from scikit-learn
"""
data
=
None
print
(
"
reading data from:
"
,
file_name
)
return
data
def
descriptive_statistic
(
df
):
def
descriptive_statistic
(
df
,
n
):
"""
Provides brief descriptive statistics on dataset.
Takes dataframe as input.
Input: df = dataframe
n = the first n rows of the dataframe
Output: shape, head, and descriptive statistics of dataframe
"""
print
(
"
Type :
"
,
None
,
"
\n\n
"
)
print
(
"
Shape :
"
,
None
)
print
(
"
Head --
\n
"
,
None
)
print
(
"
\n\n
Tail --
\n
"
,
None
)
print
(
"
Describe :
"
,
None
)
print
(
"
Shape :
"
,
df
.
shape
)
print
(
"
Head --
\n
"
,
df
.
head
(
n
))
print
(
"
Describe :
"
,
df
.
describe
())
def
model_selection
(
df
):
"""
...
...
@@ -61,4 +56,11 @@ def accuracy_metrics(fit, MSE):
d
[
'
BIC
'
]
=
None
d
[
'
PRESS
'
]
=
None
d
[
'
Cp
'
]
=
None
return
d
\ No newline at end of file
return
None
def
mysqrt
(
n
):
if
n
<
0
:
n
=
1.5
*
abs
(
n
)
sqrt1
=
n
**
(
1
/
2
)
return
sqrt1
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment