Added compatability with Google Colab

fb035e87 · buckl113 · Colbry, Dirk · fcdb57bc · fb035e87
Commit fb035e87 authored 3 years ago by buckl113 Committed by Colbry, Dirk 2 years ago
--- a/Auto-SKLearn_AutoML/Classification.ipynb
+++ b/Auto-SKLearn_AutoML/Classification.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Classification.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Classification Using Auto-SKLearn"
+      ],
+      "metadata": {
+        "id": "-I9i52jCjML_"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github.com/mcint170/DataTools_Tutorial_Demo/blob/main/Auto-SKLearn_AutoML/Classification.ipynb)"
+      ],
+      "metadata": {
+        "id": "-ZrgwiL9kR_L"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install auto-sklearn"
+      ],
+      "metadata": {
+        "id": "XAjlAHVRenet"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "After running this cell, Click Runtime -> Restart runtime. Then you can run the following cells."
+      ],
+      "metadata": {
+        "id": "yqIcMA8hgZ8W"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# imports\n",
+        "from pprint import pprint\n",
+        "\n",
+        "import sklearn.datasets\n",
+        "import sklearn.metrics\n",
+        "import pickle\n",
+        "\n",
+        "import autosklearn.classification"
+      ],
+      "metadata": {
+        "id": "BXuKNodQe7QZ"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# split the dataset\n",
+        "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n",
+        "X_train, X_test, y_train, y_test = \\\n",
+        "    sklearn.model_selection.train_test_split(X, y, random_state=1)"
+      ],
+      "metadata": {
+        "id": "ExulDsEAfAoO"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Fit the classifier\n",
+        "automl = autosklearn.classification.AutoSklearnClassifier(\n",
+        "    time_left_for_this_task=120,\n",
+        "    per_run_time_limit=30,\n",
+        "    tmp_folder='/tmp/autosklearn_classification_example_tmp',\n",
+        ")\n",
+        "automl.fit(X_train, y_train, dataset_name='breast_cancer')"
+      ],
+      "metadata": {
+        "id": "-0zi5I38fNMM",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e732438b-610c-4d82-bd38-b1a5497541c6"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,\n",
+              "                      tmp_folder='/tmp/autosklearn_classification_example_tmp')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Different Models run by autosklearn\n",
+        "print(automl.leaderboard())"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SxtOkluYiVHe",
+        "outputId": "29e44357-b2cb-404d-a024-cda5bd61b65a"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "          rank  ensemble_weight                 type      cost  duration\n",
+            "model_id                                                                \n",
+            "7            1             0.10          extra_trees  0.014184  1.502508\n",
+            "2            2             0.02        random_forest  0.028369  2.024807\n",
+            "36           3             0.06  k_nearest_neighbors  0.028369  0.853534\n",
+            "26           4             0.04          extra_trees  0.028369  2.240347\n",
+            "19           5             0.02          extra_trees  0.028369  2.791073\n",
+            "22           6             0.02    gradient_boosting  0.028369  1.149980\n",
+            "3            7             0.14                  mlp  0.028369  1.667622\n",
+            "12           8             0.04    gradient_boosting  0.035461  1.240657\n",
+            "17           9             0.02    gradient_boosting  0.035461  1.510491\n",
+            "8           10             0.02        random_forest  0.035461  1.958862\n",
+            "37          11             0.06    gradient_boosting  0.035461  1.585859\n",
+            "5           12             0.04        random_forest  0.035461  2.075770\n",
+            "27          13             0.10          extra_trees  0.042553  1.910083\n",
+            "34          14             0.08        random_forest  0.042553  1.884860\n",
+            "9           15             0.04          extra_trees  0.042553  1.799630\n",
+            "23          16             0.02                  mlp  0.049645  2.405247\n",
+            "35          17             0.06          extra_trees  0.056738  1.586217\n",
+            "32          18             0.02          extra_trees  0.063830  1.650489\n",
+            "38          19             0.02          extra_trees  0.063830  2.128083\n",
+            "20          20             0.02   passive_aggressive  0.078014  0.774718\n",
+            "30          21             0.04             adaboost  0.078014  3.121010\n",
+            "29          22             0.02          gaussian_nb  0.141844  1.951357\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Show the different models\n",
+        "pprint(automl.show_models(), indent=4)"
+      ],
+      "metadata": {
+        "id": "25xOtCJ7icgh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Predict the test labels\n",
+        "predictions = automl.predict(X_test)\n",
+        "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XvbhWaZpidYt",
+        "outputId": "7a153d86-4d3b-474a-f867-8adf7e07318b"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Accuracy score: 0.9440559440559441\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Export the model with the highest rank\n",
+        "clf = automl.show_models()[7]['sklearn_classifier']\n",
+        "pickle.dump(clf,open('model.pickle','wb'))"
+      ],
+      "metadata": {
+        "id": "iCFcuh9EikR_"
+      },
+      "execution_count": 10,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
+%% Cell type:markdown id: tags:
+
+# Classification Using Auto-SKLearn
+
+%% Cell type:markdown id: tags:
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github.com/mcint170/DataTools_Tutorial_Demo/blob/main/Auto-SKLearn_AutoML/Classification.ipynb)
+
+%% Cell type:code id: tags:
+
+``` 
+!pip install auto-sklearn
+```
+
+%% Cell type:markdown id: tags:
+
+After running this cell, Click Runtime -> Restart runtime. Then you can run the following cells.
+
+%% Cell type:code id: tags:
+
+``` 
+# imports
+from pprint import pprint
+
+import sklearn.datasets
+import sklearn.metrics
+import pickle
+
+import autosklearn.classification
+```
+
+%% Cell type:code id: tags:
+
+``` 
+# split the dataset
+X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = \
+    sklearn.model_selection.train_test_split(X, y, random_state=1)
+```
+
+%% Cell type:code id: tags:
+
+``` 
+# Fit the classifier
+automl = autosklearn.classification.AutoSklearnClassifier(
+    time_left_for_this_task=120,
+    per_run_time_limit=30,
+    tmp_folder='/tmp/autosklearn_classification_example_tmp',
+)
+automl.fit(X_train, y_train, dataset_name='breast_cancer')
+```
+
+%% Output
+
+    AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,
+                          tmp_folder='/tmp/autosklearn_classification_example_tmp')
+
+%% Cell type:code id: tags:
+
+``` 
+# Different Models run by autosklearn
+print(automl.leaderboard())
+```
+
+%% Output
+
+              rank  ensemble_weight                 type      cost  duration
+    model_id
+    7            1             0.10          extra_trees  0.014184  1.502508
+    2            2             0.02        random_forest  0.028369  2.024807
+    36           3             0.06  k_nearest_neighbors  0.028369  0.853534
+    26           4             0.04          extra_trees  0.028369  2.240347
+    19           5             0.02          extra_trees  0.028369  2.791073
+    22           6             0.02    gradient_boosting  0.028369  1.149980
+    3            7             0.14                  mlp  0.028369  1.667622
+    12           8             0.04    gradient_boosting  0.035461  1.240657
+    17           9             0.02    gradient_boosting  0.035461  1.510491
+    8           10             0.02        random_forest  0.035461  1.958862
+    37          11             0.06    gradient_boosting  0.035461  1.585859
+    5           12             0.04        random_forest  0.035461  2.075770
+    27          13             0.10          extra_trees  0.042553  1.910083
+    34          14             0.08        random_forest  0.042553  1.884860
+    9           15             0.04          extra_trees  0.042553  1.799630
+    23          16             0.02                  mlp  0.049645  2.405247
+    35          17             0.06          extra_trees  0.056738  1.586217
+    32          18             0.02          extra_trees  0.063830  1.650489
+    38          19             0.02          extra_trees  0.063830  2.128083
+    20          20             0.02   passive_aggressive  0.078014  0.774718
+    30          21             0.04             adaboost  0.078014  3.121010
+    29          22             0.02          gaussian_nb  0.141844  1.951357
+
+%% Cell type:code id: tags:
+
+``` 
+# Show the different models
+pprint(automl.show_models(), indent=4)
+```
+
+%% Cell type:code id: tags:
+
+``` 
+# Predict the test labels
+predictions = automl.predict(X_test)
+print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
+```
+
+%% Output
+
+    Accuracy score: 0.9440559440559441
+
+%% Cell type:code id: tags:
+
+``` 
+# Export the model with the highest rank
+clf = automl.show_models()[7]['sklearn_classifier']
+pickle.dump(clf,open('model.pickle','wb'))
+```