From fb035e879079cc5a34a1ca55a50192ce7171273d Mon Sep 17 00:00:00 2001
From: buckl113 <39534448+buckl113@users.noreply.github.com>
Date: Fri, 4 Mar 2022 15:18:00 -0500
Subject: [PATCH] Added compatability with Google Colab

---
 Auto-SKLearn_AutoML/Classification.ipynb | 219 +++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 Auto-SKLearn_AutoML/Classification.ipynb

diff --git a/Auto-SKLearn_AutoML/Classification.ipynb b/Auto-SKLearn_AutoML/Classification.ipynb
new file mode 100644
index 0000000..17ec558
--- /dev/null
+++ b/Auto-SKLearn_AutoML/Classification.ipynb
@@ -0,0 +1,219 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Classification.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Classification Using Auto-SKLearn"
+      ],
+      "metadata": {
+        "id": "-I9i52jCjML_"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github.com/mcint170/DataTools_Tutorial_Demo/blob/main/Auto-SKLearn_AutoML/Classification.ipynb)"
+      ],
+      "metadata": {
+        "id": "-ZrgwiL9kR_L"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install auto-sklearn"
+      ],
+      "metadata": {
+        "id": "XAjlAHVRenet"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "After running this cell, Click Runtime -> Restart runtime. Then you can run the following cells."
+      ],
+      "metadata": {
+        "id": "yqIcMA8hgZ8W"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# imports\n",
+        "from pprint import pprint\n",
+        "\n",
+        "import sklearn.datasets\n",
+        "import sklearn.metrics\n",
+        "import pickle\n",
+        "\n",
+        "import autosklearn.classification"
+      ],
+      "metadata": {
+        "id": "BXuKNodQe7QZ"
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# split the dataset\n",
+        "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n",
+        "X_train, X_test, y_train, y_test = \\\n",
+        "    sklearn.model_selection.train_test_split(X, y, random_state=1)"
+      ],
+      "metadata": {
+        "id": "ExulDsEAfAoO"
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Fit the classifier\n",
+        "automl = autosklearn.classification.AutoSklearnClassifier(\n",
+        "    time_left_for_this_task=120,\n",
+        "    per_run_time_limit=30,\n",
+        "    tmp_folder='/tmp/autosklearn_classification_example_tmp',\n",
+        ")\n",
+        "automl.fit(X_train, y_train, dataset_name='breast_cancer')"
+      ],
+      "metadata": {
+        "id": "-0zi5I38fNMM",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e732438b-610c-4d82-bd38-b1a5497541c6"
+      },
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,\n",
+              "                      tmp_folder='/tmp/autosklearn_classification_example_tmp')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Different Models run by autosklearn\n",
+        "print(automl.leaderboard())"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SxtOkluYiVHe",
+        "outputId": "29e44357-b2cb-404d-a024-cda5bd61b65a"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "          rank  ensemble_weight                 type      cost  duration\n",
+            "model_id                                                                \n",
+            "7            1             0.10          extra_trees  0.014184  1.502508\n",
+            "2            2             0.02        random_forest  0.028369  2.024807\n",
+            "36           3             0.06  k_nearest_neighbors  0.028369  0.853534\n",
+            "26           4             0.04          extra_trees  0.028369  2.240347\n",
+            "19           5             0.02          extra_trees  0.028369  2.791073\n",
+            "22           6             0.02    gradient_boosting  0.028369  1.149980\n",
+            "3            7             0.14                  mlp  0.028369  1.667622\n",
+            "12           8             0.04    gradient_boosting  0.035461  1.240657\n",
+            "17           9             0.02    gradient_boosting  0.035461  1.510491\n",
+            "8           10             0.02        random_forest  0.035461  1.958862\n",
+            "37          11             0.06    gradient_boosting  0.035461  1.585859\n",
+            "5           12             0.04        random_forest  0.035461  2.075770\n",
+            "27          13             0.10          extra_trees  0.042553  1.910083\n",
+            "34          14             0.08        random_forest  0.042553  1.884860\n",
+            "9           15             0.04          extra_trees  0.042553  1.799630\n",
+            "23          16             0.02                  mlp  0.049645  2.405247\n",
+            "35          17             0.06          extra_trees  0.056738  1.586217\n",
+            "32          18             0.02          extra_trees  0.063830  1.650489\n",
+            "38          19             0.02          extra_trees  0.063830  2.128083\n",
+            "20          20             0.02   passive_aggressive  0.078014  0.774718\n",
+            "30          21             0.04             adaboost  0.078014  3.121010\n",
+            "29          22             0.02          gaussian_nb  0.141844  1.951357\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Show the different models\n",
+        "pprint(automl.show_models(), indent=4)"
+      ],
+      "metadata": {
+        "id": "25xOtCJ7icgh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Predict the test labels\n",
+        "predictions = automl.predict(X_test)\n",
+        "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XvbhWaZpidYt",
+        "outputId": "7a153d86-4d3b-474a-f867-8adf7e07318b"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Accuracy score: 0.9440559440559441\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Export the model with the highest rank\n",
+        "clf = automl.show_models()[7]['sklearn_classifier']\n",
+        "pickle.dump(clf,open('model.pickle','wb'))"
+      ],
+      "metadata": {
+        "id": "iCFcuh9EikR_"
+      },
+      "execution_count": 10,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
-- 
GitLab