From fb035e879079cc5a34a1ca55a50192ce7171273d Mon Sep 17 00:00:00 2001 From: buckl113 <39534448+buckl113@users.noreply.github.com> Date: Fri, 4 Mar 2022 15:18:00 -0500 Subject: [PATCH] Added compatability with Google Colab --- Auto-SKLearn_AutoML/Classification.ipynb | 219 +++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 Auto-SKLearn_AutoML/Classification.ipynb diff --git a/Auto-SKLearn_AutoML/Classification.ipynb b/Auto-SKLearn_AutoML/Classification.ipynb new file mode 100644 index 0000000..17ec558 --- /dev/null +++ b/Auto-SKLearn_AutoML/Classification.ipynb @@ -0,0 +1,219 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Classification.ipynb", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Classification Using Auto-SKLearn" + ], + "metadata": { + "id": "-I9i52jCjML_" + } + }, + { + "cell_type": "markdown", + "source": [ + "[](https://colab.research.google.com/github.com/mcint170/DataTools_Tutorial_Demo/blob/main/Auto-SKLearn_AutoML/Classification.ipynb)" + ], + "metadata": { + "id": "-ZrgwiL9kR_L" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install auto-sklearn" + ], + "metadata": { + "id": "XAjlAHVRenet" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "After running this cell, Click Runtime -> Restart runtime. Then you can run the following cells." + ], + "metadata": { + "id": "yqIcMA8hgZ8W" + } + }, + { + "cell_type": "code", + "source": [ + "# imports\n", + "from pprint import pprint\n", + "\n", + "import sklearn.datasets\n", + "import sklearn.metrics\n", + "import pickle\n", + "\n", + "import autosklearn.classification" + ], + "metadata": { + "id": "BXuKNodQe7QZ" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# split the dataset\n", + "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n", + "X_train, X_test, y_train, y_test = \\\n", + " sklearn.model_selection.train_test_split(X, y, random_state=1)" + ], + "metadata": { + "id": "ExulDsEAfAoO" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Fit the classifier\n", + "automl = autosklearn.classification.AutoSklearnClassifier(\n", + " time_left_for_this_task=120,\n", + " per_run_time_limit=30,\n", + " tmp_folder='/tmp/autosklearn_classification_example_tmp',\n", + ")\n", + "automl.fit(X_train, y_train, dataset_name='breast_cancer')" + ], + "metadata": { + "id": "-0zi5I38fNMM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e732438b-610c-4d82-bd38-b1a5497541c6" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,\n", + " tmp_folder='/tmp/autosklearn_classification_example_tmp')" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Different Models run by autosklearn\n", + "print(automl.leaderboard())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SxtOkluYiVHe", + "outputId": "29e44357-b2cb-404d-a024-cda5bd61b65a" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " rank ensemble_weight type cost duration\n", + "model_id \n", + "7 1 0.10 extra_trees 0.014184 1.502508\n", + "2 2 0.02 random_forest 0.028369 2.024807\n", + "36 3 0.06 k_nearest_neighbors 0.028369 0.853534\n", + "26 4 0.04 extra_trees 0.028369 2.240347\n", + "19 5 0.02 extra_trees 0.028369 2.791073\n", + "22 6 0.02 gradient_boosting 0.028369 1.149980\n", + "3 7 0.14 mlp 0.028369 1.667622\n", + "12 8 0.04 gradient_boosting 0.035461 1.240657\n", + "17 9 0.02 gradient_boosting 0.035461 1.510491\n", + "8 10 0.02 random_forest 0.035461 1.958862\n", + "37 11 0.06 gradient_boosting 0.035461 1.585859\n", + "5 12 0.04 random_forest 0.035461 2.075770\n", + "27 13 0.10 extra_trees 0.042553 1.910083\n", + "34 14 0.08 random_forest 0.042553 1.884860\n", + "9 15 0.04 extra_trees 0.042553 1.799630\n", + "23 16 0.02 mlp 0.049645 2.405247\n", + "35 17 0.06 extra_trees 0.056738 1.586217\n", + "32 18 0.02 extra_trees 0.063830 1.650489\n", + "38 19 0.02 extra_trees 0.063830 2.128083\n", + "20 20 0.02 passive_aggressive 0.078014 0.774718\n", + "30 21 0.04 adaboost 0.078014 3.121010\n", + "29 22 0.02 gaussian_nb 0.141844 1.951357\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Show the different models\n", + "pprint(automl.show_models(), indent=4)" + ], + "metadata": { + "id": "25xOtCJ7icgh" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Predict the test labels\n", + "predictions = automl.predict(X_test)\n", + "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XvbhWaZpidYt", + "outputId": "7a153d86-4d3b-474a-f867-8adf7e07318b" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy score: 0.9440559440559441\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Export the model with the highest rank\n", + "clf = automl.show_models()[7]['sklearn_classifier']\n", + "pickle.dump(clf,open('model.pickle','wb'))" + ], + "metadata": { + "id": "iCFcuh9EikR_" + }, + "execution_count": 10, + "outputs": [] + } + ] +} \ No newline at end of file -- GitLab