diff --git a/Classification.ipynb b/Classification.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8cc48163f8a88f574ac1eea8f8d957d869cd36a7 --- /dev/null +++ b/Classification.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8459055e", + "metadata": {}, + "source": [ + "# Classification\n", + "\n", + "**_NOTE_** autosklearn only will run in linux (feb 26, 2022)\n", + "\n", + "Example coming from [here](https://automl.github.io/auto-sklearn/master/examples/20_basic/example_classification.html#sphx-glr-examples-20-basic-example-classification-py)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c69433ce", + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "from pprint import pprint\n", + "\n", + "import sklearn.datasets\n", + "import sklearn.metrics\n", + "import pickle\n", + "\n", + "import autosklearn.classification" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b1e1930", + "metadata": {}, + "outputs": [], + "source": [ + "# split the dataset\n", + "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n", + "X_train, X_test, y_train, y_test = \\\n", + " sklearn.model_selection.train_test_split(X, y, random_state=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15e5f821", + "metadata": {}, + "outputs": [], + "source": [ + "# Fit the classifier\n", + "automl = autosklearn.classification.AutoSklearnClassifier(\n", + " time_left_for_this_task=120,\n", + " per_run_time_limit=30,\n", + " tmp_folder='/tmp/autosklearn_classification_example_tmp',\n", + ")\n", + "automl.fit(X_train, y_train, dataset_name='breast_cancer')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d4e4d9f", + "metadata": {}, + "outputs": [], + "source": [ + "# Different Models run by autosklearn\n", + "print(automl.leaderboard())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72e580e7", + "metadata": {}, + "outputs": [], + "source": [ + "# Show the different models\n", + "pprint(automl.show_models(), indent=4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "027039cd", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict the test labels\n", + "predictions = automl.predict(X_test)\n", + "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd372ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Export the model with the highest rank\n", + "clf = automl.show_models()[7]['sklearn_classifier']\n", + "pickle.dump(clf,open('model.pickle','wb'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3324782", + "metadata": {}, + "outputs": [], + "source": [ + "clf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "021b7159", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}