diff --git a/Auto-SKLearn_AutoML/Classification.ipynb b/Auto-SKLearn_AutoML/Classification.ipynb index 8908132d3548381bafbf08857dfba18dca59a159..a7f8b9eab53c7b7ce9f8a185e3c32d0a99d648ad 100644 --- a/Auto-SKLearn_AutoML/Classification.ipynb +++ b/Auto-SKLearn_AutoML/Classification.ipynb @@ -7,98 +7,49 @@ "source": [ "# Classification\n", "\n", - "**_NOTE_** autosklearn only will run in linux (feb 26, 2022)\n", + "**_NOTE_** \n", + "\n", + "The module `autosklearn` will only run in Linux environments, such as Google Collab or Jupyter Hub. Attempting to run this notebook will fail if you are not in a Linux environment.\n", "\n", "Example coming from [here](https://automl.github.io/auto-sklearn/master/examples/20_basic/example_classification.html#sphx-glr-examples-20-basic-example-classification-py)" ] }, - { - "cell_type": "markdown", - "id": "c5dad4c0", - "metadata": {}, - "source": [ - "**Classification doesn't work with current version of scipy/github and requires different packages/updates to run notebook**\n", - "- Note from professor Colbry: Notebook can't be fixed in classtime, write note of what needs to be fixed and push to gitlab as is." - ] - }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c69433ce", "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'autosklearn'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m<ipython-input-1-13292483524a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpickle\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mautosklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclassification\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'autosklearn'" - ] - } - ], + "outputs": [], "source": [ - "# import\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.datasets import fetch_openml\n", - "import autosklearn.classification\n", "import pandas as pd\n", "\n", "import sklearn.datasets\n", "import sklearn.metrics\n", - "# Fixed import model_selection\n", "import sklearn.model_selection\n", "import pickle\n", "\n", - "import autosklearn.classification #cannot import classification from autosklearn" + "import autosklearn.classification " ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "642c1f1a", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ERROR: Invalid requirement: '#loading'\n" - ] - } - ], + "outputs": [], "source": [ "pip install auto-sklearn #loading infinitely" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "2b1e1930", "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "module 'sklearn' has no attribute 'model_selection'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m<ipython-input-3-935236cbcd87>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mload_breast_cancer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreturn_X_y\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel_selection\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrandom_state\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m: module 'sklearn' has no attribute 'model_selection'" - ] - } - ], + "outputs": [], "source": [ "# split the dataset\n", "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n", @@ -108,22 +59,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "15e5f821", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'autosklearn' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m<ipython-input-6-6c1473e893d3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Fit the classifier\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m automl = autosklearn.classification.AutoSklearnClassifier(\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mtime_left_for_this_task\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m120\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mper_run_time_limit\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mtmp_folder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'/tmp/autosklearn_classification_example_tmp'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mNameError\u001b[0m: name 'autosklearn' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Fit the classifier\n", "automl = autosklearn.classification.AutoSklearnClassifier(\n", @@ -136,22 +75,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "2d4e4d9f", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'automl' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-11-6dfffdcd8374>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Different Models run by autosklearn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleaderboard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Different Models run by autosklearn\n", "print(automl.leaderboard())" @@ -159,22 +86,10 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "72e580e7", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'automl' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-12-ab76765f6a20>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Show the different models\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_models\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Show the different models\n", "pprint(automl.show_models(), indent=4)" @@ -182,22 +97,10 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "027039cd", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'automl' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-13-596897413c8d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Predict the test labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Accuracy score:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Predict the test labels\n", "predictions = automl.predict(X_test)\n", @@ -206,22 +109,10 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "acd372ea", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'automl' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-14-14e40d77d77d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Export the model with the highest rank\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_models\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sklearn_classifier'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'model.pickle'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Export the model with the highest rank\n", "clf = automl.show_models()[7]['sklearn_classifier']\n", @@ -230,22 +121,10 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "a3324782", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'clf' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-15-b9c89d294f77>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'clf' is not defined" - ] - } - ], + "outputs": [], "source": [ "clf" ] @@ -261,7 +140,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -275,7 +154,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.9.12" } }, "nbformat": 4,