diff --git a/GAMA_AutoML_Tutorial.ipynb b/GAMA_AutoML_Tutorial.ipynb index 9b90d1f2eb218cbc0794e17ac87836c9cc771e95..1cbb73063d9e8e2debac5f648ac4a6a201145848 100644 --- a/GAMA_AutoML_Tutorial.ipynb +++ b/GAMA_AutoML_Tutorial.ipynb @@ -26,6 +26,20 @@ "Image source: https://github.com/openml-labs/gama/raw/master/images/logos/Logo-With-Grey-Name-Transparent.png" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "481fae1c", + "metadata": {}, + "source": [ + "## View Documentation At:\n", + "* https://openml-labs.github.io/gama/master/\n", + "* https://github.com/openml-labs/gama\n", + "* https://openml-labs.github.io/gama/master/index.html\n", + "* https://openml-labs.github.io/gama/master/user_guide/index.html#dashboard\n", + "* https://openml-labs.github.io/gama/master/api/index.html" + ] + }, { "cell_type": "markdown", "id": "25258b82", @@ -44,12 +58,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "bded45a9", "metadata": {}, "outputs": [], "source": [ - "# pip install gama" + "# pip install gama\n", + "# *or*\n", + "# pip3 install gama " ] }, { @@ -88,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "00a14ad0", "metadata": {}, "outputs": [], @@ -170,21 +186,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Starting `fit` which will take roughly 3 minutes...\n" - ] - }, - { - "ename": "IndexError", - "evalue": "list index out of range", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_25136/719449767.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mautoml\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mGamaClassifier\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax_total_time\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m180\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstore\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"nothing\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Starting `fit` which will take roughly 3 minutes...\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mautoml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0mlabel_predictions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\REPOS\\DataTools_Tutorial_Demo\\envs_gama\\lib\\site-packages\\gama\\GamaClassifier.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, *args, **kwargs)\u001b[0m\n\u001b[0;32m 132\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_label_encoder\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 133\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_evaluation_library\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdetermine_sample_indices\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstratify\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 135\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_encode_labels\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\REPOS\\DataTools_Tutorial_Demo\\envs_gama\\lib\\site-packages\\gama\\gama.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, warm_start)\u001b[0m\n\u001b[0;32m 537\u001b[0m )\n\u001b[0;32m 538\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_post_processing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdynamic_defaults\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 539\u001b[1;33m self.model = self._post_processing.post_process(\n\u001b[0m\u001b[0;32m 540\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 541\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_y\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\REPOS\\DataTools_Tutorial_Demo\\envs_gama\\lib\\site-packages\\gama\\postprocessing\\best_fit.py\u001b[0m in \u001b[0;36mpost_process\u001b[1;34m(self, x, y, timeout, selection)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mList\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mIndividual\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m ) -> object:\n\u001b[1;32m---> 26\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_selected_individual\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mselection\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_selected_individual\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpipeline\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mIndexError\u001b[0m: list index out of range" + "Starting `fit` which will take roughly 3 minutes...\n", + "accuracy: 0.951048951048951\n", + "log loss: 0.13989498044372267\n" ] } ], @@ -222,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "35f68b4c", "metadata": {}, "outputs": [ @@ -230,20 +234,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Starting `fit` which will take roughly 3 minutes...\n" - ] - }, - { - "ename": "IndexError", - "evalue": "list index out of range", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mIndexError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_25136/3966760049.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mautoml\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mGamaRegressor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmax_total_time\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m180\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstore\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"nothing\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Starting `fit` which will take roughly 3 minutes...\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[0mautoml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[0mpredictions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\REPOS\\DataTools_Tutorial_Demo\\envs_gama\\lib\\site-packages\\gama\\gama.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, warm_start)\u001b[0m\n\u001b[0;32m 537\u001b[0m )\n\u001b[0;32m 538\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_post_processing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdynamic_defaults\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 539\u001b[1;33m self.model = self._post_processing.post_process(\n\u001b[0m\u001b[0;32m 540\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_x\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 541\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_y\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m~\\REPOS\\DataTools_Tutorial_Demo\\envs_gama\\lib\\site-packages\\gama\\postprocessing\\best_fit.py\u001b[0m in \u001b[0;36mpost_process\u001b[1;34m(self, x, y, timeout, selection)\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mList\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mIndividual\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 25\u001b[0m ) -> object:\n\u001b[1;32m---> 26\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_selected_individual\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mselection\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 27\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_selected_individual\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpipeline\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mIndexError\u001b[0m: list index out of range" + "Starting `fit` which will take roughly 3 minutes...\n", + "MSE: 17.566521076771657\n" ] } ], @@ -282,25 +274,6 @@ "\n" ] }, - { - "cell_type": "markdown", - "id": "ffdabb07", - "metadata": {}, - "source": [ - "## References" - ] - }, - { - "cell_type": "markdown", - "id": "500eb430", - "metadata": {}, - "source": [ - "* https://github.com/openml-labs/gama\n", - "* https://openml-labs.github.io/gama/master/index.html\n", - "* https://openml-labs.github.io/gama/master/user_guide/index.html#dashboard\n", - "* https://openml-labs.github.io/gama/master/api/index.html" - ] - }, { "cell_type": "markdown", "id": "e0a28b97", @@ -312,7 +285,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -326,7 +299,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.0" + }, + "vscode": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + } } }, "nbformat": 4,