diff --git a/AudioDataTutorial.ipynb b/AudioDataTutorial.ipynb index 7e3b68a16868924111ac72c76de34eacb3096fee..d7ca78bc27e37e670ddb44b85d01d12272c9f0b0 100644 --- a/AudioDataTutorial.ipynb +++ b/AudioDataTutorial.ipynb @@ -47,6 +47,9 @@ "\n", "jtplot.style()\n", "\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", "%matplotlib inline" ] }, @@ -72,7 +75,7 @@ "source": [ "import requests\n", "\n", - "url = 'http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/Microsoft/6_Channel_ID.wav'\n", + "url = 'http://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/Microsoft/6_Channel_ID.wav'\n", "file='6_Channel_ID.wav'\n", "r = requests.get(url, allow_redirects=True)\n", "open(file, 'wb').write(r.content)" @@ -95,9 +98,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "Fs, data = read('6_Channel_ID.wav')\n", @@ -264,6 +265,7 @@ "plt.xlabel('Sample Index')\n", "plt.ylabel('Amplitude')\n", "plt.title('Waveform of Test Audio')\n", + "plt.legend(['FL', 'FR', 'FC', 'LF', 'BF', 'BR'], bbox_to_anchor=(1.05, 1.0), loc='upper left')\n", "plt.show()" ] }, @@ -299,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.2" + "version": "3.9.15" } }, "nbformat": 4, diff --git a/Auto-SKLearn_AutoML/Classification.ipynb b/Auto-SKLearn_AutoML/Classification.ipynb index d4dfef7e13a64b026fb5f08334a570d8dfaba3fc..57391f91c37791f41850721ac11e14e56af26d18 100644 --- a/Auto-SKLearn_AutoML/Classification.ipynb +++ b/Auto-SKLearn_AutoML/Classification.ipynb @@ -1,226 +1,4 @@ { -<<<<<<< HEAD - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Classification.ipynb", - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Classification Using Auto-SKLearn", - "\n", - "**_NOTE_** autosklearn only will run in linux (feb 26, 2022)\n", - "\n" - ], - "metadata": { - "id": "-I9i52jCjML_" - } - }, - { - "cell_type": "markdown", - "source": [ - "[](https://colab.research.google.com/github/mcint170/DataTools_Tutorial_Demo/blob/main/Auto-SKLearn_AutoML/Classification.ipynb)" - ], - "metadata": { - "id": "-ZrgwiL9kR_L" - } - }, - { - "cell_type": "code", - "source": [ - "!pip install auto-sklearn" - ], - "metadata": { - "id": "XAjlAHVRenet" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "If running on Google Colab: After running this cell, Click Runtime -> Restart runtime. Then you can run the following cells." - ], - "metadata": { - "id": "yqIcMA8hgZ8W" - } - }, - { - "cell_type": "code", - "source": [ - "# imports\n", - "from pprint import pprint\n", - "\n", - "import sklearn.datasets\n", - "import sklearn.metrics\n", - "import pickle\n", - "\n", - "import autosklearn.classification" - ], - "metadata": { - "id": "BXuKNodQe7QZ" - }, - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# split the dataset\n", - "X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)\n", - "X_train, X_test, y_train, y_test = \\\n", - " sklearn.model_selection.train_test_split(X, y, random_state=1)" - ], - "metadata": { - "id": "ExulDsEAfAoO" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Fit the classifier\n", - "automl = autosklearn.classification.AutoSklearnClassifier(\n", - " time_left_for_this_task=120,\n", - " per_run_time_limit=30,\n", - " tmp_folder='/tmp/autosklearn_classification_example_tmp',\n", - ")\n", - "automl.fit(X_train, y_train, dataset_name='breast_cancer')" - ], - "metadata": { - "id": "-0zi5I38fNMM", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e732438b-610c-4d82-bd38-b1a5497541c6" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "AutoSklearnClassifier(per_run_time_limit=30, time_left_for_this_task=120,\n", - " tmp_folder='/tmp/autosklearn_classification_example_tmp')" - ] - }, - "metadata": {}, - "execution_count": 6 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Different Models run by autosklearn\n", - "print(automl.leaderboard())" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SxtOkluYiVHe", - "outputId": "29e44357-b2cb-404d-a024-cda5bd61b65a" - }, - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " rank ensemble_weight type cost duration\n", - "model_id \n", - "7 1 0.10 extra_trees 0.014184 1.502508\n", - "2 2 0.02 random_forest 0.028369 2.024807\n", - "36 3 0.06 k_nearest_neighbors 0.028369 0.853534\n", - "26 4 0.04 extra_trees 0.028369 2.240347\n", - "19 5 0.02 extra_trees 0.028369 2.791073\n", - "22 6 0.02 gradient_boosting 0.028369 1.149980\n", - "3 7 0.14 mlp 0.028369 1.667622\n", - "12 8 0.04 gradient_boosting 0.035461 1.240657\n", - "17 9 0.02 gradient_boosting 0.035461 1.510491\n", - "8 10 0.02 random_forest 0.035461 1.958862\n", - "37 11 0.06 gradient_boosting 0.035461 1.585859\n", - "5 12 0.04 random_forest 0.035461 2.075770\n", - "27 13 0.10 extra_trees 0.042553 1.910083\n", - "34 14 0.08 random_forest 0.042553 1.884860\n", - "9 15 0.04 extra_trees 0.042553 1.799630\n", - "23 16 0.02 mlp 0.049645 2.405247\n", - "35 17 0.06 extra_trees 0.056738 1.586217\n", - "32 18 0.02 extra_trees 0.063830 1.650489\n", - "38 19 0.02 extra_trees 0.063830 2.128083\n", - "20 20 0.02 passive_aggressive 0.078014 0.774718\n", - "30 21 0.04 adaboost 0.078014 3.121010\n", - "29 22 0.02 gaussian_nb 0.141844 1.951357\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Show the different models\n", - "pprint(automl.show_models(), indent=4)" - ], - "metadata": { - "id": "25xOtCJ7icgh" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Predict the test labels\n", - "predictions = automl.predict(X_test)\n", - "print(\"Accuracy score:\", sklearn.metrics.accuracy_score(y_test, predictions))" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "XvbhWaZpidYt", - "outputId": "7a153d86-4d3b-474a-f867-8adf7e07318b" - }, - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Accuracy score: 0.9440559440559441\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Export the model with the highest rank\n", - "clf = automl.show_models()[7]['sklearn_classifier']\n", - "pickle.dump(clf,open('model.pickle','wb'))" - ], - "metadata": { - "id": "iCFcuh9EikR_" - }, - "execution_count": 10, - "outputs": [] - } - ] -======= "cells": [ { "cell_type": "markdown", @@ -234,26 +12,56 @@ "Example coming from [here](https://automl.github.io/auto-sklearn/master/examples/20_basic/example_classification.html#sphx-glr-examples-20-basic-example-classification-py)" ] }, + { + "cell_type": "markdown", + "id": "c5dad4c0", + "metadata": {}, + "source": [ + "**Classification doesn't work with current version of scipy/github and requires different packages/updates to run notebook**\n", + "- Note from professor Colbry: Notebook can't be fixed in classtime, write note of what needs to be fixed and push to gitlab as is." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "c69433ce", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'apply' from 'dask.compatibility' (/home/weinbren/.local/lib/python3.8/site-packages/dask/compatibility.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-23-910f3a285265>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mautosklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassification\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/autosklearn/classification.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mautosklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestimators\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mAutoSklearnClassifier\u001b[0m \u001b[0;31m# noqa (imported but unused)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/autosklearn/estimators.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdistributed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjoblib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/dask/distributed.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdistributed\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmsg\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"No module named 'distributed'\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/distributed/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_version\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mget_versions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mactor\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mActor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mActorFuture\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m from .client import (\n\u001b[1;32m 9\u001b[0m \u001b[0mClient\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/distributed/actor.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mqueue\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mQueue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mFuture\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault_client\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mto_serialize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0miscoroutinefunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msync\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthread_state\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/anaconda3/lib/python3.8/site-packages/distributed/client.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcollections_to_dsk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnormalize_token\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokenize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompatibility\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mapply\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mflatten\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhighlevelgraph\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mHighLevelGraph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'apply' from 'dask.compatibility' (/home/weinbren/.local/lib/python3.8/site-packages/dask/compatibility.py)" + ] + } + ], "source": [ "# imports\n", "from pprint import pprint\n", "\n", "import sklearn.datasets\n", "import sklearn.metrics\n", + "# Fixed import model_selection\n", + "import sklearn.model_selection\n", "import pickle\n", "\n", - "import autosklearn.classification" + "# This does not work on current version of github, needs update. \n", + "import autosklearn.classification\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "2b1e1930", "metadata": {}, "outputs": [], @@ -266,10 +74,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "15e5f821", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'autosklearn' has no attribute 'classification'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-22-6c1473e893d3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Fit the classifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m automl = autosklearn.classification.AutoSklearnClassifier(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mtime_left_for_this_task\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m120\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mper_run_time_limit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mtmp_folder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'/tmp/autosklearn_classification_example_tmp'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: module 'autosklearn' has no attribute 'classification'" + ] + } + ], "source": [ "# Fit the classifier\n", "automl = autosklearn.classification.AutoSklearnClassifier(\n", @@ -282,10 +102,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "2d4e4d9f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'automl' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-11-6dfffdcd8374>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Different Models run by autosklearn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleaderboard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" + ] + } + ], "source": [ "# Different Models run by autosklearn\n", "print(automl.leaderboard())" @@ -293,10 +125,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "72e580e7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'automl' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-12-ab76765f6a20>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Show the different models\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_models\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" + ] + } + ], "source": [ "# Show the different models\n", "pprint(automl.show_models(), indent=4)" @@ -304,10 +148,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "027039cd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'automl' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-13-596897413c8d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Predict the test labels\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Accuracy score:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maccuracy_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" + ] + } + ], "source": [ "# Predict the test labels\n", "predictions = automl.predict(X_test)\n", @@ -316,10 +172,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "acd372ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'automl' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-14-14e40d77d77d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Export the model with the highest rank\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mclf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mautoml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow_models\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sklearn_classifier'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdump\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'model.pickle'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'automl' is not defined" + ] + } + ], "source": [ "# Export the model with the highest rank\n", "clf = automl.show_models()[7]['sklearn_classifier']\n", @@ -328,10 +196,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "a3324782", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'clf' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-15-b9c89d294f77>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mclf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'clf' is not defined" + ] + } + ], "source": [ "clf" ] @@ -347,7 +227,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -361,10 +241,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 ->>>>>>> 7e6d5ac (Adding minor comments and updates to AutoML tutorial) } diff --git a/GUI_Tutorial.ipynb b/GUI_Tutorial.ipynb index 63e37c787ee14a4bb4fb6bf69aed06ac0022bb04..cf8fe234bf93fdbe7bb55888bfe4a06bda199941 100644 --- a/GUI_Tutorial.ipynb +++ b/GUI_Tutorial.ipynb @@ -2,7 +2,6 @@ "cells": [ { "cell_type": "markdown", - "id": "7227f3fa", "metadata": {}, "source": [ "## Gradio: How you can build a GUI within a Jupyter Notebook\n", @@ -12,7 +11,17 @@ }, { "cell_type": "markdown", - "id": "ba81c68d", + "metadata": {}, + "source": [ + "## GUI Background\n", + "A graphical user interface (GUI) is an interface through which a user interacts with electronic devices such as computers and smartphones through the use of icons, menus and other visual indicators or representations (graphics). GUIs graphically display information and related user controls, unlike text-based interfaces, where data and commands are strictly in text. GUI representations are manipulated by a pointing device such as a mouse, trackball, stylus, or by a finger on a touch screen.\n", + "\n", + "Source: \n", + "https://www.techopedia.com/definition/5435/graphical-user-interface-gui" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Getting Started\n", @@ -24,7 +33,9 @@ "cell_type": "code", "execution_count": 1, "id": "cdfdbb3e", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -38,6 +49,14 @@ "!python --version" ] }, + { + "cell_type": "markdown", + "id": "b8de36df", + "metadata": {}, + "source": [ + "On your terminal," + ] + }, { "cell_type": "code", "execution_count": 2, @@ -50,7 +69,6 @@ }, { "cell_type": "markdown", - "id": "f76c9232", "metadata": {}, "source": [ "Next, import the library as follows:" @@ -70,7 +88,6 @@ }, { "cell_type": "markdown", - "id": "c3d4eb50", "metadata": {}, "source": [ "Gradio can be used with a wide range of media-text, pictures, video, and sound. It is most useful for demonstrating machine learning algorithms.\n", @@ -126,7 +143,6 @@ }, { "cell_type": "markdown", - "id": "8469e7e7", "metadata": {}, "source": [ "### The Interface\n", @@ -213,7 +229,6 @@ }, { "cell_type": "markdown", - "id": "94a5d6f2", "metadata": {}, "source": [ "Gradio can load in data, similar to pandas frames, by using the command `gradio.inputs.Dataframe(data_name)`. It can only take in strings, numbers, bools, and dates as data types. Gradio does not contain a library of datasets, so data must be input by the user. It can also work with time series, images, audio, video, and generic file uploads.\n", @@ -236,7 +251,6 @@ }, { "cell_type": "markdown", - "id": "17bf993f", "metadata": {}, "source": [ "An example of a question could be, what is the conclusion of the sentence: Today will be a good day.\n", @@ -248,7 +262,6 @@ }, { "cell_type": "markdown", - "id": "1196a4c1", "metadata": {}, "source": [ "How this data or tool could be used in some of the team projects (maybe not your own)\n", @@ -268,7 +281,6 @@ }, { "cell_type": "markdown", - "id": "500e9ab6", "metadata": {}, "source": [ "Sources:<br>\n", @@ -280,7 +292,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/GoogleSheetsTutorial.ipynb b/GoogleSheetsTutorial.ipynb index 02b4037fac8a80a14efdca8d123d7d952bcf362a..e1623d9304d4c8cc250125e28345be3bd6dea8cd 100644 --- a/GoogleSheetsTutorial.ipynb +++ b/GoogleSheetsTutorial.ipynb @@ -34,19 +34,15 @@ " 1. This can be found at https://console.developers.google.com/projectselector2/apis/credentials?pli=1&supportedpurview=project \n", " (you must use your personal email not your MSU email)\n", "2. Inside that project enable the Google sheets API and Google Drive for your new project\n", - " 1. Use the search bar to find and click on the Google Sheets API.\n", - " 2. Enable the API by selecting the blue button.\n", - " 3. Repeat the first two steps for the Google Drive API.\n", - "3. Create a Google service account through the Google developer portal\n", - "4. Go to “APIs & Services > Credentials†- 4.1 At the top of the screen select “Create credentials > Service account keyâ€.\n", - "5. Once you open the "Create service account" screen, fill out the name and ID fields and click "Create and Continue" to move to the next section. After filling out "1. Service account details" under "2. Grant this service account access to project" navigate to the "Role" drop down and choose "Basic" -> 'Editor." After you select the role, hit "Continue." You can skip the next section, "Grant users access to this service account" and click done. \n", - "6. Click “Create†and “Doneâ€.\n", - "7. Press “Manage service accounts†above Service Accounts.\n", - "8. Press on â‹® near recently created service account and select “Manage keys†and then click on “ADD KEY > Create new keyâ€.\n", - "9. Select JSON key type and press “Createâ€.\n", - "10. In your python file import gspread\n", - "11. Copy Json key into python file as a dictionary\n", + " 1. This is found by using the search bar to search for Google API and Google Drive\n", + "3. Go to “APIs & Services > Credentials†and choose “Create credentials > Service account keyâ€.\n", + "4. Fill out the form (making sure to add editor privledges to the service account)\n", + "5. Click “Create†and “Doneâ€.\n", + "6. Press “Manage service accounts†above Service Accounts.\n", + "7. Press on â‹® near recently created service account and select “Manage keys†and then click on “ADD KEY > Create new keyâ€.\n", + "8. Select JSON key type and press “Createâ€.\n", + "9. In your python file import gspread\n", + "10. Copy Json key into python file as a dictionary\n", "\n", "\n", "for more information/ documentation on gspread head here\n", diff --git a/GoogleSheetsTutorial1.ipynb b/GoogleSheetsTutorial1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c9d2d6e51b5d8a1175337a99648a179f9a862239 --- /dev/null +++ b/GoogleSheetsTutorial1.ipynb @@ -0,0 +1,74 @@ +# Google Sheets API Tutorial + +--- + + + +--- + + + +### How to Get Started + +1. Create a project inside Google Developer Console + 1. This can be found at https://console.developers.google.com/projectselector2/apis/credentials?pli=1&supportedpurview=project + (you must use your personal email not your MSU email) +2. Inside that project enable the Google sheets API and Google Drive for your new project + 1. This is found by using the search bar to search for Google API and Google Drive +3. Create a Google service account through the Google developer portal +4. Go to “APIs & Services > Credentials†and choose “Create credentials > Service account keyâ€. +5. Fill out the form (making sure to add editor privledges to the service account) +6. Click “Create†and “Doneâ€. +7. Press “Manage service accounts†above Service Accounts. +8. Press on â‹® near recently created service account and select “Manage keys†and then click on “ADD KEY > Create new keyâ€. +9. Select JSON key type and press “Createâ€. +10. In your python file import gspread +11. Copy Json key into python file as a dictionary + + +for more information/ documentation on gspread head here +https://docs.gspread.org/en/latest/oauth2.html + +!pip install gspread + +import gspread +import json + +## Testing out the API + +credentials = { + "type": "service_account", + "project_id": "d2l-instructor-api", + "private_key_id": "deb126d3a82a7dc84fea187d04cd37e871b5e6fc", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDUsc6loi+P4gzn\nkGdemmh3Mg1GIOIHnfbqDI8rZebakwuHDMqGp9cmEk1rP4NehR0PXRoE5j7DZs7z\n3gzTMbV75q1JmL6gRyCl0FQvwTCDEN5lst1NodjH6Pg1j61fpszLsNHe4EVyvaXH\nrTbwJvLARQlP+TQyGaTZiK8IXF3H0tOA/opOtuQEZQt9oIeae1TTM2mcUAg6MhAL\nL2/PkleLXqYdKJT6OhZyJgWwljyxg/0llxqz+yiEmJrkbn6r9taP2GzAFgqVpacN\nFF9idd+SCzgQ2eC6evAkdUibC6xH+TLZDo/uBwWS1AfBA38++Kn3y1caRvBNQPQ9\nDm6RRTrrAgMBAAECggEAIcojna2v56hOS4Vl3qIiIXmKoU79CZ+/R7x6DDdyntvI\nc4qFLDJC3FIBCZ479QqprLSqOwgHBYzyKMzoda7KeaYSdoQ1GIpkNeNsVG/ZEKFf\n64EoZjplkZDByeSK8wyxMVTkaodvkQRu71NlzG4rl38ANTvOXn0zcrmjsOzXmzRp\nb96Y5v71fypDzmDPnWB3+hbNEynYcJMWOxBOqt7T4TMxIpixdEcVyY/HMmsrtEHO\ng2InK+XdVbdPOal/woZhP/lc88jHAm9uD5yYZArwtOka4PuZvbVxT7aQItu3w7wh\n72u90TEo3KzuJBYnZb2RwitW6B1AQtea8WiTeVeftQKBgQDqu257OzPOjsLRCoim\nPZcF7zwVgZEs68fuWf5xjfT4AeDfYqvz1t6VfRUPunNOFZEsnnEcrcIIwa1gQDpR\nLlPA1ncoVu75yfS2XUQE6anbcFWmbZQU3m4BwE6Hjk0ftdP14e8Br7XdBN2TOg1E\nqtM7yF812COF8PEeCZPn6jgkfwKBgQDn9ze+Y0Rrxfgdl4FpvJZH963UlCOwuLVs\n6oFqXcGMbzeRmTxLDjkQ/cf6BqaTAfPNwc/PLrs26MGtLIcN24Lnie8q4nPrzCjr\nGq7A6km0ceurCQTj6VkkTy5E/fIh0og9A2BKXo1/G+pcv//LRUF8Tie8aTenlTl6\nomIZupWDlQKBgQDYNwPCcTr8RhX/VbOfZVYKOl5e9PUTqq+DxtOQJ5GBLMHFIm9/\ncOmgSxIuZbE2OLl7nwpTv13ekQAxi5fsFT9CfopN1x3TaoqFvI0d9VnnbqcGYSMo\nweYUqN3tEU/LKMJwV8e/Bun/By9OIQf9u1hNVfkVcNzv0ItY/ruFwQIr4wKBgEzA\n01QO2pPilH+OIwcOnJdBj+YYAls2MunQCnRcg29pWXS2xGl9UkqZ/nJvgux/p+E8\nMPu31cCMcZFQe3uNV5ovXNDOz1aFXkC0uoAgxbSMQVN9j32uN425GXFAqb6hP+0Y\nUeMpuB4H6Zs4+HWU+98aqTFBi2XLDZLTaixjPZelAoGBAM+mmJ9Cvnng2z5o8qTq\nMHf5ZRJl86PWQ/RxK/2r/Z8oGzm64O/5QD92YUlVfxA18rj/6pvz9rgX/tO9P7ec\n9xU+xVomRGMehiPdzUwwb0ZVqVnKgTIhACr2HNj2WuBcdcb6sqoYgBmAN1bjLu5n\n3fdTrl8aAH8TUW3uu2Rdeqql\n-----END PRIVATE KEY-----\n", + "client_email": "d2lapi@d2l-instructor-api.iam.gserviceaccount.com", + "client_id": "109123177444430472879", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/d2lapi%40d2l-instructor-api.iam.gserviceaccount.com" +} + +gc = gspread.service_account_from_dict(credentials) +sheet_id = "1vt5SnUuAojBLPug41dBs3_LKpyVM2rxrJw6MwAQ1_c0" +gsheet = gc.open_by_key(sheet_id) +worksheet_list = gsheet.worksheets() + + + +for sheet in worksheet_list: + sheet_name = sheet.title + # if not check_date(sheet_name): # if attendance from different month + # continue + sheet_data = gsheet.worksheet(sheet_name).get_all_records() + print(sheet_data) + +import pandas as pd + +dataframe = pd.DataFrame(gsheet.worksheet(sheet_name).get_all_records()) +dataframe.loc[1, "Hobby"] = "Watching K-Dramas" +gsheet.worksheet(sheet_name).update([dataframe.columns.values.tolist()] + dataframe.values.tolist()) + + + any other questions refer to https://docs.gspread.org/en/latest/user-guide.html + diff --git a/Video-Image-Data-Tutorial/Ford_Video_Analysis.ipynb b/Video-Image-Data-Tutorial/Ford_Video_Analysis.ipynb index b106c899cf2afe2a557fd6c4dd7f3faafb7a12f9..6ad91ea7e6b75cd579fa491fc6a08f05727d1d83 100644 --- a/Video-Image-Data-Tutorial/Ford_Video_Analysis.ipynb +++ b/Video-Image-Data-Tutorial/Ford_Video_Analysis.ipynb @@ -1,4 +1,237 @@ { +<<<<<<< HEAD + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "OqSmFS3lZj9Y" + }, + "source": [ + "# <center> Video Image Data </center>\n", + "#### CMSE 495 Ford Group\n", + "\n", + "This tutorial teaches the user how to input a video file, such a mp4 and convert each frame of the video into a jpeg image using python, primarily in a Jupyter notebook." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uey1neRTkRwd" + }, + "source": [ + "[](https://colab.research.google.com/github/pathakis/DataTools_Tutorial_Demo/blob/main/Video-Image-Data-Tutorial/Ford_Video_Analysis.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQIJxk_kdjT1" + }, + "source": [ + "<b> Environment Setup (Makefile):</b>\n", + "- Use the command 'make innit' automatically set up the environment for you.\n", + "\n", + "<b> Environment Setup (Manual):</b>\n", + "- Set up new environment using pip/conda (Conda Recommended). Use command \n", + "\n", + " <code> conda create -n envs python=3.10 </code>\n", + "\n", + "- Activate your new environment. Use command \n", + "\n", + " <code> conda activate envs</code>\n", + "\n", + "- Install the requisite packages.Use command \n", + "\n", + " <code> pip install opencv-python</code> or,\n", + "\n", + " <code> conda install -c conda-forge opencv</code>\n", + "\n", + "<b> Usage Instructions:</b>\n", + "\n", + "- The example call shows the format in which this func may be used.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f0aUW4PLdobE" + }, + "source": [ + "This process uses 2 packages called [os](https://docs.python.org/3/library/os.html) and [cv2](https://pypi.org/project/opencv-python/). Os provides miscellaneous operating system interfaces such as opening and reading the files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JGkN_k3BgXV8" + }, + "outputs": [], + "source": [ + "# !pip install opencv-python" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PCbpVR-HZzmt" + }, + "outputs": [], + "source": [ + "import cv2\n", + "import os\n", + "import glob\n", + "import urllib.request\n", + "\n", + "\n", + "def video_to_frames(file_path, directory_path, greyscale = False):\n", + "\n", + " '''This function will change a video file to a frames'''\n", + " \n", + " #opening the video\n", + " vidcap = cv2.VideoCapture(file_path) \n", + " \n", + " dirname = directory_path\n", + " os.makedirs(dirname, exist_ok=True)\n", + " \n", + " #capturing a frame as well as a boolean value representing whether an image was properly opened\n", + " success,image = vidcap.read()\n", + " \n", + " count = 0\n", + " \n", + " while success:\n", + " \n", + " #this is specifically for foam_segmented.avi\n", + " if greyscale:\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", + " \n", + " #writing the image to a the directory path that was specified, \n", + " #if the path specified does not exist then it will be created\n", + " #this finctionality was added so that the images could be stored in a separate folder\n", + " #example of output file names: 1.jpg, 2.jpg, 3.jpg, and so on\n", + " cv2.imwrite(os.path.join(dirname, str(count)+\".jpg\"), image)\n", + " success,image = vidcap.read()\n", + " count += 1\n", + " #All the frames will be added in order\n", + " cv2.waitKey(1) \n", + " \n", + " #releasing the threads\n", + " vidcap.release()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "hBfLcvMhh6v7" + }, + "source": [ + "DEMO FOR THE **avi_to_frames** \n", + "\n", + "1. To download a sample avi file that you want to work with, use the following code `urllib.request.urlretrieve('https://file-examples.com/wp-content/uploads/2018/04/file_example_AVI_480_750kB.avi', 'testing.mp4')` \n", + "\n", + "2. After the video has been downloaded `avi_frames(./testing.mp4, path_to_where_you_want_the_frames, False)` this will create a folder with frames from the video." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-N-hJD11jJjo" + }, + "outputs": [], + "source": [ + "# Making a Video From Frames\n", + "def frames_to_video(directory_path, fps, width, height):\n", + " fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n", + " video = cv2.VideoWriter('video.avi', fourcc, fps, (width, height))\n", + " num_frames = len([name for name in os.listdir(directory_path) if os.path.isfile(name)])\n", + "\n", + " for j in range(num_frames):\n", + " img = cv2.imread(str(j) + '.jpg')\n", + " video.write(img)\n", + "\n", + " cv2.destroyAllWindows()\n", + " video.release()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sIhvc4DplLkE" + }, + "source": [ + "DEMO FOR THE **frames_to_video** \n", + "\n", + "1. We will be working with the frames that we created using `avi_to_frames`. If you have not created those frames feel free to look at the steps above.\n", + "\n", + "2. Run the following command (**make the necessary changes in the function call**)`frames_to_video(where_the_frames_are, fps, width, height)`\n", + "\n", + "3. The video will show up in the current directory." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bnfzxPNJeZVS" + }, + "source": [ + "**The code below will put the image arrays into a list.** This snippet of code utilizes glob but packages like os can also be used." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mLbqPW7SeJd-" + }, + "outputs": [], + "source": [ + "path = glob.glob(\"./*.jpg\")\n", + "images = []\n", + "for img in path:\n", + " n = cv2.imread(img)\n", + " images.append(n)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GIq-_h4wdxWM" + }, + "source": [ + "<b> References:</b>\n", + "- [Managing Environments](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html)\n", + "- [Open CV in python](https://pypi.org/project/opencv-python/)\n", + "- [Colab Button](https://www.youtube.com/watch?v=RoGZIbwzG5w)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Ford_Video_Analysis.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.15" + }, + "vscode": { + "interpreter": { + "hash": "4f3567101b31d35f97cf2856951ebbba1e09a3f852422478e736adda2bb3beee" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +======= "cells": [ { "cell_type": "markdown", @@ -72,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "id": "PCbpVR-HZzmt" }, @@ -129,12 +362,12 @@ "\n", "1. To download a sample avi file that you want to work with, use the following code `urllib.request.urlretrieve('https://www.engr.colostate.edu/me/facil/dynamics/files/drop.avi', 'testing.mp4')` \n", "\n", - "2. After the video has been downloaded `video_to_frames(./testing.mp4, path_to_where_you_want_the_frames, False)` this will create a folder with frames from the video." + "2. After the video has been downloaded `avi_frames(./testing.mp4, path_to_where_you_want_the_frames, False)` this will create a folder with frames from the video." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "-N-hJD11jJjo" }, @@ -180,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "mLbqPW7SeJd-" }, @@ -204,6 +437,22 @@ "- [Open CV in python](https://pypi.org/project/opencv-python/)\n", "- [Colab Button](https://www.youtube.com/watch?v=RoGZIbwzG5w)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Comments:**\n", + "\n", + "The videos is not working, it only downloaded a unavailable video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -213,7 +462,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -227,9 +476,10 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.8.8" } }, "nbformat": 4, - "nbformat_minor": 4 + "nbformat_minor": 1 +>>>>>>> 5c328fa (this is a testing comment) } diff --git a/Zotero_Instructions.ipynb b/Zotero_Instructions.ipynb index 82998428d0b09eeddeda3468a828ae0e7eb53c45..eefc757057c95913fbc15acf9109548874edc13e 100644 --- a/Zotero_Instructions.ipynb +++ b/Zotero_Instructions.ipynb @@ -82,12 +82,35 @@ "### Managing Existing Citations\n", "\n", "To manage existing citations, you can click on the citation you want to edit and the same information will pop up just like when creating a new citation.\n", + "\n", + "For online storage to access your citations anywhere you can store up to 300MB for free, but any more than that you need to pay.\n", + "\n", + "Here is the fee chart:\n", + "| Amount | Price |\n", + "| ---: | ---: |\n", + "| 300 MB | Free |\n", + "| 2 GB | \\$20 per year |\n", + "| 6 GB | \\$50 per year |\n", + "| Unlimited | \\$120 per year |\n", + "\n", + "\n", "___" ] }, { "cell_type": "markdown", - "id": "9ff6d48d", + "id": "a1b8ab6d", + "metadata": {}, + "source": [ + "### Creating New Citations\n", + "There is another setup guide for the broswer collector.\n", + "link:\n", + "https://guides.library.illinoisstate.edu/zotero/connector" + ] + }, + { + "cell_type": "markdown", + "id": "9d845196-f8a8-403d-8161-4287c11b17c0", "metadata": {}, "source": [ "## What Data is Available on Zotero?\n", @@ -128,6 +151,29 @@ "\n", "https://fordham.libguides.com/BibliographicManagement/Zotero#:~:text=Zotero%20is%20a%20free*%20citation,ease%20the%20collection%20of%20citations.\n" ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "412467a4", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "1bacd6c7", + "metadata": {}, + "source": [ + "Above is an image of the references for this notebook in zotero. \n", + "\n", + "Click on the + sign to create a document and then the paperclip to link a URL to it to reference it." + ] } ], "metadata": { @@ -146,7 +192,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/social_media_scraper/requirements.txt b/social_media_scraper/requirements.txt deleted file mode 100644 index 61bb11cd54f64cb59ab521fdc8204b380ce4a151..0000000000000000000000000000000000000000 --- a/social_media_scraper/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -#### Requirements for YouTube scraper -pandas -matplotlib -pytchat - diff --git a/social_media_scraper/yt_scraper.ipynb b/social_media_scraper/yt_scraper.ipynb index 1740c12a3bb06ed4913755af64ac4175b573605f..83f2013ce00c6e032d100b8c5a5590471f3d1674 100644 --- a/social_media_scraper/yt_scraper.ipynb +++ b/social_media_scraper/yt_scraper.ipynb @@ -15,7 +15,11 @@ "source": [ "### 1. Getting Project Dependecies\n", "\n", - "run **pip install -r requirements.txt** in your terminal\n", + "# if needed" + "#!pip install pandas" + "#!pip install pytchat" + "#!pip install matplotlib" + "#!pip install time", "\n", "This installs pandas, numpy and [pytchat](https://pypi.org/project/pytchat/)" ] @@ -46,8 +50,10 @@ } ], "source": [ - "#!pip install -r requirements.txt\n", - "!pip install pytchat" + "!pip install pandas\n", + "!pip install matplotlib\n", + "!pip install pytchat", + ] }, { diff --git a/tpot_tutorial.ipynb b/tpot_tutorial.ipynb index f672e306cbbc798def72140618ccaecb6626f401..d6337788119eb0551e1b395808b619fd8931d015 100644 --- a/tpot_tutorial.ipynb +++ b/tpot_tutorial.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "id": "eaee9fda", "metadata": {}, "outputs": [ @@ -18,33 +18,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: torch in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (1.10.2)\n", - "Requirement already satisfied: typing-extensions in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from torch) (3.10.0.2)\n", + "Requirement already satisfied: torch in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (1.13.1)\n", + "Requirement already satisfied: typing-extensions in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from torch) (3.10.0.2)\n", "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: xgboost in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (1.5.2)\n", - "Requirement already satisfied: numpy in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from xgboost) (1.21.2)\n", - "Requirement already satisfied: scipy in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from xgboost) (1.8.0)\n", + "Requirement already satisfied: xgboost in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (1.7.3)\n", + "Requirement already satisfied: numpy in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from xgboost) (1.21.2)\n", + "Requirement already satisfied: scipy in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from xgboost) (1.7.3)\n", "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: tpot in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (0.11.7)\n", - "Requirement already satisfied: xgboost>=1.1.0 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.5.2)\n", - "Requirement already satisfied: joblib>=0.13.2 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.1.0)\n", - "Requirement already satisfied: numpy>=1.16.3 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.21.2)\n", - "Requirement already satisfied: stopit>=1.1.1 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.1.2)\n", - "Requirement already satisfied: tqdm>=4.36.1 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (4.62.3)\n", - "Requirement already satisfied: scipy>=1.3.1 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.8.0)\n", - "Requirement already satisfied: scikit-learn>=0.22.0 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.0.1)\n", - "Requirement already satisfied: update-checker>=0.16 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (0.18.0)\n", - "Requirement already satisfied: deap>=1.2 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.3.1)\n", - "Requirement already satisfied: pandas>=0.24.2 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.3.5)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.2->tpot) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.2->tpot) (2021.3)\n", - "Requirement already satisfied: six>=1.5 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas>=0.24.2->tpot) (1.16.0)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from scikit-learn>=0.22.0->tpot) (2.2.0)\n", - "Requirement already satisfied: requests>=2.3.0 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from update-checker>=0.16->tpot) (2.26.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (1.26.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2021.10.8)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/kaitlynarnold/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (3.3)\n", + "Requirement already satisfied: tpot in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (0.11.7)\n", + "Requirement already satisfied: tqdm>=4.36.1 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (4.62.3)\n", + "Requirement already satisfied: xgboost>=1.1.0 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.7.3)\n", + "Requirement already satisfied: deap>=1.2 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.3.3)\n", + "Requirement already satisfied: pandas>=0.24.2 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.3.5)\n", + "Requirement already satisfied: update-checker>=0.16 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (0.18.0)\n", + "Requirement already satisfied: stopit>=1.1.1 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.1.2)\n", + "Requirement already satisfied: scipy>=1.3.1 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.7.3)\n", + "Requirement already satisfied: scikit-learn>=0.22.0 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.0.1)\n", + "Requirement already satisfied: joblib>=0.13.2 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.1.0)\n", + "Requirement already satisfied: numpy>=1.16.3 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from tpot) (1.21.2)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.2->tpot) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.2->tpot) (2021.3)\n", + "Requirement already satisfied: six>=1.5 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from python-dateutil>=2.7.3->pandas>=0.24.2->tpot) (1.16.0)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from scikit-learn>=0.22.0->tpot) (2.2.0)\n", + "Requirement already satisfied: requests>=2.3.0 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from update-checker>=0.16->tpot) (2.27.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (3.3)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2.0.4)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (1.26.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/izaanys/opt/anaconda3/lib/python3.8/site-packages (from requests>=2.3.0->update-checker>=0.16->tpot) (2022.9.24)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "id": "022ab1ce", "metadata": {}, "outputs": [], @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "2898aaa9", "metadata": {}, "outputs": [ @@ -137,7 +137,7 @@ " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -150,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "1d1ac27a", "metadata": {}, "outputs": [ @@ -160,7 +160,7 @@ "((112, 4), (38, 4), (112,), (38,))" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -173,21 +173,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "id": "015515d0", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "49da94ec3a20499782a4a98ab6224166", + "model_id": "", "version_major": 2, "version_minor": 0 }, @@ -203,14 +196,14 @@ "output_type": "stream", "text": [ "\n", - "2.21 minutes have elapsed. TPOT will close down.\n", + "2.02 minutes have elapsed. TPOT will close down.\n", "TPOT closed during evaluation in one generation.\n", "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n", "\n", "\n", "TPOT closed prematurely. Will use the current best pipeline.\n", "\n", - "Best pipeline: MLPClassifier(input_matrix, alpha=0.0001, learning_rate_init=0.01)\n", + "Best pipeline: LogisticRegression(input_matrix, C=25.0, dual=False, penalty=l2)\n", "0.9736842105263158\n" ] } @@ -220,14 +213,22 @@ "# Will report the score of the best found pipeline\n", "# Change max_time_mins to a higher time to allow TPOT to run without interruption. #issue number 25\n", "# It is currently at 2 mins for sake of not taking to long\n", - "tpot = TPOTClassifier(verbosity=2, max_time_mins=2)\n", + "tpot = TPOTClassifier(verbosity=2, max_time_mins=3)\n", "tpot.fit(X_train, y_train)\n", "print(tpot.score(X_test, y_test))" ] }, + { + "cell_type": "markdown", + "id": "22bb780f", + "metadata": {}, + "source": [ + "Issued warning of TPOT closed prematurely. I am increasing the max_time to 4 so tpot can completely run and the results are more accurate" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "id": "fedcae2c", "metadata": {}, "outputs": [], @@ -246,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "id": "f2ac0eda", "metadata": {}, "outputs": [ @@ -388,7 +389,7 @@ "4 0 373450 8.0500 NaN S " ] }, - "execution_count": 12, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -401,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "id": "30eeb3aa", "metadata": {}, "outputs": [], @@ -412,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "id": "bcc561a3", "metadata": {}, "outputs": [ @@ -436,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "id": "5be7251f", "metadata": {}, "outputs": [ @@ -457,7 +458,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "id": "8bb50fa8", "metadata": {}, "outputs": [], @@ -469,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "id": "11c06d01", "metadata": {}, "outputs": [ @@ -491,7 +492,7 @@ "dtype: bool" ] }, - "execution_count": 17, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -504,7 +505,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "id": "aa017a2a", "metadata": {}, "outputs": [], @@ -517,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "id": "34567d07", "metadata": {}, "outputs": [ @@ -533,7 +534,7 @@ " [1, 0, 0, ..., 0, 0, 0]])" ] }, - "execution_count": 19, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -544,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "id": "f31db644", "metadata": {}, "outputs": [], @@ -555,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "id": "e8ccd33c", "metadata": {}, "outputs": [], @@ -566,7 +567,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "id": "594776f6", "metadata": {}, "outputs": [], @@ -578,7 +579,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "id": "e8d47aef", "metadata": {}, "outputs": [ @@ -588,7 +589,7 @@ "False" ] }, - "execution_count": 23, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -600,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "id": "6fb6b7df", "metadata": {}, "outputs": [ @@ -610,7 +611,7 @@ "156" ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -621,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "id": "09fe6803", "metadata": {}, "outputs": [], @@ -632,7 +633,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "id": "a14bb5fd", "metadata": {}, "outputs": [ @@ -642,7 +643,7 @@ "(668, 223)" ] }, - "execution_count": 26, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -655,21 +656,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "227863a0", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: xgboost.XGBClassifier is not available and will not be used by TPOT.\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", + "model_id": "986af026de044e8694da6328fcffcb1c", "version_major": 2, "version_minor": 0 }, @@ -679,56 +673,36 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Generation 1 - Current best internal CV score: 0.8068791381438671\n", - "\n", - "2.01 minutes have elapsed. TPOT will close down.\n", - "TPOT closed during evaluation in one generation.\n", - "WARNING: TPOT may not provide a good pipeline if TPOT is stopped/interrupted in a early generation.\n", - "\n", - "\n", - "TPOT closed prematurely. Will use the current best pipeline.\n", - "\n", - "Best pipeline: DecisionTreeClassifier(input_matrix, criterion=gini, max_depth=3, min_samples_leaf=13, min_samples_split=20)\n" - ] - }, - { - "data": { - "text/plain": [ - "TPOTClassifier(max_eval_time_mins=0.04, max_time_mins=2, population_size=40,\n", - " verbosity=2)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "# create the classifier and fit the model, reports the best pipeline\n", "# Parameters within the TPOT Classifier can be changed to allow for longer run time across more models\n", - "tpot = TPOTClassifier(verbosity=2, max_time_mins=2, max_eval_time_mins=0.04, population_size=40)\n", + "tpot = TPOTClassifier(verbosity=2, max_time_mins=4, max_eval_time_mins=0.04, population_size=40)\n", "tpot.fit(titanic_new[training_indices], titanic_class[training_indices])" ] }, + { + "cell_type": "markdown", + "id": "910d6e80", + "metadata": {}, + "source": [ + "Issued warning of TPOT closed prematurely. I am increasing the max_time so tpot can completely run and the results are more accurate" + ] + }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 23, "id": "ca18f35b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.757847533632287" + "0.7533632286995515" ] }, - "execution_count": 28, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -740,7 +714,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 24, "id": "4d710e07", "metadata": {}, "outputs": [], @@ -751,7 +725,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 25, "id": "d7ff45ed", "metadata": {}, "outputs": [ @@ -873,7 +847,7 @@ "max 1309.000000 3.000000 76.000000 8.000000 9.000000 512.329200" ] }, - "execution_count": 30, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -886,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 26, "id": "8264d2ed", "metadata": {}, "outputs": [], @@ -899,7 +873,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 27, "id": "fe8198e3", "metadata": {}, "outputs": [], @@ -911,7 +885,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 28, "id": "13204313", "metadata": {}, "outputs": [ @@ -932,7 +906,7 @@ "dtype: bool" ] }, - "execution_count": 33, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -945,7 +919,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 29, "id": "82e8d3fb", "metadata": {}, "outputs": [], @@ -960,7 +934,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 30, "id": "185ba8c1", "metadata": {}, "outputs": [], @@ -971,7 +945,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 31, "id": "359c8b6b", "metadata": {}, "outputs": [ @@ -981,7 +955,7 @@ "False" ] }, - "execution_count": 36, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -992,7 +966,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 32, "id": "e73a0c32", "metadata": {}, "outputs": [], @@ -1002,7 +976,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 33, "id": "d868e452", "metadata": {}, "outputs": [], @@ -1013,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 34, "id": "1666f357", "metadata": {}, "outputs": [ @@ -1023,7 +997,7 @@ "array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0])" ] }, - "execution_count": 40, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1034,10 +1008,26 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 35, "id": "3d91d737", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'data/submission.csv'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/tk/mrjkyqms0651_r4m4qvb9xz00000gn/T/ipykernel_43658/2451894762.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m#save as csv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mfinal\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'PassengerId'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtitanic_sub\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'PassengerId'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Survived'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0msubmission\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mfinal\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/submission.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[1;32m 3464\u001b[0m )\n\u001b[1;32m 3465\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3466\u001b[0;31m return DataFrameRenderer(formatter).to_csv(\n\u001b[0m\u001b[1;32m 3467\u001b[0m \u001b[0mpath_or_buf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3468\u001b[0m \u001b[0mline_terminator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mline_terminator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/formats/format.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1104\u001b[0m )\n\u001b[0;32m-> 1105\u001b[0;31m \u001b[0mcsv_formatter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1107\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcreated_buffer\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/formats/csvs.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 235\u001b[0m \"\"\"\n\u001b[1;32m 236\u001b[0m \u001b[0;31m# apply compression and byte/text conversion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 237\u001b[0;31m with get_handle(\n\u001b[0m\u001b[1;32m 238\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/opt/anaconda3/lib/python3.8/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 702\u001b[0;31m handle = open(\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/submission.csv'" + ] + } + ], "source": [ "#create a data frame with passenger id and what class they belong to (if they survived or not)\n", "#save as csv\n", @@ -1047,21 +1037,10 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "240feb73", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(418, 2)" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "final.shape" ] @@ -1093,7 +1072,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.1" } }, "nbformat": 4,