From edf45fe25d207956d34ffce05a20f708311f7e68 Mon Sep 17 00:00:00 2001 From: ande2472 <ande2472@msu.edu> Date: Tue, 28 Jun 2022 03:15:47 -0400 Subject: [PATCH] Upload New File --- Pandas.ipynb | 381 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 Pandas.ipynb diff --git a/Pandas.ipynb b/Pandas.ipynb new file mode 100644 index 0000000..415d787 --- /dev/null +++ b/Pandas.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 32, + "id": "fc73d74c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('answercheck.py', <http.client.HTTPMessage at 0x7fc9a8612220>)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "##ANSWER##\n", + "#Install answercheck in current director\n", + "from urllib.request import urlretrieve\n", + "urlretrieve('https://raw.githubusercontent.com/colbrydi/jupytercheck/master/answercheck.py', filename='answercheck.py')\n", + "##ANSWER##" + ] + }, + { + "cell_type": "markdown", + "id": "3c2a4f39", + "metadata": {}, + "source": [ + "# Pandas\n", + "Pandas is a python library used to analyze data." + ] + }, + { + "cell_type": "markdown", + "id": "2b78245b", + "metadata": {}, + "source": [ + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "71e867d9", + "metadata": {}, + "source": [ + "## Description\n", + "Pandas take tabular data and store them in objects known as ‘databases’. They are useful for exploring, cleaning, and processing data\n" + ] + }, + { + "cell_type": "markdown", + "id": "58ec20e8", + "metadata": {}, + "source": [ + "## Self Assessment\n", + "\n", + "Questions that test for the learning goals and allows students to evaluate if they truly understand the topics." + ] + }, + { + "cell_type": "markdown", + "id": "5d237ca9", + "metadata": {}, + "source": [ + "## Training Materials\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "21f9b14e", + "metadata": {}, + "source": [ + "For a general understanding of how to use pandas, along with practice exercises, check out the following website: https://www.w3schools.com/python/pandas/default.asp\n", + "\n", + "For a complete directory of everything you can do with pandas:\n", + "​​\n", + "https://pandas.pydata.org/docs/index.html\n", + "\n", + "Pandas Cheat Sheet:\n", + "\n", + "Data Analysis:\n", + "\thttps://drive.google.com/file/d/1UHK8wtWbADvHKXFC937IS6MTnlSZC_zB/view \n", + "\n", + "Wrangling Data:\n", + "https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "1b047470", + "metadata": {}, + "source": [ + "Go to https://people.sc.fsu.edu/~jburkardt/data/csv/csv.html and download the “grades.cvs†file. We will use it to solve the following problems:\n", + "\n", + "✅ **<span style=\"color:red\">Question:</span>** Create a dataframe variable to store the grades.cvs table.\n", + "\n", + "\t\n", + "\n", + "If you need help: https://www.w3schools.com/python/pandas/pandas_csv.asp\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ee42a54e", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m##ANSWER##\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrades.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m df\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "##ANSWER##\n", + "df = pd.read_csv('grades.csv')\n", + "df\n", + "##ANSWER##" + ] + }, + { + "cell_type": "markdown", + "id": "1e046af0", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Remove the 9th row (Andrew Airpump’s data) from the dataframe.\n", + "\n", + "If you need help, refer to the Wrangling Data cheatsheet: https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "180aff55", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m##ANSWER##\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrades.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m df\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "##ANSWER##\n", + "df_1 = df.drop([8])\n", + "df_1\n", + "\n", + "##ANSWER##" + ] + }, + { + "cell_type": "markdown", + "id": "43219d26", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Clean up column headers to remove unnecessary quotation marks.\n", + "\n", + "\n", + "If you need help: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4c9f8f12", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m##ANSWER##\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrades.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m df\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "##ANSWER##\n", + "df_2 = df_1.rename(columns=[{'\\\"First name\\\"': 'First name'}])\n", + "\tdf_2\n", + "\n", + "\n", + "##ANSWER##" + ] + }, + { + "cell_type": "markdown", + "id": "f4e4a841", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Sort the data by student’s SSN. Store this data in a new dataframe called df_3.\n", + "\n", + "\n", + "If you need help: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html?highlight=sort_values#pandas.DataFrame.sort_values\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b8c5166c", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m##ANSWER##\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgrades.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 3\u001b[0m df\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "##ANSWER##\n", + "df_3 = df_2.sortvalues(by='Final')\n", + "\tdf_3 \n", + "\n", + "\n", + "\n", + "##ANSWER##" + ] + }, + { + "cell_type": "markdown", + "id": "11a12621", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Sort the data by the overall letter grade they received in the class and the grade they received on the final.\n", + "\n", + "\n", + "If you need help: Same as above\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "333f476e", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Create a line graph plotting Test 1 scores against Test 2 scores.\n", + "\n", + "\n", + "\n", + "If you need help: https://pandas.pydata.org/docs/user_guide/visualization.html\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "f66f2294", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Perform a correlation test comparing Test 3 and Test 4 scores.\n", + "\n", + "\n", + "If you need help: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.corr.html\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "dd9c7fa5", + "metadata": {}, + "source": [ + "✅ **<span style=\"color:red\">Question:</span>** Example answercheck question: What is $x = 2+2$?\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "64684dfd", + "metadata": {}, + "outputs": [], + "source": [ + "#Put your answer here" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "619a2259", + "metadata": {}, + "outputs": [], + "source": [ + "##ANSWER##\n", + "x = 4\n", + "##ANSWER##" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "9cc2b34a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "CheckWarning: passed variable is <class 'int'> and not a numpy.matrix.\n", + " Trying to convert to a array matrix using ```A = np.matrix(A)```.\n", + "\n", + "\n", + "CheckWarning: passed matrix is int64 and not <class 'numpy.float64'>...\n", + " Trying to convert to float using ```A = A.astype(float)```.\n", + "\n", + "Testing [[4.]]\n", + "Answer seems to be correct\n", + "\n" + ] + } + ], + "source": [ + "from answercheck import checkanswer\n", + "checkanswer.vector(x,'2cab95d1b144d663bad1ce5c51020ae0')" + ] + }, + { + "cell_type": "markdown", + "id": "44b461a0", + "metadata": {}, + "source": [ + "---\n", + "\n", + "Written by <<YOUR NAME HERE>>, Michigan State University \n", + "As part of the Data Science Bridge Project \n", + " \n", + "<a rel=\"license\" href=\"http://creativecommons.org/licenses/by-nc/4.0/\"><img alt=\"Creative Commons License\" style=\"border-width:0\" src=\"https://i.creativecommons.org/l/by-nc/4.0/88x31.png\" /></a><br />This work is licensed under a <a rel=\"license\" href=\"http://creativecommons.org/licenses/by-nc/4.0/\">Creative Commons Attribution-NonCommercial 4.0 International License</a>." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} -- GitLab