From d0822726b8008f8d961e92187f87e03b25f87d6d Mon Sep 17 00:00:00 2001
From: PengnanZ <57605178+PengnanZ@users.noreply.github.com>
Date: Fri, 25 Feb 2022 15:22:56 -0500
Subject: [PATCH] Add files via upload

---
 .../Censusdata_tutorial.ipynb                 | 541 ++++++++++++++++++
 censusdata_package_tutorial/Help_Functions.py |  29 +
 censusdata_package_tutorial/README.md         |   3 +
 3 files changed, 573 insertions(+)
 create mode 100644 censusdata_package_tutorial/Censusdata_tutorial.ipynb
 create mode 100644 censusdata_package_tutorial/Help_Functions.py
 create mode 100644 censusdata_package_tutorial/README.md

diff --git a/censusdata_package_tutorial/Censusdata_tutorial.ipynb b/censusdata_package_tutorial/Censusdata_tutorial.ipynb
new file mode 100644
index 0000000..1d52b95
--- /dev/null
+++ b/censusdata_package_tutorial/Censusdata_tutorial.ipynb
@@ -0,0 +1,541 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7a42b048",
+   "metadata": {},
+   "source": [
+    "# U.S. Census Data Tutorial"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "319735dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install censusdata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7baf9942",
+   "metadata": {},
+   "source": [
+    "If censusdata package was not in your enviroment, make sure to uncommond above line to pip it.\n",
+    "\n",
+    "Reference of the [CensusData library](https://jtleider.github.io/censusdata/index.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "83acab08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import re\n",
+    "import numpy as np\n",
+    "import censusdata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "04dc2a46",
+   "metadata": {},
+   "source": [
+    "### Main Methods\n",
+    "[CensusData API Documentation](https://jtleider.github.io/censusdata/api.html)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "333e3815",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Search for ACS 2015-2019 5-year estimate variables where the concept \n",
+    "# includes the text 'population'.\n",
+    "sample = censusdata.search('acs5', 2019, 'concept', \n",
+    "                           lambda value: re.search('population', value, re.IGNORECASE))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb0b1ea5",
+   "metadata": {},
+   "source": [
+    "**Parameters:**\t\n",
+    "* src (str) â€“ Census data source: ```â€˜acs1â€™``` for **ACS 1-year estimates**, ```â€˜acs5â€™``` for **ACS 5-year estimates**, ```â€˜acs3â€™``` for **ACS 3-year estimates**, ```â€˜acsseâ€™``` for **ACS 1-year supplemental estimates**, ```â€˜sf1â€™``` for **SF1 data**.\n",
+    "* year (int) â€“ Year of data.\n",
+    "* field (str) â€“ Field in which to search.\n",
+    "* criterion (str or function) â€“ Search criterion. Either string to search for, or a function which will be passed the value of field and return True if a match and False otherwise.\n",
+    "* tabletype (str, optional) â€“ Type of table from which variables are drawn (only applicable to ACS data). Options are ```â€˜detailâ€™``` (detail tables), ```â€˜subjectâ€™``` (subject tables), ```â€˜profileâ€™``` (data profile tables), ```â€˜cprofileâ€™``` (comparison profile tables).\n",
+    "\n",
+    "**Returns:**\t\n",
+    "List of 3-tuples containing variable names, concepts, and labels matching the search criterion.\n",
+    "\n",
+    "**Return type:**\t\n",
+    "list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "4984d1b7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10765\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(sample))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ede1fed8",
+   "metadata": {},
+   "source": [
+    "This would be the sample amount we get based on what we use to search. In this case, there are 10765 samples which are ACS 5-year estimates for 2019 include the text 'population'."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "dbffaeca",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('B01003_001E', 'TOTAL POPULATION', 'Estimate!!Total')\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(sample[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07ae53a2",
+   "metadata": {},
+   "source": [
+    "Let's use the first sample file as an example. Based on the result from above, the first sample is called: 'B01003_001E', which is a total population table under the parent table B01003. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a97df25b",
+   "metadata": {},
+   "source": [
+    "After you know the parent table you're interested in you can use the ```printtable``` function to get a clean readout of all the subtables in order to check if there are other subtables we might interested about."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "fe3d19e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Variable     | Table                          | Label                                                    | Type \n",
+      "-------------------------------------------------------------------------------------------------------------------\n",
+      "B01003_001E  | TOTAL POPULATION               | !! Estimate Total                                        | int  \n",
+      "-------------------------------------------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "censusdata.printtable(censusdata.censustable('acs5', 2019, 'B01003'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "60ac12ee",
+   "metadata": {},
+   "source": [
+    "### Data download"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d74c7914",
+   "metadata": {},
+   "source": [
+    "If you want download data based on some state, county etc. Start at **step 1**, if not start at **step 3**.\n",
+    "\n",
+    "**Step 1** If you want to download the data for some States, you need to find the geography code for it. And function ```geographies``` is build for that"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "501fec9c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Summary level: 040, state:26\n"
+     ]
+    }
+   ],
+   "source": [
+    "states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2019)\n",
+    "print(states['Michigan'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6de55a1a",
+   "metadata": {},
+   "source": [
+    "**Step 2** Also if you want it be county level you need do almost the same thing but by adding county after state. For example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "bb5ad44b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Summary level: 050, state:26> county:163\n"
+     ]
+    }
+   ],
+   "source": [
+    "counties = censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '*')]), 'acs5', 2019)\n",
+    "print(counties['Wayne County, Michigan'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e610da56",
+   "metadata": {},
+   "source": [
+    "**Step 3** Now, is time to download what you want. Example based on Michigan, Wayne County. If you don't have state and county code, leave that as ```'*'```."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d7dac8c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = censusdata.download('acs5', 2019, censusdata.censusgeo([('state', '26'),\n",
+    "                                                               ('county', '163'),\n",
+    "                                                               ('block group', '*')]),\n",
+    "                          ['B01003_001E'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d19c7015",
+   "metadata": {},
+   "source": [
+    "And this is the length of the data we get."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "44f953f0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1822"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57c5df9b",
+   "metadata": {},
+   "source": [
+    "### Extra (data formating, slice)\n",
+    "\n",
+    "This part are some extra step if you need, such as change the column name by using pandas, and slice it based on Census Tract by using ```census_cut``` in ```Help_Functions```."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1af89c80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "column_name = ['TOTAL POPULATION']\n",
+    "data.columns = column_name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "02d0bfed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_indices = []\n",
+    "for index in data.index.tolist():\n",
+    "    new_indices.append(index)\n",
+    "\n",
+    "data.index = new_indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "9a537ede",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>TOTAL POPULATION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Block Group 0, Census Tract 9901, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:990100&gt; block group:0</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 3, Census Tract 5104, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:510400&gt; block group:3</th>\n",
+       "      <td>238</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 5, Census Tract 5528, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:552800&gt; block group:5</th>\n",
+       "      <td>1546</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 3, Census Tract 5014, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:501400&gt; block group:3</th>\n",
+       "      <td>757</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 2, Census Tract 5044, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:504400&gt; block group:2</th>\n",
+       "      <td>427</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    TOTAL POPULATION\n",
+       "Block Group 0, Census Tract 9901, Wayne County,...                 0\n",
+       "Block Group 3, Census Tract 5104, Wayne County,...               238\n",
+       "Block Group 5, Census Tract 5528, Wayne County,...              1546\n",
+       "Block Group 3, Census Tract 5014, Wayne County,...               757\n",
+       "Block Group 2, Census Tract 5044, Wayne County,...               427"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "54f9165e",
+   "metadata": {},
+   "source": [
+    "### ```census_cut``` usage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "6f39eed2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from Help_Functions import census_cut\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "229c79aa",
+   "metadata": {},
+   "source": [
+    "For example, we want the data for some areas based on Census Tracts are 5303, 5304, 5316, 5317"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "58749d91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Tracts = ['Census Tract 5303', 'Census Tract 5304','Census Tract 5316', 'Census Tract 5317']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "336301fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>TOTAL POPULATION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>Block Group 2, Census Tract 5304, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:530400&gt; block group:2</th>\n",
+       "      <td>647</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 1, Census Tract 5304, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:530400&gt; block group:1</th>\n",
+       "      <td>398</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 3, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:530300&gt; block group:3</th>\n",
+       "      <td>702</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 1, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:530300&gt; block group:1</th>\n",
+       "      <td>281</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 2, Census Tract 5317, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:531700&gt; block group:2</th>\n",
+       "      <td>905</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 1, Census Tract 5317, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:531700&gt; block group:1</th>\n",
+       "      <td>648</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 2, Census Tract 5316, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:531600&gt; block group:2</th>\n",
+       "      <td>1094</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 1, Census Tract 5316, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:531600&gt; block group:1</th>\n",
+       "      <td>761</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Block Group 2, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26&gt; county:163&gt; tract:530300&gt; block group:2</th>\n",
+       "      <td>329</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                    TOTAL POPULATION\n",
+       "Block Group 2, Census Tract 5304, Wayne County,...               647\n",
+       "Block Group 1, Census Tract 5304, Wayne County,...               398\n",
+       "Block Group 3, Census Tract 5303, Wayne County,...               702\n",
+       "Block Group 1, Census Tract 5303, Wayne County,...               281\n",
+       "Block Group 2, Census Tract 5317, Wayne County,...               905\n",
+       "Block Group 1, Census Tract 5317, Wayne County,...               648\n",
+       "Block Group 2, Census Tract 5316, Wayne County,...              1094\n",
+       "Block Group 1, Census Tract 5316, Wayne County,...               761\n",
+       "Block Group 2, Census Tract 5303, Wayne County,...               329"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = census_cut(Tracts, data)\n",
+    "df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/censusdata_package_tutorial/Help_Functions.py b/censusdata_package_tutorial/Help_Functions.py
new file mode 100644
index 0000000..baefe39
--- /dev/null
+++ b/censusdata_package_tutorial/Help_Functions.py
@@ -0,0 +1,29 @@
+# +
+import pandas as pd
+import numpy as np
+import re
+
+def census_cut(tracts, data):
+    '''
+    This function is use to cut based on Census Tract for data download by using censusdata package.
+        
+        Parameters:
+            tracts: A list of string which are the Census Tract. Such as 'Census Tract 0000'.
+            data: Data download by using censusdata package.
+        Return:
+            result: A new set of data which only include the data based on Census Track.
+    '''
+    mask = []
+    for i in range(len(data.index)):
+        string = str(data.index[i])
+        check = True
+        for tract in tracts:
+            match = re.search(tract, string)
+            if match:
+                mask.append(True)
+                check = False
+        if check:
+            mask.append(False)
+    len(mask), len(data.index)
+    result = data[mask]
+    return result
diff --git a/censusdata_package_tutorial/README.md b/censusdata_package_tutorial/README.md
new file mode 100644
index 0000000..8a32031
--- /dev/null
+++ b/censusdata_package_tutorial/README.md
@@ -0,0 +1,3 @@
+# Censusdata_tutorial
+
+This is the Tutorial for how to use censusdata package. There also include an extra ```Help_Functions.py``` file which has a help function use for help slice the data download by using cens
-- 
GitLab