From d0822726b8008f8d961e92187f87e03b25f87d6d Mon Sep 17 00:00:00 2001 From: PengnanZ <57605178+PengnanZ@users.noreply.github.com> Date: Fri, 25 Feb 2022 15:22:56 -0500 Subject: [PATCH] Add files via upload --- .../Censusdata_tutorial.ipynb | 541 ++++++++++++++++++ censusdata_package_tutorial/Help_Functions.py | 29 + censusdata_package_tutorial/README.md | 3 + 3 files changed, 573 insertions(+) create mode 100644 censusdata_package_tutorial/Censusdata_tutorial.ipynb create mode 100644 censusdata_package_tutorial/Help_Functions.py create mode 100644 censusdata_package_tutorial/README.md diff --git a/censusdata_package_tutorial/Censusdata_tutorial.ipynb b/censusdata_package_tutorial/Censusdata_tutorial.ipynb new file mode 100644 index 0000000..1d52b95 --- /dev/null +++ b/censusdata_package_tutorial/Censusdata_tutorial.ipynb @@ -0,0 +1,541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7a42b048", + "metadata": {}, + "source": [ + "# U.S. Census Data Tutorial" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "319735dc", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install censusdata" + ] + }, + { + "cell_type": "markdown", + "id": "7baf9942", + "metadata": {}, + "source": [ + "If censusdata package was not in your enviroment, make sure to uncommond above line to pip it.\n", + "\n", + "Reference of the [CensusData library](https://jtleider.github.io/censusdata/index.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "83acab08", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "import numpy as np\n", + "import censusdata" + ] + }, + { + "cell_type": "markdown", + "id": "04dc2a46", + "metadata": {}, + "source": [ + "### Main Methods\n", + "[CensusData API Documentation](https://jtleider.github.io/censusdata/api.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "333e3815", + "metadata": {}, + "outputs": [], + "source": [ + "# Search for ACS 2015-2019 5-year estimate variables where the concept \n", + "# includes the text 'population'.\n", + "sample = censusdata.search('acs5', 2019, 'concept', \n", + " lambda value: re.search('population', value, re.IGNORECASE))" + ] + }, + { + "cell_type": "markdown", + "id": "cb0b1ea5", + "metadata": {}, + "source": [ + "**Parameters:**\t\n", + "* src (str) – Census data source: ```‘acs1’``` for **ACS 1-year estimates**, ```‘acs5’``` for **ACS 5-year estimates**, ```‘acs3’``` for **ACS 3-year estimates**, ```‘acsse’``` for **ACS 1-year supplemental estimates**, ```‘sf1’``` for **SF1 data**.\n", + "* year (int) – Year of data.\n", + "* field (str) – Field in which to search.\n", + "* criterion (str or function) – Search criterion. Either string to search for, or a function which will be passed the value of field and return True if a match and False otherwise.\n", + "* tabletype (str, optional) – Type of table from which variables are drawn (only applicable to ACS data). Options are ```‘detail’``` (detail tables), ```‘subject’``` (subject tables), ```‘profile’``` (data profile tables), ```‘cprofile’``` (comparison profile tables).\n", + "\n", + "**Returns:**\t\n", + "List of 3-tuples containing variable names, concepts, and labels matching the search criterion.\n", + "\n", + "**Return type:**\t\n", + "list" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4984d1b7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10765\n" + ] + } + ], + "source": [ + "print(len(sample))" + ] + }, + { + "cell_type": "markdown", + "id": "ede1fed8", + "metadata": {}, + "source": [ + "This would be the sample amount we get based on what we use to search. In this case, there are 10765 samples which are ACS 5-year estimates for 2019 include the text 'population'." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dbffaeca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('B01003_001E', 'TOTAL POPULATION', 'Estimate!!Total')\n" + ] + } + ], + "source": [ + "print(sample[0])" + ] + }, + { + "cell_type": "markdown", + "id": "07ae53a2", + "metadata": {}, + "source": [ + "Let's use the first sample file as an example. Based on the result from above, the first sample is called: 'B01003_001E', which is a total population table under the parent table B01003. " + ] + }, + { + "cell_type": "markdown", + "id": "a97df25b", + "metadata": {}, + "source": [ + "After you know the parent table you're interested in you can use the ```printtable``` function to get a clean readout of all the subtables in order to check if there are other subtables we might interested about." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fe3d19e4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Variable | Table | Label | Type \n", + "-------------------------------------------------------------------------------------------------------------------\n", + "B01003_001E | TOTAL POPULATION | !! Estimate Total | int \n", + "-------------------------------------------------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "censusdata.printtable(censusdata.censustable('acs5', 2019, 'B01003'))" + ] + }, + { + "cell_type": "markdown", + "id": "60ac12ee", + "metadata": {}, + "source": [ + "### Data download" + ] + }, + { + "cell_type": "markdown", + "id": "d74c7914", + "metadata": {}, + "source": [ + "If you want download data based on some state, county etc. Start at **step 1**, if not start at **step 3**.\n", + "\n", + "**Step 1** If you want to download the data for some States, you need to find the geography code for it. And function ```geographies``` is build for that" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "501fec9c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summary level: 040, state:26\n" + ] + } + ], + "source": [ + "states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2019)\n", + "print(states['Michigan'])" + ] + }, + { + "cell_type": "markdown", + "id": "6de55a1a", + "metadata": {}, + "source": [ + "**Step 2** Also if you want it be county level you need do almost the same thing but by adding county after state. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "bb5ad44b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summary level: 050, state:26> county:163\n" + ] + } + ], + "source": [ + "counties = censusdata.geographies(censusdata.censusgeo([('state', '26'), ('county', '*')]), 'acs5', 2019)\n", + "print(counties['Wayne County, Michigan'])" + ] + }, + { + "cell_type": "markdown", + "id": "e610da56", + "metadata": {}, + "source": [ + "**Step 3** Now, is time to download what you want. Example based on Michigan, Wayne County. If you don't have state and county code, leave that as ```'*'```." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d7dac8c6", + "metadata": {}, + "outputs": [], + "source": [ + "data = censusdata.download('acs5', 2019, censusdata.censusgeo([('state', '26'),\n", + " ('county', '163'),\n", + " ('block group', '*')]),\n", + " ['B01003_001E'])" + ] + }, + { + "cell_type": "markdown", + "id": "d19c7015", + "metadata": {}, + "source": [ + "And this is the length of the data we get." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "44f953f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1822" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(data)" + ] + }, + { + "cell_type": "markdown", + "id": "57c5df9b", + "metadata": {}, + "source": [ + "### Extra (data formating, slice)\n", + "\n", + "This part are some extra step if you need, such as change the column name by using pandas, and slice it based on Census Tract by using ```census_cut``` in ```Help_Functions```." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1af89c80", + "metadata": {}, + "outputs": [], + "source": [ + "column_name = ['TOTAL POPULATION']\n", + "data.columns = column_name" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "02d0bfed", + "metadata": {}, + "outputs": [], + "source": [ + "new_indices = []\n", + "for index in data.index.tolist():\n", + " new_indices.append(index)\n", + "\n", + "data.index = new_indices" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9a537ede", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>TOTAL POPULATION</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Block Group 0, Census Tract 9901, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:990100> block group:0</th>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 3, Census Tract 5104, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:510400> block group:3</th>\n", + " <td>238</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 5, Census Tract 5528, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:552800> block group:5</th>\n", + " <td>1546</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 3, Census Tract 5014, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:501400> block group:3</th>\n", + " <td>757</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 2, Census Tract 5044, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:504400> block group:2</th>\n", + " <td>427</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " TOTAL POPULATION\n", + "Block Group 0, Census Tract 9901, Wayne County,... 0\n", + "Block Group 3, Census Tract 5104, Wayne County,... 238\n", + "Block Group 5, Census Tract 5528, Wayne County,... 1546\n", + "Block Group 3, Census Tract 5014, Wayne County,... 757\n", + "Block Group 2, Census Tract 5044, Wayne County,... 427" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "markdown", + "id": "54f9165e", + "metadata": {}, + "source": [ + "### ```census_cut``` usage" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6f39eed2", + "metadata": {}, + "outputs": [], + "source": [ + "from Help_Functions import census_cut\n", + "import re" + ] + }, + { + "cell_type": "markdown", + "id": "229c79aa", + "metadata": {}, + "source": [ + "For example, we want the data for some areas based on Census Tracts are 5303, 5304, 5316, 5317" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "58749d91", + "metadata": {}, + "outputs": [], + "source": [ + "Tracts = ['Census Tract 5303', 'Census Tract 5304','Census Tract 5316', 'Census Tract 5317']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "336301fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>TOTAL POPULATION</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>Block Group 2, Census Tract 5304, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:530400> block group:2</th>\n", + " <td>647</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 1, Census Tract 5304, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:530400> block group:1</th>\n", + " <td>398</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 3, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:530300> block group:3</th>\n", + " <td>702</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 1, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:530300> block group:1</th>\n", + " <td>281</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 2, Census Tract 5317, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:531700> block group:2</th>\n", + " <td>905</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 1, Census Tract 5317, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:531700> block group:1</th>\n", + " <td>648</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 2, Census Tract 5316, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:531600> block group:2</th>\n", + " <td>1094</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 1, Census Tract 5316, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:531600> block group:1</th>\n", + " <td>761</td>\n", + " </tr>\n", + " <tr>\n", + " <th>Block Group 2, Census Tract 5303, Wayne County, Michigan: Summary level: 150, state:26> county:163> tract:530300> block group:2</th>\n", + " <td>329</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " TOTAL POPULATION\n", + "Block Group 2, Census Tract 5304, Wayne County,... 647\n", + "Block Group 1, Census Tract 5304, Wayne County,... 398\n", + "Block Group 3, Census Tract 5303, Wayne County,... 702\n", + "Block Group 1, Census Tract 5303, Wayne County,... 281\n", + "Block Group 2, Census Tract 5317, Wayne County,... 905\n", + "Block Group 1, Census Tract 5317, Wayne County,... 648\n", + "Block Group 2, Census Tract 5316, Wayne County,... 1094\n", + "Block Group 1, Census Tract 5316, Wayne County,... 761\n", + "Block Group 2, Census Tract 5303, Wayne County,... 329" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = census_cut(Tracts, data)\n", + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/censusdata_package_tutorial/Help_Functions.py b/censusdata_package_tutorial/Help_Functions.py new file mode 100644 index 0000000..baefe39 --- /dev/null +++ b/censusdata_package_tutorial/Help_Functions.py @@ -0,0 +1,29 @@ +# + +import pandas as pd +import numpy as np +import re + +def census_cut(tracts, data): + ''' + This function is use to cut based on Census Tract for data download by using censusdata package. + + Parameters: + tracts: A list of string which are the Census Tract. Such as 'Census Tract 0000'. + data: Data download by using censusdata package. + Return: + result: A new set of data which only include the data based on Census Track. + ''' + mask = [] + for i in range(len(data.index)): + string = str(data.index[i]) + check = True + for tract in tracts: + match = re.search(tract, string) + if match: + mask.append(True) + check = False + if check: + mask.append(False) + len(mask), len(data.index) + result = data[mask] + return result diff --git a/censusdata_package_tutorial/README.md b/censusdata_package_tutorial/README.md new file mode 100644 index 0000000..8a32031 --- /dev/null +++ b/censusdata_package_tutorial/README.md @@ -0,0 +1,3 @@ +# Censusdata_tutorial + +This is the Tutorial for how to use censusdata package. There also include an extra ```Help_Functions.py``` file which has a help function use for help slice the data download by using cens -- GitLab