From fabad5650997136b89dc898c6ae503bbf18d588b Mon Sep 17 00:00:00 2001 From: Nunsi Shiaki <89302881+Omi-1@users.noreply.github.com> Date: Fri, 30 Jun 2023 12:47:24 +0100 Subject: [PATCH] Data Analysis and Visualisation. --- Smart_Data_Science_Project.ipynb | 1496 ++++++++++++++++++++++++++++++ 1 file changed, 1496 insertions(+) create mode 100644 Smart_Data_Science_Project.ipynb diff --git a/Smart_Data_Science_Project.ipynb b/Smart_Data_Science_Project.ipynb new file mode 100644 index 0000000..73d7718 --- /dev/null +++ b/Smart_Data_Science_Project.ipynb @@ -0,0 +1,1496 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "id": "5d48d0a7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7a5557af", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('911.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c99f1c8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
latlngdescziptitletimeStamptwpaddre
040.297876-75.581294REINDEER CT & DEAD END; NEW HANOVER; Station ...19525.0EMS: BACK PAINS/INJURY2015-12-10 17:10:52NEW HANOVERREINDEER CT & DEAD END1
140.258061-75.264680BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...19446.0EMS: DIABETIC EMERGENCY2015-12-10 17:29:21HATFIELD TOWNSHIPBRIAR PATH & WHITEMARSH LN1
240.121182-75.351975HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...19401.0Fire: GAS-ODOR/LEAK2015-12-10 14:39:21NORRISTOWNHAWS AVE1
340.116153-75.343513AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...19401.0EMS: CARDIAC EMERGENCY2015-12-10 16:47:36NORRISTOWNAIRY ST & SWEDE ST1
440.251492-75.603350CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...NaNEMS: DIZZINESS2015-12-10 16:56:52LOWER POTTSGROVECHERRYWOOD CT & DEAD END1
\n", + "
" + ], + "text/plain": [ + " lat lng desc \\\n", + "0 40.297876 -75.581294 REINDEER CT & DEAD END; NEW HANOVER; Station ... \n", + "1 40.258061 -75.264680 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... \n", + "2 40.121182 -75.351975 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... \n", + "3 40.116153 -75.343513 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... \n", + "4 40.251492 -75.603350 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... \n", + "\n", + " zip title timeStamp twp \\\n", + "0 19525.0 EMS: BACK PAINS/INJURY 2015-12-10 17:10:52 NEW HANOVER \n", + "1 19446.0 EMS: DIABETIC EMERGENCY 2015-12-10 17:29:21 HATFIELD TOWNSHIP \n", + "2 19401.0 Fire: GAS-ODOR/LEAK 2015-12-10 14:39:21 NORRISTOWN \n", + "3 19401.0 EMS: CARDIAC EMERGENCY 2015-12-10 16:47:36 NORRISTOWN \n", + "4 NaN EMS: DIZZINESS 2015-12-10 16:56:52 LOWER POTTSGROVE \n", + "\n", + " addr e \n", + "0 REINDEER CT & DEAD END 1 \n", + "1 BRIAR PATH & WHITEMARSH LN 1 \n", + "2 HAWS AVE 1 \n", + "3 AIRY ST & SWEDE ST 1 \n", + "4 CHERRYWOOD CT & DEAD END 1 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "52a5db0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "663522" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7d2c899f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "21999814", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([19525., 19446., 19401., nan, 19044., 19426., 19438., 19462.,\n", + " 19428., 19040., 19027., 18936., 18974., 19031., 19403., 19422.,\n", + " 19085., 18964., 19038., 19406., 19468., 19010., 19095., 19464.,\n", + " 19444., 19041., 19440., 19405., 19002., 19096., 19454., 19465.,\n", + " 19004., 19066., 19072., 18041., 19046., 19090., 19012., 19025.,\n", + " 19473., 18073., 18969., 18074., 19460., 19001., 18054., 19009.,\n", + " 19006., 19035., 19150., 19075., 19034., 19151., 19453., 19003.,\n", + " 18914., 19512., 18976., 19120., 18915., 18076., 19477., 19087.,\n", + " 18966., 19131., 19128., 19083., 19053., 19475., 18960., 19504.,\n", + " 18070., 19492., 18932., 19118., 18092., 19490., 19518., 18056.,\n", + " 19119., 19107., 17752., 19111., 18927., 19435., 18951., 19472.,\n", + " 19503., 19126., 19505., 19423., 19138., 36107., 18036., 19116.,\n", + " 19139., 19129., 19115., 19355., 77316., 19457., 19082., 19127.,\n", + " 19443., 17555., 19520., 19063., 19020., 19404., 19382., 19474.,\n", + " 19057., 19073., 19121., 18958., 19026., 19018., 19047., 19064.,\n", + " 19602., 19486., 19348., 18051., 18049., 19333., 19144., 18101.,\n", + " 19607., 19450., 19380., 17506., 8361., 18940., 18104., 7203.,\n", + " 19030., 8033., 19104., 17545., 8832., 19021., 19106., 8065.,\n", + " 15301., 18911., 18902., 18944., 3366., 19545., 19390., 19140.,\n", + " 18901., 19601., 19341., 19301., 19425., 23005., 19054., 18040.,\n", + " 18102., 17603., 18080., 17901., 19153., 21701., 18103., 19134.,\n", + " 19135., 8502., 19122., 19320., 3103., 19610., 19102., 17331.,\n", + " 19050., 19023., 17810., 8077., 8628., 19605., 19437., 19312.,\n", + " 19147., 19456., 19604., 17507., 1104., 18042., 18011., 15090.,\n", + " 19543., 19124., 19609., 19445., 19310., 19070., 7081., 7726.,\n", + " 17566., 19008., 19365., 19103., 18938.])" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['zip'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "592ecdd4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "204" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['zip'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a0fdabfc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "lat 25949\n", + "lng 25980\n", + "desc 663282\n", + "zip 204\n", + "title 148\n", + "timeStamp 640754\n", + "twp 68\n", + "addr 41292\n", + "e 1\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "632698e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 663522 entries, 0 to 663521\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 lat 663522 non-null float64\n", + " 1 lng 663522 non-null float64\n", + " 2 desc 663522 non-null object \n", + " 3 zip 583323 non-null float64\n", + " 4 title 663522 non-null object \n", + " 5 timeStamp 663522 non-null object \n", + " 6 twp 663229 non-null object \n", + " 7 addr 663522 non-null object \n", + " 8 e 663522 non-null int64 \n", + "dtypes: float64(3), int64(1), object(5)\n", + "memory usage: 32.9+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "f542c323", + "metadata": {}, + "source": [ + "# What are the top 5 townships (twp) for 911 calls." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f371659c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LOWER MERION 55490\n", + "ABINGTON 39947\n", + "NORRISTOWN 37633\n", + "UPPER MERION 36010\n", + "CHELTENHAM 30574\n", + "Name: twp, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['twp'].value_counts().head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "effa7c47", + "metadata": {}, + "source": [ + "# What are the top 5 zip codes for 911 calls." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "08cfa750", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "19401.0 45606\n", + "19464.0 43910\n", + "19403.0 34888\n", + "19446.0 32270\n", + "19406.0 22464\n", + "Name: zip, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['zip'].value_counts().head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "62b5c8ab", + "metadata": {}, + "source": [ + "# Take a look a \"title\" column, how many unique title codes are there?" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "bad77a83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "148" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['title'].nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "dca56d0c", + "metadata": {}, + "source": [ + "# Creating new features" + ] + }, + { + "cell_type": "markdown", + "id": "9a94d8ee", + "metadata": {}, + "source": [ + "# In the titles column there are \"Reasons/Departments\" specified before the title code.These are Ems,fire,and Traffic.Create a new column called \"Reason\" that contain this string value." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "9f67f27c", + "metadata": {}, + "outputs": [], + "source": [ + "x =df['title'].iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "79311b3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' BACK PAINS/INJURY'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x.split(':')[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "90adf2bb", + "metadata": {}, + "outputs": [], + "source": [ + "df['Reason'] = df['title'].apply(lambda title: title.split(':')[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a75d1c8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 EMS\n", + "1 EMS\n", + "2 Fire\n", + "3 EMS\n", + "4 EMS\n", + " ... \n", + "663517 Traffic\n", + "663518 EMS\n", + "663519 EMS\n", + "663520 Fire\n", + "663521 Traffic\n", + "Name: Reason, Length: 663522, dtype: object" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Reason']" + ] + }, + { + "cell_type": "markdown", + "id": "b473df23", + "metadata": {}, + "source": [ + "# What is the most common Reason for a 911 call based off of this new column? " + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "e56bd9a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EMS 332692\n", + "Traffic 230208\n", + "Fire 100622\n", + "Name: Reason, dtype: int64" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Reason'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "46f517e6", + "metadata": {}, + "source": [ + "# Now use seaborn to create a countplot of 911 calls by Reason." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "9665513c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df['Reason'],data=df,palette='viridis')" + ] + }, + { + "cell_type": "markdown", + "id": "7074b0fc", + "metadata": {}, + "source": [ + "# What is the data type of the objects in the timeStamp column?" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "23ed680a", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 663522 entries, 0 to 663521\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 lat 663522 non-null float64\n", + " 1 lng 663522 non-null float64\n", + " 2 desc 663522 non-null object \n", + " 3 zip 583323 non-null float64\n", + " 4 title 663522 non-null object \n", + " 5 timeStamp 663522 non-null object \n", + " 6 twp 663229 non-null object \n", + " 7 addr 663522 non-null object \n", + " 8 e 663522 non-null int64 \n", + " 9 Reason 663522 non-null object \n", + "dtypes: float64(3), int64(1), object(6)\n", + "memory usage: 35.4+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "f1bc5448", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['timeStamp'].iloc[0])" + ] + }, + { + "cell_type": "markdown", + "id": "fb9c12fe", + "metadata": {}, + "source": [ + "# Convert the above column from strings to DateTime objects." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "036db288", + "metadata": {}, + "outputs": [], + "source": [ + "df['timeStamp']= pd.to_datetime(df['timeStamp'])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "bd1cddf0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas._libs.tslibs.timestamps.Timestamp" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(df['timeStamp'].iloc[0])" + ] + }, + { + "cell_type": "markdown", + "id": "5fcc0ce8", + "metadata": {}, + "source": [ + "# Grab specific attributes from a Datetime object by calling them. " + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "e526f419", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "10" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time=df['timeStamp'].iloc[0]\n", + "time.day" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "24ef7230", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2015" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time.year" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "292e3718", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "17" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time.hour" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "66ab9642", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time.day_of_week" + ] + }, + { + "cell_type": "markdown", + "id": "d8e2cd70", + "metadata": {}, + "source": [ + "# Create 3 new columns called Hour,Month, and Day of Week. Create these columns based off of the timeStamp column." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "2a189eab", + "metadata": {}, + "outputs": [], + "source": [ + "df['Hour']= df['timeStamp'].apply (lambda time: time.hour)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "c0acb9f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 17\n", + "1 17\n", + "2 14\n", + "3 16\n", + "4 16\n", + " ..\n", + "663517 15\n", + "663518 15\n", + "663519 15\n", + "663520 15\n", + "663521 15\n", + "Name: Hour, Length: 663522, dtype: int64" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " df['Hour']" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "0814bc93", + "metadata": {}, + "outputs": [], + "source": [ + "df['Month']= df['timeStamp'].apply (lambda time: time.month)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "62864a45", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 12\n", + "1 12\n", + "2 12\n", + "3 12\n", + "4 12\n", + " ..\n", + "663517 7\n", + "663518 7\n", + "663519 7\n", + "663520 7\n", + "663521 7\n", + "Name: Month, Length: 663522, dtype: int64" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Month']" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "b9d3d89a", + "metadata": {}, + "outputs": [], + "source": [ + "df['Day_of_week']= df['timeStamp'].apply (lambda time: time.day_of_week)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "43585281", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3\n", + "1 3\n", + "2 3\n", + "3 3\n", + "4 3\n", + " ..\n", + "663517 2\n", + "663518 2\n", + "663519 2\n", + "663520 2\n", + "663521 2\n", + "Name: Day_of_week, Length: 663522, dtype: int64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Day_of_week']" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "3abc7b92", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
latlngdescziptitletimeStamptwpaddreReasonHourMonthDay_of_week
040.297876-75.581294REINDEER CT & DEAD END; NEW HANOVER; Station ...19525.0EMS: BACK PAINS/INJURY2015-12-10 17:10:52NEW HANOVERREINDEER CT & DEAD END1EMS17123
140.258061-75.264680BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...19446.0EMS: DIABETIC EMERGENCY2015-12-10 17:29:21HATFIELD TOWNSHIPBRIAR PATH & WHITEMARSH LN1EMS17123
240.121182-75.351975HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...19401.0Fire: GAS-ODOR/LEAK2015-12-10 14:39:21NORRISTOWNHAWS AVE1Fire14123
340.116153-75.343513AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...19401.0EMS: CARDIAC EMERGENCY2015-12-10 16:47:36NORRISTOWNAIRY ST & SWEDE ST1EMS16123
440.251492-75.603350CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...NaNEMS: DIZZINESS2015-12-10 16:56:52LOWER POTTSGROVECHERRYWOOD CT & DEAD END1EMS16123
\n", + "
" + ], + "text/plain": [ + " lat lng desc \\\n", + "0 40.297876 -75.581294 REINDEER CT & DEAD END; NEW HANOVER; Station ... \n", + "1 40.258061 -75.264680 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... \n", + "2 40.121182 -75.351975 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... \n", + "3 40.116153 -75.343513 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... \n", + "4 40.251492 -75.603350 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... \n", + "\n", + " zip title timeStamp twp \\\n", + "0 19525.0 EMS: BACK PAINS/INJURY 2015-12-10 17:10:52 NEW HANOVER \n", + "1 19446.0 EMS: DIABETIC EMERGENCY 2015-12-10 17:29:21 HATFIELD TOWNSHIP \n", + "2 19401.0 Fire: GAS-ODOR/LEAK 2015-12-10 14:39:21 NORRISTOWN \n", + "3 19401.0 EMS: CARDIAC EMERGENCY 2015-12-10 16:47:36 NORRISTOWN \n", + "4 NaN EMS: DIZZINESS 2015-12-10 16:56:52 LOWER POTTSGROVE \n", + "\n", + " addr e Reason Hour Month Day_of_week \n", + "0 REINDEER CT & DEAD END 1 EMS 17 12 3 \n", + "1 BRIAR PATH & WHITEMARSH LN 1 EMS 17 12 3 \n", + "2 HAWS AVE 1 Fire 14 12 3 \n", + "3 AIRY ST & SWEDE ST 1 EMS 16 12 3 \n", + "4 CHERRYWOOD CT & DEAD END 1 EMS 16 12 3 " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "1def2b99", + "metadata": {}, + "source": [ + "# Use the .map() with a dictionary to map the actual string names to the day of the week" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "c9d3ee43", + "metadata": {}, + "outputs": [], + "source": [ + "dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thur',4:'Fri',5:'Sat',6:'Sun'}" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "eb279e3a", + "metadata": {}, + "outputs": [], + "source": [ + "df['Day_of_week'] = df['Day_of_week'].map(dmap)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "6d1af1c4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
latlngdescziptitletimeStamptwpaddreReasonHourMonthDay_of_week
040.297876-75.581294REINDEER CT & DEAD END; NEW HANOVER; Station ...19525.0EMS: BACK PAINS/INJURY2015-12-10 17:10:52NEW HANOVERREINDEER CT & DEAD END1EMS1712Thur
140.258061-75.264680BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...19446.0EMS: DIABETIC EMERGENCY2015-12-10 17:29:21HATFIELD TOWNSHIPBRIAR PATH & WHITEMARSH LN1EMS1712Thur
240.121182-75.351975HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...19401.0Fire: GAS-ODOR/LEAK2015-12-10 14:39:21NORRISTOWNHAWS AVE1Fire1412Thur
340.116153-75.343513AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...19401.0EMS: CARDIAC EMERGENCY2015-12-10 16:47:36NORRISTOWNAIRY ST & SWEDE ST1EMS1612Thur
440.251492-75.603350CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...NaNEMS: DIZZINESS2015-12-10 16:56:52LOWER POTTSGROVECHERRYWOOD CT & DEAD END1EMS1612Thur
\n", + "
" + ], + "text/plain": [ + " lat lng desc \\\n", + "0 40.297876 -75.581294 REINDEER CT & DEAD END; NEW HANOVER; Station ... \n", + "1 40.258061 -75.264680 BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... \n", + "2 40.121182 -75.351975 HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... \n", + "3 40.116153 -75.343513 AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... \n", + "4 40.251492 -75.603350 CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... \n", + "\n", + " zip title timeStamp twp \\\n", + "0 19525.0 EMS: BACK PAINS/INJURY 2015-12-10 17:10:52 NEW HANOVER \n", + "1 19446.0 EMS: DIABETIC EMERGENCY 2015-12-10 17:29:21 HATFIELD TOWNSHIP \n", + "2 19401.0 Fire: GAS-ODOR/LEAK 2015-12-10 14:39:21 NORRISTOWN \n", + "3 19401.0 EMS: CARDIAC EMERGENCY 2015-12-10 16:47:36 NORRISTOWN \n", + "4 NaN EMS: DIZZINESS 2015-12-10 16:56:52 LOWER POTTSGROVE \n", + "\n", + " addr e Reason Hour Month Day_of_week \n", + "0 REINDEER CT & DEAD END 1 EMS 17 12 Thur \n", + "1 BRIAR PATH & WHITEMARSH LN 1 EMS 17 12 Thur \n", + "2 HAWS AVE 1 Fire 14 12 Thur \n", + "3 AIRY ST & SWEDE ST 1 EMS 16 12 Thur \n", + "4 CHERRYWOOD CT & DEAD END 1 EMS 16 12 Thur " + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "90542cdd", + "metadata": {}, + "source": [ + "# Use seaborn to craete a countplot of the Day of week column with hue based of the Reason column." + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "fc334816", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df['Day_of_week'],data=df,hue=df['Reason'],palette='viridis')\n", + "# To relocate the legend\n", + "plt.legend(bbox_to_anchor=(1.05,1), loc=2, borderaxespad=0.)" + ] + }, + { + "cell_type": "markdown", + "id": "c299f0c9", + "metadata": {}, + "source": [ + "# Do the above for the month column" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "bb4aae73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x=df['Month'],data=df,hue=df['Reason'],palette='viridis')\n", + "# To relocate the legend\n", + "plt.legend(bbox_to_anchor=(1.05,1), loc=2, borderaxespad=0.)" + ] + }, + { + "cell_type": "markdown", + "id": "7b63db7f", + "metadata": {}, + "source": [ + "# Create a heatmap with the dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "1fd11802", + "metadata": {}, + "outputs": [], + "source": [ + "Tc = df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "7952e137", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(Tc,annot=True,cmap='coolwarm')" + ] + }, + { + "cell_type": "markdown", + "id": "3abb8e06", + "metadata": {}, + "source": [ + "# Creat a clustermap using the dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "b59e3c33", + "metadata": {}, + "outputs": [], + "source": [ + "Tcc =df.pivot_table(index='Reason',columns='Month',values='Hour')" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "debb3d55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.clustermap(Tcc)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "866b4b32", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c57a76c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}