From d5915f909804b8306867de52ca0cf6d38f546c2a Mon Sep 17 00:00:00 2001 From: John Doe Date: Mon, 23 Jul 2018 10:28:37 -0400 Subject: [PATCH] first commit assessment --- .ipynb_checkpoints/titanic-checkpoint.ipynb | 3578 +++++++++++++++++++ .pytest_cache/v/cache/nodeids | 1 + src/__init__.py | 0 src/alice.txt | 17 + assessment.py => src/assessment.py | 91 +- test/.pytest_cache/v/cache/nodeids | 1 + test/__init__.py | 0 test/alice.txt | 17 + testing.py => test/testing.py | 5 + titanic.ipynb | 3578 +++++++++++++++++++ 10 files changed, 7262 insertions(+), 26 deletions(-) create mode 100644 .ipynb_checkpoints/titanic-checkpoint.ipynb create mode 100644 .pytest_cache/v/cache/nodeids create mode 100644 src/__init__.py create mode 100644 src/alice.txt rename assessment.py => src/assessment.py (61%) create mode 100644 test/.pytest_cache/v/cache/nodeids create mode 100644 test/__init__.py create mode 100644 test/alice.txt rename testing.py => test/testing.py (96%) create mode 100644 titanic.ipynb diff --git a/.ipynb_checkpoints/titanic-checkpoint.ipynb b/.ipynb_checkpoints/titanic-checkpoint.ipynb new file mode 100644 index 0000000..b4fbf64 --- /dev/null +++ b/.ipynb_checkpoints/titanic-checkpoint.ipynb @@ -0,0 +1,3578 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "import sklearn.linear_model as lm\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Survived 1.000000\n", + "Fare 0.257307\n", + "Parch 0.081629\n", + "PassengerId -0.005007\n", + "SibSp -0.035322\n", + "Age -0.077221\n", + "Pclass -0.338481\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()['Survived'].sort_values(ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 549\n", + "1 342\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Survived.value_counts() ##Base Model is 549/(549 + 342) = .616" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "male 577\n", + "female 314\n", + "Name: Sex, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "347082 7\n", + "1601 7\n", + "CA. 2343 7\n", + "347088 6\n", + "CA 2144 6\n", + "3101295 6\n", + "S.O.C. 14879 5\n", + "382652 5\n", + "113781 4\n", + "LINE 4\n", + "4133 4\n", + "W./C. 6608 4\n", + "113760 4\n", + "PC 17757 4\n", + "17421 4\n", + "349909 4\n", + "347077 4\n", + "2666 4\n", + "19950 4\n", + "363291 3\n", + "PC 17582 3\n", + "29106 3\n", + "PC 17755 3\n", + "248727 3\n", + "110413 3\n", + "230080 3\n", + "13502 3\n", + "PC 17760 3\n", + "347742 3\n", + "PC 17572 3\n", + " ..\n", + "349205 1\n", + "349240 1\n", + "STON/O2. 3101283 1\n", + "693 1\n", + "PC 17482 1\n", + "113051 1\n", + "113796 1\n", + "323592 1\n", + "2669 1\n", + "F.C.C. 13528 1\n", + "350029 1\n", + "112059 1\n", + "A./5. 3235 1\n", + "19988 1\n", + "345770 1\n", + "27849 1\n", + "7267 1\n", + "349217 1\n", + "3101276 1\n", + "349246 1\n", + "PC 17474 1\n", + "6563 1\n", + "347060 1\n", + "65303 1\n", + "3460 1\n", + "367232 1\n", + "350025 1\n", + "17463 1\n", + "349249 1\n", + "342826 1\n", + "Name: Ticket, Length: 681, dtype: int64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Embarked_CEmbarked_QEmbarked_SSex_femaleSex_male
000101
110010
200110
300110
400101
501001
600101
700101
800110
910010
1000110
1100110
1200101
1300101
1400110
1500110
1601001
1700101
1800110
1910010
2000101
2100101
2201010
2300101
2400110
2500110
2610001
2700101
2801010
2900101
..................
86100101
86200110
86300110
86400101
86500110
86610010
86700101
86800101
86900101
87000101
87100110
87200101
87300101
87410010
87510010
87600101
87700101
87800101
87910010
88000110
88100101
88200110
88300101
88400101
88501010
88600101
88700110
88800110
88910001
89001001
\n", + "

891 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Embarked_C Embarked_Q Embarked_S Sex_female Sex_male\n", + "0 0 0 1 0 1\n", + "1 1 0 0 1 0\n", + "2 0 0 1 1 0\n", + "3 0 0 1 1 0\n", + "4 0 0 1 0 1\n", + "5 0 1 0 0 1\n", + "6 0 0 1 0 1\n", + "7 0 0 1 0 1\n", + "8 0 0 1 1 0\n", + "9 1 0 0 1 0\n", + "10 0 0 1 1 0\n", + "11 0 0 1 1 0\n", + "12 0 0 1 0 1\n", + "13 0 0 1 0 1\n", + "14 0 0 1 1 0\n", + "15 0 0 1 1 0\n", + "16 0 1 0 0 1\n", + "17 0 0 1 0 1\n", + "18 0 0 1 1 0\n", + "19 1 0 0 1 0\n", + "20 0 0 1 0 1\n", + "21 0 0 1 0 1\n", + "22 0 1 0 1 0\n", + "23 0 0 1 0 1\n", + "24 0 0 1 1 0\n", + "25 0 0 1 1 0\n", + "26 1 0 0 0 1\n", + "27 0 0 1 0 1\n", + "28 0 1 0 1 0\n", + "29 0 0 1 0 1\n", + ".. ... ... ... ... ...\n", + "861 0 0 1 0 1\n", + "862 0 0 1 1 0\n", + "863 0 0 1 1 0\n", + "864 0 0 1 0 1\n", + "865 0 0 1 1 0\n", + "866 1 0 0 1 0\n", + "867 0 0 1 0 1\n", + "868 0 0 1 0 1\n", + "869 0 0 1 0 1\n", + "870 0 0 1 0 1\n", + "871 0 0 1 1 0\n", + "872 0 0 1 0 1\n", + "873 0 0 1 0 1\n", + "874 1 0 0 1 0\n", + "875 1 0 0 1 0\n", + "876 0 0 1 0 1\n", + "877 0 0 1 0 1\n", + "878 0 0 1 0 1\n", + "879 1 0 0 1 0\n", + "880 0 0 1 1 0\n", + "881 0 0 1 0 1\n", + "882 0 0 1 1 0\n", + "883 0 0 1 0 1\n", + "884 0 0 1 0 1\n", + "885 0 1 0 1 0\n", + "886 0 0 1 0 1\n", + "887 0 0 1 1 0\n", + "888 0 0 1 1 0\n", + "889 1 0 0 0 1\n", + "890 0 1 0 0 1\n", + "\n", + "[891 rows x 5 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(df[['Embarked', 'Sex']])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Braund,', 'Mr.', 'Owen', 'Harris']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Name'][0].split()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "titles = []\n", + "for i in range(len(df['Name'])):\n", + " nl = df['Name'][i].split()\n", + " if 'Mr.' in nl:\n", + " titles.append('Mr.')\n", + " elif 'Miss.' in nl:\n", + " titles.append('Miss.')\n", + " else:\n", + " titles.append('Master.')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "df['title'] = titles" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "dummies = pd.get_dummies(df[['Embarked', 'Sex', 'title']]) ##all categorical variables that could matter in some capacity" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "full = pd.concat([df, dummies], axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFare...EmbarkedtitleEmbarked_CEmbarked_QEmbarked_SSex_femaleSex_maletitle_Master.title_Miss.title_Mr.
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500...SMr.00101001
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833...CMaster.10010100
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250...SMiss.00110010
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000...SMaster.00110100
4503Allen, Mr. William Henrymale35.0003734508.0500...SMr.00101001
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare ... Embarked title Embarked_C \\\n", + "0 0 A/5 21171 7.2500 ... S Mr. 0 \n", + "1 0 PC 17599 71.2833 ... C Master. 1 \n", + "2 0 STON/O2. 3101282 7.9250 ... S Miss. 0 \n", + "3 0 113803 53.1000 ... S Master. 0 \n", + "4 0 373450 8.0500 ... S Mr. 0 \n", + "\n", + " Embarked_Q Embarked_S Sex_female Sex_male title_Master. title_Miss. \\\n", + "0 0 1 0 1 0 0 \n", + "1 0 0 1 0 1 0 \n", + "2 0 1 1 0 0 1 \n", + "3 0 1 1 0 1 0 \n", + "4 0 1 0 1 0 0 \n", + "\n", + " title_Mr. \n", + "0 1 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 1 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Survived 1.000000\n", + "Sex_female 0.543351\n", + "title_Master. 0.338476\n", + "title_Miss. 0.327093\n", + "Fare 0.257307\n", + "Embarked_C 0.168240\n", + "Parch 0.081629\n", + "Embarked_Q 0.003650\n", + "PassengerId -0.005007\n", + "SibSp -0.035322\n", + "Age -0.077221\n", + "Embarked_S -0.155660\n", + "Pclass -0.338481\n", + "Sex_male -0.543351\n", + "title_Mr. -0.549199\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.corr()['Survived'].sort_values(ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 21 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "title 891 non-null object\n", + "Embarked_C 891 non-null uint8\n", + "Embarked_Q 891 non-null uint8\n", + "Embarked_S 891 non-null uint8\n", + "Sex_female 891 non-null uint8\n", + "Sex_male 891 non-null uint8\n", + "title_Master. 891 non-null uint8\n", + "title_Miss. 891 non-null uint8\n", + "title_Mr. 891 non-null uint8\n", + "dtypes: float64(2), int64(5), object(6), uint8(8)\n", + "memory usage: 97.5+ KB\n" + ] + } + ], + "source": [ + "full.info() #looks like age is missing values\n", + "age_median = full.Age.median()\n", + "full['Age'] = full.Age.fillna(age_median)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchFareEmbarked_QEmbarked_SSex_femaletitle_Master.title_Miss.
0322.0107.250001000
1138.01071.283300110
2326.0007.925001101
3135.01053.100001110
4335.0008.050001000
5328.0008.458310000
6154.00051.862501000
732.03121.075001010
8327.00211.133301110
9214.01030.070800110
1034.01116.700001101
11158.00026.550001101
12320.0008.050001000
13339.01531.275001000
14314.0007.854201101
15255.00016.000001110
1632.04129.125010010
17228.00013.000001000
18331.01018.000001110
19328.0007.225000110
20235.00026.000001000
21234.00013.000001000
22315.0008.029210101
23128.00035.500001000
2438.03121.075001101
25338.01531.387501110
26328.0007.225000000
27119.032263.000001000
28328.0007.879210101
29328.0007.895801000
.................................
861221.01011.500001000
862148.00025.929201110
863328.08269.550001101
864224.00013.000001000
865242.00013.000001110
866227.01013.858300101
867131.00050.495801000
868328.0009.500001000
86934.01111.133301010
870326.0007.895801000
871147.01152.554201110
872133.0005.000001000
873347.0009.000001000
874228.01024.000000110
875315.0007.225000101
876320.0009.845801000
877319.0007.895801000
878328.0007.895801000
879156.00183.158300110
880225.00126.000001110
881333.0007.895801000
882322.00010.516701101
883228.00010.500001000
884325.0007.050001000
885339.00529.125010110
886227.00013.000001010
887119.00030.000001101
888328.01223.450001101
889126.00030.000000000
890332.0007.750010000
\n", + "

891 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Embarked_Q Embarked_S Sex_female \\\n", + "0 3 22.0 1 0 7.2500 0 1 0 \n", + "1 1 38.0 1 0 71.2833 0 0 1 \n", + "2 3 26.0 0 0 7.9250 0 1 1 \n", + "3 1 35.0 1 0 53.1000 0 1 1 \n", + "4 3 35.0 0 0 8.0500 0 1 0 \n", + "5 3 28.0 0 0 8.4583 1 0 0 \n", + "6 1 54.0 0 0 51.8625 0 1 0 \n", + "7 3 2.0 3 1 21.0750 0 1 0 \n", + "8 3 27.0 0 2 11.1333 0 1 1 \n", + "9 2 14.0 1 0 30.0708 0 0 1 \n", + "10 3 4.0 1 1 16.7000 0 1 1 \n", + "11 1 58.0 0 0 26.5500 0 1 1 \n", + "12 3 20.0 0 0 8.0500 0 1 0 \n", + "13 3 39.0 1 5 31.2750 0 1 0 \n", + "14 3 14.0 0 0 7.8542 0 1 1 \n", + "15 2 55.0 0 0 16.0000 0 1 1 \n", + "16 3 2.0 4 1 29.1250 1 0 0 \n", + "17 2 28.0 0 0 13.0000 0 1 0 \n", + "18 3 31.0 1 0 18.0000 0 1 1 \n", + "19 3 28.0 0 0 7.2250 0 0 1 \n", + "20 2 35.0 0 0 26.0000 0 1 0 \n", + "21 2 34.0 0 0 13.0000 0 1 0 \n", + "22 3 15.0 0 0 8.0292 1 0 1 \n", + "23 1 28.0 0 0 35.5000 0 1 0 \n", + "24 3 8.0 3 1 21.0750 0 1 1 \n", + "25 3 38.0 1 5 31.3875 0 1 1 \n", + "26 3 28.0 0 0 7.2250 0 0 0 \n", + "27 1 19.0 3 2 263.0000 0 1 0 \n", + "28 3 28.0 0 0 7.8792 1 0 1 \n", + "29 3 28.0 0 0 7.8958 0 1 0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "861 2 21.0 1 0 11.5000 0 1 0 \n", + "862 1 48.0 0 0 25.9292 0 1 1 \n", + "863 3 28.0 8 2 69.5500 0 1 1 \n", + "864 2 24.0 0 0 13.0000 0 1 0 \n", + "865 2 42.0 0 0 13.0000 0 1 1 \n", + "866 2 27.0 1 0 13.8583 0 0 1 \n", + "867 1 31.0 0 0 50.4958 0 1 0 \n", + "868 3 28.0 0 0 9.5000 0 1 0 \n", + "869 3 4.0 1 1 11.1333 0 1 0 \n", + "870 3 26.0 0 0 7.8958 0 1 0 \n", + "871 1 47.0 1 1 52.5542 0 1 1 \n", + "872 1 33.0 0 0 5.0000 0 1 0 \n", + "873 3 47.0 0 0 9.0000 0 1 0 \n", + "874 2 28.0 1 0 24.0000 0 0 1 \n", + "875 3 15.0 0 0 7.2250 0 0 1 \n", + "876 3 20.0 0 0 9.8458 0 1 0 \n", + "877 3 19.0 0 0 7.8958 0 1 0 \n", + "878 3 28.0 0 0 7.8958 0 1 0 \n", + "879 1 56.0 0 1 83.1583 0 0 1 \n", + "880 2 25.0 0 1 26.0000 0 1 1 \n", + "881 3 33.0 0 0 7.8958 0 1 0 \n", + "882 3 22.0 0 0 10.5167 0 1 1 \n", + "883 2 28.0 0 0 10.5000 0 1 0 \n", + "884 3 25.0 0 0 7.0500 0 1 0 \n", + "885 3 39.0 0 5 29.1250 1 0 1 \n", + "886 2 27.0 0 0 13.0000 0 1 0 \n", + "887 1 19.0 0 0 30.0000 0 1 1 \n", + "888 3 28.0 1 2 23.4500 0 1 1 \n", + "889 1 26.0 0 0 30.0000 0 0 0 \n", + "890 3 32.0 0 0 7.7500 1 0 0 \n", + "\n", + " title_Master. title_Miss. \n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 1 \n", + "3 1 0 \n", + "4 0 0 \n", + "5 0 0 \n", + "6 0 0 \n", + "7 1 0 \n", + "8 1 0 \n", + "9 1 0 \n", + "10 0 1 \n", + "11 0 1 \n", + "12 0 0 \n", + "13 0 0 \n", + "14 0 1 \n", + "15 1 0 \n", + "16 1 0 \n", + "17 0 0 \n", + "18 1 0 \n", + "19 1 0 \n", + "20 0 0 \n", + "21 0 0 \n", + "22 0 1 \n", + "23 0 0 \n", + "24 0 1 \n", + "25 1 0 \n", + "26 0 0 \n", + "27 0 0 \n", + "28 0 1 \n", + "29 0 0 \n", + ".. ... ... \n", + "861 0 0 \n", + "862 1 0 \n", + "863 0 1 \n", + "864 0 0 \n", + "865 1 0 \n", + "866 0 1 \n", + "867 0 0 \n", + "868 0 0 \n", + "869 1 0 \n", + "870 0 0 \n", + "871 1 0 \n", + "872 0 0 \n", + "873 0 0 \n", + "874 1 0 \n", + "875 0 1 \n", + "876 0 0 \n", + "877 0 0 \n", + "878 0 0 \n", + "879 1 0 \n", + "880 1 0 \n", + "881 0 0 \n", + "882 0 1 \n", + "883 0 0 \n", + "884 0 0 \n", + "885 1 0 \n", + "886 1 0 \n", + "887 0 1 \n", + "888 0 1 \n", + "889 0 0 \n", + "890 0 0 \n", + "\n", + "[891 rows x 10 columns]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = full.drop(columns = ['Survived','Sex_male', 'PassengerId', 'title_Mr.', 'Embarked_C', 'Sex', 'title', 'Cabin', 'Name', 'Embarked', 'Ticket'])\n", + "#Remove response variable, the string categorical variables, and one of the dummy categories classified from the initial string\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = full.Survived\n", + "type(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(891, 10)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.head()\n", + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 10 columns):\n", + "Pclass 891 non-null int64\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Fare 891 non-null float64\n", + "Embarked_Q 891 non-null uint8\n", + "Embarked_S 891 non-null uint8\n", + "Sex_female 891 non-null uint8\n", + "title_Master. 891 non-null uint8\n", + "title_Miss. 891 non-null uint8\n", + "dtypes: float64(2), int64(3), uint8(5)\n", + "memory usage: 39.2 KB\n" + ] + } + ], + "source": [ + "X.info() #looks like some age are missing" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PassengerId -0.005007\n", + "Survived 1.000000\n", + "Pclass -0.338481\n", + "Age -0.064910\n", + "SibSp -0.035322\n", + "Parch 0.081629\n", + "Fare 0.257307\n", + "Embarked_C 0.168240\n", + "Embarked_Q 0.003650\n", + "Embarked_S -0.155660\n", + "Sex_female 0.543351\n", + "Sex_male -0.543351\n", + "title_Master. 0.338476\n", + "title_Miss. 0.327093\n", + "title_Mr. -0.549199\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.corr()['Survived']" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = lm.LogisticRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchFareEmbarked_QEmbarked_SSex_femaletitle_Master.title_Miss.
709328.01115.245800010
439231.00010.500001000
840320.0007.925001000
72026.00133.000001101
39314.01011.241700101
290126.00078.850001101
300328.0007.750010101
333316.02018.000001000
208316.0007.750010101
136119.00226.283301101
137137.01053.100001000
696344.0008.050001000
485328.03125.466701101
244330.0007.225000000
344236.00013.000001000
853116.00139.400001101
621142.01052.554201000
653328.0007.829210101
886227.00013.000001010
110147.00052.000001000
294324.0007.895801000
447134.00026.550001000
192319.0107.854201101
682320.0009.225001000
538328.00014.500001000
819310.03227.900001010
30140.00027.720800010
673231.00013.000001000
6334.03227.900001010
396331.0007.854201101
.................................
456165.00026.550001000
500317.0008.662501000
430128.00026.550001000
44514.00281.858301010
650328.0007.895801000
17231.01111.133301101
450236.01227.750001000
314243.01126.250001000
332138.001153.462501000
801231.01126.250001110
90329.0008.050001000
834318.0008.300001000
181228.00015.050000000
581139.011110.883300110
795239.00013.000001000
69326.0208.662501000
131320.0007.050001000
334128.010133.650001110
597349.0000.000001000
135223.00015.045800000
16431.04139.687501010
28328.0007.879210101
783328.01223.450001000
19323.01126.000001010
86934.01111.133301010
715319.0007.650001000
525340.5007.750010000
38131.00215.741700101
140328.00215.245800110
173321.0007.925001000
\n", + "

295 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Embarked_Q Embarked_S Sex_female \\\n", + "709 3 28.0 1 1 15.2458 0 0 0 \n", + "439 2 31.0 0 0 10.5000 0 1 0 \n", + "840 3 20.0 0 0 7.9250 0 1 0 \n", + "720 2 6.0 0 1 33.0000 0 1 1 \n", + "39 3 14.0 1 0 11.2417 0 0 1 \n", + "290 1 26.0 0 0 78.8500 0 1 1 \n", + "300 3 28.0 0 0 7.7500 1 0 1 \n", + "333 3 16.0 2 0 18.0000 0 1 0 \n", + "208 3 16.0 0 0 7.7500 1 0 1 \n", + "136 1 19.0 0 2 26.2833 0 1 1 \n", + "137 1 37.0 1 0 53.1000 0 1 0 \n", + "696 3 44.0 0 0 8.0500 0 1 0 \n", + "485 3 28.0 3 1 25.4667 0 1 1 \n", + "244 3 30.0 0 0 7.2250 0 0 0 \n", + "344 2 36.0 0 0 13.0000 0 1 0 \n", + "853 1 16.0 0 1 39.4000 0 1 1 \n", + "621 1 42.0 1 0 52.5542 0 1 0 \n", + "653 3 28.0 0 0 7.8292 1 0 1 \n", + "886 2 27.0 0 0 13.0000 0 1 0 \n", + "110 1 47.0 0 0 52.0000 0 1 0 \n", + "294 3 24.0 0 0 7.8958 0 1 0 \n", + "447 1 34.0 0 0 26.5500 0 1 0 \n", + "192 3 19.0 1 0 7.8542 0 1 1 \n", + "682 3 20.0 0 0 9.2250 0 1 0 \n", + "538 3 28.0 0 0 14.5000 0 1 0 \n", + "819 3 10.0 3 2 27.9000 0 1 0 \n", + "30 1 40.0 0 0 27.7208 0 0 0 \n", + "673 2 31.0 0 0 13.0000 0 1 0 \n", + "63 3 4.0 3 2 27.9000 0 1 0 \n", + "396 3 31.0 0 0 7.8542 0 1 1 \n", + ".. ... ... ... ... ... ... ... ... \n", + "456 1 65.0 0 0 26.5500 0 1 0 \n", + "500 3 17.0 0 0 8.6625 0 1 0 \n", + "430 1 28.0 0 0 26.5500 0 1 0 \n", + "445 1 4.0 0 2 81.8583 0 1 0 \n", + "650 3 28.0 0 0 7.8958 0 1 0 \n", + "172 3 1.0 1 1 11.1333 0 1 1 \n", + "450 2 36.0 1 2 27.7500 0 1 0 \n", + "314 2 43.0 1 1 26.2500 0 1 0 \n", + "332 1 38.0 0 1 153.4625 0 1 0 \n", + "801 2 31.0 1 1 26.2500 0 1 1 \n", + "90 3 29.0 0 0 8.0500 0 1 0 \n", + "834 3 18.0 0 0 8.3000 0 1 0 \n", + "181 2 28.0 0 0 15.0500 0 0 0 \n", + "581 1 39.0 1 1 110.8833 0 0 1 \n", + "795 2 39.0 0 0 13.0000 0 1 0 \n", + "69 3 26.0 2 0 8.6625 0 1 0 \n", + "131 3 20.0 0 0 7.0500 0 1 0 \n", + "334 1 28.0 1 0 133.6500 0 1 1 \n", + "597 3 49.0 0 0 0.0000 0 1 0 \n", + "135 2 23.0 0 0 15.0458 0 0 0 \n", + "164 3 1.0 4 1 39.6875 0 1 0 \n", + "28 3 28.0 0 0 7.8792 1 0 1 \n", + "783 3 28.0 1 2 23.4500 0 1 0 \n", + "193 2 3.0 1 1 26.0000 0 1 0 \n", + "869 3 4.0 1 1 11.1333 0 1 0 \n", + "715 3 19.0 0 0 7.6500 0 1 0 \n", + "525 3 40.5 0 0 7.7500 1 0 0 \n", + "381 3 1.0 0 2 15.7417 0 0 1 \n", + "140 3 28.0 0 2 15.2458 0 0 1 \n", + "173 3 21.0 0 0 7.9250 0 1 0 \n", + "\n", + " title_Master. title_Miss. \n", + "709 1 0 \n", + "439 0 0 \n", + "840 0 0 \n", + "720 0 1 \n", + "39 0 1 \n", + "290 0 1 \n", + "300 0 1 \n", + "333 0 0 \n", + "208 0 1 \n", + "136 0 1 \n", + "137 0 0 \n", + "696 0 0 \n", + "485 0 1 \n", + "244 0 0 \n", + "344 0 0 \n", + "853 0 1 \n", + "621 0 0 \n", + "653 0 1 \n", + "886 1 0 \n", + "110 0 0 \n", + "294 0 0 \n", + "447 0 0 \n", + "192 0 1 \n", + "682 0 0 \n", + "538 0 0 \n", + "819 1 0 \n", + "30 1 0 \n", + "673 0 0 \n", + "63 1 0 \n", + "396 0 1 \n", + ".. ... ... \n", + "456 0 0 \n", + "500 0 0 \n", + "430 0 0 \n", + "445 1 0 \n", + "650 0 0 \n", + "172 0 1 \n", + "450 0 0 \n", + "314 0 0 \n", + "332 0 0 \n", + "801 1 0 \n", + "90 0 0 \n", + "834 0 0 \n", + "181 0 0 \n", + "581 1 0 \n", + "795 0 0 \n", + "69 0 0 \n", + "131 0 0 \n", + "334 1 0 \n", + "597 0 0 \n", + "135 0 0 \n", + "164 1 0 \n", + "28 0 1 \n", + "783 0 0 \n", + "193 1 0 \n", + "869 1 0 \n", + "715 0 0 \n", + "525 0 0 \n", + "381 0 1 \n", + "140 1 0 \n", + "173 0 0 \n", + "\n", + "[295 rows x 10 columns]" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,\n", + " 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8101694915254237" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X_test, y_test) ## does about 20% better than base model performance of 61.6%" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.68818243, -0.02576249, -0.36500498, -0.27925771, 0.00578152,\n", + " -0.144344 , -0.54206936, 1.77298728, 1.71801348, 0.60544203]])" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.11797972])" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.intercept_" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'LogisticRegression' object has no attribute 'summary'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msummary\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'LogisticRegression' object has no attribute 'summary'" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.pytest_cache/v/cache/nodeids b/.pytest_cache/v/cache/nodeids new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/.pytest_cache/v/cache/nodeids @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/alice.txt b/src/alice.txt new file mode 100644 index 0000000..84bf3cc --- /dev/null +++ b/src/alice.txt @@ -0,0 +1,17 @@ +Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' +So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. +There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. +In another moment down went Alice after it, never once considering how in the world she was to get out again. +The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. +Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. +'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.) +Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) +Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.' +Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. +Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof. +There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again. +Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted! +Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. +There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here before,' said Alice,) and round the neck of the bottle was a paper label, with the words 'DRINK ME' beautifully printed on it in large letters. +It was all very well to say 'Drink me,' but the wise little Alice was not going to do THAT in a hurry. 'No, I'll look first,' she said, 'and see whether it's marked "poison" or not'; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they WOULD not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger VERY deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked 'poison,' it is almost certain to disagree with you, sooner or later. +However, this bottle was NOT marked 'poison,' so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off. diff --git a/assessment.py b/src/assessment.py similarity index 61% rename from assessment.py rename to src/assessment.py index 281675d..9ba57d6 100644 --- a/assessment.py +++ b/src/assessment.py @@ -3,8 +3,12 @@ # PYTHON SECTION - -def count_characters(string): +class Assessment: + + def __init__(self): + pass + + def count_characters(self, string): ''' INPUT: STRING OUTPUT: DICT (with counts of each character in input string) @@ -14,10 +18,13 @@ def count_characters(string): Characters which with a count of 0 should not be included in the output dictionary. ''' - pass - - -def invert_dictionary(d): + letters = [] + for i in range(len(string)): + letters.append(string[i]) + dict_letters = {letter: letters.count(letter) for letter in letters} + return dict_letter + + def invert_dictionary(self, d): ''' INPUT: DICT OUTPUT: DICT (of sets of input keys indexing the same input values @@ -28,10 +35,23 @@ def invert_dictionary(d): the set of d's keys which shared the same value. e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}} ''' - pass + letters = [] + counts = [] + for letter in d: + letters.append(letter) + for letter in d: + counts.append(d[letter]) + inv_dict = {} + for count in counts: + if count not in inv_dict: + inv_dict.update({count : []}) + for letter in d: + if d[letter] in inv_dict: + inv_dict[d[letter]].append(letter) + return inv_dict -def word_count(filename): + def word_count(self, filename): ''' INPUT: STRING OUTPUT: INT, INT, INT (a tuple with line, word, @@ -44,10 +64,15 @@ def word_count(filename): 2. number of words (broken by whitespace) 3. number of characters ''' - pass + num_char = len(filename) + lines = 0 + for line in open(filename): + lines += 1 + num_words = len(filename.split()) + return num_char, num_words, lines -def matrix_multiplication(A, B): + def matrix_multiplication(self, A, B): ''' INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS, LIST (of length n) OF LIST (of length n) OF INTEGERS @@ -67,13 +92,23 @@ def matrix_multiplication(A, B): Please do not use numpy. Write your solution in straight python. ''' - pass + c = [] + for i in range(0,len(A)): + temp=[] + for j in range(0,len(B[0])): + s = 0 + for k in range(0,len(A[0])): + s += A[i][k]*B[k][j] + temp.append(s) + c.append(temp) + return c + # NumPy SECTION -def array_work(rows, cols, scalar, matrixA): + def array_work(self, rows, cols, scalar, matrixA): ''' INPUT: INT, INT, INT, NUMPY ARRAY OUTPUT: NUMPY ARRAY @@ -89,10 +124,12 @@ def array_work(rows, cols, scalar, matrixA): [5, 6], * [5, 5, 5]] [7, 8]] ''' - pass + arrayA = np.array(matrixA) + multiplied = arrayA * scalar + return multiplied.reshape(rows, cols) -def boolean_indexing(arr, minimum): + def boolean_indexing(self, arr, minimum): ''' INPUT: NUMPY ARRAY, INT OUTPUT: NUMPY ARRAY @@ -105,12 +142,17 @@ def boolean_indexing(arr, minimum): In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7) Out[1]: array([7, 8]) ''' - pass + atleast = [] + for row in arr: + for num in row: + if num >= minimum: + atleast.append(num) + return np.array(atleast) # Pandas SECTION -def make_series(start, length, index): + def make_series(self, start, length, index): ''' INPUTS: INT, INT, LIST (of length "length") OUTPUT: PANDAS SERIES (of "length" sequential integers @@ -128,15 +170,12 @@ def make_series(start, length, index): c 7 dtype: int64 ''' - pass + data = [] + for i in range(len(index)): + data.append(start + i) + return pd.Series(data = data, index = index) + def data_frame_work(self,df, colA, colB, colC): + df[colC]=colA+colB + return df -def data_frame_work(df, colA, colB, colC): - ''' - INPUT: DATAFRAME, STR, STR, STR - OUTPUT: None - - Insert a column (colC) into the dataframe that is the sum of colA and colB. - Assume that df contains columns colA and colB and that these are numeric. - ''' - pass diff --git a/test/.pytest_cache/v/cache/nodeids b/test/.pytest_cache/v/cache/nodeids new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/.pytest_cache/v/cache/nodeids @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/alice.txt b/test/alice.txt new file mode 100644 index 0000000..84bf3cc --- /dev/null +++ b/test/alice.txt @@ -0,0 +1,17 @@ +Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?' +So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her. +There was nothing so VERY remarkable in that; nor did Alice think it so VERY much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!' (when she thought it over afterwards, it occurred to her that she ought to have wondered at this, but at the time it all seemed quite natural); but when the Rabbit actually TOOK A WATCH OUT OF ITS WAISTCOAT-POCKET, and looked at it, and then hurried on, Alice started to her feet, for it flashed across her mind that she had never before seen a rabbit with either a waistcoat-pocket, or a watch to take out of it, and burning with curiosity, she ran across the field after it, and fortunately was just in time to see it pop down a large rabbit-hole under the hedge. +In another moment down went Alice after it, never once considering how in the world she was to get out again. +The rabbit-hole went straight on like a tunnel for some way, and then dipped suddenly down, so suddenly that Alice had not a moment to think about stopping herself before she found herself falling down a very deep well. +Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. +'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.) +Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud. 'I must be getting somewhere near the centre of the earth. Let me see: that would be four thousand miles down, I think—' (for, you see, Alice had learnt several things of this sort in her lessons in the schoolroom, and though this was not a VERY good opportunity for showing off her knowledge, as there was no one to listen to her, still it was good practice to say it over) '—yes, that's about the right distance—but then I wonder what Latitude or Longitude I've got to?' (Alice had no idea what Latitude was, or Longitude either, but thought they were nice grand words to say.) +Presently she began again. 'I wonder if I shall fall right THROUGH the earth! How funny it'll seem to come out among the people that walk with their heads downward! The Antipathies, I think—' (she was rather glad there WAS no one listening, this time, as it didn't sound at all the right word) '—but I shall have to ask them what the name of the country is, you know. Please, Ma'am, is this New Zealand or Australia?' (and she tried to curtsey as she spoke—fancy CURTSEYING as you're falling through the air! Do you think you could manage it?) 'And what an ignorant little girl she'll think me for asking! No, it'll never do to ask: perhaps I shall see it written up somewhere.' +Down, down, down. There was nothing else to do, so Alice soon began talking again. 'Dinah'll miss me very much to-night, I should think!' (Dinah was the cat.) 'I hope they'll remember her saucer of milk at tea-time. Dinah my dear! I wish you were down here with me! There are no mice in the air, I'm afraid, but you might catch a bat, and that's very like a mouse, you know. But do cats eat bats, I wonder?' And here Alice began to get rather sleepy, and went on saying to herself, in a dreamy sort of way, 'Do cats eat bats? Do cats eat bats?' and sometimes, 'Do bats eat cats?' for, you see, as she couldn't answer either question, it didn't much matter which way she put it. She felt that she was dozing off, and had just begun to dream that she was walking hand in hand with Dinah, and saying to her very earnestly, 'Now, Dinah, tell me the truth: did you ever eat a bat?' when suddenly, thump! thump! down she came upon a heap of sticks and dry leaves, and the fall was over. +Alice was not a bit hurt, and she jumped up on to her feet in a moment: she looked up, but it was all dark overhead; before her was another long passage, and the White Rabbit was still in sight, hurrying down it. There was not a moment to be lost: away went Alice like the wind, and was just in time to hear it say, as it turned a corner, 'Oh my ears and whiskers, how late it's getting!' She was close behind it when she turned the corner, but the Rabbit was no longer to be seen: she found herself in a long, low hall, which was lit up by a row of lamps hanging from the roof. +There were doors all round the hall, but they were all locked; and when Alice had been all the way down one side and up the other, trying every door, she walked sadly down the middle, wondering how she was ever to get out again. +Suddenly she came upon a little three-legged table, all made of solid glass; there was nothing on it except a tiny golden key, and Alice's first thought was that it might belong to one of the doors of the hall; but, alas! either the locks were too large, or the key was too small, but at any rate it would not open any of them. However, on the second time round, she came upon a low curtain she had not noticed before, and behind it was a little door about fifteen inches high: she tried the little golden key in the lock, and to her great delight it fitted! +Alice opened the door and found that it led into a small passage, not much larger than a rat-hole: she knelt down and looked along the passage into the loveliest garden you ever saw. How she longed to get out of that dark hall, and wander about among those beds of bright flowers and those cool fountains, but she could not even get her head through the doorway; 'and even if my head would go through,' thought poor Alice, 'it would be of very little use without my shoulders. Oh, how I wish I could shut up like a telescope! I think I could, if I only knew how to begin.' For, you see, so many out-of-the-way things had happened lately, that Alice had begun to think that very few things indeed were really impossible. +There seemed to be no use in waiting by the little door, so she went back to the table, half hoping she might find another key on it, or at any rate a book of rules for shutting people up like telescopes: this time she found a little bottle on it, ('which certainly was not here before,' said Alice,) and round the neck of the bottle was a paper label, with the words 'DRINK ME' beautifully printed on it in large letters. +It was all very well to say 'Drink me,' but the wise little Alice was not going to do THAT in a hurry. 'No, I'll look first,' she said, 'and see whether it's marked "poison" or not'; for she had read several nice little histories about children who had got burnt, and eaten up by wild beasts and other unpleasant things, all because they WOULD not remember the simple rules their friends had taught them: such as, that a red-hot poker will burn you if you hold it too long; and that if you cut your finger VERY deeply with a knife, it usually bleeds; and she had never forgotten that, if you drink much from a bottle marked 'poison,' it is almost certain to disagree with you, sooner or later. +However, this bottle was NOT marked 'poison,' so Alice ventured to taste it, and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, custard, pine-apple, roast turkey, toffee, and hot buttered toast,) she very soon finished it off. diff --git a/testing.py b/test/testing.py similarity index 96% rename from testing.py rename to test/testing.py index c68b010..367886e 100644 --- a/testing.py +++ b/test/testing.py @@ -1,3 +1,8 @@ +import numpy as np +import pandas as pd +import pytest +from src.assessment import Assessment + def test_count_characters(self): string = "abafdcggfaabe" answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2} diff --git a/titanic.ipynb b/titanic.ipynb new file mode 100644 index 0000000..b4fbf64 --- /dev/null +++ b/titanic.ipynb @@ -0,0 +1,3578 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import statsmodels.api as sm\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import r2_score\n", + "from pandas.plotting import scatter_matrix\n", + "from sklearn.linear_model import LogisticRegression\n", + "import sklearn.linear_model as lm\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('train.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 83.6+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Survived 1.000000\n", + "Fare 0.257307\n", + "Parch 0.081629\n", + "PassengerId -0.005007\n", + "SibSp -0.035322\n", + "Age -0.077221\n", + "Pclass -0.338481\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.corr()['Survived'].sort_values(ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 549\n", + "1 342\n", + "Name: Survived, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Survived.value_counts() ##Base Model is 549/(549 + 342) = .616" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "male 577\n", + "female 314\n", + "Name: Sex, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Sex.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "347082 7\n", + "1601 7\n", + "CA. 2343 7\n", + "347088 6\n", + "CA 2144 6\n", + "3101295 6\n", + "S.O.C. 14879 5\n", + "382652 5\n", + "113781 4\n", + "LINE 4\n", + "4133 4\n", + "W./C. 6608 4\n", + "113760 4\n", + "PC 17757 4\n", + "17421 4\n", + "349909 4\n", + "347077 4\n", + "2666 4\n", + "19950 4\n", + "363291 3\n", + "PC 17582 3\n", + "29106 3\n", + "PC 17755 3\n", + "248727 3\n", + "110413 3\n", + "230080 3\n", + "13502 3\n", + "PC 17760 3\n", + "347742 3\n", + "PC 17572 3\n", + " ..\n", + "349205 1\n", + "349240 1\n", + "STON/O2. 3101283 1\n", + "693 1\n", + "PC 17482 1\n", + "113051 1\n", + "113796 1\n", + "323592 1\n", + "2669 1\n", + "F.C.C. 13528 1\n", + "350029 1\n", + "112059 1\n", + "A./5. 3235 1\n", + "19988 1\n", + "345770 1\n", + "27849 1\n", + "7267 1\n", + "349217 1\n", + "3101276 1\n", + "349246 1\n", + "PC 17474 1\n", + "6563 1\n", + "347060 1\n", + "65303 1\n", + "3460 1\n", + "367232 1\n", + "350025 1\n", + "17463 1\n", + "349249 1\n", + "342826 1\n", + "Name: Ticket, Length: 681, dtype: int64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Embarked_CEmbarked_QEmbarked_SSex_femaleSex_male
000101
110010
200110
300110
400101
501001
600101
700101
800110
910010
1000110
1100110
1200101
1300101
1400110
1500110
1601001
1700101
1800110
1910010
2000101
2100101
2201010
2300101
2400110
2500110
2610001
2700101
2801010
2900101
..................
86100101
86200110
86300110
86400101
86500110
86610010
86700101
86800101
86900101
87000101
87100110
87200101
87300101
87410010
87510010
87600101
87700101
87800101
87910010
88000110
88100101
88200110
88300101
88400101
88501010
88600101
88700110
88800110
88910001
89001001
\n", + "

891 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " Embarked_C Embarked_Q Embarked_S Sex_female Sex_male\n", + "0 0 0 1 0 1\n", + "1 1 0 0 1 0\n", + "2 0 0 1 1 0\n", + "3 0 0 1 1 0\n", + "4 0 0 1 0 1\n", + "5 0 1 0 0 1\n", + "6 0 0 1 0 1\n", + "7 0 0 1 0 1\n", + "8 0 0 1 1 0\n", + "9 1 0 0 1 0\n", + "10 0 0 1 1 0\n", + "11 0 0 1 1 0\n", + "12 0 0 1 0 1\n", + "13 0 0 1 0 1\n", + "14 0 0 1 1 0\n", + "15 0 0 1 1 0\n", + "16 0 1 0 0 1\n", + "17 0 0 1 0 1\n", + "18 0 0 1 1 0\n", + "19 1 0 0 1 0\n", + "20 0 0 1 0 1\n", + "21 0 0 1 0 1\n", + "22 0 1 0 1 0\n", + "23 0 0 1 0 1\n", + "24 0 0 1 1 0\n", + "25 0 0 1 1 0\n", + "26 1 0 0 0 1\n", + "27 0 0 1 0 1\n", + "28 0 1 0 1 0\n", + "29 0 0 1 0 1\n", + ".. ... ... ... ... ...\n", + "861 0 0 1 0 1\n", + "862 0 0 1 1 0\n", + "863 0 0 1 1 0\n", + "864 0 0 1 0 1\n", + "865 0 0 1 1 0\n", + "866 1 0 0 1 0\n", + "867 0 0 1 0 1\n", + "868 0 0 1 0 1\n", + "869 0 0 1 0 1\n", + "870 0 0 1 0 1\n", + "871 0 0 1 1 0\n", + "872 0 0 1 0 1\n", + "873 0 0 1 0 1\n", + "874 1 0 0 1 0\n", + "875 1 0 0 1 0\n", + "876 0 0 1 0 1\n", + "877 0 0 1 0 1\n", + "878 0 0 1 0 1\n", + "879 1 0 0 1 0\n", + "880 0 0 1 1 0\n", + "881 0 0 1 0 1\n", + "882 0 0 1 1 0\n", + "883 0 0 1 0 1\n", + "884 0 0 1 0 1\n", + "885 0 1 0 1 0\n", + "886 0 0 1 0 1\n", + "887 0 0 1 1 0\n", + "888 0 0 1 1 0\n", + "889 1 0 0 0 1\n", + "890 0 1 0 0 1\n", + "\n", + "[891 rows x 5 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.get_dummies(df[['Embarked', 'Sex']])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Braund,', 'Mr.', 'Owen', 'Harris']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Name'][0].split()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "titles = []\n", + "for i in range(len(df['Name'])):\n", + " nl = df['Name'][i].split()\n", + " if 'Mr.' in nl:\n", + " titles.append('Mr.')\n", + " elif 'Miss.' in nl:\n", + " titles.append('Miss.')\n", + " else:\n", + " titles.append('Master.')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "df['title'] = titles" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "dummies = pd.get_dummies(df[['Embarked', 'Sex', 'title']]) ##all categorical variables that could matter in some capacity" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "full = pd.concat([df, dummies], axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFare...EmbarkedtitleEmbarked_CEmbarked_QEmbarked_SSex_femaleSex_maletitle_Master.title_Miss.title_Mr.
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500...SMr.00101001
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833...CMaster.10010100
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250...SMiss.00110010
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000...SMaster.00110100
4503Allen, Mr. William Henrymale35.0003734508.0500...SMr.00101001
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare ... Embarked title Embarked_C \\\n", + "0 0 A/5 21171 7.2500 ... S Mr. 0 \n", + "1 0 PC 17599 71.2833 ... C Master. 1 \n", + "2 0 STON/O2. 3101282 7.9250 ... S Miss. 0 \n", + "3 0 113803 53.1000 ... S Master. 0 \n", + "4 0 373450 8.0500 ... S Mr. 0 \n", + "\n", + " Embarked_Q Embarked_S Sex_female Sex_male title_Master. title_Miss. \\\n", + "0 0 1 0 1 0 0 \n", + "1 0 0 1 0 1 0 \n", + "2 0 1 1 0 0 1 \n", + "3 0 1 1 0 1 0 \n", + "4 0 1 0 1 0 0 \n", + "\n", + " title_Mr. \n", + "0 1 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 1 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Survived 1.000000\n", + "Sex_female 0.543351\n", + "title_Master. 0.338476\n", + "title_Miss. 0.327093\n", + "Fare 0.257307\n", + "Embarked_C 0.168240\n", + "Parch 0.081629\n", + "Embarked_Q 0.003650\n", + "PassengerId -0.005007\n", + "SibSp -0.035322\n", + "Age -0.077221\n", + "Embarked_S -0.155660\n", + "Pclass -0.338481\n", + "Sex_male -0.543351\n", + "title_Mr. -0.549199\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.corr()['Survived'].sort_values(ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 21 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "title 891 non-null object\n", + "Embarked_C 891 non-null uint8\n", + "Embarked_Q 891 non-null uint8\n", + "Embarked_S 891 non-null uint8\n", + "Sex_female 891 non-null uint8\n", + "Sex_male 891 non-null uint8\n", + "title_Master. 891 non-null uint8\n", + "title_Miss. 891 non-null uint8\n", + "title_Mr. 891 non-null uint8\n", + "dtypes: float64(2), int64(5), object(6), uint8(8)\n", + "memory usage: 97.5+ KB\n" + ] + } + ], + "source": [ + "full.info() #looks like age is missing values\n", + "age_median = full.Age.median()\n", + "full['Age'] = full.Age.fillna(age_median)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchFareEmbarked_QEmbarked_SSex_femaletitle_Master.title_Miss.
0322.0107.250001000
1138.01071.283300110
2326.0007.925001101
3135.01053.100001110
4335.0008.050001000
5328.0008.458310000
6154.00051.862501000
732.03121.075001010
8327.00211.133301110
9214.01030.070800110
1034.01116.700001101
11158.00026.550001101
12320.0008.050001000
13339.01531.275001000
14314.0007.854201101
15255.00016.000001110
1632.04129.125010010
17228.00013.000001000
18331.01018.000001110
19328.0007.225000110
20235.00026.000001000
21234.00013.000001000
22315.0008.029210101
23128.00035.500001000
2438.03121.075001101
25338.01531.387501110
26328.0007.225000000
27119.032263.000001000
28328.0007.879210101
29328.0007.895801000
.................................
861221.01011.500001000
862148.00025.929201110
863328.08269.550001101
864224.00013.000001000
865242.00013.000001110
866227.01013.858300101
867131.00050.495801000
868328.0009.500001000
86934.01111.133301010
870326.0007.895801000
871147.01152.554201110
872133.0005.000001000
873347.0009.000001000
874228.01024.000000110
875315.0007.225000101
876320.0009.845801000
877319.0007.895801000
878328.0007.895801000
879156.00183.158300110
880225.00126.000001110
881333.0007.895801000
882322.00010.516701101
883228.00010.500001000
884325.0007.050001000
885339.00529.125010110
886227.00013.000001010
887119.00030.000001101
888328.01223.450001101
889126.00030.000000000
890332.0007.750010000
\n", + "

891 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Embarked_Q Embarked_S Sex_female \\\n", + "0 3 22.0 1 0 7.2500 0 1 0 \n", + "1 1 38.0 1 0 71.2833 0 0 1 \n", + "2 3 26.0 0 0 7.9250 0 1 1 \n", + "3 1 35.0 1 0 53.1000 0 1 1 \n", + "4 3 35.0 0 0 8.0500 0 1 0 \n", + "5 3 28.0 0 0 8.4583 1 0 0 \n", + "6 1 54.0 0 0 51.8625 0 1 0 \n", + "7 3 2.0 3 1 21.0750 0 1 0 \n", + "8 3 27.0 0 2 11.1333 0 1 1 \n", + "9 2 14.0 1 0 30.0708 0 0 1 \n", + "10 3 4.0 1 1 16.7000 0 1 1 \n", + "11 1 58.0 0 0 26.5500 0 1 1 \n", + "12 3 20.0 0 0 8.0500 0 1 0 \n", + "13 3 39.0 1 5 31.2750 0 1 0 \n", + "14 3 14.0 0 0 7.8542 0 1 1 \n", + "15 2 55.0 0 0 16.0000 0 1 1 \n", + "16 3 2.0 4 1 29.1250 1 0 0 \n", + "17 2 28.0 0 0 13.0000 0 1 0 \n", + "18 3 31.0 1 0 18.0000 0 1 1 \n", + "19 3 28.0 0 0 7.2250 0 0 1 \n", + "20 2 35.0 0 0 26.0000 0 1 0 \n", + "21 2 34.0 0 0 13.0000 0 1 0 \n", + "22 3 15.0 0 0 8.0292 1 0 1 \n", + "23 1 28.0 0 0 35.5000 0 1 0 \n", + "24 3 8.0 3 1 21.0750 0 1 1 \n", + "25 3 38.0 1 5 31.3875 0 1 1 \n", + "26 3 28.0 0 0 7.2250 0 0 0 \n", + "27 1 19.0 3 2 263.0000 0 1 0 \n", + "28 3 28.0 0 0 7.8792 1 0 1 \n", + "29 3 28.0 0 0 7.8958 0 1 0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "861 2 21.0 1 0 11.5000 0 1 0 \n", + "862 1 48.0 0 0 25.9292 0 1 1 \n", + "863 3 28.0 8 2 69.5500 0 1 1 \n", + "864 2 24.0 0 0 13.0000 0 1 0 \n", + "865 2 42.0 0 0 13.0000 0 1 1 \n", + "866 2 27.0 1 0 13.8583 0 0 1 \n", + "867 1 31.0 0 0 50.4958 0 1 0 \n", + "868 3 28.0 0 0 9.5000 0 1 0 \n", + "869 3 4.0 1 1 11.1333 0 1 0 \n", + "870 3 26.0 0 0 7.8958 0 1 0 \n", + "871 1 47.0 1 1 52.5542 0 1 1 \n", + "872 1 33.0 0 0 5.0000 0 1 0 \n", + "873 3 47.0 0 0 9.0000 0 1 0 \n", + "874 2 28.0 1 0 24.0000 0 0 1 \n", + "875 3 15.0 0 0 7.2250 0 0 1 \n", + "876 3 20.0 0 0 9.8458 0 1 0 \n", + "877 3 19.0 0 0 7.8958 0 1 0 \n", + "878 3 28.0 0 0 7.8958 0 1 0 \n", + "879 1 56.0 0 1 83.1583 0 0 1 \n", + "880 2 25.0 0 1 26.0000 0 1 1 \n", + "881 3 33.0 0 0 7.8958 0 1 0 \n", + "882 3 22.0 0 0 10.5167 0 1 1 \n", + "883 2 28.0 0 0 10.5000 0 1 0 \n", + "884 3 25.0 0 0 7.0500 0 1 0 \n", + "885 3 39.0 0 5 29.1250 1 0 1 \n", + "886 2 27.0 0 0 13.0000 0 1 0 \n", + "887 1 19.0 0 0 30.0000 0 1 1 \n", + "888 3 28.0 1 2 23.4500 0 1 1 \n", + "889 1 26.0 0 0 30.0000 0 0 0 \n", + "890 3 32.0 0 0 7.7500 1 0 0 \n", + "\n", + " title_Master. title_Miss. \n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 1 \n", + "3 1 0 \n", + "4 0 0 \n", + "5 0 0 \n", + "6 0 0 \n", + "7 1 0 \n", + "8 1 0 \n", + "9 1 0 \n", + "10 0 1 \n", + "11 0 1 \n", + "12 0 0 \n", + "13 0 0 \n", + "14 0 1 \n", + "15 1 0 \n", + "16 1 0 \n", + "17 0 0 \n", + "18 1 0 \n", + "19 1 0 \n", + "20 0 0 \n", + "21 0 0 \n", + "22 0 1 \n", + "23 0 0 \n", + "24 0 1 \n", + "25 1 0 \n", + "26 0 0 \n", + "27 0 0 \n", + "28 0 1 \n", + "29 0 0 \n", + ".. ... ... \n", + "861 0 0 \n", + "862 1 0 \n", + "863 0 1 \n", + "864 0 0 \n", + "865 1 0 \n", + "866 0 1 \n", + "867 0 0 \n", + "868 0 0 \n", + "869 1 0 \n", + "870 0 0 \n", + "871 1 0 \n", + "872 0 0 \n", + "873 0 0 \n", + "874 1 0 \n", + "875 0 1 \n", + "876 0 0 \n", + "877 0 0 \n", + "878 0 0 \n", + "879 1 0 \n", + "880 1 0 \n", + "881 0 0 \n", + "882 0 1 \n", + "883 0 0 \n", + "884 0 0 \n", + "885 1 0 \n", + "886 1 0 \n", + "887 0 1 \n", + "888 0 1 \n", + "889 0 0 \n", + "890 0 0 \n", + "\n", + "[891 rows x 10 columns]" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = full.drop(columns = ['Survived','Sex_male', 'PassengerId', 'title_Mr.', 'Embarked_C', 'Sex', 'title', 'Cabin', 'Name', 'Embarked', 'Ticket'])\n", + "#Remove response variable, the string categorical variables, and one of the dummy categories classified from the initial string\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.series.Series" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = full.Survived\n", + "type(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(891, 10)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.head()\n", + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 891 entries, 0 to 890\n", + "Data columns (total 10 columns):\n", + "Pclass 891 non-null int64\n", + "Age 891 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Fare 891 non-null float64\n", + "Embarked_Q 891 non-null uint8\n", + "Embarked_S 891 non-null uint8\n", + "Sex_female 891 non-null uint8\n", + "title_Master. 891 non-null uint8\n", + "title_Miss. 891 non-null uint8\n", + "dtypes: float64(2), int64(3), uint8(5)\n", + "memory usage: 39.2 KB\n" + ] + } + ], + "source": [ + "X.info() #looks like some age are missing" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PassengerId -0.005007\n", + "Survived 1.000000\n", + "Pclass -0.338481\n", + "Age -0.064910\n", + "SibSp -0.035322\n", + "Parch 0.081629\n", + "Fare 0.257307\n", + "Embarked_C 0.168240\n", + "Embarked_Q 0.003650\n", + "Embarked_S -0.155660\n", + "Sex_female 0.543351\n", + "Sex_male -0.543351\n", + "title_Master. 0.338476\n", + "title_Miss. 0.327093\n", + "title_Mr. -0.549199\n", + "Name: Survived, dtype: float64" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "full.corr()['Survived']" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = lm.LogisticRegression()\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassAgeSibSpParchFareEmbarked_QEmbarked_SSex_femaletitle_Master.title_Miss.
709328.01115.245800010
439231.00010.500001000
840320.0007.925001000
72026.00133.000001101
39314.01011.241700101
290126.00078.850001101
300328.0007.750010101
333316.02018.000001000
208316.0007.750010101
136119.00226.283301101
137137.01053.100001000
696344.0008.050001000
485328.03125.466701101
244330.0007.225000000
344236.00013.000001000
853116.00139.400001101
621142.01052.554201000
653328.0007.829210101
886227.00013.000001010
110147.00052.000001000
294324.0007.895801000
447134.00026.550001000
192319.0107.854201101
682320.0009.225001000
538328.00014.500001000
819310.03227.900001010
30140.00027.720800010
673231.00013.000001000
6334.03227.900001010
396331.0007.854201101
.................................
456165.00026.550001000
500317.0008.662501000
430128.00026.550001000
44514.00281.858301010
650328.0007.895801000
17231.01111.133301101
450236.01227.750001000
314243.01126.250001000
332138.001153.462501000
801231.01126.250001110
90329.0008.050001000
834318.0008.300001000
181228.00015.050000000
581139.011110.883300110
795239.00013.000001000
69326.0208.662501000
131320.0007.050001000
334128.010133.650001110
597349.0000.000001000
135223.00015.045800000
16431.04139.687501010
28328.0007.879210101
783328.01223.450001000
19323.01126.000001010
86934.01111.133301010
715319.0007.650001000
525340.5007.750010000
38131.00215.741700101
140328.00215.245800110
173321.0007.925001000
\n", + "

295 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " Pclass Age SibSp Parch Fare Embarked_Q Embarked_S Sex_female \\\n", + "709 3 28.0 1 1 15.2458 0 0 0 \n", + "439 2 31.0 0 0 10.5000 0 1 0 \n", + "840 3 20.0 0 0 7.9250 0 1 0 \n", + "720 2 6.0 0 1 33.0000 0 1 1 \n", + "39 3 14.0 1 0 11.2417 0 0 1 \n", + "290 1 26.0 0 0 78.8500 0 1 1 \n", + "300 3 28.0 0 0 7.7500 1 0 1 \n", + "333 3 16.0 2 0 18.0000 0 1 0 \n", + "208 3 16.0 0 0 7.7500 1 0 1 \n", + "136 1 19.0 0 2 26.2833 0 1 1 \n", + "137 1 37.0 1 0 53.1000 0 1 0 \n", + "696 3 44.0 0 0 8.0500 0 1 0 \n", + "485 3 28.0 3 1 25.4667 0 1 1 \n", + "244 3 30.0 0 0 7.2250 0 0 0 \n", + "344 2 36.0 0 0 13.0000 0 1 0 \n", + "853 1 16.0 0 1 39.4000 0 1 1 \n", + "621 1 42.0 1 0 52.5542 0 1 0 \n", + "653 3 28.0 0 0 7.8292 1 0 1 \n", + "886 2 27.0 0 0 13.0000 0 1 0 \n", + "110 1 47.0 0 0 52.0000 0 1 0 \n", + "294 3 24.0 0 0 7.8958 0 1 0 \n", + "447 1 34.0 0 0 26.5500 0 1 0 \n", + "192 3 19.0 1 0 7.8542 0 1 1 \n", + "682 3 20.0 0 0 9.2250 0 1 0 \n", + "538 3 28.0 0 0 14.5000 0 1 0 \n", + "819 3 10.0 3 2 27.9000 0 1 0 \n", + "30 1 40.0 0 0 27.7208 0 0 0 \n", + "673 2 31.0 0 0 13.0000 0 1 0 \n", + "63 3 4.0 3 2 27.9000 0 1 0 \n", + "396 3 31.0 0 0 7.8542 0 1 1 \n", + ".. ... ... ... ... ... ... ... ... \n", + "456 1 65.0 0 0 26.5500 0 1 0 \n", + "500 3 17.0 0 0 8.6625 0 1 0 \n", + "430 1 28.0 0 0 26.5500 0 1 0 \n", + "445 1 4.0 0 2 81.8583 0 1 0 \n", + "650 3 28.0 0 0 7.8958 0 1 0 \n", + "172 3 1.0 1 1 11.1333 0 1 1 \n", + "450 2 36.0 1 2 27.7500 0 1 0 \n", + "314 2 43.0 1 1 26.2500 0 1 0 \n", + "332 1 38.0 0 1 153.4625 0 1 0 \n", + "801 2 31.0 1 1 26.2500 0 1 1 \n", + "90 3 29.0 0 0 8.0500 0 1 0 \n", + "834 3 18.0 0 0 8.3000 0 1 0 \n", + "181 2 28.0 0 0 15.0500 0 0 0 \n", + "581 1 39.0 1 1 110.8833 0 0 1 \n", + "795 2 39.0 0 0 13.0000 0 1 0 \n", + "69 3 26.0 2 0 8.6625 0 1 0 \n", + "131 3 20.0 0 0 7.0500 0 1 0 \n", + "334 1 28.0 1 0 133.6500 0 1 1 \n", + "597 3 49.0 0 0 0.0000 0 1 0 \n", + "135 2 23.0 0 0 15.0458 0 0 0 \n", + "164 3 1.0 4 1 39.6875 0 1 0 \n", + "28 3 28.0 0 0 7.8792 1 0 1 \n", + "783 3 28.0 1 2 23.4500 0 1 0 \n", + "193 2 3.0 1 1 26.0000 0 1 0 \n", + "869 3 4.0 1 1 11.1333 0 1 0 \n", + "715 3 19.0 0 0 7.6500 0 1 0 \n", + "525 3 40.5 0 0 7.7500 1 0 0 \n", + "381 3 1.0 0 2 15.7417 0 0 1 \n", + "140 3 28.0 0 2 15.2458 0 0 1 \n", + "173 3 21.0 0 0 7.9250 0 1 0 \n", + "\n", + " title_Master. title_Miss. \n", + "709 1 0 \n", + "439 0 0 \n", + "840 0 0 \n", + "720 0 1 \n", + "39 0 1 \n", + "290 0 1 \n", + "300 0 1 \n", + "333 0 0 \n", + "208 0 1 \n", + "136 0 1 \n", + "137 0 0 \n", + "696 0 0 \n", + "485 0 1 \n", + "244 0 0 \n", + "344 0 0 \n", + "853 0 1 \n", + "621 0 0 \n", + "653 0 1 \n", + "886 1 0 \n", + "110 0 0 \n", + "294 0 0 \n", + "447 0 0 \n", + "192 0 1 \n", + "682 0 0 \n", + "538 0 0 \n", + "819 1 0 \n", + "30 1 0 \n", + "673 0 0 \n", + "63 1 0 \n", + "396 0 1 \n", + ".. ... ... \n", + "456 0 0 \n", + "500 0 0 \n", + "430 0 0 \n", + "445 1 0 \n", + "650 0 0 \n", + "172 0 1 \n", + "450 0 0 \n", + "314 0 0 \n", + "332 0 0 \n", + "801 1 0 \n", + "90 0 0 \n", + "834 0 0 \n", + "181 0 0 \n", + "581 1 0 \n", + "795 0 0 \n", + "69 0 0 \n", + "131 0 0 \n", + "334 1 0 \n", + "597 0 0 \n", + "135 0 0 \n", + "164 1 0 \n", + "28 0 1 \n", + "783 0 0 \n", + "193 1 0 \n", + "869 1 0 \n", + "715 0 0 \n", + "525 0 0 \n", + "381 0 1 \n", + "140 1 0 \n", + "173 0 0 \n", + "\n", + "[295 rows x 10 columns]" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n", + " 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,\n", + " 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,\n", + " 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,\n", + " 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,\n", + " 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 1, 0, 1, 0, 0, 0, 1, 1, 0], dtype=int64)" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.8101694915254237" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.score(X_test, y_test) ## does about 20% better than base model performance of 61.6%" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.68818243, -0.02576249, -0.36500498, -0.27925771, 0.00578152,\n", + " -0.144344 , -0.54206936, 1.77298728, 1.71801348, 0.60544203]])" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1.11797972])" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.intercept_" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'LogisticRegression' object has no attribute 'summary'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msummary\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'LogisticRegression' object has no attribute 'summary'" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}