From f902e6cff08fbd26d482de9ab123365e8c33b1fa Mon Sep 17 00:00:00 2001 From: Ricardo Beato Date: Wed, 18 Oct 2023 16:59:44 +0200 Subject: [PATCH] commiting panda --- your-code/pandas_1.ipynb | 2495 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 2417 insertions(+), 78 deletions(-) diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..2ff35d7 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -44,10 +44,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "panda_series = pd.Series(lst)\n", + "panda_series" + ] }, { "cell_type": "markdown", @@ -60,10 +84,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "panda_series[2]" + ] }, { "cell_type": "markdown", @@ -74,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 111, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +129,145 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 112, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_dataframe = pd.DataFrame(b)\n", + "first_dataframe" + ] }, { "cell_type": "markdown", @@ -106,37 +278,521 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 113, "metadata": {}, "outputs": [], "source": [ - "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", - " [61.3, 40.8, 30.8, 37.8, 87.6],\n", - " [20.6, 73.2, 44.2, 14.6, 91.8],\n", - " [57.4, 0.1, 96.1, 4.2, 69.5],\n", - " [83.6, 20.5, 85.4, 22.8, 35.9],\n", - " [49.0, 69.0, 0.1, 31.8, 89.1],\n", - " [23.3, 40.7, 95.0, 83.8, 26.9],\n", - " [27.6, 26.4, 53.8, 88.8, 68.5],\n", - " [96.6, 96.4, 53.4, 72.4, 50.1],\n", - " [73.7, 39.0, 43.2, 81.6, 34.7]]" + "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 114, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" + "first_dataframe.columns = [i for i in colnames]\n", + "first_dataframe" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 100, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (728751949.py, line 7)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[100], line 7\u001b[1;36m\u001b[0m\n\u001b[1;33m print(first_dataframe.shape)b\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "#(ignore this, just trying this in class)\n", + "\"rows\"\n", + "print(first_dataframe.shape[0])\n", + "\"columns\"\n", + "print(first_dataframe.shape[1])\n", + "\"rows x columns\"\n", + "print(first_dataframe.shape)b\n", + "\n", + "#random values from the table\n", + "first_dataframe.sample(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_dataframe.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "first_dataframe.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "DataFrame.sort_values() missing 1 required positional argument: 'by'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[103], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m first_dataframe\u001b[38;5;241m.\u001b[39msort_values()\n", + "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m 326\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 327\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m 328\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m 329\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 330\u001b[0m )\n\u001b[1;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "\u001b[1;31mTypeError\u001b[0m: DataFrame.sort_values() missing 1 required positional argument: 'by'" + ] + } + ], + "source": [ + "first_dataframe.sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
count10.0000010.0000010.00000010.00000010.000000
mean54.6200050.1100056.95000047.28000063.250000
std25.6489932.1220430.16827831.39344624.562313
min20.600000.100000.1000004.20000026.900000
25%32.9500029.5500043.45000025.05000039.450000
50%55.2500040.7500053.60000036.40000069.000000
75%70.6000072.1500080.92500079.30000085.300000
max96.6000096.4000096.10000088.80000091.800000
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "count 10.00000 10.00000 10.000000 10.000000 10.000000\n", + "mean 54.62000 50.11000 56.950000 47.280000 63.250000\n", + "std 25.64899 32.12204 30.168278 31.393446 24.562313\n", + "min 20.60000 0.10000 0.100000 4.200000 26.900000\n", + "25% 32.95000 29.55000 43.450000 25.050000 39.450000\n", + "50% 55.25000 40.75000 53.600000 36.400000 69.000000\n", + "75% 70.60000 72.15000 80.925000 79.300000 85.300000\n", + "max 96.60000 96.40000 96.100000 88.800000 91.800000" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#ignore this\n", + "first_dataframe.describe()" + ] }, { "cell_type": "markdown", @@ -147,10 +803,151 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 107, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Making a copy cause it will be useful to preserve the original DF for next exercise\n", + "removed_cols_df = first_dataframe.copy()\n", + "removed_cols_df.drop([\"Score_2\", \"Score_4\"],axis=1,inplace=True)\n", + "removed_cols_df" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "5\n", + "(10, 5)\n" + ] + } + ], + "source": [ + "#Ignore\n", + "\n", + "\"rows\"\n", + "print(first_dataframe.shape[0])\n", + "\"columns\"\n", + "print(first_dataframe.shape[1])\n", + "\"rows x columns\"\n", + "print(first_dataframe.shape)\n" + ] }, { "cell_type": "markdown", @@ -161,10 +958,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 116, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_first_dataframe = first_dataframe[\"Score_3\"].mean()\n", + "mean_first_dataframe" + ] }, { "cell_type": "markdown", @@ -175,10 +986,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 115, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "max_first_dataframe = first_dataframe[\"Score_4\"].max()\n", + "max_first_dataframe" + ] }, { "cell_type": "markdown", @@ -189,10 +1014,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 117, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "40.75" + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "median_first_dataframe = first_dataframe[\"Score_2\"].median()\n", + "median_first_dataframe" + ] }, { "cell_type": "markdown", @@ -203,7 +1042,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 118, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +1063,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 119, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orders_dataframe = pd.DataFrame(orders)\n", + "orders_dataframe" + ] }, { "cell_type": "markdown", @@ -238,10 +1201,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total quantity is 2978\n", + "Total quantity is 637.0\n" + ] + } + ], + "source": [ + "orders_dataframe_total_quant = orders_dataframe[\"Quantity\"].sum()\n", + "print(f\"Total quantity is {orders_dataframe_total_quant}\")\n", + "\n", + "orders_dataframe_total_revenue = orders_dataframe[\"Revenue\"].sum()\n", + "print(f\"Total quantity is {orders_dataframe_total_revenue}\")" + ] }, { "cell_type": "markdown", @@ -252,10 +1230,161 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 146, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.95\n", + "0.18\n", + "difference is 11.77\n", + "0.18\n", + "11.95\n", + "difference is 11.77\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
8JAZZ HEARTS ADDRESS BOOK100.191.90
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
5POPCORN HOLDER70.855.95
4PLAYING CARDS JUBILEE UNION JACK21.252.50
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
0LUNCH BAG APPLE DESIGN11.651.65
2RIBBON REEL STRIPES DESIGN11.651.65
7PARTY BUNTING44.9519.80
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "orders_dataframe.sort_values(by=\"UnitPrice\",axis=0,ascending=True,inplace=True)\n", + "\n", + "#Approach 1: trying to print the difference dinamically:\n", + "print(orders_dataframe['UnitPrice'].iloc[-1]) #will retrieve the last value in the column UnitPrice\n", + "print(orders_dataframe['UnitPrice'].iloc[0]) #will retrieve the first value in the column UnitPrice\n", + "print(f\"difference is {orders_dataframe['UnitPrice'].iloc[-1]-orders_dataframe['UnitPrice'].iloc[0]}\")\n", + "\n", + "#Approach 2: min and max method (also dynamic after all...):\n", + "print(orders_dataframe[\"UnitPrice\"].min())\n", + "print(orders_dataframe[\"UnitPrice\"].max())\n", + "#attributing to variables so it's not such a mess on the strings print:\n", + "min_price = orders_dataframe[\"UnitPrice\"].min()\n", + "max_price = orders_dataframe[\"UnitPrice\"].max()\n", + "print(f\"difference is {max_price-min_price}\")\n", + "\n", + "\n", + "orders_dataframe\n" + ] }, { "cell_type": "markdown", @@ -266,7 +1395,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 160, "metadata": {}, "outputs": [], "source": [ @@ -285,10 +1414,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 161, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.head()" + ] }, { "cell_type": "markdown", @@ -299,10 +1548,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 162, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.isna(admissions).sum()\n", + "#Apparently there is no missing data as the retrieved series for the column names with the sum of Trues is 0 for all" + ] }, { "cell_type": "markdown", @@ -313,10 +1585,218 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 163, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
133711844.54.59.6510.92
231610433.03.58.0010.72
332211033.52.58.6710.80
431410322.03.08.2100.65
533011554.53.09.3410.90
...........................
38132411033.53.59.0410.82
38232510733.03.59.1110.84
38333011645.04.59.4510.91
38431210333.54.08.7800.67
38533311745.04.09.6610.95
\n", + "

385 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "... ... ... ... ... ... ... \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "384 312 103 3 3.5 4.0 8.78 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "Serial No. \n", + "1 1 0.92 \n", + "2 1 0.72 \n", + "3 1 0.80 \n", + "4 0 0.65 \n", + "5 1 0.90 \n", + "... ... ... \n", + "381 1 0.82 \n", + "382 1 0.84 \n", + "383 1 0.91 \n", + "384 0 0.67 \n", + "385 1 0.95 \n", + "\n", + "[385 rows x 8 columns]" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.set_index(\"Serial No.\")" + ] }, { "cell_type": "code", @@ -334,10 +1814,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 169, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 169, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.duplicated(subset=[\"GRE Score\",\"CGPA\"]).sum()\n", + "#by returning a 0, we see that the sum of all Trues (True when duplicated) is 0, meaning there are none" + ] }, { "cell_type": "markdown", @@ -348,10 +1842,228 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 193, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "101\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
4533011554.53.09.3410.90
101132811244.04.59.1010.78
192032811655.05.09.5010.94
202133411955.04.59.7010.95
..............................
37938032911144.54.09.2310.89
38038132411033.53.59.0410.82
38138232510733.03.59.1110.84
38238333011645.04.59.4510.91
38438533311745.04.09.6610.95
\n", + "

101 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "10 11 328 112 4 4.0 4.5 9.10 \n", + "19 20 328 116 5 5.0 5.0 9.50 \n", + "20 21 334 119 5 5.0 4.5 9.70 \n", + ".. ... ... ... ... ... ... ... \n", + "379 380 329 111 4 4.5 4.0 9.23 \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "4 1 0.90 \n", + "10 1 0.78 \n", + "19 1 0.94 \n", + "20 1 0.95 \n", + ".. ... ... \n", + "379 1 0.89 \n", + "380 1 0.82 \n", + "381 1 0.84 \n", + "382 1 0.91 \n", + "384 1 0.95 \n", + "\n", + "[101 rows x 9 columns]" + ] + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "condition1_CGPAgreater9 = admissions[\"CGPA\"] > 9\n", + "condition2_stud_investig = admissions[\"Research\"] == 1\n", + "\n", + "print(len(admissions[condition1_CGPAgreater9 & condition2_stud_investig]))\n", + "admissions[condition1_CGPAgreater9 & condition2_stud_investig]\n" + ] }, { "cell_type": "markdown", @@ -362,17 +2074,153 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 196, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
282933811843.04.59.4010.91
626332711433.03.09.0200.61
14014132611433.03.09.1110.83
21721832411143.03.09.0110.82
38138232510733.03.59.1110.84
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "28 29 338 118 4 3.0 4.5 9.40 \n", + "62 63 327 114 3 3.0 3.0 9.02 \n", + "140 141 326 114 3 3.0 3.0 9.11 \n", + "217 218 324 111 4 3.0 3.0 9.01 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit \n", + "28 1 0.91 \n", + "62 0 0.61 \n", + "140 1 0.83 \n", + "217 1 0.82 \n", + "381 1 0.84 " + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "condition1_CGPAgreater9 = admissions[\"CGPA\"] > 9\n", + "condition3_SOPless35 = admissions[\"SOP\"] < 3.5\n", + "\n", + "admissions[condition1_CGPAgreater9 & condition3_SOPless35]" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 198, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0.8019999999999999" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions[condition1_CGPAgreater9 & condition3_SOPless35][\"Chance of Admit\"].mean()" + ] }, { "cell_type": "markdown", @@ -384,10 +2232,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 204, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def toefl_greater_100_is_true(number):\n", + " if number > 100:\n", + " return True\n", + " else:\n", + " return False" + ] }, { "cell_type": "markdown", @@ -398,10 +2252,242 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 205, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitTOEFL > 100Decision
17117230910832.53.08.1200.72TrueTrue
36236329010011.52.07.5600.47FalseFalse
18718830610523.02.58.2600.73TrueTrue
20820932411043.03.58.9710.84TrueTrue
565730910023.03.08.1000.48FalseFalse
....................................
808134011554.54.59.4510.94TrueTrue
32732830810633.03.08.2400.58TrueTrue
969731210522.53.08.1200.64TrueTrue
787932011055.04.59.2210.92TrueTrue
35735833611944.54.09.6210.95TrueTrue
\n", + "

100 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "171 172 309 108 3 2.5 3.0 8.12 \n", + "362 363 290 100 1 1.5 2.0 7.56 \n", + "187 188 306 105 2 3.0 2.5 8.26 \n", + "208 209 324 110 4 3.0 3.5 8.97 \n", + "56 57 309 100 2 3.0 3.0 8.10 \n", + ".. ... ... ... ... ... ... ... \n", + "80 81 340 115 5 4.5 4.5 9.45 \n", + "327 328 308 106 3 3.0 3.0 8.24 \n", + "96 97 312 105 2 2.5 3.0 8.12 \n", + "78 79 320 110 5 5.0 4.5 9.22 \n", + "357 358 336 119 4 4.5 4.0 9.62 \n", + "\n", + " Research Chance of Admit TOEFL > 100 Decision \n", + "171 0 0.72 True True \n", + "362 0 0.47 False False \n", + "187 0 0.73 True True \n", + "208 1 0.84 True True \n", + "56 0 0.48 False False \n", + ".. ... ... ... ... \n", + "80 1 0.94 True True \n", + "327 0 0.58 True True \n", + "96 0 0.64 True True \n", + "78 1 0.92 True True \n", + "357 1 0.95 True True \n", + "\n", + "[100 rows x 11 columns]" + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions[\"Decision\"] = admissions[\"TOEFL Score\"].apply(toefl_greater_100_is_true)\n", + "admissions.sample(100)" + ] }, { "cell_type": "code", @@ -425,6 +2511,259 @@ "HINT (use np.where)" ] }, + { + "cell_type": "code", + "execution_count": 208, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitTOEFL > 100Decisiondecision2
0133711844.54.59.6510.92TrueTrue1
1231610433.03.58.0010.72TrueTrue0
2332211033.52.58.6710.80TrueTrue1
3431410322.03.08.2100.65TrueTrue0
4533011554.53.09.3410.90TrueTrue1
.......................................
38038132411033.53.59.0410.82TrueTrue1
38138232510733.03.59.1110.84TrueTrue0
38238333011645.04.59.4510.91TrueTrue1
38338431210333.54.08.7800.67TrueTrue1
38438533311745.04.09.6610.95TrueTrue1
\n", + "

385 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + ".. ... ... ... ... ... ... ... \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "383 384 312 103 3 3.5 4.0 8.78 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit TOEFL > 100 Decision decision2 \n", + "0 1 0.92 True True 1 \n", + "1 1 0.72 True True 0 \n", + "2 1 0.80 True True 1 \n", + "3 0 0.65 True True 0 \n", + "4 1 0.90 True True 1 \n", + ".. ... ... ... ... ... \n", + "380 1 0.82 True True 1 \n", + "381 1 0.84 True True 0 \n", + "382 1 0.91 True True 1 \n", + "383 0 0.67 True True 1 \n", + "384 1 0.95 True True 1 \n", + "\n", + "[385 rows x 12 columns]" + ] + }, + "execution_count": 208, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions[\"decision2\"] = np.where(admissions[\"SOP\"]>3,1,0)\n", + "admissions\n", + "\n", + "#np.where(value > 10, condition_2, else)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -449,7 +2788,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.4" }, "toc": { "base_numbering": "",