diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb
index 4f428ac..2ff35d7 100644
--- a/your-code/pandas_1.ipynb
+++ b/your-code/pandas_1.ipynb
@@ -18,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
@@ -44,10 +44,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 5.7\n",
+ "1 75.2\n",
+ "2 74.4\n",
+ "3 84.0\n",
+ "4 66.5\n",
+ "5 66.3\n",
+ "6 55.8\n",
+ "7 75.7\n",
+ "8 29.1\n",
+ "9 43.7\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "panda_series = pd.Series(lst)\n",
+ "panda_series"
+ ]
},
{
"cell_type": "markdown",
@@ -60,10 +84,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "74.4"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "panda_series[2]"
+ ]
},
{
"cell_type": "markdown",
@@ -74,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
@@ -92,10 +129,145 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 112,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 53.1 | \n",
+ " 95.0 | \n",
+ " 67.5 | \n",
+ " 35.0 | \n",
+ " 78.4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 61.3 | \n",
+ " 40.8 | \n",
+ " 30.8 | \n",
+ " 37.8 | \n",
+ " 87.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20.6 | \n",
+ " 73.2 | \n",
+ " 44.2 | \n",
+ " 14.6 | \n",
+ " 91.8 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 57.4 | \n",
+ " 0.1 | \n",
+ " 96.1 | \n",
+ " 4.2 | \n",
+ " 69.5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 83.6 | \n",
+ " 20.5 | \n",
+ " 85.4 | \n",
+ " 22.8 | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 49.0 | \n",
+ " 69.0 | \n",
+ " 0.1 | \n",
+ " 31.8 | \n",
+ " 89.1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 23.3 | \n",
+ " 40.7 | \n",
+ " 95.0 | \n",
+ " 83.8 | \n",
+ " 26.9 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 27.6 | \n",
+ " 26.4 | \n",
+ " 53.8 | \n",
+ " 88.8 | \n",
+ " 68.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 96.6 | \n",
+ " 96.4 | \n",
+ " 53.4 | \n",
+ " 72.4 | \n",
+ " 50.1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 73.7 | \n",
+ " 39.0 | \n",
+ " 43.2 | \n",
+ " 81.6 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2 3 4\n",
+ "0 53.1 95.0 67.5 35.0 78.4\n",
+ "1 61.3 40.8 30.8 37.8 87.6\n",
+ "2 20.6 73.2 44.2 14.6 91.8\n",
+ "3 57.4 0.1 96.1 4.2 69.5\n",
+ "4 83.6 20.5 85.4 22.8 35.9\n",
+ "5 49.0 69.0 0.1 31.8 89.1\n",
+ "6 23.3 40.7 95.0 83.8 26.9\n",
+ "7 27.6 26.4 53.8 88.8 68.5\n",
+ "8 96.6 96.4 53.4 72.4 50.1\n",
+ "9 73.7 39.0 43.2 81.6 34.7"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "first_dataframe = pd.DataFrame(b)\n",
+ "first_dataframe"
+ ]
},
{
"cell_type": "markdown",
@@ -106,37 +278,521 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
- "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n",
- " [61.3, 40.8, 30.8, 37.8, 87.6],\n",
- " [20.6, 73.2, 44.2, 14.6, 91.8],\n",
- " [57.4, 0.1, 96.1, 4.2, 69.5],\n",
- " [83.6, 20.5, 85.4, 22.8, 35.9],\n",
- " [49.0, 69.0, 0.1, 31.8, 89.1],\n",
- " [23.3, 40.7, 95.0, 83.8, 26.9],\n",
- " [27.6, 26.4, 53.8, 88.8, 68.5],\n",
- " [96.6, 96.4, 53.4, 72.4, 50.1],\n",
- " [73.7, 39.0, 43.2, 81.6, 34.7]]"
+ "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 114,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Score_1 | \n",
+ " Score_2 | \n",
+ " Score_3 | \n",
+ " Score_4 | \n",
+ " Score_5 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 53.1 | \n",
+ " 95.0 | \n",
+ " 67.5 | \n",
+ " 35.0 | \n",
+ " 78.4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 61.3 | \n",
+ " 40.8 | \n",
+ " 30.8 | \n",
+ " 37.8 | \n",
+ " 87.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20.6 | \n",
+ " 73.2 | \n",
+ " 44.2 | \n",
+ " 14.6 | \n",
+ " 91.8 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 57.4 | \n",
+ " 0.1 | \n",
+ " 96.1 | \n",
+ " 4.2 | \n",
+ " 69.5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 83.6 | \n",
+ " 20.5 | \n",
+ " 85.4 | \n",
+ " 22.8 | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 49.0 | \n",
+ " 69.0 | \n",
+ " 0.1 | \n",
+ " 31.8 | \n",
+ " 89.1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 23.3 | \n",
+ " 40.7 | \n",
+ " 95.0 | \n",
+ " 83.8 | \n",
+ " 26.9 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 27.6 | \n",
+ " 26.4 | \n",
+ " 53.8 | \n",
+ " 88.8 | \n",
+ " 68.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 96.6 | \n",
+ " 96.4 | \n",
+ " 53.4 | \n",
+ " 72.4 | \n",
+ " 50.1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 73.7 | \n",
+ " 39.0 | \n",
+ " 43.2 | \n",
+ " 81.6 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Score_1 Score_2 Score_3 Score_4 Score_5\n",
+ "0 53.1 95.0 67.5 35.0 78.4\n",
+ "1 61.3 40.8 30.8 37.8 87.6\n",
+ "2 20.6 73.2 44.2 14.6 91.8\n",
+ "3 57.4 0.1 96.1 4.2 69.5\n",
+ "4 83.6 20.5 85.4 22.8 35.9\n",
+ "5 49.0 69.0 0.1 31.8 89.1\n",
+ "6 23.3 40.7 95.0 83.8 26.9\n",
+ "7 27.6 26.4 53.8 88.8 68.5\n",
+ "8 96.6 96.4 53.4 72.4 50.1\n",
+ "9 73.7 39.0 43.2 81.6 34.7"
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']"
+ "first_dataframe.columns = [i for i in colnames]\n",
+ "first_dataframe"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 100,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "ename": "SyntaxError",
+ "evalue": "invalid syntax (728751949.py, line 7)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;36m Cell \u001b[1;32mIn[100], line 7\u001b[1;36m\u001b[0m\n\u001b[1;33m print(first_dataframe.shape)b\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
+ ]
+ }
+ ],
+ "source": [
+ "#(ignore this, just trying this in class)\n",
+ "\"rows\"\n",
+ "print(first_dataframe.shape[0])\n",
+ "\"columns\"\n",
+ "print(first_dataframe.shape[1])\n",
+ "\"rows x columns\"\n",
+ "print(first_dataframe.shape)b\n",
+ "\n",
+ "#random values from the table\n",
+ "first_dataframe.sample(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Score_1 | \n",
+ " Score_2 | \n",
+ " Score_3 | \n",
+ " Score_4 | \n",
+ " Score_5 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 53.1 | \n",
+ " 95.0 | \n",
+ " 67.5 | \n",
+ " 35.0 | \n",
+ " 78.4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 61.3 | \n",
+ " 40.8 | \n",
+ " 30.8 | \n",
+ " 37.8 | \n",
+ " 87.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20.6 | \n",
+ " 73.2 | \n",
+ " 44.2 | \n",
+ " 14.6 | \n",
+ " 91.8 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 57.4 | \n",
+ " 0.1 | \n",
+ " 96.1 | \n",
+ " 4.2 | \n",
+ " 69.5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 83.6 | \n",
+ " 20.5 | \n",
+ " 85.4 | \n",
+ " 22.8 | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Score_1 Score_2 Score_3 Score_4 Score_5\n",
+ "0 53.1 95.0 67.5 35.0 78.4\n",
+ "1 61.3 40.8 30.8 37.8 87.6\n",
+ "2 20.6 73.2 44.2 14.6 91.8\n",
+ "3 57.4 0.1 96.1 4.2 69.5\n",
+ "4 83.6 20.5 85.4 22.8 35.9"
+ ]
+ },
+ "execution_count": 101,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "first_dataframe.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Score_1 | \n",
+ " Score_2 | \n",
+ " Score_3 | \n",
+ " Score_4 | \n",
+ " Score_5 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " 49.0 | \n",
+ " 69.0 | \n",
+ " 0.1 | \n",
+ " 31.8 | \n",
+ " 89.1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 23.3 | \n",
+ " 40.7 | \n",
+ " 95.0 | \n",
+ " 83.8 | \n",
+ " 26.9 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 27.6 | \n",
+ " 26.4 | \n",
+ " 53.8 | \n",
+ " 88.8 | \n",
+ " 68.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 96.6 | \n",
+ " 96.4 | \n",
+ " 53.4 | \n",
+ " 72.4 | \n",
+ " 50.1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 73.7 | \n",
+ " 39.0 | \n",
+ " 43.2 | \n",
+ " 81.6 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Score_1 Score_2 Score_3 Score_4 Score_5\n",
+ "5 49.0 69.0 0.1 31.8 89.1\n",
+ "6 23.3 40.7 95.0 83.8 26.9\n",
+ "7 27.6 26.4 53.8 88.8 68.5\n",
+ "8 96.6 96.4 53.4 72.4 50.1\n",
+ "9 73.7 39.0 43.2 81.6 34.7"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "first_dataframe.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "TypeError",
+ "evalue": "DataFrame.sort_values() missing 1 required positional argument: 'by'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[103], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m first_dataframe\u001b[38;5;241m.\u001b[39msort_values()\n",
+ "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m 326\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m 327\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m 328\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m 329\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m 330\u001b[0m )\n\u001b[1;32m--> 331\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+ "\u001b[1;31mTypeError\u001b[0m: DataFrame.sort_values() missing 1 required positional argument: 'by'"
+ ]
+ }
+ ],
+ "source": [
+ "first_dataframe.sort_values()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Score_1 | \n",
+ " Score_2 | \n",
+ " Score_3 | \n",
+ " Score_4 | \n",
+ " Score_5 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 10.00000 | \n",
+ " 10.00000 | \n",
+ " 10.000000 | \n",
+ " 10.000000 | \n",
+ " 10.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 54.62000 | \n",
+ " 50.11000 | \n",
+ " 56.950000 | \n",
+ " 47.280000 | \n",
+ " 63.250000 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 25.64899 | \n",
+ " 32.12204 | \n",
+ " 30.168278 | \n",
+ " 31.393446 | \n",
+ " 24.562313 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 20.60000 | \n",
+ " 0.10000 | \n",
+ " 0.100000 | \n",
+ " 4.200000 | \n",
+ " 26.900000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 32.95000 | \n",
+ " 29.55000 | \n",
+ " 43.450000 | \n",
+ " 25.050000 | \n",
+ " 39.450000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 55.25000 | \n",
+ " 40.75000 | \n",
+ " 53.600000 | \n",
+ " 36.400000 | \n",
+ " 69.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 70.60000 | \n",
+ " 72.15000 | \n",
+ " 80.925000 | \n",
+ " 79.300000 | \n",
+ " 85.300000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 96.60000 | \n",
+ " 96.40000 | \n",
+ " 96.100000 | \n",
+ " 88.800000 | \n",
+ " 91.800000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Score_1 Score_2 Score_3 Score_4 Score_5\n",
+ "count 10.00000 10.00000 10.000000 10.000000 10.000000\n",
+ "mean 54.62000 50.11000 56.950000 47.280000 63.250000\n",
+ "std 25.64899 32.12204 30.168278 31.393446 24.562313\n",
+ "min 20.60000 0.10000 0.100000 4.200000 26.900000\n",
+ "25% 32.95000 29.55000 43.450000 25.050000 39.450000\n",
+ "50% 55.25000 40.75000 53.600000 36.400000 69.000000\n",
+ "75% 70.60000 72.15000 80.925000 79.300000 85.300000\n",
+ "max 96.60000 96.40000 96.100000 88.800000 91.800000"
+ ]
+ },
+ "execution_count": 104,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#ignore this\n",
+ "first_dataframe.describe()"
+ ]
},
{
"cell_type": "markdown",
@@ -147,10 +803,151 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 107,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Score_1 | \n",
+ " Score_3 | \n",
+ " Score_5 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 53.1 | \n",
+ " 67.5 | \n",
+ " 78.4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 61.3 | \n",
+ " 30.8 | \n",
+ " 87.6 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 20.6 | \n",
+ " 44.2 | \n",
+ " 91.8 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 57.4 | \n",
+ " 96.1 | \n",
+ " 69.5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 83.6 | \n",
+ " 85.4 | \n",
+ " 35.9 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 49.0 | \n",
+ " 0.1 | \n",
+ " 89.1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 23.3 | \n",
+ " 95.0 | \n",
+ " 26.9 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 27.6 | \n",
+ " 53.8 | \n",
+ " 68.5 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 96.6 | \n",
+ " 53.4 | \n",
+ " 50.1 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 73.7 | \n",
+ " 43.2 | \n",
+ " 34.7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Score_1 Score_3 Score_5\n",
+ "0 53.1 67.5 78.4\n",
+ "1 61.3 30.8 87.6\n",
+ "2 20.6 44.2 91.8\n",
+ "3 57.4 96.1 69.5\n",
+ "4 83.6 85.4 35.9\n",
+ "5 49.0 0.1 89.1\n",
+ "6 23.3 95.0 26.9\n",
+ "7 27.6 53.8 68.5\n",
+ "8 96.6 53.4 50.1\n",
+ "9 73.7 43.2 34.7"
+ ]
+ },
+ "execution_count": 107,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Making a copy cause it will be useful to preserve the original DF for next exercise\n",
+ "removed_cols_df = first_dataframe.copy()\n",
+ "removed_cols_df.drop([\"Score_2\", \"Score_4\"],axis=1,inplace=True)\n",
+ "removed_cols_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10\n",
+ "5\n",
+ "(10, 5)\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Ignore\n",
+ "\n",
+ "\"rows\"\n",
+ "print(first_dataframe.shape[0])\n",
+ "\"columns\"\n",
+ "print(first_dataframe.shape[1])\n",
+ "\"rows x columns\"\n",
+ "print(first_dataframe.shape)\n"
+ ]
},
{
"cell_type": "markdown",
@@ -161,10 +958,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 116,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "56.95000000000001"
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mean_first_dataframe = first_dataframe[\"Score_3\"].mean()\n",
+ "mean_first_dataframe"
+ ]
},
{
"cell_type": "markdown",
@@ -175,10 +986,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 115,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "88.8"
+ ]
+ },
+ "execution_count": 115,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "max_first_dataframe = first_dataframe[\"Score_4\"].max()\n",
+ "max_first_dataframe"
+ ]
},
{
"cell_type": "markdown",
@@ -189,10 +1014,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 117,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "40.75"
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "median_first_dataframe = first_dataframe[\"Score_2\"].median()\n",
+ "median_first_dataframe"
+ ]
},
{
"cell_type": "markdown",
@@ -203,7 +1042,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
@@ -224,10 +1063,134 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 119,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Description | \n",
+ " Quantity | \n",
+ " UnitPrice | \n",
+ " Revenue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " LUNCH BAG APPLE DESIGN | \n",
+ " 1 | \n",
+ " 1.65 | \n",
+ " 1.65 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " SET OF 60 VINTAGE LEAF CAKE CASES | \n",
+ " 24 | \n",
+ " 0.55 | \n",
+ " 13.20 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " RIBBON REEL STRIPES DESIGN | \n",
+ " 1 | \n",
+ " 1.65 | \n",
+ " 1.65 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " WORLD WAR 2 GLIDERS ASSTD DESIGNS | \n",
+ " 2880 | \n",
+ " 0.18 | \n",
+ " 518.40 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " PLAYING CARDS JUBILEE UNION JACK | \n",
+ " 2 | \n",
+ " 1.25 | \n",
+ " 2.50 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " POPCORN HOLDER | \n",
+ " 7 | \n",
+ " 0.85 | \n",
+ " 5.95 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " BOX OF VINTAGE ALPHABET BLOCKS | \n",
+ " 1 | \n",
+ " 11.95 | \n",
+ " 11.95 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " PARTY BUNTING | \n",
+ " 4 | \n",
+ " 4.95 | \n",
+ " 19.80 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " JAZZ HEARTS ADDRESS BOOK | \n",
+ " 10 | \n",
+ " 0.19 | \n",
+ " 1.90 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " SET OF 4 SANTA PLACE SETTINGS | \n",
+ " 48 | \n",
+ " 1.25 | \n",
+ " 60.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Description Quantity UnitPrice Revenue\n",
+ "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n",
+ "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n",
+ "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n",
+ "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n",
+ "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n",
+ "5 POPCORN HOLDER 7 0.85 5.95\n",
+ "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n",
+ "7 PARTY BUNTING 4 4.95 19.80\n",
+ "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n",
+ "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00"
+ ]
+ },
+ "execution_count": 119,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "orders_dataframe = pd.DataFrame(orders)\n",
+ "orders_dataframe"
+ ]
},
{
"cell_type": "markdown",
@@ -238,10 +1201,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 122,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total quantity is 2978\n",
+ "Total quantity is 637.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "orders_dataframe_total_quant = orders_dataframe[\"Quantity\"].sum()\n",
+ "print(f\"Total quantity is {orders_dataframe_total_quant}\")\n",
+ "\n",
+ "orders_dataframe_total_revenue = orders_dataframe[\"Revenue\"].sum()\n",
+ "print(f\"Total quantity is {orders_dataframe_total_revenue}\")"
+ ]
},
{
"cell_type": "markdown",
@@ -252,10 +1230,161 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 146,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "11.95\n",
+ "0.18\n",
+ "difference is 11.77\n",
+ "0.18\n",
+ "11.95\n",
+ "difference is 11.77\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Description | \n",
+ " Quantity | \n",
+ " UnitPrice | \n",
+ " Revenue | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " WORLD WAR 2 GLIDERS ASSTD DESIGNS | \n",
+ " 2880 | \n",
+ " 0.18 | \n",
+ " 518.40 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " JAZZ HEARTS ADDRESS BOOK | \n",
+ " 10 | \n",
+ " 0.19 | \n",
+ " 1.90 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " SET OF 60 VINTAGE LEAF CAKE CASES | \n",
+ " 24 | \n",
+ " 0.55 | \n",
+ " 13.20 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " POPCORN HOLDER | \n",
+ " 7 | \n",
+ " 0.85 | \n",
+ " 5.95 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " PLAYING CARDS JUBILEE UNION JACK | \n",
+ " 2 | \n",
+ " 1.25 | \n",
+ " 2.50 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " SET OF 4 SANTA PLACE SETTINGS | \n",
+ " 48 | \n",
+ " 1.25 | \n",
+ " 60.00 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " LUNCH BAG APPLE DESIGN | \n",
+ " 1 | \n",
+ " 1.65 | \n",
+ " 1.65 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " RIBBON REEL STRIPES DESIGN | \n",
+ " 1 | \n",
+ " 1.65 | \n",
+ " 1.65 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " PARTY BUNTING | \n",
+ " 4 | \n",
+ " 4.95 | \n",
+ " 19.80 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " BOX OF VINTAGE ALPHABET BLOCKS | \n",
+ " 1 | \n",
+ " 11.95 | \n",
+ " 11.95 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Description Quantity UnitPrice Revenue\n",
+ "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n",
+ "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n",
+ "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n",
+ "5 POPCORN HOLDER 7 0.85 5.95\n",
+ "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n",
+ "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n",
+ "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n",
+ "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n",
+ "7 PARTY BUNTING 4 4.95 19.80\n",
+ "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95"
+ ]
+ },
+ "execution_count": 146,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "orders_dataframe.sort_values(by=\"UnitPrice\",axis=0,ascending=True,inplace=True)\n",
+ "\n",
+ "#Approach 1: trying to print the difference dinamically:\n",
+ "print(orders_dataframe['UnitPrice'].iloc[-1]) #will retrieve the last value in the column UnitPrice\n",
+ "print(orders_dataframe['UnitPrice'].iloc[0]) #will retrieve the first value in the column UnitPrice\n",
+ "print(f\"difference is {orders_dataframe['UnitPrice'].iloc[-1]-orders_dataframe['UnitPrice'].iloc[0]}\")\n",
+ "\n",
+ "#Approach 2: min and max method (also dynamic after all...):\n",
+ "print(orders_dataframe[\"UnitPrice\"].min())\n",
+ "print(orders_dataframe[\"UnitPrice\"].max())\n",
+ "#attributing to variables so it's not such a mess on the strings print:\n",
+ "min_price = orders_dataframe[\"UnitPrice\"].min()\n",
+ "max_price = orders_dataframe[\"UnitPrice\"].max()\n",
+ "print(f\"difference is {max_price-min_price}\")\n",
+ "\n",
+ "\n",
+ "orders_dataframe\n"
+ ]
},
{
"cell_type": "markdown",
@@ -266,7 +1395,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 160,
"metadata": {},
"outputs": [],
"source": [
@@ -285,10 +1414,130 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 161,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Serial No. | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 337 | \n",
+ " 118 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.5 | \n",
+ " 9.65 | \n",
+ " 1 | \n",
+ " 0.92 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 316 | \n",
+ " 104 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 8.00 | \n",
+ " 1 | \n",
+ " 0.72 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 322 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 2.5 | \n",
+ " 8.67 | \n",
+ " 1 | \n",
+ " 0.80 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 314 | \n",
+ " 103 | \n",
+ " 2 | \n",
+ " 2.0 | \n",
+ " 3.0 | \n",
+ " 8.21 | \n",
+ " 0 | \n",
+ " 0.65 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 330 | \n",
+ " 115 | \n",
+ " 5 | \n",
+ " 4.5 | \n",
+ " 3.0 | \n",
+ " 9.34 | \n",
+ " 1 | \n",
+ " 0.90 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "0 1 337 118 4 4.5 4.5 9.65 \n",
+ "1 2 316 104 3 3.0 3.5 8.00 \n",
+ "2 3 322 110 3 3.5 2.5 8.67 \n",
+ "3 4 314 103 2 2.0 3.0 8.21 \n",
+ "4 5 330 115 5 4.5 3.0 9.34 \n",
+ "\n",
+ " Research Chance of Admit \n",
+ "0 1 0.92 \n",
+ "1 1 0.72 \n",
+ "2 1 0.80 \n",
+ "3 0 0.65 \n",
+ "4 1 0.90 "
+ ]
+ },
+ "execution_count": 161,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions.head()"
+ ]
},
{
"cell_type": "markdown",
@@ -299,10 +1548,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 162,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Serial No. 0\n",
+ "GRE Score 0\n",
+ "TOEFL Score 0\n",
+ "University Rating 0\n",
+ "SOP 0\n",
+ "LOR 0\n",
+ "CGPA 0\n",
+ "Research 0\n",
+ "Chance of Admit 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 162,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.isna(admissions).sum()\n",
+ "#Apparently there is no missing data as the retrieved series for the column names with the sum of Trues is 0 for all"
+ ]
},
{
"cell_type": "markdown",
@@ -313,10 +1585,218 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 163,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ "
\n",
+ " \n",
+ " Serial No. | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 337 | \n",
+ " 118 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.5 | \n",
+ " 9.65 | \n",
+ " 1 | \n",
+ " 0.92 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 316 | \n",
+ " 104 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 8.00 | \n",
+ " 1 | \n",
+ " 0.72 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 322 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 2.5 | \n",
+ " 8.67 | \n",
+ " 1 | \n",
+ " 0.80 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 314 | \n",
+ " 103 | \n",
+ " 2 | \n",
+ " 2.0 | \n",
+ " 3.0 | \n",
+ " 8.21 | \n",
+ " 0 | \n",
+ " 0.65 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 330 | \n",
+ " 115 | \n",
+ " 5 | \n",
+ " 4.5 | \n",
+ " 3.0 | \n",
+ " 9.34 | \n",
+ " 1 | \n",
+ " 0.90 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 324 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 3.5 | \n",
+ " 9.04 | \n",
+ " 1 | \n",
+ " 0.82 | \n",
+ "
\n",
+ " \n",
+ " 382 | \n",
+ " 325 | \n",
+ " 107 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 9.11 | \n",
+ " 1 | \n",
+ " 0.84 | \n",
+ "
\n",
+ " \n",
+ " 383 | \n",
+ " 330 | \n",
+ " 116 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.5 | \n",
+ " 9.45 | \n",
+ " 1 | \n",
+ " 0.91 | \n",
+ "
\n",
+ " \n",
+ " 384 | \n",
+ " 312 | \n",
+ " 103 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 4.0 | \n",
+ " 8.78 | \n",
+ " 0 | \n",
+ " 0.67 | \n",
+ "
\n",
+ " \n",
+ " 385 | \n",
+ " 333 | \n",
+ " 117 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.0 | \n",
+ " 9.66 | \n",
+ " 1 | \n",
+ " 0.95 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
385 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "Serial No. \n",
+ "1 337 118 4 4.5 4.5 9.65 \n",
+ "2 316 104 3 3.0 3.5 8.00 \n",
+ "3 322 110 3 3.5 2.5 8.67 \n",
+ "4 314 103 2 2.0 3.0 8.21 \n",
+ "5 330 115 5 4.5 3.0 9.34 \n",
+ "... ... ... ... ... ... ... \n",
+ "381 324 110 3 3.5 3.5 9.04 \n",
+ "382 325 107 3 3.0 3.5 9.11 \n",
+ "383 330 116 4 5.0 4.5 9.45 \n",
+ "384 312 103 3 3.5 4.0 8.78 \n",
+ "385 333 117 4 5.0 4.0 9.66 \n",
+ "\n",
+ " Research Chance of Admit \n",
+ "Serial No. \n",
+ "1 1 0.92 \n",
+ "2 1 0.72 \n",
+ "3 1 0.80 \n",
+ "4 0 0.65 \n",
+ "5 1 0.90 \n",
+ "... ... ... \n",
+ "381 1 0.82 \n",
+ "382 1 0.84 \n",
+ "383 1 0.91 \n",
+ "384 0 0.67 \n",
+ "385 1 0.95 \n",
+ "\n",
+ "[385 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 163,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions.set_index(\"Serial No.\")"
+ ]
},
{
"cell_type": "code",
@@ -334,10 +1814,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 169,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "execution_count": 169,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions.duplicated(subset=[\"GRE Score\",\"CGPA\"]).sum()\n",
+ "#by returning a 0, we see that the sum of all Trues (True when duplicated) is 0, meaning there are none"
+ ]
},
{
"cell_type": "markdown",
@@ -348,10 +1842,228 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 193,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "101\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Serial No. | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 337 | \n",
+ " 118 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.5 | \n",
+ " 9.65 | \n",
+ " 1 | \n",
+ " 0.92 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 330 | \n",
+ " 115 | \n",
+ " 5 | \n",
+ " 4.5 | \n",
+ " 3.0 | \n",
+ " 9.34 | \n",
+ " 1 | \n",
+ " 0.90 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 328 | \n",
+ " 112 | \n",
+ " 4 | \n",
+ " 4.0 | \n",
+ " 4.5 | \n",
+ " 9.10 | \n",
+ " 1 | \n",
+ " 0.78 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 20 | \n",
+ " 328 | \n",
+ " 116 | \n",
+ " 5 | \n",
+ " 5.0 | \n",
+ " 5.0 | \n",
+ " 9.50 | \n",
+ " 1 | \n",
+ " 0.94 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 21 | \n",
+ " 334 | \n",
+ " 119 | \n",
+ " 5 | \n",
+ " 5.0 | \n",
+ " 4.5 | \n",
+ " 9.70 | \n",
+ " 1 | \n",
+ " 0.95 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 379 | \n",
+ " 380 | \n",
+ " 329 | \n",
+ " 111 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.0 | \n",
+ " 9.23 | \n",
+ " 1 | \n",
+ " 0.89 | \n",
+ "
\n",
+ " \n",
+ " 380 | \n",
+ " 381 | \n",
+ " 324 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 3.5 | \n",
+ " 9.04 | \n",
+ " 1 | \n",
+ " 0.82 | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 382 | \n",
+ " 325 | \n",
+ " 107 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 9.11 | \n",
+ " 1 | \n",
+ " 0.84 | \n",
+ "
\n",
+ " \n",
+ " 382 | \n",
+ " 383 | \n",
+ " 330 | \n",
+ " 116 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.5 | \n",
+ " 9.45 | \n",
+ " 1 | \n",
+ " 0.91 | \n",
+ "
\n",
+ " \n",
+ " 384 | \n",
+ " 385 | \n",
+ " 333 | \n",
+ " 117 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.0 | \n",
+ " 9.66 | \n",
+ " 1 | \n",
+ " 0.95 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
101 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "0 1 337 118 4 4.5 4.5 9.65 \n",
+ "4 5 330 115 5 4.5 3.0 9.34 \n",
+ "10 11 328 112 4 4.0 4.5 9.10 \n",
+ "19 20 328 116 5 5.0 5.0 9.50 \n",
+ "20 21 334 119 5 5.0 4.5 9.70 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "379 380 329 111 4 4.5 4.0 9.23 \n",
+ "380 381 324 110 3 3.5 3.5 9.04 \n",
+ "381 382 325 107 3 3.0 3.5 9.11 \n",
+ "382 383 330 116 4 5.0 4.5 9.45 \n",
+ "384 385 333 117 4 5.0 4.0 9.66 \n",
+ "\n",
+ " Research Chance of Admit \n",
+ "0 1 0.92 \n",
+ "4 1 0.90 \n",
+ "10 1 0.78 \n",
+ "19 1 0.94 \n",
+ "20 1 0.95 \n",
+ ".. ... ... \n",
+ "379 1 0.89 \n",
+ "380 1 0.82 \n",
+ "381 1 0.84 \n",
+ "382 1 0.91 \n",
+ "384 1 0.95 \n",
+ "\n",
+ "[101 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 193,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "condition1_CGPAgreater9 = admissions[\"CGPA\"] > 9\n",
+ "condition2_stud_investig = admissions[\"Research\"] == 1\n",
+ "\n",
+ "print(len(admissions[condition1_CGPAgreater9 & condition2_stud_investig]))\n",
+ "admissions[condition1_CGPAgreater9 & condition2_stud_investig]\n"
+ ]
},
{
"cell_type": "markdown",
@@ -362,17 +2074,153 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 196,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Serial No. | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 28 | \n",
+ " 29 | \n",
+ " 338 | \n",
+ " 118 | \n",
+ " 4 | \n",
+ " 3.0 | \n",
+ " 4.5 | \n",
+ " 9.40 | \n",
+ " 1 | \n",
+ " 0.91 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " 63 | \n",
+ " 327 | \n",
+ " 114 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 9.02 | \n",
+ " 0 | \n",
+ " 0.61 | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " 141 | \n",
+ " 326 | \n",
+ " 114 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 9.11 | \n",
+ " 1 | \n",
+ " 0.83 | \n",
+ "
\n",
+ " \n",
+ " 217 | \n",
+ " 218 | \n",
+ " 324 | \n",
+ " 111 | \n",
+ " 4 | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 9.01 | \n",
+ " 1 | \n",
+ " 0.82 | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 382 | \n",
+ " 325 | \n",
+ " 107 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 9.11 | \n",
+ " 1 | \n",
+ " 0.84 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "28 29 338 118 4 3.0 4.5 9.40 \n",
+ "62 63 327 114 3 3.0 3.0 9.02 \n",
+ "140 141 326 114 3 3.0 3.0 9.11 \n",
+ "217 218 324 111 4 3.0 3.0 9.01 \n",
+ "381 382 325 107 3 3.0 3.5 9.11 \n",
+ "\n",
+ " Research Chance of Admit \n",
+ "28 1 0.91 \n",
+ "62 0 0.61 \n",
+ "140 1 0.83 \n",
+ "217 1 0.82 \n",
+ "381 1 0.84 "
+ ]
+ },
+ "execution_count": 196,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "condition1_CGPAgreater9 = admissions[\"CGPA\"] > 9\n",
+ "condition3_SOPless35 = admissions[\"SOP\"] < 3.5\n",
+ "\n",
+ "admissions[condition1_CGPAgreater9 & condition3_SOPless35]"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 198,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8019999999999999"
+ ]
+ },
+ "execution_count": 198,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions[condition1_CGPAgreater9 & condition3_SOPless35][\"Chance of Admit\"].mean()"
+ ]
},
{
"cell_type": "markdown",
@@ -384,10 +2232,16 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 204,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "def toefl_greater_100_is_true(number):\n",
+ " if number > 100:\n",
+ " return True\n",
+ " else:\n",
+ " return False"
+ ]
},
{
"cell_type": "markdown",
@@ -398,10 +2252,242 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 205,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Serial No. | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ " TOEFL > 100 | \n",
+ " Decision | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 171 | \n",
+ " 172 | \n",
+ " 309 | \n",
+ " 108 | \n",
+ " 3 | \n",
+ " 2.5 | \n",
+ " 3.0 | \n",
+ " 8.12 | \n",
+ " 0 | \n",
+ " 0.72 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 362 | \n",
+ " 363 | \n",
+ " 290 | \n",
+ " 100 | \n",
+ " 1 | \n",
+ " 1.5 | \n",
+ " 2.0 | \n",
+ " 7.56 | \n",
+ " 0 | \n",
+ " 0.47 | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 187 | \n",
+ " 188 | \n",
+ " 306 | \n",
+ " 105 | \n",
+ " 2 | \n",
+ " 3.0 | \n",
+ " 2.5 | \n",
+ " 8.26 | \n",
+ " 0 | \n",
+ " 0.73 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 208 | \n",
+ " 209 | \n",
+ " 324 | \n",
+ " 110 | \n",
+ " 4 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 8.97 | \n",
+ " 1 | \n",
+ " 0.84 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " 57 | \n",
+ " 309 | \n",
+ " 100 | \n",
+ " 2 | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 8.10 | \n",
+ " 0 | \n",
+ " 0.48 | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 81 | \n",
+ " 340 | \n",
+ " 115 | \n",
+ " 5 | \n",
+ " 4.5 | \n",
+ " 4.5 | \n",
+ " 9.45 | \n",
+ " 1 | \n",
+ " 0.94 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 327 | \n",
+ " 328 | \n",
+ " 308 | \n",
+ " 106 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 8.24 | \n",
+ " 0 | \n",
+ " 0.58 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " 97 | \n",
+ " 312 | \n",
+ " 105 | \n",
+ " 2 | \n",
+ " 2.5 | \n",
+ " 3.0 | \n",
+ " 8.12 | \n",
+ " 0 | \n",
+ " 0.64 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 79 | \n",
+ " 320 | \n",
+ " 110 | \n",
+ " 5 | \n",
+ " 5.0 | \n",
+ " 4.5 | \n",
+ " 9.22 | \n",
+ " 1 | \n",
+ " 0.92 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 357 | \n",
+ " 358 | \n",
+ " 336 | \n",
+ " 119 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.0 | \n",
+ " 9.62 | \n",
+ " 1 | \n",
+ " 0.95 | \n",
+ " True | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "171 172 309 108 3 2.5 3.0 8.12 \n",
+ "362 363 290 100 1 1.5 2.0 7.56 \n",
+ "187 188 306 105 2 3.0 2.5 8.26 \n",
+ "208 209 324 110 4 3.0 3.5 8.97 \n",
+ "56 57 309 100 2 3.0 3.0 8.10 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "80 81 340 115 5 4.5 4.5 9.45 \n",
+ "327 328 308 106 3 3.0 3.0 8.24 \n",
+ "96 97 312 105 2 2.5 3.0 8.12 \n",
+ "78 79 320 110 5 5.0 4.5 9.22 \n",
+ "357 358 336 119 4 4.5 4.0 9.62 \n",
+ "\n",
+ " Research Chance of Admit TOEFL > 100 Decision \n",
+ "171 0 0.72 True True \n",
+ "362 0 0.47 False False \n",
+ "187 0 0.73 True True \n",
+ "208 1 0.84 True True \n",
+ "56 0 0.48 False False \n",
+ ".. ... ... ... ... \n",
+ "80 1 0.94 True True \n",
+ "327 0 0.58 True True \n",
+ "96 0 0.64 True True \n",
+ "78 1 0.92 True True \n",
+ "357 1 0.95 True True \n",
+ "\n",
+ "[100 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 205,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions[\"Decision\"] = admissions[\"TOEFL Score\"].apply(toefl_greater_100_is_true)\n",
+ "admissions.sample(100)"
+ ]
},
{
"cell_type": "code",
@@ -425,6 +2511,259 @@
"HINT (use np.where)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 208,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Serial No. | \n",
+ " GRE Score | \n",
+ " TOEFL Score | \n",
+ " University Rating | \n",
+ " SOP | \n",
+ " LOR | \n",
+ " CGPA | \n",
+ " Research | \n",
+ " Chance of Admit | \n",
+ " TOEFL > 100 | \n",
+ " Decision | \n",
+ " decision2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 337 | \n",
+ " 118 | \n",
+ " 4 | \n",
+ " 4.5 | \n",
+ " 4.5 | \n",
+ " 9.65 | \n",
+ " 1 | \n",
+ " 0.92 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 316 | \n",
+ " 104 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 8.00 | \n",
+ " 1 | \n",
+ " 0.72 | \n",
+ " True | \n",
+ " True | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 322 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 2.5 | \n",
+ " 8.67 | \n",
+ " 1 | \n",
+ " 0.80 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 314 | \n",
+ " 103 | \n",
+ " 2 | \n",
+ " 2.0 | \n",
+ " 3.0 | \n",
+ " 8.21 | \n",
+ " 0 | \n",
+ " 0.65 | \n",
+ " True | \n",
+ " True | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 330 | \n",
+ " 115 | \n",
+ " 5 | \n",
+ " 4.5 | \n",
+ " 3.0 | \n",
+ " 9.34 | \n",
+ " 1 | \n",
+ " 0.90 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 380 | \n",
+ " 381 | \n",
+ " 324 | \n",
+ " 110 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 3.5 | \n",
+ " 9.04 | \n",
+ " 1 | \n",
+ " 0.82 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 382 | \n",
+ " 325 | \n",
+ " 107 | \n",
+ " 3 | \n",
+ " 3.0 | \n",
+ " 3.5 | \n",
+ " 9.11 | \n",
+ " 1 | \n",
+ " 0.84 | \n",
+ " True | \n",
+ " True | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 382 | \n",
+ " 383 | \n",
+ " 330 | \n",
+ " 116 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.5 | \n",
+ " 9.45 | \n",
+ " 1 | \n",
+ " 0.91 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 383 | \n",
+ " 384 | \n",
+ " 312 | \n",
+ " 103 | \n",
+ " 3 | \n",
+ " 3.5 | \n",
+ " 4.0 | \n",
+ " 8.78 | \n",
+ " 0 | \n",
+ " 0.67 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 384 | \n",
+ " 385 | \n",
+ " 333 | \n",
+ " 117 | \n",
+ " 4 | \n",
+ " 5.0 | \n",
+ " 4.0 | \n",
+ " 9.66 | \n",
+ " 1 | \n",
+ " 0.95 | \n",
+ " True | \n",
+ " True | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
385 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n",
+ "0 1 337 118 4 4.5 4.5 9.65 \n",
+ "1 2 316 104 3 3.0 3.5 8.00 \n",
+ "2 3 322 110 3 3.5 2.5 8.67 \n",
+ "3 4 314 103 2 2.0 3.0 8.21 \n",
+ "4 5 330 115 5 4.5 3.0 9.34 \n",
+ ".. ... ... ... ... ... ... ... \n",
+ "380 381 324 110 3 3.5 3.5 9.04 \n",
+ "381 382 325 107 3 3.0 3.5 9.11 \n",
+ "382 383 330 116 4 5.0 4.5 9.45 \n",
+ "383 384 312 103 3 3.5 4.0 8.78 \n",
+ "384 385 333 117 4 5.0 4.0 9.66 \n",
+ "\n",
+ " Research Chance of Admit TOEFL > 100 Decision decision2 \n",
+ "0 1 0.92 True True 1 \n",
+ "1 1 0.72 True True 0 \n",
+ "2 1 0.80 True True 1 \n",
+ "3 0 0.65 True True 0 \n",
+ "4 1 0.90 True True 1 \n",
+ ".. ... ... ... ... ... \n",
+ "380 1 0.82 True True 1 \n",
+ "381 1 0.84 True True 0 \n",
+ "382 1 0.91 True True 1 \n",
+ "383 0 0.67 True True 1 \n",
+ "384 1 0.95 True True 1 \n",
+ "\n",
+ "[385 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 208,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "admissions[\"decision2\"] = np.where(admissions[\"SOP\"]>3,1,0)\n",
+ "admissions\n",
+ "\n",
+ "#np.where(value > 10, condition_2, else)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -449,7 +2788,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.8"
+ "version": "3.11.4"
},
"toc": {
"base_numbering": "",