diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..ebebb1b 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 73, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -44,37 +44,87 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "ls = pd.Series(lst)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "print(ls)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Use indexing to return the third value in the Series above.\n", - "\n", - "*Hint: Remember that indexing begins at 0.*" + "### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0\n", + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n" + ] + } + ], + "source": [ + "df = pd.DataFrame(ls)\n", + "print(df)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." + "### 3. Use indexing to return the third value in the Series above.\n", + "\n", + "*Hint: Remember that indexing begins at 0.*" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +142,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "67.5\n" + ] + } + ], + "source": [ + "print(b[:][0][2])" + ] }, { "cell_type": "markdown", @@ -106,9 +166,27 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], "source": [ "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", " [61.3, 40.8, 30.8, 37.8, 87.6],\n", @@ -119,12 +197,15 @@ " [23.3, 40.7, 95.0, 83.8, 26.9],\n", " [27.6, 26.4, 53.8, 88.8, 68.5],\n", " [96.6, 96.4, 53.4, 72.4, 50.1],\n", - " [73.7, 39.0, 43.2, 81.6, 34.7]]" + " [73.7, 39.0, 43.2, 81.6, 34.7]]\n", + "\n", + "b1 = pd.DataFrame(b)\n", + "print(b1)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -133,10 +214,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "b2 = b1.rename(columns={0: 'Score_1', 1: 'Score_2', 2: 'Score_3', 3: 'Score_4', 4: 'Score_5'})\n", + "print(b2)\n", + " " + ] }, { "cell_type": "markdown", @@ -147,10 +250,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7\n" + ] + } + ], + "source": [ + "b3 = b2[['Score_1', 'Score_3', 'Score_5']]\n", + "print(b3)" + ] }, { "cell_type": "markdown", @@ -161,10 +285,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2\n", + "0 67.5\n", + "1 30.8\n", + "2 44.2\n", + "3 96.1\n", + "4 85.4\n", + "5 0.1\n", + "6 95.0\n", + "7 53.8\n", + "8 53.4\n", + "9 43.2\n" + ] + } + ], + "source": [ + "sc_3 = b1[[2]]\n", + "print(sc_3)\n", + "sc_3_av = sc_3.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 56.95\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "print(sc_3_av)" + ] }, { "cell_type": "markdown", @@ -175,10 +339,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 88.8\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "sc_4 = b1[[3]]\n", + "sc_4max = sc_4.max()\n", + "print(sc_4max)\n" + ] }, { "cell_type": "markdown", @@ -189,10 +366,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 40.75\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "sc_2 = b1[[1]]\n", + "sc_2med = sc_2.median()\n", + "print(sc_2med)" + ] }, { "cell_type": "markdown", @@ -203,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +414,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n" + ] + } + ], + "source": [ + "orders_1 = pd.DataFrame(orders)\n", + "print(orders_1)" + ] }, { "cell_type": "markdown", @@ -238,10 +449,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quantity 2978\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "t_q = orders_1[['Quantity']]\n", + "t_qsum = t_q.sum()\n", + "print(t_qsum)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Revenue 637.0\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "t_r = orders_1[['Revenue']]\n", + "t_rsum = t_r.sum()\n", + "print(t_rsum)" + ] }, { "cell_type": "markdown", @@ -252,10 +496,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 69, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UnitPrice 11.77\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "exp = orders_1[['UnitPrice']]\n", + "max = exp.max()\n", + "min = exp.min()\n", + "diff = max - min\n", + "print(diff)\n" + ] }, { "cell_type": "markdown", @@ -266,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 112, "metadata": {}, "outputs": [], "source": [ @@ -285,10 +544,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 113, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.head()" + ] }, { "cell_type": "markdown", @@ -299,31 +678,117 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 114, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.isnull().sum()" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 115, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 385 entries, 0 to 384\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Serial No. 385 non-null int64 \n", + " 1 GRE Score 385 non-null int64 \n", + " 2 TOEFL Score 385 non-null int64 \n", + " 3 University Rating 385 non-null int64 \n", + " 4 SOP 385 non-null float64\n", + " 5 LOR 385 non-null float64\n", + " 6 CGPA 385 non-null float64\n", + " 7 Research 385 non-null int64 \n", + " 8 Chance of Admit 385 non-null float64\n", + "dtypes: float64(4), int64(5)\n", + "memory usage: 27.2 KB\n" + ] + } + ], "source": [ - "### 2 - Interestingly, there is a column that uniquely identifies the applicants. This column is the serial number column. Instead of having our own index, we should make this column our index. Do this in the cell below. Keep the column in the dataframe in addition to making it an index." + "admissions.info()" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "### 2 - Interestingly, there is a column that uniquely identifies the applicants. This column is the serial number column. Instead of having our own index, we should make this column our index. Do this in the cell below. Keep the column in the dataframe in addition to making it an index." + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 121, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "... ... ... ... ... ... ... \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "384 312 103 3 3.5 4.0 8.78 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "Serial No. \n", + "1 1 0.92 \n", + "2 1 0.72 \n", + "3 1 0.80 \n", + "4 0 0.65 \n", + "5 1 0.90 \n", + "... ... ... \n", + "381 1 0.82 \n", + "382 1 0.84 \n", + "383 1 0.91 \n", + "384 0 0.67 \n", + "385 1 0.95 \n", + "\n", + "[385 rows x 8 columns]\n" + ] + } + ], + "source": [ + "admission_1 = admissions.set_index('Serial No.')\n", + "print(admission_1)" + ] }, { "cell_type": "markdown", @@ -334,10 +799,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 126, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admission_1[\"gre_cgpa\"] = admission_1[\"GRE Score\"].astype(str) + \" - \"+ admission_1[\"CGPA\"].astype(str)\n", + "admission_1[\"gre_cgpa\"].is_unique" + ] }, { "cell_type": "markdown", @@ -348,10 +827,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "11 328 112 4 4.0 4.5 9.10 \n", + "20 328 116 5 5.0 5.0 9.50 \n", + "21 334 119 5 5.0 4.5 9.70 \n", + "... ... ... ... ... ... ... \n", + "380 329 111 4 4.5 4.0 9.23 \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit gre_cgpa \n", + "Serial No. \n", + "1 1 0.92 337 - 9.65 \n", + "5 1 0.90 330 - 9.34 \n", + "11 1 0.78 328 - 9.1 \n", + "20 1 0.94 328 - 9.5 \n", + "21 1 0.95 334 - 9.7 \n", + "... ... ... ... \n", + "380 1 0.89 329 - 9.23 \n", + "381 1 0.82 324 - 9.04 \n", + "382 1 0.84 325 - 9.11 \n", + "383 1 0.91 330 - 9.45 \n", + "385 1 0.95 333 - 9.66 \n", + "\n", + "[101 rows x 9 columns]\n" + ] + } + ], + "source": [ + "condition1 = (admission_1['CGPA'] > 9) & (admission_1['Research'] == 1)\n", + "filterdata = admission_1[condition1]\n", + "print(filterdata)" + ] }, { "cell_type": "markdown", @@ -362,17 +881,69 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 129, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "29 338 118 4 3.0 4.5 9.40 \n", + "63 327 114 3 3.0 3.0 9.02 \n", + "141 326 114 3 3.0 3.0 9.11 \n", + "218 324 111 4 3.0 3.0 9.01 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit gre_cgpa \n", + "Serial No. \n", + "29 1 0.91 338 - 9.4 \n", + "63 0 0.61 327 - 9.02 \n", + "141 1 0.83 326 - 9.11 \n", + "218 1 0.82 324 - 9.01 \n", + "382 1 0.84 325 - 9.11 \n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "condition3 = (admission_1['CGPA'] > 9)\n", + "condition4 = (admission_1['SOP'] < 3.5)\n", + "filterdata1 = admission_1[condition3 & condition4]\n", + "print(filterdata1)\n", + "\n", + "0" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 130, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chance of Admit 0.802\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "chance = filterdata1[['Chance of Admit']]\n", + "mean_chance = chance.mean()\n", + "print(mean_chance)" + ] }, { "cell_type": "markdown", @@ -384,10 +955,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 131, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "... ... ... ... ... ... ... \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "384 312 103 3 3.5 4.0 8.78 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit gre_cgpa Decision \n", + "Serial No. \n", + "1 1 0.92 337 - 9.65 True \n", + "2 1 0.72 316 - 8.0 True \n", + "3 1 0.80 322 - 8.67 True \n", + "4 0 0.65 314 - 8.21 True \n", + "5 1 0.90 330 - 9.34 True \n", + "... ... ... ... ... \n", + "381 1 0.82 324 - 9.04 True \n", + "382 1 0.84 325 - 9.11 True \n", + "383 1 0.91 330 - 9.45 True \n", + "384 0 0.67 312 - 8.78 True \n", + "385 1 0.95 333 - 9.66 True \n", + "\n", + "[385 rows x 10 columns]\n" + ] + } + ], + "source": [ + "def toefl_condition(score):\n", + " return score > 100\n", + "\n", + "admission_1['Decision'] = admission_1['TOEFL Score'].apply(toefl_condition)\n", + "print(admission_1)\n" + ] }, { "cell_type": "markdown", @@ -404,25 +1017,63 @@ "source": [] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", + "HINT (use np.where)" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", + "execution_count": 132, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "... ... ... ... ... ... ... \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "384 312 103 3 3.5 4.0 8.78 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit gre_cgpa Decision Decision2 \n", + "Serial No. \n", + "1 1 0.92 337 - 9.65 True 1 \n", + "2 1 0.72 316 - 8.0 True 1 \n", + "3 1 0.80 322 - 8.67 True 1 \n", + "4 0 0.65 314 - 8.21 True 0 \n", + "5 1 0.90 330 - 9.34 True 1 \n", + "... ... ... ... ... ... \n", + "381 1 0.82 324 - 9.04 True 1 \n", + "382 1 0.84 325 - 9.11 True 1 \n", + "383 1 0.91 330 - 9.45 True 1 \n", + "384 0 0.67 312 - 8.78 True 1 \n", + "385 1 0.95 333 - 9.66 True 1 \n", + "\n", + "[385 rows x 11 columns]\n" + ] + } + ], "source": [ - "Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", - "HINT (use np.where)" + "def SOP_greater(value):\n", + " if value >= 3:\n", + " return 1\n", + " else:\n", + " return 0\n", + "\n", + "admission_1['Decision2'] = admission_1['SOP'].apply(SOP_greater)\n", + "print(admission_1)" ] }, { @@ -449,7 +1100,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": "",