diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..b3373ea 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -44,10 +44,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df1 = pd.Series(lst)" + ] }, { "cell_type": "markdown", @@ -60,10 +62,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "1\n", + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "RangeIndex(start=0, stop=10, step=1)\n" + ] + } + ], + "source": [ + "df1.index\n", + "\n", + "for i in df1.index:\n", + " print(i)\n", + "print(df1.index)" + ] }, { "cell_type": "markdown", @@ -74,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +118,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df = pd.DataFrame(b)" + ] }, { "cell_type": "markdown", @@ -106,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -133,10 +161,287 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename(columns={0:'Score_1', 1:'Score_2',2: 'Score_3', 3:'Score_4', 4:'Score_5'}, inplace=True)\n", + "df" + ] }, { "cell_type": "markdown", @@ -147,10 +452,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_subset = df[['Score_1', 'Score_3','Score_5']]\n", + "df_subset" + ] }, { "cell_type": "markdown", @@ -161,10 +579,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "average_score_3 = df['Score_3'].mean()\n", + "average_score_3" + ] }, { "cell_type": "markdown", @@ -175,10 +607,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "maximum_socre_4 = df['Score_4'].max()\n", + "maximum_socre_4" + ] }, { "cell_type": "markdown", @@ -189,10 +635,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "median_score_2 = df['Score_2'].median\n", + "median_score_2" + ] }, { "cell_type": "markdown", @@ -203,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +694,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_orders = pd.DataFrame.from_dict(orders)\n", + "df_orders" + ] }, { "cell_type": "markdown", @@ -238,10 +832,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Total Quantity is: 2978 and the Total Revenue is: 637\n" + ] + } + ], + "source": [ + "total_quantity = np.sum(df_orders['Quantity'])\n", + "total_revenue = np.sum(df_orders['Revenue'])\n", + "print(\"The Total Quantity is:\", total_quantity,\" and the Total Revenue is: \", int(total_revenue))" + ] }, { "cell_type": "markdown", @@ -252,10 +858,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The most expensive price is: 11.95 and the least expensive is: 0.18\n" + ] + } + ], + "source": [ + "least_exp = np.min(df_orders['UnitPrice'])\n", + "most_exp = np.max(df_orders['UnitPrice'])\n", + "print(\"The most expensive price is: \", most_exp,\"and the least expensive is: \", least_exp)" + ] }, { "cell_type": "markdown", @@ -266,7 +884,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 69, "metadata": {}, "outputs": [], "source": [ @@ -285,10 +903,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 85, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_var = admissions.head\n", + "new_var" + ] }, { "cell_type": "markdown", @@ -299,10 +957,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.isnull().sum()\n", + "\n", + "# We dont have any missing values on the table" + ] }, { "cell_type": "markdown", @@ -313,17 +995,231 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 89, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
1133711844.54.59.6510.92
2231610433.03.58.0010.72
3332211033.52.58.6710.80
4431410322.03.08.2100.65
5533011554.53.09.3410.90
..............................
38138132411033.53.59.0410.82
38238232510733.03.59.1110.84
38338333011645.04.59.4510.91
38438431210333.54.08.7800.67
38538533311745.04.09.6610.95
\n", + "

385 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR \\\n", + "Serial No. \n", + "1 1 337 118 4 4.5 4.5 \n", + "2 2 316 104 3 3.0 3.5 \n", + "3 3 322 110 3 3.5 2.5 \n", + "4 4 314 103 2 2.0 3.0 \n", + "5 5 330 115 5 4.5 3.0 \n", + "... ... ... ... ... ... ... \n", + "381 381 324 110 3 3.5 3.5 \n", + "382 382 325 107 3 3.0 3.5 \n", + "383 383 330 116 4 5.0 4.5 \n", + "384 384 312 103 3 3.5 4.0 \n", + "385 385 333 117 4 5.0 4.0 \n", + "\n", + " CGPA Research Chance of Admit \n", + "Serial No. \n", + "1 9.65 1 0.92 \n", + "2 8.00 1 0.72 \n", + "3 8.67 1 0.80 \n", + "4 8.21 0 0.65 \n", + "5 9.34 1 0.90 \n", + "... ... ... ... \n", + "381 9.04 1 0.82 \n", + "382 9.11 1 0.84 \n", + "383 9.45 1 0.91 \n", + "384 8.78 0 0.67 \n", + "385 9.66 1 0.95 \n", + "\n", + "[385 rows x 9 columns]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.set_index(\"Serial No.\", inplace=False, drop=False)" + ] }, { "cell_type": "markdown", @@ -334,10 +1230,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 115, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "We have 385 unique combinations between them, that is the same number os Serial Numbers\n" + ] + } + ], + "source": [ + "new_set = admissions[['GRE Score','CGPA']]\n", + "print(new_set.nunique)\n", + "\n", + "print('We have 385 unique combinations between them, that is the same number os Serial Numbers')" + ] }, { "cell_type": "markdown", @@ -435,9 +1358,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "ironhack", "language": "python", - "name": "python3" + "name": "ironhack" }, "language_info": { "codemirror_mode": { @@ -449,7 +1372,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": "",