From d5a433bf93c83f2b931a65afa527ee75eb6debe3 Mon Sep 17 00:00:00 2001 From: Scott McCormack Date: Sat, 17 Sep 2022 20:28:06 +0800 Subject: [PATCH 1/4] ch03: notebook updates (#1) - Updates to `03-dict-set/03-dict-set.ipynb` to include 2nd edition updates (executed using Python 3.10) - Minor updates to `03-dict-set/README.md` to target script in `03-dict-set/` --- 03-dict-set/03-dict-set.ipynb | 1723 +++++++++++++++++++++++++++++++-- 03-dict-set/README.md | 2 +- 2 files changed, 1654 insertions(+), 71 deletions(-) diff --git a/03-dict-set/03-dict-set.ipynb b/03-dict-set/03-dict-set.ipynb index 1bfc9d5..556a63f 100644 --- a/03-dict-set/03-dict-set.ipynb +++ b/03-dict-set/03-dict-set.ipynb @@ -1,28 +1,47 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Chapter 3 — Dictionaries and Sets" + ] + }, { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "3.8.0 (v3.8.0:fa919fdf25, Oct 14 2019, 10:23:27) \n", - "[Clang 6.0 (clang-600.0.57)]\n" + "3.10.4 (main, Mar 31 2022, 08:41:55) [GCC 7.5.0]\n" ] } ], "source": [ "import sys\n", + "\n", "print(sys.version)" ] }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -37,6 +56,7 @@ ], "source": [ "from collections import abc\n", + "\n", "my_dict = {}\n", "isinstance(my_dict, abc.Mapping)" ] @@ -44,7 +64,11 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -64,7 +88,11 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -84,8 +112,12 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "name": "stdout", @@ -105,8 +137,12 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -114,7 +150,7 @@ "5149391500123939311" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -126,8 +162,12 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -135,7 +175,7 @@ "True" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -151,8 +191,12 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -160,7 +204,7 @@ "{'one': 1, 'two': 2, 'three': 3}" ] }, - "execution_count": 10, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -171,8 +215,12 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -180,7 +228,7 @@ "['one', 'two', 'three']" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -191,8 +239,12 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -200,7 +252,7 @@ "{'two': 2, 'one': 1, 'three': 3}" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -211,8 +263,12 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, + "execution_count": 11, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -220,7 +276,7 @@ "('three', 3)" ] }, - "execution_count": 13, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -231,8 +287,12 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -240,7 +300,7 @@ "{'two': 2, 'one': 1}" ] }, - "execution_count": 14, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -249,30 +309,60 @@ "c" ] }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Modern dict Syntax" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 3-1. Examples of dict comprehensions" + ] + }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ - "dial_codes = [ # <1>\n", + "dial_codes = [ # <1>\n", " (880, 'Bangladesh'),\n", - " (55, 'Brazil'),\n", - " (86, 'China'),\n", - " (91, 'India'),\n", - " (62, 'Indonesia'),\n", - " (81, 'Japan'),\n", + " (55, 'Brazil'),\n", + " (86, 'China'),\n", + " (91, 'India'),\n", + " (62, 'Indonesia'),\n", + " (81, 'Japan'),\n", " (234, 'Nigeria'),\n", - " (92, 'Pakistan'),\n", - " (7, 'Russia'),\n", - " (1, 'United States'),\n", + " (92, 'Pakistan'),\n", + " (7, 'Russia'),\n", + " (1, 'United States'),\n", "]" ] }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -289,20 +379,24 @@ " 'United States': 1}" ] }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "country_dial = {country: code for code, country in dial_codes}\n", + "country_dial = {country: code for code, country in dial_codes} # <2>\n", "country_dial" ] }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -310,55 +404,1544 @@ "{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}" ] }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{code: country.upper() # <3>\n", + " for country, code in sorted(country_dial.items())\n", + " if code < 70}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'x': 1, 'y': 2, 'z': 3}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def dump(**kwargs):\n", + " return kwargs\n", + "\n", + "\n", + "dump(**{'x': 1}, y=2, **{'z': 3})" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 2, 'b': 4, 'c': 6}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 = {'a': 1, 'b': 3}\n", + "d2 = {'a': 2, 'b': 4, 'c': 6}\n", + "{'a': 2, 'b': 4, 'c': 6}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 1, 'b': 3}" + ] + }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "{code: country.upper() \n", - " for country, code in sorted(country_dial.items())\n", - " if code < 70}" + "d1" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 2, 'b': 4, 'c': 6}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 |= d2\n", + "d1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Pattern Matching with Mappings" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 3-2. `creator.py`: `get_creators()` extracts names of creators from media records" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "**Note**: Python 3.10 is required" ] }, { "cell_type": "code", "execution_count": 20, - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def get_creators(record: dict) -> list:\n", + " match record:\n", + " case {'type': 'book', 'api': 2, 'authors': [*names]}: # <1>\n", + " return names\n", + " case {'type': 'book', 'api': 1, 'author': name}: # <2>\n", + " return [name]\n", + " case {'type': 'book'}: # <3>\n", + " raise ValueError(f\"Invalid 'book' record: {record!r}\")\n", + " case {'type': 'movie', 'director': name}: # <4>\n", + " return [name]\n", + " case _: # <5>\n", + " raise ValueError(f'Invalid record: {record!r}')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { "text/plain": [ - "{'Pakistan': 92,\n", - " 'Indonesia': 62,\n", - " 'Russia': 7,\n", - " 'Japan': 81,\n", - " 'United States': 1,\n", - " 'China': 86,\n", - " 'Brazil': 55,\n", - " 'Bangladesh': 880,\n", - " 'Nigeria': 234,\n", - " 'India': 91}" + "['Douglas Hofstadter']" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from random import shuffle\n", - "shuffle(dial_codes)\n", - "country_dial = {country: code for code, country in dial_codes}\n", - "country_dial" + "b1 = dict(api=1, author='Douglas Hofstadter',\n", + " type='book', title='Gödel, Escher, Bach')\n", + "get_creators(b1)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 22, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], - "source": [] + "source": [ + "from collections import OrderedDict\n", + "\n", + "b2 = OrderedDict(api=2, type='book',\n", + " title='Python in a Nutshell',\n", + " authors='Martelli Ravenscroft Holden'.split())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Martelli', 'Ravenscroft', 'Holden']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_creators(b2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Invalid 'book' record: {'type': 'book', 'pages': 770}", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [24]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mget_creators\u001B[49m\u001B[43m(\u001B[49m\u001B[43m{\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mtype\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mbook\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mpages\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m770\u001B[39;49m\u001B[43m}\u001B[49m\u001B[43m)\u001B[49m\n", + "Input \u001B[0;32mIn [20]\u001B[0m, in \u001B[0;36mget_creators\u001B[0;34m(record)\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_creators\u001B[39m(record: \u001B[38;5;28mdict\u001B[39m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28mlist\u001B[39m:\n\u001B[1;32m 2\u001B[0m \u001B[38;5;28;01mmatch\u001B[39;00m record:\n\u001B[1;32m 3\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mapi\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;241m2\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauthors\u001B[39m\u001B[38;5;124m'\u001B[39m: [\u001B[38;5;241m*\u001B[39mnames]}: \u001B[38;5;66;03m# <1>\u001B[39;00m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m names\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mapi\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;241m1\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauthor\u001B[39m\u001B[38;5;124m'\u001B[39m: name}: \u001B[38;5;66;03m# <2>\u001B[39;00m\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m [name]\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m}: \u001B[38;5;66;03m# <3>\u001B[39;00m\n\u001B[0;32m----> 8\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mInvalid \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m record: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mrecord\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 9\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmovie\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mdirector\u001B[39m\u001B[38;5;124m'\u001B[39m: name}: \u001B[38;5;66;03m# <4>\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m [name]\n\u001B[1;32m 11\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m \u001B[38;5;28;01m_\u001B[39;00m: \u001B[38;5;66;03m# <5>\u001B[39;00m\n\u001B[1;32m 12\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mInvalid record: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mrecord\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m'\u001B[39m)\n", + "\u001B[0;31mValueError\u001B[0m: Invalid 'book' record: {'type': 'book', 'pages': 770}" + ] + } + ], + "source": [ + "get_creators({'type': 'book', 'pages': 770})" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Invalid record: 'Spam, spam, spam'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [25]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mget_creators\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mSpam, spam, spam\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n", + "Input \u001B[0;32mIn [20]\u001B[0m, in \u001B[0;36mget_creators\u001B[0;34m(record)\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mget_creators\u001B[39m(record: \u001B[38;5;28mdict\u001B[39m) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;28mlist\u001B[39m:\n\u001B[1;32m 2\u001B[0m \u001B[38;5;28;01mmatch\u001B[39;00m record:\n\u001B[1;32m 3\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mapi\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;241m2\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauthors\u001B[39m\u001B[38;5;124m'\u001B[39m: [\u001B[38;5;241m*\u001B[39mnames]}: \u001B[38;5;66;03m# <1>\u001B[39;00m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m names\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mapi\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;241m1\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mauthor\u001B[39m\u001B[38;5;124m'\u001B[39m: name}: \u001B[38;5;66;03m# <2>\u001B[39;00m\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m [name]\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m}: \u001B[38;5;66;03m# <3>\u001B[39;00m\n\u001B[1;32m 8\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mInvalid \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mbook\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m record: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mrecord\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 9\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m {\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtype\u001B[39m\u001B[38;5;124m'\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmovie\u001B[39m\u001B[38;5;124m'\u001B[39m, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mdirector\u001B[39m\u001B[38;5;124m'\u001B[39m: name}: \u001B[38;5;66;03m# <4>\u001B[39;00m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m [name]\n\u001B[1;32m 11\u001B[0m \u001B[38;5;28;01mcase\u001B[39;00m \u001B[38;5;28;01m_\u001B[39;00m: \u001B[38;5;66;03m# <5>\u001B[39;00m\n\u001B[0;32m---> 12\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mInvalid record: \u001B[39m\u001B[38;5;132;01m{\u001B[39;00mrecord\u001B[38;5;132;01m!r}\u001B[39;00m\u001B[38;5;124m'\u001B[39m)\n", + "\u001B[0;31mValueError\u001B[0m: Invalid record: 'Spam, spam, spam'" + ] + } + ], + "source": [ + "get_creators('Spam, spam, spam')" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Ice cream details: {'flavor': 'vanilla', 'cost': 199}\n" + ] + } + ], + "source": [ + "food = dict(category='ice cream', flavor='vanilla', cost=199)\n", + "match food:\n", + " case {'category': 'ice cream', **details}:\n", + " print(f'Ice cream details: {details}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Standard API of Mapping Types" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What is Hashable" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "my_dict = {}\n", + "isinstance(my_dict, abc.Mapping)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "isinstance(my_dict, abc.MutableMapping)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-3907003130834322577" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tt = (1, 2, (30, 40))\n", + "hash(tt)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "unhashable type: 'list'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [30]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[1;32m 1\u001B[0m tl \u001B[38;5;241m=\u001B[39m (\u001B[38;5;241m1\u001B[39m, \u001B[38;5;241m2\u001B[39m, [\u001B[38;5;241m30\u001B[39m, \u001B[38;5;241m40\u001B[39m])\n\u001B[0;32m----> 2\u001B[0m \u001B[38;5;28;43mhash\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtl\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mTypeError\u001B[0m: unhashable type: 'list'" + ] + } + ], + "source": [ + "tl = (1, 2, [30, 40])\n", + "hash(tl)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5149391500123939311" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tf = (1, 2, frozenset([30, 40]))\n", + "hash(tf)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Inserting or Updating Mutable Values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-4. [`index0.py`](index0.py): Processing the \"Zen of Python\"; each line shows a word and a list of occurences coded as pairs" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "# !python3 index0.py zen.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "source": [ + "#### Example 3-5. [`index.py`](index.py) uses `dict.setdefault` to fetch and update a list of word occurrences from the index in a single line; contrast with Example 3-4." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# python3 index.py zen.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Automatic handling of Missing Keys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `defaultdict`: Another take on missing keys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-6. [`index_default.py`](index_default.py): using defaultdict instead of the setdefault method" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# !python index_default.py zen.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The `__missing__` method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-7. When searching for a nonstring key, [`StrKeyDict0`](strkeydict0.py) converts it to `str` when it is not found" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'two'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from strkeydict0 import StrKeyDict0\n", + "\n", + "d = StrKeyDict0([('2', 'two'), ('4', 'four')])\n", + "d['2']" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'four'" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[4]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'1'", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [37]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43md\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;241;43m1\u001B[39;49m\u001B[43m]\u001B[49m\n", + "File \u001B[0;32m~/git/training/example-code-2e/03-dict-set/strkeydict0.py:44\u001B[0m, in \u001B[0;36mStrKeyDict0.__missing__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 42\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(key, \u001B[38;5;28mstr\u001B[39m): \u001B[38;5;66;03m# <2>\u001B[39;00m\n\u001B[1;32m 43\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key)\n\u001B[0;32m---> 44\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m[\u001B[49m\u001B[38;5;28;43mstr\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\u001B[43m]\u001B[49m\n", + "File \u001B[0;32m~/git/training/example-code-2e/03-dict-set/strkeydict0.py:43\u001B[0m, in \u001B[0;36mStrKeyDict0.__missing__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 41\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21m__missing__\u001B[39m(\u001B[38;5;28mself\u001B[39m, key):\n\u001B[1;32m 42\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(key, \u001B[38;5;28mstr\u001B[39m): \u001B[38;5;66;03m# <2>\u001B[39;00m\n\u001B[0;32m---> 43\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key)\n\u001B[1;32m 44\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m[\u001B[38;5;28mstr\u001B[39m(key)]\n", + "\u001B[0;31mKeyError\u001B[0m: '1'" + ] + } + ], + "source": [ + "d[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'two'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.get('2')" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'four'" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.get(4)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'N/A'" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d.get(1, 'N/A')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "2 in d" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1 in d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Variations of `dict`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `collections.ChainMap`" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 = dict(a=1, b=3)\n", + "d2 = dict(a=2, b=4, c=6)\n", + "from collections import ChainMap\n", + "chain = ChainMap(d1, d2)\n", + "chain['a']" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain['c']" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 1, 'b': 3, 'c': -1}" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain['c'] = -1\n", + "d1" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 2, 'b': 4, 'c': 6}" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `collections.Counter`" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import collections\n", + "ct = collections.Counter('abracadabra')\n", + "ct" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Counter({'a': 10, 'b': 2, 'r': 2, 'c': 1, 'd': 1, 'z': 3})" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ct.update('aaaaazzz')\n", + "ct" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('a', 10), ('z', 3), ('b', 2)]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ct.most_common(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Immutable Mappings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-10. `MappingProxyType` builds a read-only `mappingproxy` instance from a `dict`" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mappingproxy({1: 'A'})" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from types import MappingProxyType\n", + "d = {1: 'A'}\n", + "d_proxy = MappingProxyType(d)\n", + "d_proxy" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'A'" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_proxy[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'mappingproxy' object does not support item assignment", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [52]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m d_proxy[\u001B[38;5;241m2\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mx\u001B[39m\u001B[38;5;124m'\u001B[39m\n", + "\u001B[0;31mTypeError\u001B[0m: 'mappingproxy' object does not support item assignment" + ] + } + ], + "source": [ + "d_proxy[2] = 'x'" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mappingproxy({1: 'A', 2: 'B'})" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d[2] = 'B'\n", + "d_proxy" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'B'" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d_proxy[2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dictionary Views" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-11. The `.values()` method returns a view of the values in a `dict`" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_values([10, 20, 30])" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d = dict(a=10, b=20, c=30)\n", + "values = d.values()\n", + "values" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(values)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[10, 20, 30]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(values)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reversed(values)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'dict_values' object is not subscriptable", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [59]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mvalues\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m]\u001B[49m\n", + "\u001B[0;31mTypeError\u001B[0m: 'dict_values' object is not subscriptable" + ] + } + ], + "source": [ + "values[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': 10, 'b': 20, 'c': 30, 'z': 99}" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d['z'] = 99\n", + "d" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_values([10, 20, 30, 99])" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "values" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "cannot create 'dict_values' instances", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", + "Input \u001B[0;32mIn [62]\u001B[0m, in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[1;32m 1\u001B[0m values_class \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mtype\u001B[39m({}\u001B[38;5;241m.\u001B[39mvalues())\n\u001B[0;32m----> 2\u001B[0m v \u001B[38;5;241m=\u001B[39m \u001B[43mvalues_class\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n", + "\u001B[0;31mTypeError\u001B[0m: cannot create 'dict_values' instances" + ] + } + ], + "source": [ + "values_class = type({}.values())\n", + "v = values_class()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set Theory" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bacon', 'eggs', 'spam'}" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']\n", + "set(l)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['bacon', 'spam', 'eggs']" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(set(l))" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['spam', 'eggs', 'bacon'])" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Preserve the order of the first occurence\n", + "dict.fromkeys(l).keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['spam', 'eggs', 'bacon']" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(dict.fromkeys(l).keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Literals" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "set" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = {1}\n", + "type(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{1}" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.pop()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "set()" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "frozenset(range(10))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Comprehensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 3-15. Build a set of Latin-1 characters that have the word “SIGN” in their Unicode names" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'#',\n", + " '$',\n", + " '%',\n", + " '+',\n", + " '<',\n", + " '=',\n", + " '>',\n", + " '¢',\n", + " '£',\n", + " '¤',\n", + " '¥',\n", + " '§',\n", + " '©',\n", + " '¬',\n", + " '®',\n", + " '°',\n", + " '±',\n", + " 'µ',\n", + " '¶',\n", + " '×',\n", + " '÷'}" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unicodedata import name\n", + "{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i),'')}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set Operations on dict Views" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'b', 'd'}" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 = dict(a=1, b=2, c=3, d=4)\n", + "d2 = dict(b=20, d=40, e=50)\n", + "d1.keys() & d2.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a'}" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = {'a', 'e', 'i'}\n", + "d1.keys() & s" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a', 'b', 'c', 'd', 'e', 'i'}" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.keys() | s" + ] } ], "metadata": { @@ -377,9 +1960,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.0" + "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file diff --git a/03-dict-set/README.md b/03-dict-set/README.md index 6a059a1..fc29774 100644 --- a/03-dict-set/README.md +++ b/03-dict-set/README.md @@ -8,7 +8,7 @@ Sample code for Chapter 3 of _Fluent Python 2e_ by Luciano Ramalho (O'Reilly, 20 Use Python's standard ``doctest`` module, for example: - $ python3 -m doctest bisect_demo.py -v + $ python3 -m doctest creator.py -v ### Jupyter Notebook From 2d483ceef1c34ad499ffa6367e5d590e274be1a4 Mon Sep 17 00:00:00 2001 From: Scott McCormack Date: Sat, 17 Sep 2022 20:29:48 +0800 Subject: [PATCH 2/4] ch04: notebook and README.md updates (#2) * Add ch04 notebook * ch04: README.rst updates for 2nd ed * Ignore cafe.txt --- 04-text-byte/.gitignore | 1 + 04-text-byte/04-text-byte.ipynb | 2238 +++++++++++++++++++++++++++++++ 04-text-byte/README.rst | 6 +- 3 files changed, 2242 insertions(+), 3 deletions(-) create mode 100644 04-text-byte/04-text-byte.ipynb diff --git a/04-text-byte/.gitignore b/04-text-byte/.gitignore index 421376d..b9ad381 100644 --- a/04-text-byte/.gitignore +++ b/04-text-byte/.gitignore @@ -1 +1,2 @@ dummy +cafe.txt diff --git a/04-text-byte/04-text-byte.ipynb b/04-text-byte/04-text-byte.ipynb new file mode 100644 index 0000000..eb566fc --- /dev/null +++ b/04-text-byte/04-text-byte.ipynb @@ -0,0 +1,2238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9bbc5481", + "metadata": {}, + "source": [ + "# Chapter 4 — Unicode Text Versus Bytes" + ] + }, + { + "cell_type": "markdown", + "id": "ca979304", + "metadata": {}, + "source": [ + "## Character Issues" + ] + }, + { + "cell_type": "markdown", + "id": "dcbc6239", + "metadata": {}, + "source": [ + "#### Example 4-1. Encoding and decoding" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6a3b3a4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = 'café'\n", + "len(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "53523bce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'caf\\xc3\\xa9'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b = s.encode('utf8')\n", + "b" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6ba8b10d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "92f1f056", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'café'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b.decode('utf8')" + ] + }, + { + "cell_type": "markdown", + "id": "f9a606ee", + "metadata": {}, + "source": [ + "## Byte Essentials" + ] + }, + { + "cell_type": "markdown", + "id": "7c1ec7f1", + "metadata": {}, + "source": [ + "#### Example 4-2. A five-byte sequence as `bytes` and as `bytearray`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3992eecc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'caf\\xc3\\xa9'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cafe = bytes('café', encoding='utf_8')\n", + "cafe" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c9dbce26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "99" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cafe[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cbe46c5b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'c'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cafe[:1]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d29feb33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "bytearray(b'caf\\xc3\\xa9')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cafe_arr = bytearray(cafe)\n", + "cafe_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "dd6fb270", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "bytearray(b'\\xa9')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cafe_arr[-1:]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cf5f74b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'1K\\xce\\xa9'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bytes.fromhex('31 4B CE A9')" + ] + }, + { + "cell_type": "markdown", + "id": "d9ff5124", + "metadata": {}, + "source": [ + "#### Example 4-3. Initializing bytes from the raw data of an array" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fe69c5fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'\\xfe\\xff\\xff\\xff\\x00\\x00\\x01\\x00\\x02\\x00'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import array\n", + "numbers = array.array('h', [-2, -1, 0, 1, 2])\n", + "octets = bytes(numbers)\n", + "octets" + ] + }, + { + "cell_type": "markdown", + "id": "7bf1245c", + "metadata": {}, + "source": [ + "### Basic Encoders/Decoders" + ] + }, + { + "cell_type": "markdown", + "id": "ff4ae5c5", + "metadata": {}, + "source": [ + "#### Example 4-4. The string “El Niño” encoded with three codecs producing very different byte sequences" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "60c8b066", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "latin_1\tb'El Ni\\xf1o'\n", + "utf_8\tb'El Ni\\xc3\\xb1o'\n", + "utf_16\tb'\\xff\\xfeE\\x00l\\x00 \\x00N\\x00i\\x00\\xf1\\x00o\\x00'\n" + ] + } + ], + "source": [ + "for codec in ['latin_1', 'utf_8', 'utf_16']:\n", + " print(codec, 'El Niño'.encode(codec), sep='\\t')" + ] + }, + { + "cell_type": "markdown", + "id": "0204104d", + "metadata": {}, + "source": [ + "## Understanding Encode/Decode Problems" + ] + }, + { + "cell_type": "markdown", + "id": "0547bdf0", + "metadata": {}, + "source": [ + "### Coping with UnicodeEncode Error" + ] + }, + { + "cell_type": "markdown", + "id": "b9edceb8", + "metadata": {}, + "source": [ + "#### Example 4-5. Encoding to bytes: success and error handling" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "7aa1d383", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'S\\xc3\\xa3o Paulo'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city = 'São Paulo'\n", + "city.encode('utf_8')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "69e2ea38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'\\xff\\xfeS\\x00\\xe3\\x00o\\x00 \\x00P\\x00a\\x00u\\x00l\\x00o\\x00'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city.encode('utf_16')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "8cf10d99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'S\\xe3o Paulo'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city.encode('iso8859_1')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "0a0becf5", + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "UnicodeEncodeError", + "evalue": "'charmap' codec can't encode character '\\xe3' in position 1: character maps to ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mUnicodeEncodeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [16]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcity\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcp437\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/fluent-python/lib/python3.10/encodings/cp437.py:12\u001b[0m, in \u001b[0;36mCodec.encode\u001b[0;34m(self, input, errors)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mencode\u001b[39m(\u001b[38;5;28mself\u001b[39m,\u001b[38;5;28minput\u001b[39m,errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcodecs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcharmap_encode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43mencoding_map\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mUnicodeEncodeError\u001b[0m: 'charmap' codec can't encode character '\\xe3' in position 1: character maps to " + ] + } + ], + "source": [ + "city.encode('cp437')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "fe6eae98", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'So Paulo'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city.encode('cp437', errors='ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "11e913e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'S?o Paulo'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city.encode('cp437', errors='replace')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "c6e189b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'São Paulo'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city.encode('cp437', errors='xmlcharrefreplace')" + ] + }, + { + "cell_type": "markdown", + "id": "cd350b72", + "metadata": {}, + "source": [ + "### Coping with UnicodeDecodeError" + ] + }, + { + "cell_type": "markdown", + "id": "cabd0d66", + "metadata": {}, + "source": [ + "#### Example 4-6. Decoding from str to bytes: success and error handling" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7f1faf3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Montréal'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "octets = b'Montr\\xe9al'\n", + "octets.decode('cp1252')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "8fd19098", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Montrιal'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "octets.decode('iso8859_7')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a3d75506", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'MontrИal'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "octets.decode('koi8_r')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a245459d", + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "UnicodeDecodeError", + "evalue": "'utf-8' codec can't decode byte 0xe9 in position 5: invalid continuation byte", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [23]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43moctets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mutf_8\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0xe9 in position 5: invalid continuation byte" + ] + } + ], + "source": [ + "octets.decode('utf_8')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "52bcaa93", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Montr�al'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "octets.decode('utf_8', errors='replace')" + ] + }, + { + "cell_type": "markdown", + "id": "601a4482", + "metadata": {}, + "source": [ + "### SyntaxError when Loading Modules with Unexpected Encoding" + ] + }, + { + "cell_type": "markdown", + "id": "b5de81cc", + "metadata": {}, + "source": [ + "#### Example 4-7. [ola.py](ola.py): “Hello, World!” in Portuguese" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "7edc2d56", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Olá, Mundo!\r\n" + ] + } + ], + "source": [ + "!python3 ola.py" + ] + }, + { + "cell_type": "markdown", + "id": "a38ba9db", + "metadata": {}, + "source": [ + "### BOM: A Useful Gremlin" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "ce7d9a63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'\\xff\\xfeE\\x00l\\x00 \\x00N\\x00i\\x00\\xf1\\x00o\\x00'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u16 = 'El Niño'.encode('utf_16')\n", + "u16" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "1969bff8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[255, 254, 69, 0, 108, 0, 32, 0, 78, 0, 105, 0, 241, 0, 111, 0]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(u16)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "6915d186", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[69, 0, 108, 0, 32, 0, 78, 0, 105, 0, 241, 0, 111, 0]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u16le = 'El Niño'.encode('utf_16le')\n", + "list(u16le)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ab36987f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 69, 0, 108, 0, 32, 0, 78, 0, 105, 0, 241, 0, 111]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "u16be = 'El Niño'.encode('utf_16be')\n", + "list(u16be)" + ] + }, + { + "cell_type": "markdown", + "id": "c988acf3", + "metadata": {}, + "source": [ + "## Handling Text Files" + ] + }, + { + "cell_type": "markdown", + "id": "f75ef2a6", + "metadata": {}, + "source": [ + "#### Example 4-8. A platform encoding issue" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "8b3e254a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "open('cafe.txt', 'w', encoding='utf_8').write('café')" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "2060dc22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'café'" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Note: We are forcing the bug by assigning the encoding 'cp1252' in this example\n", + "open('cafe.txt', encoding='cp1252').read()" + ] + }, + { + "cell_type": "markdown", + "id": "8854f3ae", + "metadata": {}, + "source": [ + "#### Example 4-9. Closer inspection of Example 4-8 running on Windows reveals the bug and how to fix it" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "01032ff2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<_io.TextIOWrapper name='cafe.txt' mode='w' encoding='utf_8'>" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp = open('cafe.txt', 'w', encoding='utf_8')\n", + "fp" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "4e09658b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp.write('café')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "df0be9ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp.close()\n", + "import os\n", + "os.stat('cafe.txt').st_size" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "3cbb9923", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<_io.TextIOWrapper name='cafe.txt' mode='r' encoding='cp1252'>" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Note: We are forcing the issue by assigning the encoding 'cp1252' in this example\n", + "fp2 = open('cafe.txt', encoding='cp1252')\n", + "fp2" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "6a73e37f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'cp1252'" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp2.encoding" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "40648ac6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<_io.TextIOWrapper name='cafe.txt' mode='r' encoding='utf_8'>" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp3 = open('cafe.txt', encoding='utf_8')\n", + "fp3" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "987b598d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'café'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp3.read()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "68ebf58c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<_io.BufferedReader name='cafe.txt'>" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp4 = open('cafe.txt', 'rb')\n", + "fp4" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "e16c94e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "b'caf\\xc3\\xa9'" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fp4.read()" + ] + }, + { + "cell_type": "markdown", + "id": "be0f460d", + "metadata": {}, + "source": [ + "### Beware of Encoding Defaults" + ] + }, + { + "cell_type": "markdown", + "id": "b73e7809", + "metadata": {}, + "source": [ + "#### Example 4-10. Exploring encoding defaults: [default_encodings.py](default_encodings.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "e2707f00", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " locale.getpreferredencoding() -> 'UTF-8'\r\n", + " type(my_file) -> \r\n", + " my_file.encoding -> 'UTF-8'\r\n", + " sys.stdout.isatty() -> True\r\n", + " sys.stdout.encoding -> 'utf-8'\r\n", + " sys.stdin.isatty() -> True\r\n", + " sys.stdin.encoding -> 'utf-8'\r\n", + " sys.stderr.isatty() -> True\r\n", + " sys.stderr.encoding -> 'utf-8'\r\n", + " sys.getdefaultencoding() -> 'utf-8'\r\n", + " sys.getfilesystemencoding() -> 'utf-8'\r\n" + ] + } + ], + "source": [ + "!python default_encodings.py # Unix machine output" + ] + }, + { + "cell_type": "markdown", + "id": "51a80f0f", + "metadata": {}, + "source": [ + "#### Example 4-12. [stdout_check.py](stdout_check.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "4a8166e7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.10.4 (main, Mar 31 2022, 08:41:55) [GCC 7.5.0]\r\n", + "\r\n", + "sys.stdout.isatty(): True\r\n", + "sys.stdout.encoding: utf-8\r\n", + "\r\n", + "Trying to output HORIZONTAL ELLIPSIS:\r\n", + "…\r\n", + "Trying to output INFINITY:\r\n", + "∞\r\n", + "Trying to output CIRCLED NUMBER FORTY TWO:\r\n", + "㊷\r\n" + ] + } + ], + "source": [ + "!python stdout_check.py # Unix machine output" + ] + }, + { + "cell_type": "markdown", + "id": "60bada3a", + "metadata": {}, + "source": [ + "## Normalizing Unicode for Reliable Comparisons" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "09177041", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('café', 'café')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1 = 'café'\n", + "s2 = 'cafe\\N{COMBINING ACUTE ACCENT}'\n", + "s1, s2" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "c4140d32", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 5)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(s1), len(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "2a8c687f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1 == s2" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "6dd29d8d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 5)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unicodedata import normalize\n", + "s1 = 'café'\n", + "s2 = 'cafe\\N{COMBINING ACUTE ACCENT}'\n", + "len(s1), len(s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "2abcf125", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4, 4)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(normalize('NFC', s1)), len(normalize('NFC', s2))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "be5825d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 5)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(normalize('NFD', s1)), len(normalize('NFD', s2))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "dafcd872", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize('NFC', s1) == normalize('NFC', s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "f54dd326", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize('NFD', s1) == normalize('NFD', s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "2cbde3b5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'OHM SIGN'" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unicodedata import name\n", + "ohm = '\\u2126'\n", + "name(ohm)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "76f3b515", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GREEK CAPITAL LETTER OMEGA'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ohm_c = normalize('NFC', ohm)\n", + "name(ohm_c)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "fb2f0252", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ohm == ohm_c" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "d047eba9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize('NFC', ohm) == normalize('NFC', ohm_c)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "0196edb7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "½\n" + ] + } + ], + "source": [ + "half = '\\N{VULGAR FRACTION ONE HALF}'\n", + "print(half)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "086f76a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1⁄2'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize('NFKC', half)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "bf79ba8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\tDIGIT ONE\n", + "⁄\tFRACTION SLASH\n", + "2\tDIGIT TWO\n" + ] + } + ], + "source": [ + "for char in normalize('NFKC', half):\n", + " print(char, name(char), sep='\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "60e5a57a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'42'" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "four_squared = '4²'\n", + "normalize('NFKC', four_squared)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "72ce42f5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('µ', 'μ')" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro = 'µ'\n", + "micro_kc = normalize('NFKC', micro)\n", + "micro, micro_kc" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "c5d23eb6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(181, 956)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ord(micro), ord(micro_kc)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "63e672c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('MICRO SIGN', 'GREEK SMALL LETTER MU')" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "name(micro), name(micro_kc)" + ] + }, + { + "cell_type": "markdown", + "id": "18f6e399", + "metadata": {}, + "source": [ + "### Case Folding" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "99edfd13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'MICRO SIGN'" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro = 'µ'\n", + "name(micro)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "9f8c6938", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GREEK SMALL LETTER MU'" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro_cf = micro.casefold()\n", + "name(micro_cf)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "a1eb1706", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('µ', 'μ')" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "micro, micro_cf" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "175f9b49", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'LATIN SMALL LETTER SHARP S'" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eszett = 'ß'\n", + "name(eszett)" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "8e80d6c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('ß', 'ss')" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "eszett_cf = eszett.casefold()\n", + "eszett, eszett_cf" + ] + }, + { + "cell_type": "markdown", + "id": "43a3a7d9", + "metadata": {}, + "source": [ + "### Utility Functions for Normalized Text Matching" + ] + }, + { + "cell_type": "markdown", + "id": "6d1e8f67", + "metadata": {}, + "source": [ + "#### Example 4-13. [normeq.py](normeq.py): normalized Unicode string comparison" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "ca8c7d47", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from normeq import nfc_equal\n", + "s1 = 'café'\n", + "s2 = 'cafe\\u0301'\n", + "s1 == s2" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "e7f35dfc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nfc_equal(s1, s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "69e21e21", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nfc_equal('A', 'a')" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "0ee92f88", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from normeq import fold_equal\n", + "s3 = 'Straße'\n", + "s4 = 'strasse'\n", + "s3 == s4" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "f0477359", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nfc_equal(s3, s4)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "395d4b33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fold_equal(s3, s4)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "c66f0238", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fold_equal(s1, s2)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "14157966", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fold_equal('A', 'a')" + ] + }, + { + "cell_type": "markdown", + "id": "af41b1ad", + "metadata": {}, + "source": [ + "### Extreme “Normalization”: Taking Out Diacritics" + ] + }, + { + "cell_type": "markdown", + "id": "0c8c2c61", + "metadata": {}, + "source": [ + "#### Example 4-14. [simplify.py](simplify.py): function to remove all combining marks" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "0506ed43", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'“Herr Voß: • ½ cup of Œtker™ caffe latte • bowl of acai.”'" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from simplify import shave_marks\n", + "order = '“Herr Voß: • ½ cup of Œtker™ caffè latte • bowl of açaí.”'\n", + "shave_marks(order)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "b074ef2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Ζεφυρος, Zefiro'" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Greek = 'Ζέφυρος, Zéfiro'\n", + "shave_marks(Greek)" + ] + }, + { + "cell_type": "markdown", + "id": "afe70e19", + "metadata": {}, + "source": [ + "#### Example 4-16. Function to remove combining marks from Latin characters" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "1b9c02a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'“Herr Voß: • ½ cup of Œtker™ caffe latte • bowl of acai.”'" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from simplify import shave_marks_latin\n", + "shave_marks_latin(order)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "a00b8761", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Ζέφυρος, Zefiro'" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shave_marks_latin(Greek)" + ] + }, + { + "cell_type": "markdown", + "id": "5300f138", + "metadata": {}, + "source": [ + "#### Example 4-17. Transform some Western typographical symbols into ASCII" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "d230e514", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\"Herr Voß: - ½ cup of OEtker(TM) caffè latte - bowl of açaí.\"'" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from simplify import dewinize, asciize\n", + "dewinize(order)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "40c30bd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\"Herr Voss: - 1⁄2 cup of OEtker(TM) caffe latte - bowl of acai.\"'" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "asciize(order)" + ] + }, + { + "cell_type": "markdown", + "id": "55173114", + "metadata": {}, + "source": [ + "## Sorting Unicode Text" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "28d9bdff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['acerola', 'atemoia', 'açaí', 'caju', 'cajá']" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fruits = ['caju', 'atemoia', 'cajá', 'açaí', 'acerola']\n", + "sorted(fruits)\n", + "# Ideal result should be: ['açaí', 'acerola', 'atemoia', 'cajá', 'caju']" + ] + }, + { + "cell_type": "markdown", + "id": "e83c363f", + "metadata": {}, + "source": [ + "\n", + "#### Example 4-19. [locale_sort.py](locale_sort.py): using the `locale.strxfrm` function as the sort key\n", + "\n", + "**Note**: Requires the locale '`pt_BR.UTF-8`' to be installed " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "c5bb098a", + "metadata": {}, + "outputs": [], + "source": [ + "# !python locale_sort.py" + ] + }, + { + "cell_type": "markdown", + "id": "6feb4d63", + "metadata": {}, + "source": [ + "### Sorting with the Unicode Collation Algorithm" + ] + }, + { + "cell_type": "markdown", + "id": "c6c3d053", + "metadata": {}, + "source": [ + "#### Example 4-20. Using the pyuca.Collator.sort_key method" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "347d71a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['açaí', 'acerola', 'atemoia', 'cajá', 'caju']" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyuca\n", + "coll = pyuca.Collator()\n", + "sorted_fruits = sorted(fruits, key=coll.sort_key)\n", + "sorted_fruits" + ] + }, + { + "cell_type": "markdown", + "id": "df4396ea", + "metadata": {}, + "source": [ + "## The Unicode Database" + ] + }, + { + "cell_type": "markdown", + "id": "38e8db95", + "metadata": {}, + "source": [ + "### Finding Characters by Name" + ] + }, + { + "cell_type": "markdown", + "id": "6d6278c3", + "metadata": {}, + "source": [ + "#### Example 4-21. [cf.py](cf.py): the character finder utility" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "9d3ff51b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "U+1F638\t😸\tGRINNING CAT FACE WITH SMILING EYES\r\n", + "U+1F63A\t😺\tSMILING CAT FACE WITH OPEN MOUTH\r\n", + "U+1F63B\t😻\tSMILING CAT FACE WITH HEART-SHAPED EYES\r\n" + ] + } + ], + "source": [ + "!python3 charfinder/cf.py smiling cat" + ] + }, + { + "cell_type": "markdown", + "id": "f7913801", + "metadata": {}, + "source": [ + "### Numeric Meaning of Characters" + ] + }, + { + "cell_type": "markdown", + "id": "fe0cc8e5", + "metadata": {}, + "source": [ + "#### Example 4-22. [numerics_demo.py](numerics_demo.py): Demo of Unicode database numerical character metadata (callouts describe each column in the output)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "86ab5e1e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "U+0031\t 1 \tre_dig\tisdig\tisnum\t 1.00\tDIGIT ONE\r\n", + "U+00bc\t ¼ \t-\t-\tisnum\t 0.25\tVULGAR FRACTION ONE QUARTER\r\n", + "U+00b2\t ² \t-\tisdig\tisnum\t 2.00\tSUPERSCRIPT TWO\r\n", + "U+0969\t ३ \tre_dig\tisdig\tisnum\t 3.00\tDEVANAGARI DIGIT THREE\r\n", + "U+136b\t ፫ \t-\tisdig\tisnum\t 3.00\tETHIOPIC DIGIT THREE\r\n", + "U+216b\t Ⅻ \t-\t-\tisnum\t12.00\tROMAN NUMERAL TWELVE\r\n", + "U+2466\t ⑦ \t-\tisdig\tisnum\t 7.00\tCIRCLED DIGIT SEVEN\r\n", + "U+2480\t ⒀ \t-\t-\tisnum\t13.00\tPARENTHESIZED NUMBER THIRTEEN\r\n", + "U+3285\t ㊅ \t-\t-\tisnum\t 6.00\tCIRCLED IDEOGRAPH SIX\r\n" + ] + } + ], + "source": [ + "!python3 numerics_demo.py" + ] + }, + { + "cell_type": "markdown", + "id": "3153f715", + "metadata": {}, + "source": [ + "## Dual-Mode str and bytes APIs" + ] + }, + { + "cell_type": "markdown", + "id": "ff0c0af0", + "metadata": {}, + "source": [ + "### str Versus bytes in Regular Expressions" + ] + }, + { + "cell_type": "markdown", + "id": "bffe3b5f", + "metadata": {}, + "source": [ + "#### Example 4-23. [ramanujan.py](ramanujan.py): compare behavior of simple str and bytes regular expressions" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "7a831ae4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Text\r\n", + " 'Ramanujan saw ௧௭௨௯ as 1729 = 1³ + 12³ = 9³ + 10³.'\r\n", + "Numbers\r\n", + " str : ['௧௭௨௯', '1729', '1', '12', '9', '10']\r\n", + " bytes: [b'1729', b'1', b'12', b'9', b'10']\r\n", + "Words\r\n", + " str : ['Ramanujan', 'saw', '௧௭௨௯', 'as', '1729', '1³', '12³', '9³', '10³']\r\n", + " bytes: [b'Ramanujan', b'saw', b'as', b'1729', b'1', b'12', b'9', b'10']\r\n" + ] + } + ], + "source": [ + "!python3 ramanujan.py" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/04-text-byte/README.rst b/04-text-byte/README.rst index 60e8138..de75cb6 100644 --- a/04-text-byte/README.rst +++ b/04-text-byte/README.rst @@ -1,4 +1,4 @@ -Sample code for Chapter 4 - "Text and bytes" +Sample code for Chapter 4 - "Unicode Text Versus Bytes" -From the book "Fluent Python" by Luciano Ramalho (O'Reilly, 2015) -http://shop.oreilly.com/product/0636920032519.do +From the book **Fluent Python, Second Edition** by Luciano Ramalho (O'Reilly, 2022). +https://learning.oreilly.com/library/view/fluent-python-2nd/9781492056348/ From f13cf2990c77bf428c5deeeaf649853a3b832060 Mon Sep 17 00:00:00 2001 From: Scott McCormack Date: Sat, 17 Sep 2022 20:31:50 +0800 Subject: [PATCH 3/4] ch05: notebook addition (#3) * Init ch05 notebook * Additions to ch05 notebook --- 05-data-classes/05-data-classes.ipynb | 1774 +++++++++++++++++++++++++ 1 file changed, 1774 insertions(+) create mode 100644 05-data-classes/05-data-classes.ipynb diff --git a/05-data-classes/05-data-classes.ipynb b/05-data-classes/05-data-classes.ipynb new file mode 100644 index 0000000..aea9d71 --- /dev/null +++ b/05-data-classes/05-data-classes.ipynb @@ -0,0 +1,1774 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%%\n" + } + }, + "source": [ + "# Chapter 5 — Data Class Builders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview of Data Class Builders" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-1. [class/coordinates.py](class/coordinates.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "class Coordinate:\n", + "\n", + " def __init__(self, lat, lon):\n", + " self.lat = lat\n", + " self.lon = lon" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<__main__.Coordinate at 0x7f117e307b80>" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "moscow = Coordinate(55.76, 37.62)\n", + "moscow" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "location = Coordinate(55.76, 37.62)\n", + "location == moscow" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(location.lat, location.lon) == (moscow.lat, moscow.lon)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Coordinate` class built with `collections.namedtuple`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from collections import namedtuple\n", + "Coordinate = namedtuple('Coordinate', 'lat lon')\n", + "issubclass(Coordinate, tuple)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Coordinate(lat=55.756, lon=37.617)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "moscow = Coordinate(55.756, 37.617)\n", + "moscow" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "moscow == Coordinate(55.756, 37.617)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Coordinate` class built with `typing.NamedTuple`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import typing\n", + "Coordinate = typing.NamedTuple('Coordinate', [('lat', float), ('lon', float)])\n", + "# Coordinate = typing.NamedTuple('Coordinate', lat=float, lon=float)\n", + "issubclass(Coordinate, tuple)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'lat': float, 'lon': float}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "typing.get_type_hints(Coordinate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-2. [typing_namedtuple/coordinates.py](typing_namedtuple/coordinates.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import NamedTuple\n", + "\n", + "class Coordinate(NamedTuple):\n", + " lat: float\n", + " lon: float\n", + "\n", + " def __str__(self):\n", + " ns = 'N' if self.lat >= 0 else 'S'\n", + " we = 'E' if self.lon >= 0 else 'W'\n", + " return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "issubclass() arg 2 must be a class, a tuple of classes, or a union\n" + ] + } + ], + "source": [ + "try:\n", + " issubclass(Coordinate, typing.NamedTuple)\n", + "except TypeError as err:\n", + " print(err)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issubclass(Coordinate, tuple)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-3. [dataclass/coordinates.py](dataclass/coordinates.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "\n", + "@dataclass(frozen=True)\n", + "class Coordinate:\n", + " lat: float\n", + " lon: float\n", + "\n", + " def __str__(self):\n", + " ns = 'N' if self.lat >= 0 else 'S'\n", + " we = 'E' if self.lon >= 0 else 'W'\n", + " return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issubclass(Coordinate, object)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classic Named Tuples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-4. Defining and using a named tuple type" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "City = namedtuple('City', 'name country population coordinates')\n", + "tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))\n", + "tokyo" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "36.933" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokyo.population" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(35.689722, 139.691667)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokyo.coordinates" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'JP'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tokyo[1]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-5. Named tuple attributes and methods (continued from the previous example)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('name', 'country', 'population', 'coordinates')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "City._fields" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'Delhi NCR',\n", + " 'country': 'IN',\n", + " 'population': 21.935,\n", + " 'coordinates': Coordinate(lat=28.613889, lon=77.208889)}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Coordinate = namedtuple('Coordinate', 'lat lon')\n", + "delhi_data = ('Delhi NCR', 'IN', 21.935, Coordinate(28.613889, 77.208889))\n", + "delhi = City._make(delhi_data)\n", + "delhi._asdict()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"name\": \"Delhi NCR\", \"country\": \"IN\", \"population\": 21.935, \"coordinates\": [28.613889, 77.208889]}'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "json.dumps(delhi._asdict())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-6. Named tuple attributes and methods, continued from Example 5-5." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Coordinate(lat=0, lon=0, reference='WGS84')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=['WGS84'])\n", + "Coordinate(0, 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'reference': 'WGS84'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Coordinate._field_defaults" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hacking a `namedtuple` to inject a method" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-7. Adding a class attribute and a method to `Card`, the `namedtuple` from \"A Pythonic Card Deck\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import collections\n", + "\n", + "\n", + "Card = collections.namedtuple('Card', ['rank', 'suit'])\n", + "Card.suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)\n", + "\n", + "\n", + "class FrenchDeck:\n", + " ranks = [str(n) for n in range(2, 11)] + list('JQKA')\n", + " suits = 'spades diamonds clubs hearts'.split()\n", + "\n", + " def __init__(self):\n", + " self._cards = [Card(rank, suit) for suit in self.suits\n", + " for rank in self.ranks]\n", + "\n", + " def __len__(self):\n", + " return len(self._cards)\n", + "\n", + " def __getitem__(self, position):\n", + " return self._cards[position]\n", + "\n", + "\n", + "def spades_high(card):\n", + " rank_value = FrenchDeck.ranks.index(card.rank)\n", + " suit_value = card.suit_values[card.suit]\n", + " return rank_value * len(card.suit_values) + suit_value\n", + "\n", + "\n", + "Card.overall_rank = spades_high\n", + "lowest_card = Card('2', 'clubs')\n", + "highest_card = Card('A', 'spades')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lowest_card.overall_rank()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "51" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "highest_card.overall_rank()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Typed Named Tuples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-8. [typing_namedtuple/coordinates2.py](typing_namedtuple/coordinates2.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "class Coordinate(NamedTuple):\n", + " lat: float\n", + " lon: float\n", + " reference: str = 'WGS84'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type Hints 101" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-9. Python does not enforce type hints at runtime" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "class Coordinate(typing.NamedTuple):\n", + " lat: float\n", + " lon: float" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Coordinate(lat='Ni', lon=None)\n" + ] + } + ], + "source": [ + "trash = Coordinate('Ni', None)\n", + "print(trash)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Meaning of Variable Annotations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-10. [meaning/demo_plain.py](meaning/demo_plain.py): a plain class with type hints" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "class DemoPlainClass:\n", + " a: int\n", + " b: float = 1.1\n", + " c = 'spam'" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': int, 'b': float}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoPlainClass.__annotations__" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'DemonPlainClass' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_161355/1123823172.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mDemonPlainClass\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'DemonPlainClass' is not defined" + ] + } + ], + "source": [ + "DemonPlainClass.a" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoPlainClass.b" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'spam'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoPlainClass.c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-11. [meaning/demo_nt.py](meaning/demo_nt.py): a class built with `typing.NamedTuple`" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "class DemoNTClass(typing.NamedTuple):\n", + " a: int\n", + " b: float = 1.1\n", + " c = 'spam'" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': int, 'b': float}" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoNTClass.__annotations__" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_tuplegetter(0, 'Alias for field number 0')" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoNTClass.a" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_tuplegetter(1, 'Alias for field number 1')" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoNTClass.b" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'spam'" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoNTClass.c" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'DemoNTClass(a, b)'" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoNTClass.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt = DemoNTClass(8)\n", + "nt.a" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt.b" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'spam'" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nt.c" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "can't set attribute", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_161355/1430086050.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: can't set attribute" + ] + } + ], + "source": [ + "nt.a = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "can't set attribute", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_161355/3556876644.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m17.3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: can't set attribute" + ] + } + ], + "source": [ + "nt.b = 17.3" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'DemoNTClass' object attribute 'c' is read-only", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_161355/927667453.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'spam2'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'DemoNTClass' object attribute 'c' is read-only" + ] + } + ], + "source": [ + "nt.c = 'spam2'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-12. [meaning/demo_dc.py](meaning/demo_dc.py): a class decorated with `@dataclass`" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass\n", + "\n", + "@dataclass\n", + "class DemoDataClass:\n", + " a: int\n", + " b: float = 1.1\n", + " c = 'spam'" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': int, 'b': float}" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoDataClass.__annotations__" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'DemoDataClass(a: int, b: float = 1.1)'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoDataClass.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "type object 'DemoDataClass' has no attribute 'a'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_161355/3309639985.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mDemoDataClass\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: type object 'DemoDataClass' has no attribute 'a'" + ] + } + ], + "source": [ + "DemoDataClass.a" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoDataClass.b" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'spam'" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "DemoDataClass.c" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dc = DemoDataClass(9)\n", + "dc.a" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dc.b" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'spam'" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dc.c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "No type checking is done at runtime" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "dc.a = 10\n", + "dc.b = 'oops'\n", + "dc.c = 'whatever'\n", + "dc.z = 'secret stash'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## More about `@dataclass`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Field Options" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-13. [dataclass/club_wrong.py](dataclass/club_wrong.py): this class raises `ValueError`" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ValueError: mutable default for field guests is not allowed: use default_factory\n" + ] + } + ], + "source": [ + "try:\n", + " @dataclass\n", + " class ClubMember:\n", + " name: str\n", + " guests: list = []\n", + "except ValueError as err:\n", + " print(f\"ValueError: {err}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-14. [dataclass/club.py](dataclass/club.py): this `ClubMember` definition works" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import field\n", + "\n", + "\n", + "@dataclass\n", + "class ClubMember:\n", + " name: str\n", + " guests: list = field(default_factory=list)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-15. [dataclass/club_generic.py](dataclass/club_generic.py): this `ClubMember` definition is more precise" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class ClubMember:\n", + " name: str\n", + " guests: list[str] = field(default_factory=list)\n", + " athelete: bool = field(default=False, repr=False) # Omitted from __repr__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Post-init Processing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-16. [dataclass/hackerclub.py](dataclass/hackerclub.py)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"HackerClubMember(name: str, guests: list = , handle: str = '')\"" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dataclass.club import ClubMember\n", + "\n", + "\n", + "@dataclass\n", + "class HackerClubMember(ClubMember):\n", + " all_handles = set()\n", + " # all_handles: ClassVar[set[str]] = set() # For mypy type checking\n", + " handle: str = ''\n", + " \n", + " def __post_init__(self):\n", + " cls = self.__class__\n", + " if self.handle == '':\n", + " self.handle = self.name.split()[0]\n", + " if self.handle in cls.all_handles:\n", + " msg = f'handle {self.handle!r} already exists.'\n", + " raise ValueError(msg)\n", + " cls.all_handles.add(self.handle)\n", + " \n", + "HackerClubMember.__doc__" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')\n", + "anna" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "HackerClubMember(name='Leo Rochael', guests=[], handle='Leo')" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "leo = HackerClubMember('Leo Rochael')\n", + "leo" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ValueError: handle 'Leo' already exists.\n" + ] + } + ], + "source": [ + "try:\n", + " leo2 = HackerClubMember('Leo DaVinci')\n", + "except ValueError as err:\n", + " print(f\"ValueError: {err}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization Variables That Are Not Fields" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-18. Example from [`dataclasses`](https://docs.python.org/3/library/dataclasses.html#init-only-variables) module documentation\n", + "https://docs.python.org/3/library/dataclasses.html#init-only-variables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `@dataclass` Example: Dublin Core Resource Record" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-19. [dataclass/resource.py](dataclass/resource.py): code for `Resource`, a class based on Dublin Core terms" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "from dataclasses import dataclass, field, fields\n", + "from typing import Optional\n", + "from enum import Enum, auto\n", + "from datetime import date\n", + "\n", + "\n", + "class ResourceType(Enum):\n", + " BOOK = auto()\n", + " EBOOK = auto()\n", + " VIDEO = auto()\n", + " \n", + "\n", + "@dataclass\n", + "class Resource:\n", + " \"\"\"Media resource description.\"\"\"\n", + " identifier: str\n", + " title: str = ''\n", + " creators: list[str] = field(default_factory=list)\n", + " date: Optional[date] = None\n", + " type: ResourceType = ResourceType.BOOK\n", + " description: str = ''\n", + " language: str = ''\n", + " subjects: list[str] = field(default_factory=list)\n", + " \n", + " def __repr__(self):\n", + " cls = self.__class__\n", + " cls_name = cls.__name__\n", + " indent = ' ' * 4\n", + " res = [f'{cls_name}(']\n", + " for f in fields(cls):\n", + " value = getattr(self, f.name)\n", + " res.append(f'{indent}{f.name} = {value!r},')\n", + " \n", + " res.append(')')\n", + " return '\\n'.join(res) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-20. [dataclass/resource.py](dataclass/resource.py): code for `Resource`, a class based on Dublin Core terms" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Resource(\n", + " identifier = '978-0-13-475759-9',\n", + " title = 'Refactoring, 2nd edition',\n", + " creators = ['Martin Fowler', 'Kent Back'],\n", + " date = datetime.date(2018, 11, 19),\n", + " type = ,\n", + " description = 'Improving the design of existing code',\n", + " language = 'EN',\n", + " subjects = ['computer programming', 'OOP'],\n", + ")" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "description = 'Improving the design of existing code'\n", + "book = Resource('978-0-13-475759-9', 'Refactoring, 2nd edition',\n", + " ['Martin Fowler', 'Kent Back'], date(2018, 11, 19),\n", + " ResourceType.BOOK, description, 'EN',\n", + " ['computer programming', 'OOP'])\n", + "book" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pattern Matching Class Instances" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Keyword Class Patterns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 5-22. `City` class and a few instances" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "import typing\n", + "\n", + "\n", + "class City(typing.NamedTuple):\n", + " continent: str\n", + " name: str\n", + " country: str\n", + " \n", + "cities = [\n", + " City('Asia', 'Toyko', 'JP'),\n", + " City('Asia', 'Delhi', 'IN'),\n", + " City('North America', 'Mexico City', 'MX'),\n", + " City('North America', 'New York', 'US'),\n", + " City('South America', 'São Paulo', 'BR'), \n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[City(continent='Asia', name='Toyko', country='JP'),\n", + " City(continent='Asia', name='Delhi', country='IN')]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def match_asian_cities():\n", + " results = []\n", + " for city in cities:\n", + " match city:\n", + " case City(continent='Asia'):\n", + " results.append(city)\n", + " return results\n", + "\n", + "\n", + "match_asian_cities()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['JP', 'IN']" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def match_asian_countries():\n", + " results = []\n", + " for city in cities:\n", + " match city:\n", + " case City(continent='Asia', country=cc):\n", + " results.append(cc)\n", + " return results\n", + "\n", + "\n", + "match_asian_countries()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['JP', 'IN']" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def match_asian_cities_pos():\n", + " results = []\n", + " for city in cities:\n", + " match city:\n", + " case City('Asia', _, country):\n", + " results.append(country)\n", + " return results\n", + "\n", + "match_asian_cities_pos()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('continent', 'name', 'country')" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "City.__match_args__" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} From 996ca3a5feafcb9f6b0cd5f098af09ec2566ea9a Mon Sep 17 00:00:00 2001 From: Scott McCormack Date: Sat, 17 Sep 2022 20:35:41 +0800 Subject: [PATCH 4/4] ch06: Addition of notebook (#4) --- 06-obj-ref/06-obj-ref.ipynb | 1624 +++++++++++++++++++++++++++++++++++ 1 file changed, 1624 insertions(+) create mode 100644 06-obj-ref/06-obj-ref.ipynb diff --git a/06-obj-ref/06-obj-ref.ipynb b/06-obj-ref/06-obj-ref.ipynb new file mode 100644 index 0000000..5fe8127 --- /dev/null +++ b/06-obj-ref/06-obj-ref.ipynb @@ -0,0 +1,1624 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Chapter 6 — Object References, Mutability, and Recycling" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Variables Are Not Boxes" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-1. Variables `a` and `b` hold references to the same list, not copies of the list" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = [1, 2, 3]\n", + "b = a\n", + "a.append(4)\n", + "b" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-2. Variables are bound to objects only after the objects are created" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Gizmo id: 140489524020752\n" + ] + } + ], + "source": [ + "class Gizmo:\n", + " def __init__(self):\n", + " print(f'Gizmo id: {id(self)}')\n", + "\n", + "\n", + "x = Gizmo()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Gizmo id: 140489524024352\n" + ] + }, + { + "ename": "TypeError", + "evalue": "unsupported operand type(s) for *: 'Gizmo' and 'int'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_54311/2818280385.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGizmo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for *: 'Gizmo' and 'int'" + ] + } + ], + "source": [ + "y = Gizmo() * 10" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Gizmo',\n", + " 'In',\n", + " 'Out',\n", + " '_',\n", + " '_1',\n", + " '__',\n", + " '___',\n", + " '__builtin__',\n", + " '__builtins__',\n", + " '__doc__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__spec__',\n", + " '_dh',\n", + " '_i',\n", + " '_i1',\n", + " '_i2',\n", + " '_i3',\n", + " '_i4',\n", + " '_ih',\n", + " '_ii',\n", + " '_iii',\n", + " '_oh',\n", + " 'a',\n", + " 'b',\n", + " 'exit',\n", + " 'get_ipython',\n", + " 'quit',\n", + " 'x']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Identity, Equality, and Aliases" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-3. `charles` and `lewis` refer to the same object" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "charles = {'name': 'Charles L. Dodgson', 'born': 1832}\n", + "lewis = charles\n", + "lewis is charles" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(140489524071488, 140489524071488)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(charles), id(lewis)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "lewis['balance'] = 950" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'name': 'Charles L. Dodgson', 'born': 1832, 'balance': 950}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "charles" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-4. `alex` and `charles` compare equal, but `alex` is not `charles`" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "alex = {'name': 'Charles L. Dodgson', 'born': 1832, 'balance': 950}\n", + "alex == charles" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "alex is not charles" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### The Relative Immutability of Tuples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-5. `t1` and `t2` initially compare equal, but changing a mutable item inside tuple `t1` makes it different" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = (1, 2, [30, 40])\n", + "t2 = (1, 2, [30, 40])\n", + "t1 == t2" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "140489525623360" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(t1[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 2, [30, 40, 99])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1[-1].append(99)\n", + "t1" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "140489525623360" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(t1[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 == t2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Copies Are Shallow by Default" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-6. Making a shallow copy of a list containing another list" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[3, [55, 44], (7, 8, 9)]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l1 = [3, [55, 44], (7, 8, 9)]\n", + "l2 = list(l1)\n", + "l2" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l2 == l1" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "l2 is l1" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l1: [3, [44], (7, 8, 9), 100]\n" + ] + } + ], + "source": [ + "l1.append(100)\n", + "l1[1].remove(55)\n", + "print('l1:', l1)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2: [3, [44], (7, 8, 9)]\n" + ] + } + ], + "source": [ + "print('l2:', l2)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l1: [3, [44, 33, 22], (7, 8, 9), 100]\n" + ] + } + ], + "source": [ + "l2[1] += [33, 22]\n", + "l2[2] += (10, 11)\n", + "print('l1:', l1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "l2: [3, [44, 33, 22], (7, 8, 9, 10, 11)]\n" + ] + } + ], + "source": [ + "print('l2:', l2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### Deep and Shallow Copies of Arbitrary Objects" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "#### Example 6-8. [`bus.py`](bus.py): Bus picks and drops off passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "class Bus:\n", + "\n", + " def __init__(self, passengers=None):\n", + " if passengers is None:\n", + " self.passengers = []\n", + " else:\n", + " self.passengers = list(passengers)\n", + "\n", + " def pick(self, name):\n", + " self.passengers.append(name)\n", + "\n", + " def drop(self, name):\n", + " self.passengers.remove(name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-9. Effects of using `copy` versus `deepcopy`" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(140489524296576, 140489455864176, 140489455861968)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import copy\n", + "\n", + "bus1 = Bus(['Alice', 'Bill', 'Claire', 'David'])\n", + "bus2 = copy.copy(bus1)\n", + "bus3 = copy.deepcopy(bus1)\n", + "id(bus1), id(bus2), id(bus3)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Alice', 'Claire', 'David']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus1.drop('Bill')\n", + "bus2.passengers # Bill is removed from bus2" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(140489455823424, 140489455823424, 140489524373056)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "id(bus1.passengers), id(bus2.passengers), id(bus3.passengers)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Alice', 'Bill', 'Claire', 'David']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus3.passengers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-10. Cyclic refernences: `b` refers to `a`, than then is append is `a`; `deepcopy` still manages to copy `a`" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[10, 20, [[...], 30]]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = [10, 20]\n", + "b = [a, 30]\n", + "a.append(b)\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[10, 20, [[...], 30]]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = copy.deepcopy(a)\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function Parameters as References" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-11. A function may change any mutable object it receives" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def f(a, b):\n", + " a += b\n", + " return a\n", + "\n", + "\n", + "x = 1\n", + "y = 2\n", + "f(x, y)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1, 2)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x, y" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2, 3, 4]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = [1, 2]\n", + "b = [3, 4]\n", + "f(a, b)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "([1, 2, 3, 4], [3, 4])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a, b" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 20, 30, 40)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t = (10, 20)\n", + "u = (30, 40)\n", + "f(t, u)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((10, 20), (30, 40))" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t, u" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mutable Types as Parameter Defaults: Bad Idea" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-12. [`haunted_bus.py`](haunted_bus.py): A simple class to illustrate the danger of a mutable default" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "class HauntedBus:\n", + " \"\"\"A bus model haunted by ghost passengers\"\"\"\n", + "\n", + " def __init__(self, passengers=[]):\n", + " self.passengers = passengers\n", + "\n", + " def pick(self, name):\n", + " self.passengers.append(name)\n", + "\n", + " def drop(self, name):\n", + " self.passengers.remove(name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-13. Buses haunted by ghost passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Alice', 'Bill']" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus1 = HauntedBus(['Alice', 'Bill'])\n", + "bus1.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Bill', 'Charlie']" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus1.pick('Charlie')\n", + "bus1.drop('Alice')\n", + "bus1.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Carrie']" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus2 = HauntedBus()\n", + "bus2.pick('Carrie')\n", + "bus2.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Carrie']" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus3 = HauntedBus()\n", + "bus3.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Carrie', 'Dave']" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus3.pick('Dave')\n", + "bus2.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus2.passengers is bus3.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Bill', 'Charlie']" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bus1.passengers" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['__annotations__',\n", + " '__builtins__',\n", + " '__call__',\n", + " '__class__',\n", + " '__closure__',\n", + " '__code__',\n", + " '__defaults__',\n", + " '__delattr__',\n", + " '__dict__',\n", + " '__dir__',\n", + " '__doc__',\n", + " '__eq__',\n", + " '__format__',\n", + " '__ge__',\n", + " '__get__',\n", + " '__getattribute__',\n", + " '__globals__',\n", + " '__gt__',\n", + " '__hash__',\n", + " '__init__',\n", + " '__init_subclass__',\n", + " '__kwdefaults__',\n", + " '__le__',\n", + " '__lt__',\n", + " '__module__',\n", + " '__name__',\n", + " '__ne__',\n", + " '__new__',\n", + " '__qualname__',\n", + " '__reduce__',\n", + " '__reduce_ex__',\n", + " '__repr__',\n", + " '__setattr__',\n", + " '__sizeof__',\n", + " '__str__',\n", + " '__subclasshook__']" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(HauntedBus.__init__)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['Carrie', 'Dave'],)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HauntedBus.__init__.__defaults__" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "HauntedBus.__init__.__defaults__[0] is bus2.passengers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defensive Programming with Mutable Parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-15. [`twilight_bus.py`](twilight_bus.py): A simple class to show the perils of mutating received arguments" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "class TwilightBus:\n", + " \"\"\"A bus model that makes passengers vanish\"\"\"\n", + "\n", + " def __init__(self, passengers=None):\n", + " if passengers is None:\n", + " self.passengers = []\n", + " else:\n", + " self.passengers = passengers\n", + "\n", + " def pick(self, name):\n", + " self.passengers.append(name)\n", + "\n", + " def drop(self, name):\n", + " self.passengers.remove(name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-14. Passengers disappear when dropped by a `TwilightBus`" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Sue', 'Maya', 'Diana']" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "basketball_team = ['Sue', 'Tina', 'Maya', 'Diana', 'Pat']\n", + "bus = TwilightBus(basketball_team)\n", + "bus.drop('Tina')\n", + "bus.drop('Pat')\n", + "basketball_team" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## del and Garbage Collection" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1, 2]" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = [1, 2]\n", + "b = a\n", + "del a\n", + "b" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "b = [3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-16. Watching the end of an object when no more references point to it" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import weakref\n", + "\n", + "s1 = {1, 2, 3}\n", + "s2 = s1\n", + "\n", + "\n", + "def bye():\n", + " print('...like tears in the rain.')\n", + "\n", + "\n", + "ender = weakref.finalize(s1, bye)\n", + "ender.alive" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "del s1\n", + "ender.alive" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "...like tears in the rain.\n" + ] + } + ], + "source": [ + "s2 = 'spam'" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ender.alive" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tricks Python Plays with Immutables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-17. A tuple built from another is actually the same tuple" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = (1, 2, 3)\n", + "t2 = tuple(t1)\n", + "t2 is t1" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t3 = t1[:]\n", + "t3 is t1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 6-18. String literals may create shared objects" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t1 = (1, 2, 3)\n", + "t3 = (1, 2, 3)\n", + "t3 is t1" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s1 = 'ABC'\n", + "s2 = 'ABC'\n", + "s1 is s2" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}