Dec 2023 update (#59)

gwu-libraries · Dec 27, 2023 · ad7d4ef · ad7d4ef
1 parent 58c62af
commit ad7d4ef
Show file tree

Hide file tree

Showing 41 changed files with 122,290 additions and 2,575 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,10 +1,3 @@
-[submodule "python-camp-hw-2-gr"]
-	path = homework-modules/python-camp-hw-2-gr
-	url = https://github.com/gwu-libraries/python-camp-hw-2-gr.git
-	ignore = dirty
-[submodule "homework-modules/python-camp-hw-4-gr"]
-	path = homework-modules/python-camp-hw-4-gr
-	url = https://github.com/gwu-libraries/python-camp-hw-4-gr.git
-[submodule "homework-modules/python-camp-hw-3-gr"]
-	path = homework-modules/python-camp-hw-3-gr
-	url = https://github.com/gwu-libraries/python-camp-hw-3-gr.git
+[submodule "homework-modules/python-camp-hw-final-gr"]
+	path = homework-modules/python-camp-hw-final-gr
+	url = https://github.com/gwu-libraries/python-camp-hw-final-gr.git
diff --git a/course_utils/autograder.py b/course_utils/autograder.py
@@ -19,18 +19,26 @@ def extract_nb_code(nb_json):
     Expect each relevant metadata tag to follow the above format.
     '''
     code = []
+    ex_idx = 1
     for cell in nb_json['cells']:
         # Filter for code cells
         if cell['cell_type'] == 'code':
             # Filter for the right kind of metadata
             tags = [t for t in cell['metadata'].get('tags', [])
-                   if t.startswith('setup') or t.startswith('solution') or t.startswith('test-case')]
+                   if t.startswith('setup') or t.startswith('test-case')]
             if tags:
                 # Assume only one relevant tag per cell
                 tag_type, tag_idx = tags[0].split(':')
                 code.append({'code': cell['source'],
                              'type': tag_type,
                              'index': tag_idx})
+                if tag_type == 'test-case':
+                    # Increment at every test case
+                    ex_idx += 1
+            elif cell['source'] and cell['source'][0].startswith('#Your code below'):
+                code.append({'code': cell['source'],
+                             'type': 'solution',
+                             'index': str(ex_idx)}) # This supports proper indexing of exercises without setup cells
     # Group code cells according to their index (so that the setup, solution and test code for each exercise will be in the same dict)
     groups =  [list(g) for k, g in groupby(code, lambda x: x['index'])]
     # Create one dict per exercise, containing setup, solution, and test code
@@ -51,17 +59,18 @@ def setUp(self):
         self.hw_code = extract_nb_code(homework_nb)
 
     def test_homework(self):
+        test_vars = {}
         for i, ex in enumerate(self.hw_code):
             with self.subTest(exercise=i+1):
-               # Pass this as the "locals" param to the first two exec statements to store variables set by the code
-               test_vars = {}
-               # Run setup code, if it exists
-               exec(ex.get('setup', ''), None, test_vars)
-               # Run solution code
-               exec(ex['solution'], None, test_vars)
-               # Run test code as assertion
-               # Pass our local vars dict as globals so that it can be accessed at compile time by exec()
-               exec(ex['test-case'], test_vars)
+                # Pass this as the "locals" param to the first two exec statements to store variables set by the code
+                # Run setup code, if it exists
+                exec(ex.get('setup', ''), None, test_vars)
+                # Run solution code
+                exec(ex['solution'], None, test_vars)
+                # Run test code as assertion
+                # Pass our local vars dict as globals so that it can be accessed at compile time by exec()
+                exec(ex['test-case'], test_vars)
+
 
 
 if __name__ == '__main__':

diff --git a/course_utils/dataset_prep.py b/course_utils/dataset_prep.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+
+import json
+from random import sample
+from itertools import tee, filterfalse
+import click
+import re
+
+COURSE_META = 'courseSectionDTO'
+COURSE_KEYS = ['department', 'course', 'section', 'instructor', 'termName']
+TEXT_META = 'courseMaterialResultsList'
+TEXT_KEYS = ['title', 'edition', 'author', 'isbn', 'materialType', 'requirementType', 'copyRightYear', 'publisher']
+PRINT_INVENTORY = 'printItemDTOs'
+E_INVENTORY = 'digitalItemDTOs'
+INVENTORY_KEYS = ['typeCondition', 'priceDisplay', 'binding',]
+
+def extract_print_inventory(course_dict):
+    for record in course_dict.get(PRINT_INVENTORY, {}).values():
+        yield dict([(transform_keys(k), course_dict.get(k)) for k in TEXT_KEYS] + [(transform_keys(k), record[k]) for k in INVENTORY_KEYS if record.get(k)] + [('item_type', 'print')])
+
+def extract_e_inventory(course_dict):
+    for record in course_dict.get(E_INVENTORY, []):
+        yield dict([(transform_keys(k), course_dict.get(k)) for k in TEXT_KEYS] + [(transform_keys(k), record[k]) for k in INVENTORY_KEYS if record.get(k)] + [('item_type', 'digital')])
+
+def transform_keys(key):
+    # Transform key from camel case to snake case
+    if key == 'course':
+        return 'course_num'
+    return re.sub(r'([A-Z])', r'_\1', key).lower()
+
+def extract_data(bkst_data):
+    cleaned_data = []
+    for course in bkst_data:
+        course_dict = {transform_keys(k): course[COURSE_META][0].get(k) for k in COURSE_KEYS}
+        course_dict['texts'] = []
+        for text in course[COURSE_META][0].get(TEXT_META, []):
+            for i in extract_print_inventory(text):
+                course_dict['texts'].append(i)
+            for i in extract_e_inventory(text):
+                course_dict['texts'].append(i)
+        cleaned_data.append(course_dict)
+    return cleaned_data
+
+
+def dedupe_courses(data):
+    courses_seen = []
+    for course in data:
+        course_key = " ".join([course[transform_keys(k)] for k in COURSE_KEYS if course[transform_keys(k)]])
+        if not course_key in courses_seen:
+            courses_seen.append(course_key)
+            yield course
+
+def partition(pred, iterable):
+    """Partition entries into false entries and true entries.
+
+    If *pred* is slow, consider wrapping it with functools.lru_cache().
+    """
+    # partition(is_odd, range(10)) --> 0 2 4 6 8   and  1 3 5 7 9
+    t1, t2 = tee(iterable)
+    return list(filterfalse(pred, t1)), list(filter(pred, t2))
+
+
+def reshuffle_data(data, key, factor=2):
+    # weighted shuffle: ensures that elements with the key are distributed more toward the beginning of the dataset
+    # factor is the proportion of elements with the key to weight, i.e., 2 = 1/2
+    without, with_key = partition(lambda x: x.get(key), data)
+    n = len(with_key) // factor
+    front_list = with_key[:n] + without[:n]
+    back_list = with_key[n:] + without[n:]
+    return sample(front_list, k=len(front_list)) + sample(back_list, k=len(back_list))
+
+@click.command()
+@click.option('--infile', default='../data/bookstore-data.json')
+@click.option('--outfile', default='../textbook/static-assets/data/bookstore-data.json')
+def main(infile, outfile):
+    with open(infile) as f:
+        bkst_data = json.load(f)
+    cleaned_data = extract_data(bkst_data)
+
+    if len({" ".join([course[transform_keys(k)] for k in COURSE_KEYS if course[transform_keys(k)]]) for course in cleaned_data}) != len(cleaned_data):
+        cleaned_data = [c for c in dedupe_courses(cleaned_data)]
+
+    with open(outfile, 'w') as f:
+        json.dump(reshuffle_data(cleaned_data, 'texts'), f, indent=4)
+
+if __name__ == '__main__':
+    main()
+
+
+
diff --git a/homework-modules/python-camp-hw-2-gr b/homework-modules/python-camp-hw-2-gr
diff --git a/homework-modules/python-camp-hw-3-gr b/homework-modules/python-camp-hw-3-gr
diff --git a/homework-modules/python-camp-hw-4-gr b/homework-modules/python-camp-hw-4-gr
diff --git a/homework-modules/python-camp-hw-final-gr b/homework-modules/python-camp-hw-final-gr
diff --git a/textbook/_toc.yml b/textbook/_toc.yml
@@ -12,10 +12,12 @@ parts:
     title: "Homework for Python Camp"
 - caption: Day 1
   chapters:
-  - file: notebooks/lessons/1_1_choreographing_code
-    title: Choreographing Code
-  - file: notebooks/lessons/1_2_from_data_to_code
-    title: From Data to Code
+  - file: notebooks/lessons/1_1_modeling_code
+    title: Modeling Code
+- caption: Day 1-2
+  chapters:
+    - file: notebooks/lessons/1_2_from_data_to_code
+      title: From Data to Code
 - caption: Day 1 Homework
   chapters:
   - file: notebooks/homework/HW_1_from_code_to_data
@@ -24,32 +26,26 @@ parts:
   chapters: 
   - file: notebooks/lessons/2_1_describing_the_team
     title: Describing the Team
-  - file: notebooks/lessons/2_2_querying_data
-    title: Querying Data
 - caption: Day 2 Homework
   chapters: 
   - file: notebooks/homework/HW_2_programming_techniques
     title: Programming Techniques
-  - file: notebooks/homework/HW_2_GR
-    title: "Graded Homework: Day 2"
-- caption: Days 3-4
+- caption: Day 3
   chapters:
-  - file: notebooks/lessons/3_1_from_story_to_code
-    title: From Story to Code
+  - file: notebooks/lessons/2_2_programming_with_data
+    title: Programming with Data
 - caption: Day 3 Homework
   chapters:
   - file: notebooks/homework/HW_3_error_stories
     title: Error Stories
-  - file: notebooks/homework/HW_3_GR
-    title: "Graded Homework: Day 3"
-- caption: Day 4 (SUPPLEMENTAL)
+- caption: Day 4
   chapters:
-  - file: notebooks/lessons/4_1_modeling_the_world
-    title: Modeling the World
-- caption: Bonus Homework
+  - file: notebooks/lessons/3_1_from_story_to_code
+    title: From Story to Code
+- caption: Final Homework
   chapters:
-  - file: notebooks/homework/HW_4_GR
-    title: "Bonus Homework: Day 4"
+  - file: notebooks/homework/HW_Final_GR
+    title: "Final Homework (Graded)"
 - caption: Python Reference
   chapters:
   - file: keep_learning

diff --git a/textbook/glossary.md b/textbook/glossary.md
@@ -96,6 +96,10 @@ Boolean value
         if x:
             print(2/x) 
 
+call
+
+    To call a method or function is to instruct Python to execute the function or method's predefined behavior. And a function or method is really just a kind of shortcut: instead of our having to type out the same code whenever we want to perform a certain task (like splitting a string, for instance), we call the function or method in which that behavior has been defined. It's sort of like using the address book on your phone: you can tap `Mom` in order to call or text Mom, instead of having to enter her number every time.
+
 character
 
     A character is the basic unit of a Python {term}`string`. In Python, characters are defined according to the [Unicode standard](https://home.unicode.org/) by default. Unicode characters comprise the following (and more):
@@ -151,7 +155,15 @@ comment
 
     We use comments to document our code for ourselves and for others.
 
+conditional logic
+
+    A general term for the set of operations that allow computers to behave differently based on different inputs or conditions. At the most fundamental level, all other mathematical operations in a computer (addition, multiplication, etc.) are implemented using combinations of {term}`Boolean operator`s implemented in electronic circuits. 
+
+    In higher-level languages like Python, when we speak of conditional logic, we're often referring to {term}`if statement`s. 
 
+CSV
+
+    An acronym for "comma-separated values," CSV is a data format compatible with spreadsheet applications like Excel. It is also a format widely used for storing tabular data: data structured in rows and columns. Unlike {term}`JSON`, CSV format does not generally work well with nested data. 
 
 curly braces
 
@@ -169,10 +181,17 @@ dictionary
 
         print(my_info['job_title'])     # Prints "Librarian"
 
+documentation
+
+    Human-readable text that explains how a program or a part of a program is intended to work and/or the rationale for a particular choice made by the programmer. Some documentation resides in separate files from the program itself; an example of this kind of documentation is found on Python docs [website](https://docs.python.org/3/). Other documentation is mixed in with the program code. Such documentation is usually offset by particular characters that tell the computer running this code to ignore the documentation itself. In Python, to include documentation within our code we either prefix a line with a hashtag (`#`) or enclose multiple lines within triple quotes (`'''`).
+
 dot notation
 
     When an {term}`instance` of a Python {term}`type` (or {term}`class`) has {term}`attributes` or {term}`method`s, we use dot notation to access the attributes/methods. For instance, a {term}`list` has an `append()` method, so for a given list `my_list`, we can write `my_list.append(3)` (to add the integer `3` to the end of the list).
 
+exception
+
+    An error message in Python arising from a problem in the logic of the code or its inputs. As opposed to a syntax error, which will always cause code to fail, an exception is Python's way of saying, "this code might run, EXCEPT not in this case." 
 
 double equals sign
 
@@ -265,7 +284,7 @@ indented block
 
 index
 
-    The position of an element in a {term}`list` or a {term}`character` in a {term}`string`. We can use the index to extract a single element/character, or we can use a range of indices to extract a {term}`slice` (a subset of the string or list). 
+    The position of an element in a {term}`list`, or of a {term}`character` in a {term}`string`. We can use the index to extract a single element/character, or we can use a range of indices to extract a {term}`slice` (a subset of the string or list). 
 
     Indices in Python start at `0` (meaning, the first element or character is considered to have an index of 0, not 1) and increase from left to right. 
 
@@ -347,6 +366,10 @@ list
 
     It's more common to use a {term}`dictionary` to represent complex data of multiple types, since each element in a dictionary has a {term}`key` that can be more descriptive. For example, if our dictionary has an `age` key, we might guess that this key will correspond to a numeric type, not a string. 
 
+list of dictionaries
+
+    A common approach to representing data that consist of multiple elements (like books or university courses or people in a class) where each element has more or less the same set of attributes. (For books, those attributes might be `title`, `author`,` publisher`, etc. For people, `name`, `age`, `email address`, etc.) Each element in the list is a dictionary with the same set of keys; only the values are different. 
+
 loop variable
 
     The {term}`variable` in a {term}`for loop` that assumes the value of each element in the {term}`collection` over which the loop runs. 
@@ -418,6 +441,12 @@ return
 
     See {term}`function` for more information.
 
+set
+
+    A Python data type that is like a {term}`list` but that can contain only unique elements. Use the `set()` function to create a set from a list: `set([1,2,1,3,2,4,3,5])` returns `{1, 2, 3, 4, 5}`. (Note that the curly braces are used to delimit sets as well as dictionaries; unlike dictionaries, sets do not have key/value pairs, only single elements separated by commas.)
+
+    Only certain Python types can be used to create sets. You can't, for instance, convert a list of dictionaries into a set. Sets are mostly useful when dealing with numeric values and/or strings.
+
 slice
 
     A slice is a subset of a Python {term}`list` or {term}`string`. To create a slice, we use {term}`square brackets` around a pair of numbers separated by a {term}`colon`, where the first number is the position of the first element we want to include, and the last element is **one plus** the position of the last element to include. 
@@ -434,6 +463,12 @@ slice
 
     `my_list[3:5]` is the same as `my_list[3:]`. 
 
+source code
+
+    Refers to the code (in Python or any other language) of which a piece of software (application, library, module, or script) consists. The Python community is very committed to [open source](https://en.wikipedia.org/wiki/Open_source) software; this commitment, coupled with the nature of the Python language itself, means that for virtually any Python {term}`library` that you might use, it's possible to inspect the source code to see how it was written. This fact can be very useful, especially for libraries lacking good documentation.
+
+    The [source code](https://github.com/python/cpython) for the core of the Python language is also open source. However,  a lot of it is implemented in C (or another low-level language, depending on the flavor of Python).
+
 square brackets
 
     Square brackets (`[]`) are used in a few different ways in Python:
@@ -442,6 +477,11 @@ square brackets
         - In creating a {term}`slice` of a string or list: `my_list[0:2]`
         - In accessing the value in a dictionary by its {term}`key`: `print(my_dict['name'])` prints the value associated with the key `name` in `my_dict`.
 
+
+standard library
+
+    The set of functions, data types, methods, and other tools that are available with the basic installation of Python.
+
 string
 
     A basic Python {term}`type` comprising a {term}`collection` of {term}`character`s. We can create a sting by enclosing any text (really, anything you can type on your keyboard) between quotation marks. 
@@ -450,6 +490,10 @@ string
 
     Strings have {term}`method`s, such as `split()`. See the [Python documentation](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) for a complete list.
 
+test
+
+    A test in the context of programming is typically some code designed to check the functionality of other code. For instance, when developing an application, programmers will typically write multiple tests that can be used to confirm that various parts of the application work as intended. This approach is particularly helpful when the application goes through multiple development cycles, receiving bug fixes and enhancements. In such situations, having a suite of tests ensures that changes to one part of the application don't break other parts of the application. 
+
 type
 
     A Python type is, in essence, a set of behaviors associated with a certain way of representing data. Let's unpack this a bit.
@@ -468,6 +512,10 @@ type
 
     Just as cooks choose the right appliance for the task at hand, so programmers choose the kind of `type` to use depending on goals and context. 
 
+Unicode
+
+    A system for representing characters from the world's many languages along with other symbols (mathematical symbols, emojis, etc.). Python handles Unicode by default, which means that you can use Python to work with text in languages other than English. See {term}`character` for more information. 
+
 value
 
     The word _value_ is ambiguous when talking about Python. On the one hand, we can talk about any occurence of data as a _value_, as when we say that in defining a {term}`variable`, the value goes on the right of the equals sign. 
@@ -492,10 +540,6 @@ variable
 
     In creating variables, we assign a name to a value with the single equals sign. The names goes on the left of the equals sign, the value on the right.
 
-Unicode
-
-    A system for representing characters from the world's many languages along with other symbols (mathematical symbols, emojis, etc.). Python handles Unicode by default, which means that you can use Python to work with text in languages other than English. See {term}`character` for more information. 
-
 white space
 
     The phrase _white space_ refers to characters created by the spacebar, the tab key, and/or the return/enter key on your keyboard.