Skip to content

Commit

Permalink
Dec 2023 update (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
dolsysmith authored Dec 27, 2023
1 parent 58c62af commit ad7d4ef
Show file tree
Hide file tree
Showing 41 changed files with 122,290 additions and 2,575 deletions.
13 changes: 3 additions & 10 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
[submodule "python-camp-hw-2-gr"]
path = homework-modules/python-camp-hw-2-gr
url = https://github.com/gwu-libraries/python-camp-hw-2-gr.git
ignore = dirty
[submodule "homework-modules/python-camp-hw-4-gr"]
path = homework-modules/python-camp-hw-4-gr
url = https://github.com/gwu-libraries/python-camp-hw-4-gr.git
[submodule "homework-modules/python-camp-hw-3-gr"]
path = homework-modules/python-camp-hw-3-gr
url = https://github.com/gwu-libraries/python-camp-hw-3-gr.git
[submodule "homework-modules/python-camp-hw-final-gr"]
path = homework-modules/python-camp-hw-final-gr
url = https://github.com/gwu-libraries/python-camp-hw-final-gr.git
29 changes: 19 additions & 10 deletions course_utils/autograder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,26 @@ def extract_nb_code(nb_json):
Expect each relevant metadata tag to follow the above format.
'''
code = []
ex_idx = 1
for cell in nb_json['cells']:
# Filter for code cells
if cell['cell_type'] == 'code':
# Filter for the right kind of metadata
tags = [t for t in cell['metadata'].get('tags', [])
if t.startswith('setup') or t.startswith('solution') or t.startswith('test-case')]
if t.startswith('setup') or t.startswith('test-case')]
if tags:
# Assume only one relevant tag per cell
tag_type, tag_idx = tags[0].split(':')
code.append({'code': cell['source'],
'type': tag_type,
'index': tag_idx})
if tag_type == 'test-case':
# Increment at every test case
ex_idx += 1
elif cell['source'] and cell['source'][0].startswith('#Your code below'):
code.append({'code': cell['source'],
'type': 'solution',
'index': str(ex_idx)}) # This supports proper indexing of exercises without setup cells
# Group code cells according to their index (so that the setup, solution and test code for each exercise will be in the same dict)
groups = [list(g) for k, g in groupby(code, lambda x: x['index'])]
# Create one dict per exercise, containing setup, solution, and test code
Expand All @@ -51,17 +59,18 @@ def setUp(self):
self.hw_code = extract_nb_code(homework_nb)

def test_homework(self):
test_vars = {}
for i, ex in enumerate(self.hw_code):
with self.subTest(exercise=i+1):
# Pass this as the "locals" param to the first two exec statements to store variables set by the code
test_vars = {}
# Run setup code, if it exists
exec(ex.get('setup', ''), None, test_vars)
# Run solution code
exec(ex['solution'], None, test_vars)
# Run test code as assertion
# Pass our local vars dict as globals so that it can be accessed at compile time by exec()
exec(ex['test-case'], test_vars)
# Pass this as the "locals" param to the first two exec statements to store variables set by the code
# Run setup code, if it exists
exec(ex.get('setup', ''), None, test_vars)
# Run solution code
exec(ex['solution'], None, test_vars)
# Run test code as assertion
# Pass our local vars dict as globals so that it can be accessed at compile time by exec()
exec(ex['test-case'], test_vars)



if __name__ == '__main__':
Expand Down
90 changes: 90 additions & 0 deletions course_utils/dataset_prep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# coding: utf-8

import json
from random import sample
from itertools import tee, filterfalse
import click
import re

COURSE_META = 'courseSectionDTO'
COURSE_KEYS = ['department', 'course', 'section', 'instructor', 'termName']
TEXT_META = 'courseMaterialResultsList'
TEXT_KEYS = ['title', 'edition', 'author', 'isbn', 'materialType', 'requirementType', 'copyRightYear', 'publisher']
PRINT_INVENTORY = 'printItemDTOs'
E_INVENTORY = 'digitalItemDTOs'
INVENTORY_KEYS = ['typeCondition', 'priceDisplay', 'binding',]

def extract_print_inventory(course_dict):
for record in course_dict.get(PRINT_INVENTORY, {}).values():
yield dict([(transform_keys(k), course_dict.get(k)) for k in TEXT_KEYS] + [(transform_keys(k), record[k]) for k in INVENTORY_KEYS if record.get(k)] + [('item_type', 'print')])

def extract_e_inventory(course_dict):
for record in course_dict.get(E_INVENTORY, []):
yield dict([(transform_keys(k), course_dict.get(k)) for k in TEXT_KEYS] + [(transform_keys(k), record[k]) for k in INVENTORY_KEYS if record.get(k)] + [('item_type', 'digital')])

def transform_keys(key):
# Transform key from camel case to snake case
if key == 'course':
return 'course_num'
return re.sub(r'([A-Z])', r'_\1', key).lower()

def extract_data(bkst_data):
cleaned_data = []
for course in bkst_data:
course_dict = {transform_keys(k): course[COURSE_META][0].get(k) for k in COURSE_KEYS}
course_dict['texts'] = []
for text in course[COURSE_META][0].get(TEXT_META, []):
for i in extract_print_inventory(text):
course_dict['texts'].append(i)
for i in extract_e_inventory(text):
course_dict['texts'].append(i)
cleaned_data.append(course_dict)
return cleaned_data


def dedupe_courses(data):
courses_seen = []
for course in data:
course_key = " ".join([course[transform_keys(k)] for k in COURSE_KEYS if course[transform_keys(k)]])
if not course_key in courses_seen:
courses_seen.append(course_key)
yield course

def partition(pred, iterable):
"""Partition entries into false entries and true entries.
If *pred* is slow, consider wrapping it with functools.lru_cache().
"""
# partition(is_odd, range(10)) --> 0 2 4 6 8 and 1 3 5 7 9
t1, t2 = tee(iterable)
return list(filterfalse(pred, t1)), list(filter(pred, t2))


def reshuffle_data(data, key, factor=2):
# weighted shuffle: ensures that elements with the key are distributed more toward the beginning of the dataset
# factor is the proportion of elements with the key to weight, i.e., 2 = 1/2
without, with_key = partition(lambda x: x.get(key), data)
n = len(with_key) // factor
front_list = with_key[:n] + without[:n]
back_list = with_key[n:] + without[n:]
return sample(front_list, k=len(front_list)) + sample(back_list, k=len(back_list))

@click.command()
@click.option('--infile', default='../data/bookstore-data.json')
@click.option('--outfile', default='../textbook/static-assets/data/bookstore-data.json')
def main(infile, outfile):
with open(infile) as f:
bkst_data = json.load(f)
cleaned_data = extract_data(bkst_data)

if len({" ".join([course[transform_keys(k)] for k in COURSE_KEYS if course[transform_keys(k)]]) for course in cleaned_data}) != len(cleaned_data):
cleaned_data = [c for c in dedupe_courses(cleaned_data)]

with open(outfile, 'w') as f:
json.dump(reshuffle_data(cleaned_data, 'texts'), f, indent=4)

if __name__ == '__main__':
main()



1 change: 0 additions & 1 deletion homework-modules/python-camp-hw-2-gr
Submodule python-camp-hw-2-gr deleted from 37e0fe
1 change: 0 additions & 1 deletion homework-modules/python-camp-hw-3-gr
Submodule python-camp-hw-3-gr deleted from 865c82
1 change: 0 additions & 1 deletion homework-modules/python-camp-hw-4-gr
Submodule python-camp-hw-4-gr deleted from 3ef378
1 change: 1 addition & 0 deletions homework-modules/python-camp-hw-final-gr
34 changes: 15 additions & 19 deletions textbook/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ parts:
title: "Homework for Python Camp"
- caption: Day 1
chapters:
- file: notebooks/lessons/1_1_choreographing_code
title: Choreographing Code
- file: notebooks/lessons/1_2_from_data_to_code
title: From Data to Code
- file: notebooks/lessons/1_1_modeling_code
title: Modeling Code
- caption: Day 1-2
chapters:
- file: notebooks/lessons/1_2_from_data_to_code
title: From Data to Code
- caption: Day 1 Homework
chapters:
- file: notebooks/homework/HW_1_from_code_to_data
Expand All @@ -24,32 +26,26 @@ parts:
chapters:
- file: notebooks/lessons/2_1_describing_the_team
title: Describing the Team
- file: notebooks/lessons/2_2_querying_data
title: Querying Data
- caption: Day 2 Homework
chapters:
- file: notebooks/homework/HW_2_programming_techniques
title: Programming Techniques
- file: notebooks/homework/HW_2_GR
title: "Graded Homework: Day 2"
- caption: Days 3-4
- caption: Day 3
chapters:
- file: notebooks/lessons/3_1_from_story_to_code
title: From Story to Code
- file: notebooks/lessons/2_2_programming_with_data
title: Programming with Data
- caption: Day 3 Homework
chapters:
- file: notebooks/homework/HW_3_error_stories
title: Error Stories
- file: notebooks/homework/HW_3_GR
title: "Graded Homework: Day 3"
- caption: Day 4 (SUPPLEMENTAL)
- caption: Day 4
chapters:
- file: notebooks/lessons/4_1_modeling_the_world
title: Modeling the World
- caption: Bonus Homework
- file: notebooks/lessons/3_1_from_story_to_code
title: From Story to Code
- caption: Final Homework
chapters:
- file: notebooks/homework/HW_4_GR
title: "Bonus Homework: Day 4"
- file: notebooks/homework/HW_Final_GR
title: "Final Homework (Graded)"
- caption: Python Reference
chapters:
- file: keep_learning
Expand Down
54 changes: 49 additions & 5 deletions textbook/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ Boolean value
if x:
print(2/x)
call
To call a method or function is to instruct Python to execute the function or method's predefined behavior. And a function or method is really just a kind of shortcut: instead of our having to type out the same code whenever we want to perform a certain task (like splitting a string, for instance), we call the function or method in which that behavior has been defined. It's sort of like using the address book on your phone: you can tap `Mom` in order to call or text Mom, instead of having to enter her number every time.
character
A character is the basic unit of a Python {term}`string`. In Python, characters are defined according to the [Unicode standard](https://home.unicode.org/) by default. Unicode characters comprise the following (and more):
Expand Down Expand Up @@ -151,7 +155,15 @@ comment
We use comments to document our code for ourselves and for others.
conditional logic
A general term for the set of operations that allow computers to behave differently based on different inputs or conditions. At the most fundamental level, all other mathematical operations in a computer (addition, multiplication, etc.) are implemented using combinations of {term}`Boolean operator`s implemented in electronic circuits.
In higher-level languages like Python, when we speak of conditional logic, we're often referring to {term}`if statement`s.
CSV
An acronym for "comma-separated values," CSV is a data format compatible with spreadsheet applications like Excel. It is also a format widely used for storing tabular data: data structured in rows and columns. Unlike {term}`JSON`, CSV format does not generally work well with nested data.
curly braces
Expand All @@ -169,10 +181,17 @@ dictionary
print(my_info['job_title']) # Prints "Librarian"
documentation
Human-readable text that explains how a program or a part of a program is intended to work and/or the rationale for a particular choice made by the programmer. Some documentation resides in separate files from the program itself; an example of this kind of documentation is found on Python docs [website](https://docs.python.org/3/). Other documentation is mixed in with the program code. Such documentation is usually offset by particular characters that tell the computer running this code to ignore the documentation itself. In Python, to include documentation within our code we either prefix a line with a hashtag (`#`) or enclose multiple lines within triple quotes (`'''`).
dot notation
When an {term}`instance` of a Python {term}`type` (or {term}`class`) has {term}`attributes` or {term}`method`s, we use dot notation to access the attributes/methods. For instance, a {term}`list` has an `append()` method, so for a given list `my_list`, we can write `my_list.append(3)` (to add the integer `3` to the end of the list).
exception
An error message in Python arising from a problem in the logic of the code or its inputs. As opposed to a syntax error, which will always cause code to fail, an exception is Python's way of saying, "this code might run, EXCEPT not in this case."
double equals sign
Expand Down Expand Up @@ -265,7 +284,7 @@ indented block
index
The position of an element in a {term}`list` or a {term}`character` in a {term}`string`. We can use the index to extract a single element/character, or we can use a range of indices to extract a {term}`slice` (a subset of the string or list).
The position of an element in a {term}`list`, or of a {term}`character` in a {term}`string`. We can use the index to extract a single element/character, or we can use a range of indices to extract a {term}`slice` (a subset of the string or list).
Indices in Python start at `0` (meaning, the first element or character is considered to have an index of 0, not 1) and increase from left to right.
Expand Down Expand Up @@ -347,6 +366,10 @@ list
It's more common to use a {term}`dictionary` to represent complex data of multiple types, since each element in a dictionary has a {term}`key` that can be more descriptive. For example, if our dictionary has an `age` key, we might guess that this key will correspond to a numeric type, not a string.
list of dictionaries
A common approach to representing data that consist of multiple elements (like books or university courses or people in a class) where each element has more or less the same set of attributes. (For books, those attributes might be `title`, `author`,` publisher`, etc. For people, `name`, `age`, `email address`, etc.) Each element in the list is a dictionary with the same set of keys; only the values are different.
loop variable
The {term}`variable` in a {term}`for loop` that assumes the value of each element in the {term}`collection` over which the loop runs.
Expand Down Expand Up @@ -418,6 +441,12 @@ return
See {term}`function` for more information.
set
A Python data type that is like a {term}`list` but that can contain only unique elements. Use the `set()` function to create a set from a list: `set([1,2,1,3,2,4,3,5])` returns `{1, 2, 3, 4, 5}`. (Note that the curly braces are used to delimit sets as well as dictionaries; unlike dictionaries, sets do not have key/value pairs, only single elements separated by commas.)
Only certain Python types can be used to create sets. You can't, for instance, convert a list of dictionaries into a set. Sets are mostly useful when dealing with numeric values and/or strings.
slice
A slice is a subset of a Python {term}`list` or {term}`string`. To create a slice, we use {term}`square brackets` around a pair of numbers separated by a {term}`colon`, where the first number is the position of the first element we want to include, and the last element is **one plus** the position of the last element to include.
Expand All @@ -434,6 +463,12 @@ slice
`my_list[3:5]` is the same as `my_list[3:]`.
source code
Refers to the code (in Python or any other language) of which a piece of software (application, library, module, or script) consists. The Python community is very committed to [open source](https://en.wikipedia.org/wiki/Open_source) software; this commitment, coupled with the nature of the Python language itself, means that for virtually any Python {term}`library` that you might use, it's possible to inspect the source code to see how it was written. This fact can be very useful, especially for libraries lacking good documentation.
The [source code](https://github.com/python/cpython) for the core of the Python language is also open source. However, a lot of it is implemented in C (or another low-level language, depending on the flavor of Python).
square brackets
Square brackets (`[]`) are used in a few different ways in Python:
Expand All @@ -442,6 +477,11 @@ square brackets
- In creating a {term}`slice` of a string or list: `my_list[0:2]`
- In accessing the value in a dictionary by its {term}`key`: `print(my_dict['name'])` prints the value associated with the key `name` in `my_dict`.
standard library
The set of functions, data types, methods, and other tools that are available with the basic installation of Python.
string
A basic Python {term}`type` comprising a {term}`collection` of {term}`character`s. We can create a sting by enclosing any text (really, anything you can type on your keyboard) between quotation marks.
Expand All @@ -450,6 +490,10 @@ string
Strings have {term}`method`s, such as `split()`. See the [Python documentation](https://docs.python.org/3/library/stdtypes.html#text-sequence-type-str) for a complete list.
test
A test in the context of programming is typically some code designed to check the functionality of other code. For instance, when developing an application, programmers will typically write multiple tests that can be used to confirm that various parts of the application work as intended. This approach is particularly helpful when the application goes through multiple development cycles, receiving bug fixes and enhancements. In such situations, having a suite of tests ensures that changes to one part of the application don't break other parts of the application.
type
A Python type is, in essence, a set of behaviors associated with a certain way of representing data. Let's unpack this a bit.
Expand All @@ -468,6 +512,10 @@ type
Just as cooks choose the right appliance for the task at hand, so programmers choose the kind of `type` to use depending on goals and context.
Unicode
A system for representing characters from the world's many languages along with other symbols (mathematical symbols, emojis, etc.). Python handles Unicode by default, which means that you can use Python to work with text in languages other than English. See {term}`character` for more information.
value
The word _value_ is ambiguous when talking about Python. On the one hand, we can talk about any occurence of data as a _value_, as when we say that in defining a {term}`variable`, the value goes on the right of the equals sign.
Expand All @@ -492,10 +540,6 @@ variable
In creating variables, we assign a name to a value with the single equals sign. The names goes on the left of the equals sign, the value on the right.
Unicode
A system for representing characters from the world's many languages along with other symbols (mathematical symbols, emojis, etc.). Python handles Unicode by default, which means that you can use Python to work with text in languages other than English. See {term}`character` for more information.
white space
The phrase _white space_ refers to characters created by the spacebar, the tab key, and/or the return/enter key on your keyboard.
Expand Down
Loading

0 comments on commit ad7d4ef

Please sign in to comment.