Skip to content

Commit

Permalink
Sampling data
Browse files Browse the repository at this point in the history
  • Loading branch information
dolsysmith committed Jan 15, 2025
1 parent 3deda1b commit 7492ba9
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 6,790 deletions.
57 changes: 54 additions & 3 deletions text-as-data/course_bulletin.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,56 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "ad769062-acf7-4be8-90c2-7577d2e8e573",
"metadata": {},
"outputs": [],
"source": [
"from random import sample"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e2d53e37-7f6c-494b-acf7-609995603067",
"metadata": {},
"outputs": [],
"source": [
"sample_keys = sample(list(cleaned_courses.keys()), k=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e306ed9c-13ff-4753-930e-6d67f9880140",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['International Business (IBUS)',\n",
" 'Anatomy and Cell Biology (ANAT)',\n",
" 'Management (MGT)',\n",
" 'Sustainability (SUST)',\n",
" 'Health Services Management and Leadership (HSML)',\n",
" 'Counseling (CNSL)',\n",
" 'Sociology (SOC)',\n",
" 'Geology (GEOL)',\n",
" 'Speech, Language, and Hearing Science (SLHS)',\n",
" 'Business Administration (BADM)']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_keys"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b1eab6dc-3e72-49db-bf7c-02252e17f5e6",
"metadata": {},
"outputs": [],
Expand All @@ -174,7 +224,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"id": "363ea68b-5af8-4fd0-9d39-bdfff3162499",
"metadata": {},
"outputs": [],
Expand All @@ -184,8 +234,9 @@
" writer = DictWriter(f, fieldnames)\n",
" writer.writeheader()\n",
" for dept, course_dict in cleaned_courses.items():\n",
" for title, desc in course_dict.items():\n",
" writer.writerow(dict(zip(fieldnames, (dept, title, '|'.join(desc)))))"
" if dept in sample_keys:\n",
" for title, desc in course_dict.items():\n",
" writer.writerow(dict(zip(fieldnames, (dept, title, '|'.join(desc)))))"
]
}
],
Expand Down
Loading

0 comments on commit 7492ba9

Please sign in to comment.