Skip to content

Commit 335c992

Browse files
committed
Added the csv_to_pandas snippet
1 parent 136bc83 commit 335c992

35 files changed

+14135
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:24d12ab64e5e93c82e0acf22e043d94d7006b033acc4160a5a99e702b6873929"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": []
9+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:24d12ab64e5e93c82e0acf22e043d94d7006b033acc4160a5a99e702b6873929"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": []
9+
}

.ipynb_checkpoints/csv_to_python_code-checkpoint.ipynb

+316
Large diffs are not rendered by default.

.ipynb_checkpoints/example_exploring_border_crossing_data-checkpoint.ipynb

+336
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:ffe99b859253d609c4a3adf1e1824a10c76bc4f11d786f310837a964c7660619"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": [
9+
{
10+
"cells": [
11+
{
12+
"cell_type": "markdown",
13+
"metadata": {},
14+
"source": [
15+
"# Data Wrangling DHS Border Crossing Data\n",
16+
"\n",
17+
"This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/)."
18+
]
19+
},
20+
{
21+
"cell_type": "code",
22+
"collapsed": false,
23+
"input": [
24+
"import pandas as pd\n",
25+
"import numpy as np\n",
26+
"import os"
27+
],
28+
"language": "python",
29+
"metadata": {},
30+
"outputs": [],
31+
"prompt_number": 10
32+
},
33+
{
34+
"cell_type": "code",
35+
"collapsed": false,
36+
"input": [
37+
"cur_dir = os.path.dirname(os.path.realpath('__file__'))"
38+
],
39+
"language": "python",
40+
"metadata": {},
41+
"outputs": [],
42+
"prompt_number": 11
43+
},
44+
{
45+
"cell_type": "code",
46+
"collapsed": false,
47+
"input": [
48+
"# Create a list of elements counting from 1995 to 2013\n",
49+
"years = list(range(1995, 2014, 1))"
50+
],
51+
"language": "python",
52+
"metadata": {},
53+
"outputs": [],
54+
"prompt_number": 12
55+
},
56+
{
57+
"cell_type": "code",
58+
"collapsed": false,
59+
"input": [
60+
"df = pd.DataFrame()"
61+
],
62+
"language": "python",
63+
"metadata": {},
64+
"outputs": [],
65+
"prompt_number": 13
66+
},
67+
{
68+
"cell_type": "code",
69+
"collapsed": false,
70+
"input": [
71+
"for year in years:\n",
72+
" data = pd.read_csv(cur_dir + '/data/cross_raw_data/bc_' + str(year) + '.csv', header=1, skipfooter=8)\n",
73+
" df = pd.concat([df, data])"
74+
],
75+
"language": "python",
76+
"metadata": {},
77+
"outputs": [],
78+
"prompt_number": 14
79+
},
80+
{
81+
"cell_type": "code",
82+
"collapsed": false,
83+
"input": [
84+
"df = df.ix[:, 0:-1]"
85+
],
86+
"language": "python",
87+
"metadata": {},
88+
"outputs": [],
89+
"prompt_number": 15
90+
},
91+
{
92+
"cell_type": "code",
93+
"collapsed": false,
94+
"input": [
95+
"# Create two lists for the loop results to be placed\n",
96+
"city = []\n",
97+
"state = []\n",
98+
"\n",
99+
"# For each row in a varible,\n",
100+
"for row in df['Port Name']:\n",
101+
" # Try to,\n",
102+
" try:\n",
103+
" # Split the row by comma and append\n",
104+
" # everything before the comma to lat\n",
105+
" city.append(row.split(': ')[1])\n",
106+
" # Split the row by comma and append\n",
107+
" # everything after the comma to lon\n",
108+
" state.append(row.split(': ')[0])\n",
109+
" # But if you get an error\n",
110+
" except:\n",
111+
" # append a missing value to lat\n",
112+
" city.append(np.NaN)\n",
113+
" # append a missing value to lon\n",
114+
" state.append(np.NaN)\n",
115+
"\n",
116+
"# Create two new columns from lat and lon\n",
117+
"df['City'] = city\n",
118+
"df['State'] = state"
119+
],
120+
"language": "python",
121+
"metadata": {},
122+
"outputs": [],
123+
"prompt_number": 16
124+
},
125+
{
126+
"cell_type": "code",
127+
"collapsed": false,
128+
"input": [
129+
"df = df.reset_index()\n",
130+
"df = df.drop('index', axis=1)\n",
131+
"df = df.rename(columns=lambda x: x.strip())"
132+
],
133+
"language": "python",
134+
"metadata": {},
135+
"outputs": [],
136+
"prompt_number": 17
137+
},
138+
{
139+
"cell_type": "code",
140+
"collapsed": false,
141+
"input": [
142+
"df.to_csv(cur_dir + '/data/cross_raw_data/bc_' + 'full_crossing_data.csv', index=False)"
143+
],
144+
"language": "python",
145+
"metadata": {},
146+
"outputs": [],
147+
"prompt_number": 18
148+
}
149+
],
150+
"metadata": {}
151+
}
152+
]
153+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{
2+
"metadata": {
3+
"name": "",
4+
"signature": "sha256:4d5e4a899b7a80d17f1cfe0dc49eceabc761f60773a785a4e296e7d291fc585d"
5+
},
6+
"nbformat": 3,
7+
"nbformat_minor": 0,
8+
"worksheets": [
9+
{
10+
"cells": [
11+
{
12+
"cell_type": "markdown",
13+
"metadata": {},
14+
"source": [
15+
"# Iterate An Ifelse Over A List\n",
16+
"\n",
17+
"This snippit was written by [Chris R. Albon](http://www.chrisralbon.com/) and is part of his collection of [well-documented Python snippits](https://github.com/chrisalbon/code_py). All code is written in Python 3 in iPython notebook and offered under the [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/).\n",
18+
"\n",
19+
"[Source](http://www.jeffknupp.com/blog/2014/05/28/a-nice-little-bit-of-python/)"
20+
]
21+
},
22+
{
23+
"cell_type": "markdown",
24+
"metadata": {},
25+
"source": [
26+
"## Create some data"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"collapsed": false,
32+
"input": [
33+
"word_list = ['Egypt',\n",
34+
" 'Watching',\n",
35+
" 'Eleanor']\n",
36+
"\n",
37+
"vowels = ['A',\n",
38+
" 'E',\n",
39+
" 'I',\n",
40+
" 'O',\n",
41+
" 'U']"
42+
],
43+
"language": "python",
44+
"metadata": {},
45+
"outputs": [
46+
{
47+
"output_type": "stream",
48+
"stream": "stdout",
49+
"text": [
50+
"Is valid\n",
51+
"Invalid\n",
52+
"Is valid\n"
53+
]
54+
}
55+
],
56+
"prompt_number": 11
57+
},
58+
{
59+
"cell_type": "markdown",
60+
"metadata": {},
61+
"source": [
62+
"## Create a for loop"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"collapsed": false,
68+
"input": [
69+
"# for each item in the word_list,\n",
70+
"for word in word_list:\n",
71+
" # if any word starts with e, where e is vowels,\n",
72+
" if any([word.startswith(e) for e in vowels]):\n",
73+
" # then print is valid,\n",
74+
" print('Is valid')\n",
75+
" # if not, \n",
76+
" else:\n",
77+
" # print invalid\n",
78+
" print('Invalid')"
79+
],
80+
"language": "python",
81+
"metadata": {},
82+
"outputs": [
83+
{
84+
"output_type": "stream",
85+
"stream": "stdout",
86+
"text": [
87+
"Is valid\n",
88+
"Invalid\n",
89+
"Is valid\n"
90+
]
91+
}
92+
],
93+
"prompt_number": 12
94+
}
95+
],
96+
"metadata": {}
97+
}
98+
]
99+
}

0 commit comments

Comments
 (0)