From 7f83d43064f349f75145966191e809d84cf5eb0f Mon Sep 17 00:00:00 2001
From: Michael Crotty <74121022+mcrotty1175@users.noreply.github.com>
Date: Fri, 4 Oct 2024 16:06:58 -0400
Subject: [PATCH] Full extraction notebook (course visualization currently
 broken)

---
 notebooks/full_extraction/README.md           | 107 +++
 notebooks/full_extraction/constants.py        |  33 +
 notebooks/full_extraction/requirements.txt    |  56 ++
 .../full_extraction/rtk_extraction.ipynb      | 665 ++++++++++++++++++
 notebooks/full_extraction/world.py            | 110 +++
 5 files changed, 971 insertions(+)
 create mode 100644 notebooks/full_extraction/README.md
 create mode 100644 notebooks/full_extraction/constants.py
 create mode 100644 notebooks/full_extraction/requirements.txt
 create mode 100644 notebooks/full_extraction/rtk_extraction.ipynb
 create mode 100644 notebooks/full_extraction/world.py

diff --git a/notebooks/full_extraction/README.md b/notebooks/full_extraction/README.md
new file mode 100644
index 0000000..3df5f72
--- /dev/null
+++ b/notebooks/full_extraction/README.md
@@ -0,0 +1,107 @@
+# RTK Full Extraction Pipeline
+
+## Installation
+0. (Optional) Copy this folder to somewhere you'll remember it
+1. Ensure python is installed (highly recommend running from within VSCode)
+2. Create a virtual environment: 
+```bash
+python3 -m venv rtk # Creates a virtual environment called "rtk"
+source rtk/bin/activate # Enters the virtual environment
+pip install -r requirements.txt # Installs the required packages
+```
+3. Install RTKLIB (https://github.com/tomojitakasu/RTKLIB)
+- MacOS users will have to modify some Makefiles and build from source
+- Linux users will have to build from source
+- Windows users can install a prebuilt version (google RTKLIB)
+
+
+## How to use
+1. Download the Hammerschlag Hall Basestaion Data
+2. Open this folder with VSCode
+3. Open `rtk_extraction.ipynb` and edit the fields in the first code section:
+    - BUGGIES: List of rovers that you want to do data analysis on
+    - DATE: Date you want to do data analysis for (format: YYYY-MM-DD)
+    - CLEANUP: If true, deletes all the temp files created during analysis
+    - DEBUG: Adds extra print statements if getting unexpected outputs
+4. Run the first 3 code sections. They will create a bunch of directories to
+   ensure data can be batch processed easily.
+5. Move rover ubx files (firmware uses extension .BIN) to the `raw_ubx` folder
+   corresponding to the date in the corresponding rover directories.
+6. If zipped, extract the basestation data and move it into the basestation_data
+   directory corresponding to the date.
+7. Attempt to run all cells in the notebook. If unsuccessful, ask for help.
+
+## Known issues
+- API for map of buggy course is broken so displaying shows on a white background
+
+### (MacOS users) Installing from source
+1. Edit the below files as per the diff
+2. Run `make && make install` from `(RTKLIB)/app/consapp`
+
+```diff
+diff --git a/app/consapp/convbin/gcc/makefile b/app/consapp/convbin/gcc/makefile
+index 1df0e93..6532465 100644
+--- a/app/consapp/convbin/gcc/makefile
++++ b/app/consapp/convbin/gcc/makefile
+@@ -11,7 +11,7 @@ OPTIONS= -DTRACE -DENAGLO -DENAQZS -DENAGAL -DENACMP -DENAIRN -DNFREQ=3 -DNEXOBS
+ 
+ CFLAGS = -O3 -ansi -pedantic -Wall -Wno-unused-but-set-variable $(INCLUDE) $(OPTIONS) -g
+ 
+-LDLIBS = -lm -lrt
++LDLIBS = -lm
+ 
+ all  : convbin
+ 
+diff --git a/app/consapp/pos2kml/gcc/makefile b/app/consapp/pos2kml/gcc/makefile
+index 032f309..3874383 100644
+--- a/app/consapp/pos2kml/gcc/makefile
++++ b/app/consapp/pos2kml/gcc/makefile
+@@ -3,7 +3,7 @@
+ BINDIR = /usr/local/bin
+ SRC    = ../../../../src
+ CFLAGS = -Wall -O3 -ansi -pedantic -I$(SRC) -DTRACE
+-LDLIBS  = -lm -lrt
++LDLIBS  = -lm
+ 
+ pos2kml    : pos2kml.o convkml.o convgpx.o solution.o geoid.o rtkcmn.o preceph.o
+ 
+diff --git a/app/consapp/rnx2rtkp/gcc/makefile b/app/consapp/rnx2rtkp/gcc/makefile
+index 0e33e2d..e76cf05 100644
+--- a/app/consapp/rnx2rtkp/gcc/makefile
++++ b/app/consapp/rnx2rtkp/gcc/makefile
+@@ -8,7 +8,7 @@ OPTS    = -DTRACE -DENAGLO -DENAQZS -DENAGAL -DENACMP -DENAIRN -DNFREQ=3 -DNEXOB
+ 
+ # for no lapack
+ CFLAGS  = -Wall -O3 -ansi -pedantic -Wno-unused-but-set-variable -I$(SRC) $(OPTS)
+-LDLIBS  = -lgfortran -lm -lrt
++LDLIBS  = -lm 
+ 
+ #CFLAGS  = -Wall -O3 -ansi -pedantic -Wno-unused-but-set-variable -I$(SRC) -DLAPACK $(OPTS)
+ #LDLIBS  = -lm -lrt -llapack -lblas
+diff --git a/app/consapp/rtkrcv/gcc/makefile b/app/consapp/rtkrcv/gcc/makefile
+index 89ae61c..b6d7830 100644
+--- a/app/consapp/rtkrcv/gcc/makefile
++++ b/app/consapp/rtkrcv/gcc/makefile
+@@ -7,7 +7,7 @@ CTARGET= -DTRACE -DENAGLO -DENAQZS -DENACMP -DENAGAL -DENAIRN -DNFREQ=3 -DNEXOBS
+ #CTARGET= -DENAGLO -DENAQZS -DENACMP -DENAGAL -DENAIRN -DNFREQ=3 -DIERS_MODEL -DSVR_REUSEADDR
+ 
+ CFLAGS = -Wall -O3 -ansi -pedantic -Wno-unused-but-set-variable -I$(SRC) -I.. -DTRACE $(CTARGET) -g
+-LDLIBS  = -lm -lrt -lpthread
++LDLIBS  = -lm -lpthread
+ #LDLIBS  = ../../../lib/iers/gcc/iers.a -lm -lrt -lpthread
+ 
+ all        : rtkrcv
+diff --git a/app/consapp/str2str/gcc/makefile b/app/consapp/str2str/gcc/makefile
+index 3d88200..a02c3a6 100644
+--- a/app/consapp/str2str/gcc/makefile
++++ b/app/consapp/str2str/gcc/makefile
+@@ -9,7 +9,7 @@ CTARGET=
+ 
+ OPTION = -DENAGLO -DENAGAL -DENAQZS -DENACMP -DENAIRN -DTRACE -DNFREQ=3 -DNEXOBS=3 -DSVR_REUSEADDR
+ CFLAGS = -Wall -O3 -ansi -pedantic -Wno-unused-but-set-variable -I$(SRC) $(OPTION) $(CTARGET) -g
+-LDLIBS  = -lm -lrt -lpthread
++LDLIBS  = -lm -lpthread
+ 
+ all        : str2str
+ str2str    : str2str.o stream.o rtkcmn.o solution.o sbas.o geoid.o
+```
diff --git a/notebooks/full_extraction/constants.py b/notebooks/full_extraction/constants.py
new file mode 100644
index 0000000..dffd113
--- /dev/null
+++ b/notebooks/full_extraction/constants.py
@@ -0,0 +1,33 @@
+PARSE_CSV_HEADER = ["timestamp", # TODO change this to "timestamp_str"
+                    "latitude",
+                    "longitude",
+                    "altitude",
+                    "fix-type",
+                    "satellites",
+                    "sdn",
+                    "sde",
+                    "sdu",
+                    "sdne",
+                    "sdeu",
+                    "sdun",
+                    "age",
+                    "ratio"
+]
+
+CALC_CSV_HEADER = [
+    "timestamp", # UTC time (float)
+    "timestamp_str",
+    "fix_type",
+    "latitude",
+    "latitude_processed",
+    "longitude",
+    "longitude_processed",
+    "altitude", # meters
+    "velocity", # 2d vector
+    "speed", # scalar (abs val of vector)
+    "acceleration_vector",
+    "acceleration_magnitude",
+    "horizontal_accuracy",
+    "vertical_accuracy",
+    "speed_accuracy"
+ ]
diff --git a/notebooks/full_extraction/requirements.txt b/notebooks/full_extraction/requirements.txt
new file mode 100644
index 0000000..8e0cbb4
--- /dev/null
+++ b/notebooks/full_extraction/requirements.txt
@@ -0,0 +1,56 @@
+appnope==0.1.4
+asttokens==2.4.1
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.6
+decorator==5.1.1
+executing==2.1.0
+fonttools==4.54.1
+geographiclib==2.0
+geopy==2.4.1
+ipykernel==6.29.5
+ipympl==0.9.4
+ipython==8.28.0
+ipython-genutils==0.2.0
+ipywidgets==8.1.5
+jedi==0.19.1
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyterlab_widgets==3.0.13
+kiwisolver==1.4.7
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+nest-asyncio==1.6.0
+numpy==2.1.1
+packaging==24.1
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.6
+plotly==5.24.1
+prompt_toolkit==3.0.48
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+Pygments==2.18.0
+pynmeagps==1.0.42
+pyparsing==3.1.4
+pyrtcm==1.1.1
+python-dateutil==2.9.0.post0
+pytz==2024.2
+pyubx2==1.2.46
+pyzmq==26.2.0
+scipy==1.14.1
+seaborn==0.13.2
+six==1.16.0
+stack-data==0.6.3
+tenacity==9.0.0
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+tzdata==2024.2
+utm==0.7.0
+wcwidth==0.2.13
+widgetsnbextension==4.0.13
diff --git a/notebooks/full_extraction/rtk_extraction.ipynb b/notebooks/full_extraction/rtk_extraction.ipynb
new file mode 100644
index 0000000..72b01ec
--- /dev/null
+++ b/notebooks/full_extraction/rtk_extraction.ipynb
@@ -0,0 +1,665 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# RTK Data Extraction\n",
+    "\n",
+    "**Requirements:**\n",
+    "- RTKLib must be installed\n",
+    "- Run the first 3 cells and fill data into the created directories before starting\n",
+    "\n",
+    "**Known Issues:**\n",
+    "- Rolls around 1200 UTC may get split across files.\n",
+    "- Timestamps are displayed in UTC+4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Constants\n",
+    "### This should be the only part of the file that needs editing\n",
+    "\n",
+    "BUGGIES = [\"rover\"]\n",
+    "\n",
+    "# YYYY-MM-DD\n",
+    "DATE = \"YYYY-MM-DD\"\n",
+    "\n",
+    "CLEANUP = False\n",
+    "DEBUG = False\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pipe 0: Setup\n",
+    "\n",
+    "This will create all the directories needed for analysis. Please make sure to fill out the buggies and dates above."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Imports\n",
+    "import os\n",
+    "import csv\n",
+    "import datetime as dt\n",
+    "import math\n",
+    "\n",
+    "from geopy.distance import geodesic\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import plotly.express as px\n",
+    "import plotly\n",
+    "from pyubx2 import UBXReader\n",
+    "import seaborn as sns\n",
+    "from scipy.interpolate import Akima1DInterpolator, CubicSpline, splrep, splprep, splev, BSpline\n",
+    "import subprocess\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "from constants import *\n",
+    "from world import *\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: Firgure out what these args do and utilize them if helpful\n",
+    "rnx2rtkpArgs = [\"-m 5\"] #, \"-i -d 9 -v 2\", \"-d 9 -v 2\"]\n",
+    "\n",
+    "BASESTATION_NAME = \"basestation\"\n",
+    "ROOT_DATA_DIR = \"rolls_data\"\n",
+    "BASESTATION_DIR = os.path.join(ROOT_DATA_DIR, f\"{BASESTATION_NAME}_data\")\n",
+    "\n",
+    "RAW_DATA_DIR = \"raw_ubx\"\n",
+    "DECODE_DIR = \".decoded_ubx\"\n",
+    "CSV_DIR = \".csv\"\n",
+    "PROCESSED_CSV_DIR = \"calc_csv\"\n",
+    "GRAPHS_DIR = \"graphs\"\n",
+    "\n",
+    "if not os.path.exists(ROOT_DATA_DIR):\n",
+    "    os.mkdir(ROOT_DATA_DIR)\n",
+    "    os.mkdir(BASESTATION_DIR)\n",
+    "\n",
+    "BASESTATION_DATE_DIR = os.path.join(BASESTATION_DIR, DATE)\n",
+    "if not os.path.exists(BASESTATION_DATE_DIR):\n",
+    "    os.mkdir(BASESTATION_DATE_DIR)\n",
+    "\n",
+    "buggy_dirs = []\n",
+    "for buggy in BUGGIES:\n",
+    "    buggy_dir = os.path.join(ROOT_DATA_DIR, buggy.lower())\n",
+    "    if not os.path.exists(buggy_dir):\n",
+    "        os.mkdir(buggy_dir)\n",
+    "\n",
+    "    roll_dir = os.path.join(buggy_dir, DATE)\n",
+    "    if not os.path.exists(roll_dir):\n",
+    "        os.mkdir(roll_dir)\n",
+    "\n",
+    "    intake = os.path.join(roll_dir, RAW_DATA_DIR)\n",
+    "    if not os.path.exists(intake): \n",
+    "        os.mkdir(intake)\n",
+    "\n",
+    "    decode = os.path.join(roll_dir, DECODE_DIR)\n",
+    "    if not os.path.exists(decode): os.mkdir(decode)\n",
+    "\n",
+    "    raw_csv = os.path.join(roll_dir, CSV_DIR)\n",
+    "    if not os.path.exists(raw_csv): os.mkdir(raw_csv)\n",
+    "\n",
+    "    processed_csv = os.path.join(roll_dir, PROCESSED_CSV_DIR)\n",
+    "    if not os.path.exists(processed_csv): os.mkdir(processed_csv)\n",
+    "\n",
+    "    graphs = os.path.join(roll_dir, GRAPHS_DIR)\n",
+    "    if not os.path.exists(graphs): os.mkdir(graphs)\n",
+    "\n",
+    "    buggy_dirs.append(roll_dir)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "needs_data = False\n",
+    "\n",
+    "if not os.listdir(BASESTATION_DATE_DIR):\n",
+    "    print(f\"Please fill {BASESTATION_DATE_DIR} with basestation data!\")\n",
+    "    needs_data = True\n",
+    "\n",
+    "for roll_dir in buggy_dirs:\n",
+    "    intake = os.path.join(roll_dir, RAW_DATA_DIR)\n",
+    "    if not os.listdir(intake): \n",
+    "        print(f\"Please fill {intake} with roll data!\")\n",
+    "        needs_data = True\n",
+    "\n",
+    "# Little check to stop the program if data is missing\n",
+    "# Continue after adding data\n",
+    "if needs_data:\n",
+    "    raise FileNotFoundError"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for rover_dir in buggy_dirs:\n",
+    "    intake_dir = os.path.join(rover_dir, RAW_DATA_DIR)\n",
+    "\n",
+    "    # Make sure everything has a .ubx extension (for convbin)\n",
+    "    bin_files = [os.path.join(intake_dir, f) for f in os.listdir(intake_dir) if f.endswith(\".BIN\")]\n",
+    "    for file in bin_files:\n",
+    "        prefix, _ = os.path.splitext(file)\n",
+    "        new_name = prefix + \".ubx\"\n",
+    "        os.rename(src=file, dst=new_name)\n",
+    "    \n",
+    "    rover_ubx = sorted([f for f in os.listdir(intake_dir) if f.endswith(\".ubx\")])\n",
+    "    if DEBUG: print(rover_dir, \":\", rover_ubx)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pipe 1: Postprocessing\n",
+    "\n",
+    "`.ubx` --> `.pos`, `.pos.stat`, and `_events.pos` files.\n",
+    "\n",
+    "This notebook contains scripts for obtaining RTK solutions through various configurations and validating/assessing the quality of those solutions.\n",
+    "\n",
+    "`convbin`:\n",
+    "Input files are `base.ubx` and `rover.ubx`.\n",
+    "Generates `.obs`, `.nav`, and `.sbs` files.\n",
+    "\n",
+    "`rnx2rtkp`:\n",
+    "Input files are `rover.obs`, `base.obs`, and `rover.nav`.\n",
+    "Generates `.pos`, `.pos.stat`, and `_events.pos` files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "basestation_files = [file for file in os.listdir(BASESTATION_DATE_DIR) if file.endswith(\".ubx\")]\n",
+    "if not basestation_files:\n",
+    "    # Remember to add and/or unzip the basestation files!\n",
+    "    raise FileNotFoundError\n",
+    "    \n",
+    "for i, base_file in enumerate(basestation_files):\n",
+    "    date, time, ext = base_file.split('_')\n",
+    "    \n",
+    "    if date != DATE:\n",
+    "        print(\"Found file with wrong date:\", base_file)\n",
+    "        continue\n",
+    "\n",
+    "    if DEBUG: print(base_file)\n",
+    "    \n",
+    "    base_full_path = os.path.join(BASESTATION_DATE_DIR, base_file)\n",
+    "    base_obs = os.path.join(BASESTATION_DATE_DIR, f\"{BASESTATION_NAME}-{time}.obs\")\n",
+    "    base_nav = os.path.join(BASESTATION_DATE_DIR, f\"{BASESTATION_NAME}-{time}.nav\")\n",
+    "    base_sbs = os.path.join(BASESTATION_DATE_DIR, f\"{BASESTATION_NAME}-{time}.sbs\")\n",
+    "\n",
+    "    if os.path.exists(base_obs):\n",
+    "        print(\"obs file\", base_obs, \"already exists. Skipping\")\n",
+    "        continue\n",
+    "\n",
+    "    subprocess.run([\"convbin\", base_full_path, \"-o\", base_obs, \"-n\", base_nav, \"-s\", base_sbs])\n",
+    "    # os.system(f\"convbin {base_full_path} -o {base_obs} -n {base_nav} -s {base_sbs}\")\n",
+    "\n",
+    "    # Sanity check files are good\n",
+    "    if os.path.getsize(base_nav) < 1000 or os.path.getsize(base_obs) < 10000:\n",
+    "        raise Exception(f\"Base station file {base_full_path} is borked.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "valid_rolls = []\n",
+    "\n",
+    "for rover_dir in buggy_dirs:\n",
+    "    intake_dir = os.path.join(rover_dir, RAW_DATA_DIR)\n",
+    "    decode_dir = os.path.join(rover_dir, DECODE_DIR)\n",
+    "    rover_ubx = sorted([f for f in os.listdir(intake_dir) if f.endswith(\".ubx\")])\n",
+    "\n",
+    "    for file in rover_ubx:\n",
+    "        if DEBUG: print(rover_dir, file)\n",
+    "        roll_no = ''.join([n for n in os.path.basename(file) if n.isdigit()])\n",
+    "        obs = os.path.join(decode_dir, f\"{roll_no:04}.obs\")\n",
+    "        nav = os.path.join(decode_dir, f\"{roll_no:04}.nav\")\n",
+    "        sbs = os.path.join(decode_dir, f\"{roll_no:04}.sbs\")\n",
+    "\n",
+    "        if os.path.exists(obs):\n",
+    "            print(\"obs file\", obs, \"already exists. Skipping\")\n",
+    "            valid_rolls.append((rover_dir, obs, nav))\n",
+    "            continue\n",
+    "\n",
+    "        raw_ubx_path = os.path.join(intake_dir, file)\n",
+    "        # os.system(f\"convbin {file} -o {obs} -n {nav} -s {sbs}\")\n",
+    "        subprocess.run([\"convbin\", raw_ubx_path, \"-o\", obs, \"-n\", nav, \"-s\", sbs])\n",
+    "\n",
+    "        # Sanity check files are good\n",
+    "        if not os.path.exists(obs):\n",
+    "            print(f\"Files not created from {raw_ubx_path}\")\n",
+    "            continue\n",
+    "\n",
+    "        obs_size = os.path.getsize(obs)\n",
+    "        nav_size = os.path.getsize(nav)\n",
+    "        if obs_size < 5000 or nav_size < 1000:\n",
+    "            print(\"Obs Size:\", obs_size)\n",
+    "            print(\"Nav Size:\", nav_size)\n",
+    "            raise Exception(f\"convbin borked. Originating from {raw_ubx_path}\")\n",
+    "\n",
+    "        valid_rolls.append((rover_dir, roll_no))\n",
+    "if DEBUG: print(valid_rolls)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "good_pos_files = []\n",
+    "\n",
+    "for roll in valid_rolls:\n",
+    "    if DEBUG: print(roll)\n",
+    "    obs = os.path.join(roll[0], DECODE_DIR, roll[1] + \".obs\")\n",
+    "    nav = os.path.join(roll[0], DECODE_DIR, roll[1] + \".nav\")\n",
+    "\n",
+    "    for i, base_obs in enumerate(sorted([os.path.join(BASESTATION_DATE_DIR, f) for f in os.listdir(BASESTATION_DATE_DIR) if f.endswith(\".obs\")])):\n",
+    "        for j, arg in enumerate(rnx2rtkpArgs):\n",
+    "            output_file = f\"{roll[1]:04}-{i}-{j}.pos\"\n",
+    "            out_file_path = os.path.join(roll[0], DECODE_DIR, output_file)\n",
+    "            os.system(f\"rnx2rtkp -o {out_file_path} {arg} -p 2 -y 2 -sys G,R,E,C -t -s , -l 40.442403483 -79.946996633 286.257 {obs} {base_obs} {nav}\")\n",
+    "            \n",
+    "            if os.path.exists(out_file_path) and os.path.getsize(out_file_path) > 2000:\n",
+    "                print(f\"Created output file {output_file}\")\n",
+    "                good_pos_files.append((roll[0], output_file))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipe 2: Parsing\n",
+    "\n",
+    "This notebook parses `.pos` files output by the previous pipe, and generates `.csv` files with headers.\n",
+    "**No math is involved.**\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def posToCsv(inputPath, outputPath):\n",
+    "    with open(inputPath, 'r') as file:\n",
+    "        lines = [line for line in file.readlines() if not line.startswith(\"%\")] # removes lines beginning with %\n",
+    "        lines = [line.strip() for line in lines] # remove leading and trailing whitespace from each line\n",
+    "        lines = [[value.strip() for value in line.split(\",\")] for line in lines] # removes leading and trailing whitespace from each CSV value in each line\n",
+    "\n",
+    "    rows = []\n",
+    "    for line in lines:\n",
+    "        row = {}\n",
+    "        for i in range(len(line)):\n",
+    "            row[PARSE_CSV_HEADER[i]] = line[i]\n",
+    "        rows.append(row)\n",
+    "    \n",
+    "    with open(outputPath, 'w') as file:\n",
+    "        writer = csv.DictWriter(file, fieldnames=PARSE_CSV_HEADER)\n",
+    "        writer.writeheader()\n",
+    "        writer.writerows(rows)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if DEBUG: print(good_pos_files)\n",
+    "\n",
+    "for dir, file in good_pos_files:\n",
+    "    input_path = os.path.join(dir, DECODE_DIR, file)\n",
+    "\n",
+    "    prefix, _ = os.path.splitext(file)\n",
+    "    buggy = os.path.basename(os.path.dirname(dir))\n",
+    "    new_name = '_'.join([buggy, DATE, prefix]) + \".csv\"\n",
+    "    output_path = os.path.join(dir, CSV_DIR, new_name)\n",
+    "    \n",
+    "    posToCsv(input_path, output_path)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipe 3: Analysis\n",
+    "\n",
+    "This notebook contains scripts for calculating kinematic data.\n",
+    "\n",
+    "Input files must be `.csv` files generated by `rtklib-pos-to-csv.ipynb`.\n",
+    "Its headers are defined in the code below.\n",
+    "\n",
+    "Outputs another `.csv` file _with the **different** headers_.\n",
+    "Those new headers are also defined in the code below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TIMESTAMP_FORMAT = \"%Y/%m/%d %H:%M:%S.%f\"\n",
+    "TIME_OFFSET = dt.timedelta(hours=4)\n",
+    "\n",
+    "# TODO make this compatible with several levels of precesion\n",
+    "# converts timestamp string to UTC value (float)\n",
+    "\n",
+    "def timestampToLocal(ts: str):\n",
+    "    return (dt.datetime.strptime(ts, TIMESTAMP_FORMAT) - TIME_OFFSET).astimezone().strftime(\"%c\")\n",
+    "\n",
+    "\n",
+    "def timestampToValue(ts: str):\n",
+    "    return (dt.datetime.strptime(ts, TIMESTAMP_FORMAT) - TIME_OFFSET).timestamp()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def getMagnitude(vector):\n",
+    "    return math.sqrt(math.pow(vector[0], 2) + math.pow(vector[1], 2))\n",
+    "\n",
+    "# Conversion factor from m/s to mph\n",
+    "FREEDOM_MULTIPLIER = 2.236936"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Code yoinked from https://github.com/CMU-Robotics-Club/RoboBuggy2/blob/sw/20230331/rb_ws/src/buggy/scripts/auton/world.py\n",
+    "\n",
+    "    Abstraction for the world coordinate system\n",
+    "\n",
+    "    The real world uses GPS coordinates, aka latitude and longitude. However,\n",
+    "    using lat/lon is bad for path planning for several reasons. First, the difference\n",
+    "    in numbers would be very tiny for small distances, alluding to roundoff errors.\n",
+    "    Additionally, lat/lon follows a North-East-Down coordinate system, with headings\n",
+    "    in the clockwise direction. We want to use an East-North-Up coordinate system, so\n",
+    "    that the heading is in the counter-clockwise direction.\n",
+    "\n",
+    "    We do this by converting GPS coordinates to UTM coordinates, which are in meters.\n",
+    "    UTM works by dividing the world into a grid, where each grid cell has a unique\n",
+    "    identifier. A UTM coordinate consists of the grid cell identifier and the \"easting\"\n",
+    "    and \"northing\" within that grid cell. The easting (x) and northing (y) are in meters,\n",
+    "    and are relative to the southwest corner of the grid cell.\n",
+    "\n",
+    "    Last, we offset the UTM coordinates to be relative to some arbitrary zero point. That\n",
+    "    way, the final world coordinates are relatively close to zero, which makes debugging\n",
+    "    easier.\n",
+    "\n",
+    "    This class provides methods to convert between GPS and world coordinates. There is\n",
+    "    a version for single coordinates and a version for numpy arrays.\n",
+    "\"\"\"\n",
+    "\n",
+    "def gps_to_world(lat, lon):\n",
+    "    \"\"\"Converts GPS coordinates to world coordinates\n",
+    "\n",
+    "    Args:\n",
+    "        lat (float): latitude\n",
+    "        lon (float): longitude\n",
+    "\n",
+    "    Returns:\n",
+    "        tuple: (x, y) in meters from some arbitrary zero point\n",
+    "    \"\"\"\n",
+    "    utm_coords = utm.from_latlon(lat, lon)\n",
+    "    x = utm_coords[0] - World.WORLD_EAST_ZERO\n",
+    "    y = utm_coords[1] - World.WORLD_NORTH_ZERO\n",
+    "\n",
+    "    return x, y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def calc(inputPath, outputPath):\n",
+    "    input_df = pd.read_csv(inputPath)\n",
+    "\n",
+    "    # Converting to linear local coordinatess\n",
+    "    local_coords = []\n",
+    "    for i in range(len(input_df)):\n",
+    "        local_coords.append(gps_to_world(input_df[\"latitude\"][i], input_df[\"longitude\"][i]))\n",
+    "\n",
+    "    input_df[\"latitude_meters\"] = [coord[0] for coord in local_coords]\n",
+    "    input_df[\"longitude_meters\"] = [coord[1] for coord in local_coords]\n",
+    "\n",
+    "\n",
+    "    interpolator = Akima1DInterpolator(input_df.loc[:, \"timestamp\"].map(timestampToValue), input_df.loc[:, [\"latitude_meters\", \"longitude_meters\"]])\n",
+    "    # interpolator = CubicSpline(input_df.loc[:, \"timestamp\"].map(timestampToValue), input_df.loc[:, [\"latitude\", \"longitude\"]])\n",
+    "    positions = interpolator(input_df.loc[:, \"timestamp\"].map(timestampToValue), nu=0)\n",
+    "    velocities = interpolator(input_df.loc[:, \"timestamp\"].map(timestampToValue), nu=1)\n",
+    "    accelerations = interpolator(input_df.loc[:, \"timestamp\"].map(timestampToValue), nu=2)\n",
+    "\n",
+    "    output_df = pd.DataFrame(columns=CALC_CSV_HEADER)\n",
+    "\n",
+    "    output_df[\"timestamp\"] = input_df.loc[:, \"timestamp\"].map(timestampToValue)\n",
+    "    output_df[\"timestamp_str\"] = input_df[\"timestamp\"].map(timestampToLocal)\n",
+    "\n",
+    "    output_df[\"fix_type\"] = input_df[\"fix-type\"]\n",
+    "\n",
+    "    output_df[\"latitude\"] = input_df[\"latitude\"]\n",
+    "    output_df[\"longitude\"] = input_df[\"longitude\"]\n",
+    "    output_df[\"altitude\"] = input_df[\"altitude\"]\n",
+    "\n",
+    "    output_df[\"velocity\"] = velocities.tolist()\n",
+    "    output_df[\"speed\"] = [getMagnitude(vector) * FREEDOM_MULTIPLIER for vector in velocities]\n",
+    "\n",
+    "    output_df[\"acceleration_vector\"] = accelerations.tolist()\n",
+    "    output_df[\"acceleration_magnitude\"] = [getMagnitude(vector) for vector in accelerations]\n",
+    "\n",
+    "    output_df = output_df[output_df[\"speed\"] < 44] # Filter out anything faster than 50mph\n",
+    "\n",
+    "    output_df.to_csv(outputPath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for rover_dir in buggy_dirs:\n",
+    "    raw_csv_dir = os.path.join(rover_dir, CSV_DIR)\n",
+    "    clean_csv_files = sorted([f for f in os.listdir(raw_csv_dir) if f.endswith(\".csv\")])\n",
+    "    if DEBUG: print(clean_csv_files)\n",
+    "\n",
+    "    for name in clean_csv_files:\n",
+    "        input_path = os.path.join(rover_dir, CSV_DIR, name)\n",
+    "        output_path = os.path.join(rover_dir, PROCESSED_CSV_DIR, name)\n",
+    "        calc(input_path, output_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipe 4: Visualization\n",
+    "\n",
+    "Notebook contains scripts for visualizing kinematic data.\n",
+    "\n",
+    "Inputs files must be `.csv` file output by `analysis.ipynb`.\n",
+    "\n",
+    "_Note that these `.csv` files are different from those generated by `rtklib-pos-to-csv.ipynb` as they have different headers._"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_map(df, outputFilepath):\n",
+    "    layers = [{\"below\": \"traces\",\n",
+    "               \"sourcetype\": \"raster\",\n",
+    "               \"source\": [\"https://imagery.pasda.psu.edu/arcgis/rest/services/pasda/PEMAImagery2018_2020/MapServer/WMTS/tile/1.0.0/pasda_PEMAImagery2018_2020/default/default028mm/{z}/{y}/{x}.png\"]}]\n",
+    "\n",
+    "    fig = px.scatter_mapbox(df,\n",
+    "                            lat=\"latitude\", lon=\"longitude\",\n",
+    "                            color=\"speed\",\n",
+    "                            hover_data=[\"altitude\",\n",
+    "                                        \"acceleration_magnitude\",\n",
+    "                                        \"timestamp_str\"],\n",
+    "                            zoom=15, size_max=18,\n",
+    "                            height=400, width=800)\n",
+    "\n",
+    "    fig.update_layout(\n",
+    "        mapbox_style=\"white-bg\",\n",
+    "        mapbox_layers=layers\n",
+    "    )\n",
+    "    fig.update_layout(margin={\"r\":0,\"t\":0,\"l\":0,\"b\":0})\n",
+    "    # fig.show()\n",
+    "    plotly.offline.plot(fig, filename=outputFilepath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_speed_graph(df, outputFilepath):\n",
+    "    plt.figure().set_figwidth(15)\n",
+    "    plt.plot(df[\"timestamp\"], df[\"speed\"])\n",
+    "    plt.savefig(outputFilepath)\n",
+    "\n",
+    "def plot_accel_graph(df, outputFilepath):\n",
+    "    plt.figure().set_figwidth(15)\n",
+    "    plt.plot(df[\"timestamp\"], df[\"acceleration_magnitude\"])\n",
+    "    plt.savefig(outputFilepath)\n",
+    "\n",
+    "def plot_speed_accel_graph(df, outputFilepath):\n",
+    "    fig, (speed, accel) = plt.subplots(2)\n",
+    "    fig.set_figwidth(15)\n",
+    "    speed.plot(df[\"timestamp\"], df[\"speed\"])\n",
+    "    speed.set_ylim([0, 20])\n",
+    "    accel.plot(df[\"timestamp\"], df[\"acceleration_magnitude\"])\n",
+    "    accel.set_ylim([-1, 50])\n",
+    "    plt.savefig(outputFilepath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for rover_dir in buggy_dirs:\n",
+    "    calc_csv_dir = os.path.join(rover_dir, PROCESSED_CSV_DIR)\n",
+    "    calc_files = sorted([f for f in os.listdir(calc_csv_dir) if f.endswith(\".csv\")])\n",
+    "\n",
+    "    for file in calc_files:\n",
+    "        df = pd.read_csv(os.path.join(calc_csv_dir, file))\n",
+    "        roll_name, _ = os.path.splitext(file)\n",
+    "        output_path = os.path.join(rover_dir, GRAPHS_DIR)\n",
+    "        plot_map(df, os.path.join(output_path, f\"{roll_name}-map.html\"))\n",
+    "        # plot_speed_graph(df, os.path.join(output_path, f\"{roll_name}-speed.png\"))\n",
+    "        # plot_accel_graph(df, os.path.join(output_path, f\"{roll_name}-accel.png\"))\n",
+    "        # plot_speed_accel_graph(df, os.path.join(output_path, f\"{roll_name}-speed-accel.png\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipe 5: Cleanup\n",
+    "\n",
+    "If `CLEANUP` is set to true, all files except for the plots will be deleted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if CLEANUP:\n",
+    "    for buggy in BUGGIES:\n",
+    "        buggy_dir = os.path.join(ROOT_DATA_DIR, buggy.lower())\n",
+    "        roll_dir = os.path.join(buggy_dir, DATE)\n",
+    "        \n",
+    "        decode = os.path.join(roll_dir, DECODE_DIR)\n",
+    "        if os.path.exists(decode):\n",
+    "            for f in os.listdir(decode):\n",
+    "                os.remove(os.path.join(decode, f))\n",
+    "            os.rmdir(decode)\n",
+    "\n",
+    "        raw_csv = os.path.join(roll_dir, CSV_DIR)\n",
+    "        if os.path.exists(raw_csv): \n",
+    "            for f in os.listdir(raw_csv):\n",
+    "                os.remove(os.path.join(raw_csv, f))\n",
+    "            os.rmdir(raw_csv)\n",
+    "        \n",
+    "        "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.12.5 ('RD25_RTK': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "5dd2f72180070ab1630fc17d10f237a57f2f409b709228c0ebe19f0575942d3f"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/full_extraction/world.py b/notebooks/full_extraction/world.py
new file mode 100644
index 0000000..76a8513
--- /dev/null
+++ b/notebooks/full_extraction/world.py
@@ -0,0 +1,110 @@
+"""
+Code yoinked from https://github.com/CMU-Robotics-Club/RoboBuggy2/blob/sw/20230331/rb_ws/src/buggy/scripts/auton/world.py
+Edits made such that Pose object/library (more robobuggy software) was not required.
+"""
+import utm
+import numpy as np
+
+
+class World:
+    """Abstraction for the world coordinate system
+
+    The real world uses GPS coordinates, aka latitude and longitude. However,
+    using lat/lon is bad for path planning for several reasons. First, the difference
+    in numbers would be very tiny for small distances, alluding to roundoff errors.
+    Additionally, lat/lon follows a North-East-Down coordinate system, with headings
+    in the clockwise direction. We want to use an East-North-Up coordinate system, so
+    that the heading is in the counter-clockwise direction.
+
+    We do this by converting GPS coordinates to UTM coordinates, which are in meters.
+    UTM works by dividing the world into a grid, where each grid cell has a unique
+    identifier. A UTM coordinate consists of the grid cell identifier and the "easting"
+    and "northing" within that grid cell. The easting (x) and northing (y) are in meters,
+    and are relative to the southwest corner of the grid cell.
+
+    Last, we offset the UTM coordinates to be relative to some arbitrary zero point. That
+    way, the final world coordinates are relatively close to zero, which makes debugging
+    easier.
+
+    This class provides methods to convert between GPS and world coordinates. There is
+    a version for single coordinates and a version for numpy arrays.
+    """
+
+    # Geolocates to around the southwest corner of Phipps
+    WORLD_EAST_ZERO = 589106
+    WORLD_NORTH_ZERO = 4476929
+
+    @staticmethod
+    def gps_to_world(lat, lon):
+        """Converts GPS coordinates to world coordinates
+
+        Args:
+            lat (float): latitude
+            lon (float): longitude
+
+        Returns:
+            tuple: (x, y) in meters from some arbitrary zero point
+        """
+        utm_coords = utm.from_latlon(lat, lon)
+        x = utm_coords[0] - World.WORLD_EAST_ZERO
+        y = utm_coords[1] - World.WORLD_NORTH_ZERO
+
+        return x, y
+
+    @staticmethod
+    def world_to_gps(x, y):
+        """Converts world coordinates to GPS coordinates
+
+        Args:
+            x (float): x in meters from some arbitrary zero point
+            y (float): y in meters from some arbitrary zero point
+
+        Returns:
+            tuple: (lat, lon)
+        """
+        utm_coords = utm.to_latlon(
+            x + World.WORLD_EAST_ZERO, y + World.WORLD_NORTH_ZERO, 17, "T"
+        )
+        lat = utm_coords[0]
+        lon = utm_coords[1]
+
+        return lat, lon
+
+    @staticmethod
+    def gps_to_world_numpy(coords):
+        """Converts GPS coordinates to world coordinates
+
+        Args:
+            coords (numpy.ndarray [size: (N,2)]): array of lat, lon pairs
+
+        Returns:
+            numpy.ndarray [size: (N,2)]: array of x, y pairs
+        """
+        utm_coords = utm.from_latlon(coords[:, 0], coords[:, 1])
+        x = utm_coords[0] - World.WORLD_EAST_ZERO
+        y = utm_coords[1] - World.WORLD_NORTH_ZERO
+
+        return np.stack((x, y), axis=1)
+
+    @staticmethod
+    def world_to_gps_numpy(coords):
+        """Converts world coordinates to GPS coordinates
+
+        Args:
+            coords (numpy.ndarray [size: (N,2)]): array of x, y pairs
+
+        Returns:
+            numpy.ndarray [size: (N,2)]: array of lat, lon pairs
+        """
+
+        # Pittsburgh is in UTM zone 17T.
+        utm_coords = utm.to_latlon(
+            coords[:, 0] + World.WORLD_EAST_ZERO,
+            coords[:, 1] + World.WORLD_NORTH_ZERO,
+            17,
+            "T",
+        )
+        lat = utm_coords[0]
+        lon = utm_coords[1]
+
+        return np.stack((lat, lon), axis=1)