From f2ae3ff1f66836401f799459fdc7715fda5ecfaf Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 24 Sep 2024 19:42:49 +0000
Subject: [PATCH 1/8] Switch to poetry for requirements

---
 pyproject.toml   | 13 +++++++++++++
 requirements.txt |  8 --------
 2 files changed, 13 insertions(+), 8 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 requirements.txt

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..d604fb28
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,13 @@
+[tool.poetry]
+package-mode = false
+
+[tool.poetry.dependencies]
+python = "^3.9"
+beautifulsoup4 = "4.12.3"
+everypolitician = "0.0.13"
+lxml = "5.2.1"
+python-dateutil = "2.2"
+requests = { version = "2.32.3", extras = ["security"] }
+requests-cache = "0.4.13"
+Click = "7.0"
+click-log = "0.3.2"
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 373b954c..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-beautifulsoup4==4.12.3
-everypolitician==0.0.13
-lxml==5.2.1
-python-dateutil==2.2
-requests[security]==2.21.0
-requests-cache==0.4.13
-Click==7.0
-click-log==0.3.2

From 11f67344d545ad7a3d74472759807fcaf2a1f1db Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 24 Sep 2024 19:43:01 +0000
Subject: [PATCH 2/8] Add dockerfile

---
 .devcontainer/devcontainer.json | 22 +++++++++++++++++++
 .vscode/settings.json           | 38 +++++++++++++++++++++++++++++++++
 Dockerfile                      | 15 +++++++++++++
 3 files changed, 75 insertions(+)
 create mode 100644 .devcontainer/devcontainer.json
 create mode 100644 .vscode/settings.json
 create mode 100644 Dockerfile

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000..9fa553c8
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,22 @@
+{
+    "name": "parlparse",
+    "build":
+    {
+        "dockerfile": "../Dockerfile"
+    },
+    "workspaceFolder": "/workspaces/parlparse",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-vscode.test-adapter-converter",
+                "ms-azuretools.vscode-docker",
+                "bmewburn.vscode-intelephense-client",
+                "bungcip.better-toml",
+                "ms-python.python",
+                "ms-python.vscode-pylance",
+                "charliermarsh.ruff",
+                "mhutchie.git-graph"
+            ]
+        }
+    }
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..6b061139
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,38 @@
+{
+    "[python]": {
+        "editor.formatOnSave": true
+    },
+    "python.defaultInterpreterPath": "/usr/bin/python",
+    "python.terminal.activateEnvironment": false,
+    "python.analysis.typeCheckingMode": "basic",
+    "editor.formatOnSave": true,
+    "files.exclude": {
+        "**/.git": true,
+        "**/.svn": true,
+        "**/.hg": true,
+        "**/CVS": true,
+        "**/.DS_Store": true,
+        "**/*.pyc": {
+            "when": "$(basename).py"
+        },
+        "**/__pycache__": true
+    },
+    "files.associations": {
+        "**/*.html": "html",
+        "**/templates/**/*.html": "django-html",
+        "**/templates/**/*": "django-txt",
+        "**/requirements{/**,*}.{txt,in}": "pip-requirements"
+    },
+    "[markdown]": {
+        "editor.quickSuggestions": {
+            "comments": "on",
+            "strings": "on",
+            "other": "on"
+        }
+    },
+    "python.testing.pytestArgs": [
+        "tests/"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..684ee5b9
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,15 @@
+FROM mysocietyorg/debian:bullseye
+RUN apt-get update && \
+    apt-get install python3-distutils python3-pip libxml2-dev libxslt-dev python-dev libffi-dev -y && \
+    update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \
+    update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 && \
+    pip install --upgrade pip
+
+RUN curl -sSL https://install.python-poetry.org | /usr/bin/python3 -
+ENV PATH="/root/.local/bin:$PATH"
+
+ENV PYTHONPATH=$PYTHONPATH:/usr/lib/python3.9/site-packages
+ENV POETRY_VIRTUALENVS_CREATE=false
+
+COPY pyproject.toml poetry.loc[k] /tmp/pyproject/
+RUN cd /tmp/pyproject && poetry install
\ No newline at end of file

From de1ba3c44b3c063d840d103d78b800c8c7582064 Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 24 Sep 2024 20:00:08 +0000
Subject: [PATCH 3/8] Add ruff linting options

---
 pyproject.toml | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 script/lint    | 10 +++++++++
 2 files changed, 69 insertions(+)
 create mode 100755 script/lint

diff --git a/pyproject.toml b/pyproject.toml
index d604fb28..a7b7661f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,3 +11,62 @@ requests = { version = "2.32.3", extras = ["security"] }
 requests-cache = "0.4.13"
 Click = "7.0"
 click-log = "0.3.2"
+
+[tool.poetry.group.dev.dependencies]
+ruff = "^0.6.7"
+
+[tool.ruff]
+
+extend-include = ["scripts/*"]
+extend-exclude = [
+ 'scripts/.gitignore',
+ 'scripts/2016_data_update/README.txt',
+ 'scripts/config.pm.incvs',
+ 'scripts/consts',
+ 'scripts/crontab',
+ 'scripts/dailyupdate',
+ 'scripts/datadotparl/mp-party-check',
+ 'scripts/datadotparl/one-off-add-pims-ids',
+ 'scripts/datadotparl/one-off-sync-lord-parties',
+ 'scripts/dircmp',
+ 'scripts/divisionextractor.pl',
+ 'scripts/morningupdate',
+ 'scripts/ni-format-revert',
+ 'scripts/ni_membership.php',
+ 'scripts/one-off-move-names-to-persons',
+ 'scripts/other-sites-update',
+ 'scripts/updatedaterange-parse',
+ 'scripts/updatedaterange-scrape',
+ 'scripts/weeklyupdate',
+ 'scripts/ynmp/tests.txt'
+]
+
+
+
+[tool.ruff.lint]
+select = [
+    "E",
+    # flake8
+    "F",
+    # isort
+    "I",
+]
+ignore = [
+    # line too long, sorted with formatter where it can be
+    "E501", 
+]
+
+
+[tool.ruff.lint.isort]
+known-first-party = ["hub"]
+section-order = [
+  "future",
+  "standard-library",
+  "django",
+  "third-party",
+  "first-party",
+  "local-folder"
+]
+
+[tool.ruff.lint.isort.sections]
+django = ["django"]
\ No newline at end of file
diff --git a/script/lint b/script/lint
new file mode 100755
index 00000000..28d42b1c
--- /dev/null
+++ b/script/lint
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+poetry run ruff format .
+
+# This ignores a number of linting checks that are *problems* and so
+# we want to be able to see in editor (and can't put in pyproject.toml)
+# but we don't want to have to fix everything (given it's working fineish)
+# to see new issues
+# this is the 'using is' for equality, top module imports broken by chdir, don't use lambdas, etc 
+poetry run ruff check . --fix --config 'lint.ignore = ["E501", "E402", "E731", "E722", "F841", "E711", "E712"]'
\ No newline at end of file

From 395019f26059336809359bc8478e9345a2876d6c Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 24 Sep 2024 20:00:24 +0000
Subject: [PATCH 4/8] Automatic formatting

---
 filtersentence_xml.py                         |  177 ++-
 london-mayors-questions/questions.py          |  815 +++++-----
 members/parl-old-check-party.py               |  253 +++-
 members/wikipedia-commons.py                  |   60 +-
 members/wikipedia-lords.py                    |   32 +-
 members/wikipedia-standingdown.py             |   21 +-
 pyscraper/base_resolver.py                    |  281 ++--
 pyscraper/contextexception.py                 |    4 +-
 pyscraper/get_links_from_ep.py                |   26 +-
 pyscraper/gettwittermps.py                    |   37 +-
 pyscraper/gidmatching.py                      |  686 +++++----
 pyscraper/lazyrunall.py                       |  145 +-
 pyscraper/lords/resolvenames.py               |  180 ++-
 pyscraper/miscfuncs.py                        |  460 +++---
 pyscraper/new_hansard.py                      | 1332 +++++++++--------
 pyscraper/ni/parse.py                         |  179 ++-
 pyscraper/ni/resolvenames.py                  |  137 +-
 pyscraper/ni/scrape.py                        |  119 +-
 pyscraper/ni/wikipedia-mla.py                 |   44 +-
 pyscraper/parlphrases.py                      |  408 +++--
 pyscraper/patchtool.py                        |  252 ++--
 pyscraper/process_hansard.py                  |   71 +-
 pyscraper/pullgluepages.py                    |   50 +-
 pyscraper/regmem/filter.py                    |  133 +-
 pyscraper/regmem/pullgluepages.py             |  199 ++-
 pyscraper/resolvemembernames.py               |  374 +++--
 pyscraper/runfilters.py                       |   99 +-
 pyscraper/sp/common.py                        |   48 +-
 pyscraper/sp/fastest-msps.py                  |  184 ++-
 pyscraper/sp/get-official-reports-new.py      |  111 +-
 pyscraper/sp/parse-official-reports-new.py    |  659 +++++---
 pyscraper/sp/resolvenames.py                  |  142 +-
 pyscraper/sp/wikipedia-msp.py                 |   25 +-
 pyscraper/sp_2024/__main__.py                 |   10 +-
 pyscraper/sp_2024/common.py                   |    8 +-
 pyscraper/sp_2024/convert.py                  |    8 +-
 pyscraper/sp_2024/download.py                 |    2 +-
 pyscraper/sp_2024/parse.py                    |    6 +-
 pyscraper/test.py                             |   26 +-
 pyscraper/unpack_hansard_zips.py              |   76 +-
 pyscraper/wa/parse.py                         |  319 ++--
 pyscraper/wa/resolvenames.py                  |   30 +-
 pyscraper/wa/scrape.py                        |   66 +-
 pyscraper/xmlfilewrite.py                     |   17 +-
 scripts/2016_data_update/dadem_import_ni.py   |  181 ++-
 scripts/2016_data_update/dadem_import_sp.py   |  187 ++-
 scripts/2021-lam-update                       |  126 +-
 scripts/2021-msp-update                       |  139 +-
 scripts/add-new-lords                         |  209 +--
 scripts/add-new-mlas                          |   96 +-
 scripts/datadotparl/crawl-members             |   47 +-
 scripts/datadotparl/json-add-new-parl-ids     |   94 +-
 scripts/datadotparl/update-members            |  400 ++---
 scripts/fetch-mp-eu-ref-positions             |   33 +-
 scripts/fetch-pw-json                         |   21 +-
 scripts/fetch_london_assembly.py              |  378 ++---
 scripts/fetch_scottish_ministers.py           |   56 +-
 .../fetch_wikidata_from_everypolitician.py    |  111 +-
 scripts/json-add-membership                   |  136 +-
 scripts/json-body-end                         |   14 +-
 scripts/json-change-party                     |   85 +-
 scripts/json-edit-person                      |   34 +-
 scripts/json-end-membership                   |   25 +-
 scripts/json-merge-people                     |   43 +-
 scripts/json-new-ids                          |    1 +
 scripts/json-nia-2017-new                     |   94 +-
 scripts/json-nia-2022-new                     |   86 +-
 scripts/popolo/__init__.py                    |  196 ++-
 scripts/popolo/menu.py                        |   26 +-
 scripts/popolo/utils.py                       |   24 +-
 scripts/quickupdate                           |   84 +-
 scripts/welsh-parliament/dual-posts.py        |   74 +-
 scripts/welsh-parliament/memberships.py       |  138 +-
 scripts/welsh-parliament/official-ids.py      |   73 +-
 scripts/welsh-parliament/organizations.py     |  102 +-
 scripts/welsh-parliament/persons.py           |  108 +-
 scripts/welsh-parliament/posts.py             |  108 +-
 scripts/ynmp/update.py                        |  274 ++--
 wrans-2014/parse.py                           |  183 ++-
 79 files changed, 7013 insertions(+), 5284 deletions(-)

diff --git a/filtersentence_xml.py b/filtersentence_xml.py
index 2e64fbb7..fe5723c5 100644
--- a/filtersentence_xml.py
+++ b/filtersentence_xml.py
@@ -1,13 +1,10 @@
-from datetime import datetime
 import re
-
-from lxml import etree
+from datetime import datetime
 
 from contextexception import ContextException
 from parlphrases import parlPhrases
 from resolvemembernames import memberList
 
-
 # this code fits onto the paragraphs before the fixhtmlentities and
 # performs difficult regular expression matching that can be
 # used for embedded links.
@@ -33,22 +30,26 @@
 reqnum = re.compile("\s*\[(\d+)\]\s*$")
 refqnum = re.compile("\s*\[(\d+)\]\s*")
 
-redatephraseval = re.compile('(?:(?:%s),? )?(\d+(?: |&nbsp;)*(?:%s)( \d+)?)' % (parlPhrases.daysofweek, parlPhrases.monthsofyear))
+redatephraseval = re.compile(
+    "(?:(?:%s),? )?(\d+(?: |&nbsp;)*(?:%s)( \d+)?)"
+    % (parlPhrases.daysofweek, parlPhrases.monthsofyear)
+)
 
 
 def TokenDate(ldate, phrtok):
     sdate_year = phrtok.sdate[0:4]
-    tdate = ldate.group(0).replace('&nbsp;', ' ')
+    tdate = ldate.group(0).replace("&nbsp;", " ")
     if not ldate.group(2):
         tdate += " %s" % sdate_year
     try:
-        lldate = datetime.strptime(tdate, '%A, %d %B %Y')
+        lldate = datetime.strptime(tdate, "%A, %d %B %Y")
         phrtok.lastdate = lldate.date().isoformat()
     except:
-        phrtok.lastdate = ''
-    return ('phrase', ' class="date" code="%s"' % phrtok.lastdate)
+        phrtok.lastdate = ""
+    return ("phrase", ' class="date" code="%s"' % phrtok.lastdate)
 
-restandingo = re.compile('''(?x)
+
+restandingo = re.compile("""(?x)
         (?:<b>)?
         Standing\sOrder\sNo\.\s*
         (
@@ -60,7 +61,7 @@ def TokenDate(ldate, phrtok):
         \(([^()]*(?:\([^()]*\))?)\) # inclusion of title for clarity
        )?
         (?:</b>)?
-''')
+""")
 
 restandingomarg = re.compile("Standing Order No")
 
@@ -68,23 +69,26 @@ def TokenDate(ldate, phrtok):
 def TokenStandingOrder(mstandingo, phrtok):
     if mstandingo.group(2):
         return (
-            'phrase', ' class="standing-order" code="%s" title="%s"' %
-            (mstandingo.group(1), re.sub('<[^>]*>', '', mstandingo.group(2)))
+            "phrase",
+            ' class="standing-order" code="%s" title="%s"'
+            % (mstandingo.group(1), re.sub("<[^>]*>", "", mstandingo.group(2))),
         )
-    return (
-        'phrase', ' class="standing-order" code="%s"' % mstandingo.group(1)
-    )
+    return ("phrase", ' class="standing-order" code="%s"' % mstandingo.group(1))
+
+
+rehtlink = re.compile("(?<![\"'])(https?://)([^\s]+)")
 
-rehtlink = re.compile('(?<!["\'])(https?://)([^\s]+)')
 
 def TokenHttpLink(mhttp, phrtok):
     qstrlink = mhttp.group(0)
-    return ('a', ' href="%s"' % qstrlink)
+    return ("a", ' href="%s"' % qstrlink)
+
 
 def TokenHrefLink(mhttp, phrtok):
-    return ('', '')
+    return ("", "")
+
 
-reoffrepw = re.compile('''(?ix)
+reoffrepw = re.compile("""(?ix)
     <i>\s*official(?:</i>|<i>|\s)*report                # Official Report
     (?:</i>|<i>|[,;\s])*
     (Commons|House\sof\sCommons|House\sof\sLords)?      # Optional house (1)
@@ -96,7 +100,7 @@ def TokenHrefLink(mhttp, phrtok):
     (?:(W[AS]?)\s*)?                                    # Optional column number prefix (2)
     (\d+(?:(?:&\#150;|-)\d+)?)                          # Column number or numbers (3)
     ([WHSA]*)                                           # Optional column suffix (4)
-''')
+""")
 
 
 def TokenOffRep(qoffrep, phrtok):
@@ -108,30 +112,31 @@ def TokenOffRep(qoffrep, phrtok):
     if qcolsuffix:
         qcolsuffix = qcolsuffix.upper()
     # print '*', qoffrep.group(0), loc1, qcolprefix, qcolsuffix, qoffrep.group(3)
-    qcpart = re.match('(\d+)(?:(?:&#150;|-)(\d+))?(?i)$', qoffrep.group(3))
+    qcpart = re.match("(\d+)(?:(?:&#150;|-)(\d+))?(?i)$", qoffrep.group(3))
     qcolnum = qcpart.group(1)
     if qcpart.group(2):
-        qcpartlead = qcpart.group(1)[len(qcpart.group(1)) - len(qcpart.group(2)):]
+        qcpartlead = qcpart.group(1)[len(qcpart.group(1)) - len(qcpart.group(2)) :]
         if int(qcpartlead) >= int(qcpart.group(2)):
-            print(' non-following column leadoff ', qoffrep.group(0))
+            print(" non-following column leadoff ", qoffrep.group(0))
             # raise Exception, ' non-following column leadoff '
 
-    if qcolsuffix == 'WH':
-        sect = 'westminhall'
-    elif qcolprefix == 'WS' or qcolsuffix == 'WS':
-        sect = 'wms'
-    elif qcolprefix == 'WA' or qcolsuffix == 'W' or qcolsuffix == 'WA':
-        sect = 'wrans'
-    elif loc1 == 'House of Lords':
-        sect = 'lords'
+    if qcolsuffix == "WH":
+        sect = "westminhall"
+    elif qcolprefix == "WS" or qcolsuffix == "WS":
+        sect = "wms"
+    elif qcolprefix == "WA" or qcolsuffix == "W" or qcolsuffix == "WA":
+        sect = "wrans"
+    elif loc1 == "House of Lords":
+        sect = "lords"
     else:
-        sect = 'debates'
+        sect = "debates"
+
+    offrepid = "%s/%s.%s" % (sect, phrtok.lastdate, qcolnum)
+    return ("phrase", ' class="offrep" id="%s"' % offrepid)
 
-    offrepid = '%s/%s.%s' % (sect, phrtok.lastdate, qcolnum)
-    return ('phrase', ' class="offrep" id="%s"' % offrepid)
 
 # Date in the middle, so need to match before the date-only parsing...
-reoffrepwdate = re.compile('''(?ix)
+reoffrepwdate = re.compile("""(?ix)
     <i>\s*official(?:</i>|<i>|\s)*report                # Official Report
     (?:(?:</i>|<i>|,|\s)*(Westminster\sHall|House\sof\sLords|House\sof\sCommons))?  # Optionally followed by a chamber (1)
     [,;]?\s*(?:</i>)?[,;]?\s*
@@ -144,47 +149,48 @@ def TokenOffRep(qoffrep, phrtok):
     (?:(W[AS]?)\s*)?                                    # Optional column number prefix (4)
     (\d+)(?:(?:&\#150;|-)\d+)?                          # Column number or numbers (5)
     ([WHS]*)                                            # Optional column number suffix (6)
-''')
+""")
 
 
 def TokenOffRepWDate(qoffrep, phrtok):
     # print qoffrep.group(0)
     loc1 = qoffrep.group(1)
     loc2 = qoffrep.group(2)
-    date = qoffrep.group(3).replace('&nbsp;', ' ')
+    date = qoffrep.group(3).replace("&nbsp;", " ")
     qcolprefix = qoffrep.group(4)
     qcolnum = qoffrep.group(5)
     qcolsuffix = qoffrep.group(6)
-    m = re.match('(\d+)/(\d+)/(\d+)', date)
+    m = re.match("(\d+)/(\d+)/(\d+)", date)
     if m:
         lordsdate = True
-        date = datetime.strptime(date, '%d/%m/%Y').date().isoformat()
+        date = datetime.strptime(date, "%d/%m/%Y").date().isoformat()
     else:
         lordsdate = False
-        date = datetime.strptime(date, '%d %B %Y').date().isoformat()
+        date = datetime.strptime(date, "%d %B %Y").date().isoformat()
     if qcolprefix:
         qcolprefix = qcolprefix.upper()
     if qcolsuffix:
         qcolsuffix = qcolsuffix.upper()
-    if loc1 == 'Westminster Hall' or qcolsuffix == 'WH':
-        sect = 'westminhall'
-    elif qcolprefix == 'WS' or qcolsuffix == 'WS':
-        sect = 'wms'
-    elif qcolprefix == 'WA' or qcolsuffix == 'W':
-        sect = 'wrans'
-    elif loc1 == 'House of Commons' or loc2 == 'Commons':
-        sect = 'debates'
-    elif loc1 == 'House of Lords' or loc2 == 'Lords' or lordsdate:
-        sect = 'lords'
+    if loc1 == "Westminster Hall" or qcolsuffix == "WH":
+        sect = "westminhall"
+    elif qcolprefix == "WS" or qcolsuffix == "WS":
+        sect = "wms"
+    elif qcolprefix == "WA" or qcolsuffix == "W":
+        sect = "wrans"
+    elif loc1 == "House of Commons" or loc2 == "Commons":
+        sect = "debates"
+    elif loc1 == "House of Lords" or loc2 == "Lords" or lordsdate:
+        sect = "lords"
     else:
-        sect = 'debates'
+        sect = "debates"
+
+    offrepid = "%s/%s.%s" % (sect, date, qcolnum)
+    return ("phrase", ' class="offrep" id="%s"' % offrepid)
 
-    offrepid = '%s/%s.%s' % (sect, date, qcolnum)
-    return ('phrase', ' class="offrep" id="%s"' % offrepid)
 
-#my hon. Friend the Member for Regent's Park and Kensington, North (Ms Buck)
+# my hon. Friend the Member for Regent's Park and Kensington, North (Ms Buck)
 # (sometimes there are spurious adjectives
-rehonfriend = re.compile('''(?ix)
+rehonfriend = re.compile("""(?ix)
     the\.?
     # Privy counsellors, barrister, armed forces, status, etc.
     (?:(?:\s|&.{4};)*(?:right\.?|rt\.|very|old|new|now|current|then|visiting|former|distinguished|hon\.?|honourable|and|learned|gallant|Labour|Liberal Democrat|Conservative|reverend|independent|excellent|poor|rude|courageous|wonderful|brutal|redoubtable|mute|present|pious|formidable|fragrant))*
@@ -193,14 +199,16 @@ def TokenOffRepWDate(qoffrep, phrtok):
     ([^(]{3,60}?)            # group 1 the name of the constituency
     \s*
     \(([^)]{5,60}?)(?:&\#(?:146|8217);s)?\)        # group 2 the name of the MP, inserted for clarity.
-''')
-rehonfriendmarg = re.compile('the\s+(hon\.\s*)?member for [^(]{0,60}\((?i)')
+""")
+rehonfriendmarg = re.compile("the\s+(hon\.\s*)?member for [^(]{0,60}\((?i)")
 
 
 def TokenHonFriend(mhonfriend, phrtok):
     # will match for ids
     orgname = mhonfriend.group(2)
-    res = memberList.matchfullnamecons(orgname, mhonfriend.group(1), phrtok.sdate, alwaysmatchcons=False)
+    res = memberList.matchfullnamecons(
+        orgname, mhonfriend.group(1), phrtok.sdate, alwaysmatchcons=False
+    )
     if not res[0]:  # comes back as None
         nid = "unknown"
         mname = orgname
@@ -212,38 +220,36 @@ def TokenHonFriend(mhonfriend, phrtok):
     # remove any xml entities from the name
     orgname = res[1]
 
-    return ('phrase', ' class="honfriend" person_id="%s" name="%s"' % (nid, orgname))
+    return ("phrase", ' class="honfriend" person_id="%s" name="%s"' % (nid, orgname))
 
 
 # the array of tokens which we will detect on the way through
 tokenchain = [
-    ('hreflink',       rehreflink,      None,              TokenHrefLink),
-    ('offrepwdate',    reoffrepwdate,   None,              TokenOffRepWDate),
-    ("date",           redatephraseval, None,              TokenDate),
-    ("offrep",         reoffrepw,       None,              TokenOffRep),
-    ("standing order", restandingo,     restandingomarg,   TokenStandingOrder),
-    ("httplink",       rehtlink,        None,              TokenHttpLink),
-    ("honfriend",      rehonfriend,     rehonfriendmarg,   TokenHonFriend),
+    ("hreflink", rehreflink, None, TokenHrefLink),
+    ("offrepwdate", reoffrepwdate, None, TokenOffRepWDate),
+    ("date", redatephraseval, None, TokenDate),
+    ("offrep", reoffrepw, None, TokenOffRep),
+    ("standing order", restandingo, restandingomarg, TokenStandingOrder),
+    ("httplink", rehtlink, None, TokenHttpLink),
+    ("honfriend", rehonfriend, rehonfriendmarg, TokenHonFriend),
 ]
 
 
 # this handles the chain of tokenization of a paragraph
 class PhraseTokenize:
-
     # recurses over itc < len(tokenchain)
     def TokenizePhraseRecurse(self, qs, stex, itc):
-
         # end of the chain
         if itc == len(tokenchain):
-            self.toklist.append(('', '', stex))
+            self.toklist.append(("", "", stex))
             return
 
         # keep eating through the pieces for the same token
         while stex:
             # attempt to split the token
             mtoken = tokenchain[itc][1].search(stex)
-            if mtoken:   # the and/or method fails with this
-                headtex = stex[:mtoken.span(0)[0]]
+            if mtoken:  # the and/or method fails with this
+                headtex = stex[: mtoken.span(0)[0]]
             else:
                 headtex = stex
 
@@ -268,29 +274,34 @@ def TokenizePhraseRecurse(self, qs, stex, itc):
             # print "Token detected:", mtoken.group(0)
 
             # the tail part
-            stex = stex[mtoken.span(0)[1]:]
+            stex = stex[mtoken.span(0)[1] :]
 
     def __init__(self, date, stex):
-        self.lastdate = ''
+        self.lastdate = ""
         self.toklist = []
         self.sdate = date
 
-        stex = re.sub('&(?!amp;)', '&amp;', stex)
+        stex = re.sub("&(?!amp;)", "&amp;", stex)
         # separate out any qnums at end of paragraph
         self.rmqnum = reqnum.search(stex)
         if self.rmqnum:
-            stex = stex[:self.rmqnum.span(0)[0]]
+            stex = stex[: self.rmqnum.span(0)[0]]
 
         # separate out qnums stuffed into front of paragraph (by the grabber of the speakername)
         frqnum = refqnum.match(stex)
         if frqnum:
             if self.rmqnum:
-                raise ContextException('Found question number [%s] in para, but already found [%s] at end (this probably just means it is being quoted, and you just need to change [] to ().' % (frqnum.group(1), self.rmqnum.group(1)))
+                raise ContextException(
+                    "Found question number [%s] in para, but already found [%s] at end (this probably just means it is being quoted, and you just need to change [] to ()."
+                    % (frqnum.group(1), self.rmqnum.group(1))
+                )
             self.rmqnum = frqnum
-            stex = stex[frqnum.span(0)[1]:]
-            stex_nohtml = re.sub('<[^>]*>', '', stex)
+            stex = stex[frqnum.span(0)[1] :]
+            stex_nohtml = re.sub("<[^>]*>", "", stex)
             if len(stex_nohtml) < 10:
-                raise ContextException('Removing question number from para appears to have removed all text (this probably just means a footnote marker is using [], just change to ()).')
+                raise ContextException(
+                    "Removing question number from para appears to have removed all text (this probably just means a footnote marker is using [], just change to ())."
+                )
 
         self.TokenizePhraseRecurse(date, stex, 0)
 
@@ -299,10 +310,10 @@ def GetPara(self):
 
         for tok in self.toklist:
             if tok[0]:
-                res.append('<%s%s>' % (tok[0], tok[1]))
+                res.append("<%s%s>" % (tok[0], tok[1]))
                 res.append(tok[2])
-                res.append('</%s>' % tok[0])
+                res.append("</%s>" % tok[0])
             else:
                 res.append(tok[2])
 
-        return ''.join(res)
+        return "".join(res)
diff --git a/london-mayors-questions/questions.py b/london-mayors-questions/questions.py
index 73db32a8..f5123ed4 100755
--- a/london-mayors-questions/questions.py
+++ b/london-mayors-questions/questions.py
@@ -1,95 +1,90 @@
 #! /usr/bin/env python3
 
-import os
+import datetime
+import json
 import logging
+import os
+import re
+import string
 
 import click
 import click_log
-
-import json
-import datetime
 import dateutil.parser
-import re
-
 import requests
 import requests_cache
-
-import string
-
 from bs4 import BeautifulSoup, element
 from lxml import etree
-from lxml.html import soupparser
 
 # Set up logging
 logger = logging.getLogger(__name__)
 click_log.basic_config(logger)
 
 # Set up the requests cache
-cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache')
-requests_cache.install_cache(cache_path, expire_after=60*60*12)
+cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
+requests_cache.install_cache(cache_path, expire_after=60 * 60 * 12)
 
 # Load and parsethe configuration file
-with open('config.json') as config_json_file:
-    logger.debug('Reading config file')
+with open("config.json") as config_json_file:
+    logger.debug("Reading config file")
     config = json.load(config_json_file)
 
 # Set our constants
-ASSEMBLY_DOMAIN = config['assembly_domain']
-DEFAULT_START_DATE = config['default_start_date']
-PUBLIC_WHIP_QUESTION_ID_PREFIX = config['public_whip_question_id_prefix']
-CURRENT_MAYOR_NAME = config['current_mayor_name']
-NAME_REGEX_TO_STRIP = config['name_regex_to_strip']
-NAME_CORRECTIONS = config['name_corrections']
+ASSEMBLY_DOMAIN = config["assembly_domain"]
+DEFAULT_START_DATE = config["default_start_date"]
+PUBLIC_WHIP_QUESTION_ID_PREFIX = config["public_whip_question_id_prefix"]
+CURRENT_MAYOR_NAME = config["current_mayor_name"]
+NAME_REGEX_TO_STRIP = config["name_regex_to_strip"]
+NAME_CORRECTIONS = config["name_corrections"]
 
 # This needs to match the type from xml2db.pl in TWFY
-XML_FILE_PREFIX = config['xml_file_prefix']
+XML_FILE_PREFIX = config["xml_file_prefix"]
 
-CLI_DATETIME_FORMAT = click.DateTime(formats=('%Y-%m-%d',))
+CLI_DATETIME_FORMAT = click.DateTime(formats=("%Y-%m-%d",))
 
-STATE_JSON_FILENAME = 'state.json'
+STATE_JSON_FILENAME = "state.json"
 
-EMPTY_STATE_OBJECT = {
-    'dates': {},
-    'questions': {}
-}
+EMPTY_STATE_OBJECT = {"dates": {}, "questions": {}}
 
 
 def getScraperState(output_folder):
-    ''' Load the scraper's state from file. '''
+    """Load the scraper's state from file."""
 
     state_file = os.path.join(output_folder, STATE_JSON_FILENAME)
 
     # Check this file exists before we load it
     if os.path.exists(state_file):
-
         with open(state_file) as state_json_file:
-            logger.debug('Reading state file')
+            logger.debug("Reading state file")
             state = json.load(state_json_file)
 
     # If not, just use the empty object. It'll be written at wrap-up.
     else:
-        logger.warning('Could not find existing state file at {}, creating new one'.format(state_file))
+        logger.warning(
+            "Could not find existing state file at {}, creating new one".format(
+                state_file
+            )
+        )
         state = EMPTY_STATE_OBJECT
 
     return state
 
 
 def writeScraperState(state, output_folder):
-    ''' Write the scraper's state back out to file. '''
+    """Write the scraper's state back out to file."""
 
     output_file = os.path.join(output_folder, STATE_JSON_FILENAME)
 
     try:
         json_string = json.dumps(state, indent=2, default=str)
-        with open(output_file, 'w') as state_json_file:
-            logger.debug('Writing state file')
+        with open(output_file, "w") as state_json_file:
+            logger.debug("Writing state file")
             state_json_file.write(json_string)
     except TypeError as e:
-        logger.error('Could not serialise to valid JSON: {}'.format(str(e)))
+        logger.error("Could not serialise to valid JSON: {}".format(str(e)))
 
 
 def getDatesInRange(start, end):
-    ''' Return an array of dates between (and inclusive of) those given. '''
+    """Return an array of dates between (and inclusive of) those given."""
 
     delta = end - start
     dates = []
@@ -102,55 +97,57 @@ def getDatesInRange(start, end):
 
 
 def scrapeAssemblyMeetingOnDate(date):
-    ''' Scrape the Mayor's Questions meeting page for the provided date '''
+    """Scrape the Mayor's Questions meeting page for the provided date"""
 
-    meeting_date_string = date.strftime('%Y/%m/%d')
+    meeting_date_string = date.strftime("%Y/%m/%d")
 
-    meeting_date_url = ASSEMBLY_DOMAIN + '/questions/meeting/mqt/' + meeting_date_string
+    meeting_date_url = ASSEMBLY_DOMAIN + "/questions/meeting/mqt/" + meeting_date_string
 
-    logger.debug('Scraping meeting page at {}'.format(meeting_date_url))
+    logger.debug("Scraping meeting page at {}".format(meeting_date_url))
 
     meeting_page = requests.get(meeting_date_url)
 
-    scraped_data = {
-        'http_status': str(meeting_page.status_code)
-    }
+    scraped_data = {"http_status": str(meeting_page.status_code)}
 
     if meeting_page.status_code == 404:
-        logger.info('Meeting on {} returned HTTP 404'.format(date))
-        scraped_data['to_scrape'] = False
+        logger.info("Meeting on {} returned HTTP 404".format(date))
+        scraped_data["to_scrape"] = False
     elif meeting_page.status_code == 200:
-        logger.info('Meeting on {} returned HTTP 200'.format(date))
-
-        scraped_data['sessions'] = parseAssemblyMeetingToSessions(meeting_page.content)
-        scraped_data['questions'] = []
-
-        if len(scraped_data['sessions']) > 0:
-            scraped_data['to_scrape'] = False
-            for session in scraped_data['sessions']:
-                scraped_data['questions'] += scrapeSessionAtUrl(session)
-        elif meeting_date_string != '2019/02/25': # Exempt date we know lacks sessions
-            logger.warning('Meeting on {} doesn\'t seem to have any sessions!'.format(date))
-            scraped_data['to_scrape'] = True
+        logger.info("Meeting on {} returned HTTP 200".format(date))
+
+        scraped_data["sessions"] = parseAssemblyMeetingToSessions(meeting_page.content)
+        scraped_data["questions"] = []
+
+        if len(scraped_data["sessions"]) > 0:
+            scraped_data["to_scrape"] = False
+            for session in scraped_data["sessions"]:
+                scraped_data["questions"] += scrapeSessionAtUrl(session)
+        elif meeting_date_string != "2019/02/25":  # Exempt date we know lacks sessions
+            logger.warning(
+                "Meeting on {} doesn't seem to have any sessions!".format(date)
+            )
+            scraped_data["to_scrape"] = True
     else:
-        logger.warning('Meeting on {} returned HTTP {}'.format(date, meeting_page.status_code))
-        scraped_data['to_scrape'] = True
+        logger.warning(
+            "Meeting on {} returned HTTP {}".format(date, meeting_page.status_code)
+        )
+        scraped_data["to_scrape"] = True
 
     return scraped_data
 
 
 def parseAssemblyMeetingToSessions(content):
-    ''' Parse an assembly meeting page and return a list of its sessions. '''
+    """Parse an assembly meeting page and return a list of its sessions."""
 
     soup = BeautifulSoup(content, features="lxml")
 
-    sessions_in_content = soup.find_all('div', class_='entity-meetingsession')
+    sessions_in_content = soup.find_all("div", class_="entity-meetingsession")
 
     sessions_in_meeting = []
 
     for session in sessions_in_content:
         session_title = session.a.text
-        session_url = session.a.get('href')
+        session_url = session.a.get("href")
 
         logger.debug('Found session "{}" at URL {}'.format(session_title, session_url))
 
@@ -160,11 +157,11 @@ def parseAssemblyMeetingToSessions(content):
 
 
 def scrapeSessionAtUrl(session_url):
-    ''' Scrape a given session URL and extract the questions within. '''
+    """Scrape a given session URL and extract the questions within."""
 
     session_full_url = ASSEMBLY_DOMAIN + session_url
 
-    logger.debug('Scraping session page at {}'.format(session_full_url))
+    logger.debug("Scraping session page at {}".format(session_full_url))
 
     session_page = requests.get(session_full_url)
 
@@ -174,46 +171,48 @@ def scrapeSessionAtUrl(session_url):
 
 
 def parseSessionToQuestions(content):
-
     soup = BeautifulSoup(content, features="lxml")
 
-    questions_in_content = soup.find_all('tr', class_='question')
+    questions_in_content = soup.find_all("tr", class_="question")
 
     questions_in_session = []
 
     for question_row in questions_in_content:
-
-        question_row_cells = question_row.findAll('td')
+        question_row_cells = question_row.findAll("td")
 
         question_number = question_row_cells[1].text
 
-        logger.debug('Found question {}'.format(question_number))
+        logger.debug("Found question {}".format(question_number))
 
         questions_in_session.append(question_number)
 
     return questions_in_session
 
 
-def scrapeQuestionWithId(question_id,context):
-    ''' Scrape the page for a given question ID and return structured data. '''
+def scrapeQuestionWithId(question_id, context):
+    """Scrape the page for a given question ID and return structured data."""
 
-    logger.debug('Scraping question {}'.format(question_id))
+    logger.debug("Scraping question {}".format(question_id))
 
-    question_full_url = ASSEMBLY_DOMAIN + '/questions/' + question_id
+    question_full_url = ASSEMBLY_DOMAIN + "/questions/" + question_id
 
-    logger.debug('Scraping question page at {}'.format(question_full_url))
+    logger.debug("Scraping question page at {}".format(question_full_url))
 
     question_page = requests.get(question_full_url)
 
     if question_page.status_code == 200:
-        logger.debug('Question {} returned HTTP 200'.format(question_id))
+        logger.debug("Question {} returned HTTP 200".format(question_id))
 
         question_parsed_data = parseQuestionPage(question_page.content)
 
     else:
         if question_page.status_code != 403:
-            logger.warning('Question {} returned HTTP {}'.format(question_id, question_page.status_code))
-        context.obj['state']['questions'][question_id]['to_scrape'] = True
+            logger.warning(
+                "Question {} returned HTTP {}".format(
+                    question_id, question_page.status_code
+                )
+            )
+        context.obj["state"]["questions"][question_id]["to_scrape"] = True
 
         question_parsed_data = None
 
@@ -221,56 +220,59 @@ def scrapeQuestionWithId(question_id,context):
 
 
 def parseQuestionPage(content):
-    ''' Actually take the HTML from a scraped question page and turn it into a structured object. '''
+    """Actually take the HTML from a scraped question page and turn it into a structured object."""
 
     soup = BeautifulSoup(content, features="lxml")
 
     # We use the canonical URL just in case anything exotic has happened with redirects.
-    canonical_url = soup.find('link', {'rel': 'canonical'})['href']
+    canonical_url = soup.find("link", {"rel": "canonical"})["href"]
 
-    main_content = soup.find('div', role='main')
+    main_content = soup.find("div", role="main")
 
     # Pull the title
 
     question_title = main_content.h1.text.strip()
 
-    logger.debug('Question title is {}'.format(question_title))
+    logger.debug("Question title is {}".format(question_title))
 
     # Extract who asked it
 
-    asked_by_name = main_content.find('div', class_='field--name-field-asked-by').find('div', class_='field__item').text.strip()
+    asked_by_name = (
+        main_content.find("div", class_="field--name-field-asked-by")
+        .find("div", class_="field__item")
+        .text.strip()
+    )
     asked_by_person = getSpeakerObjectFromName(asked_by_name)
 
-    logger.debug('Question asked by {}'.format(asked_by_person['name']))
+    logger.debug("Question asked by {}".format(asked_by_person["name"]))
 
     # Try to extract the actual question
 
-    question_text = main_content.find('div', class_='field--name-body').find('div', class_='field__item')
+    question_text = main_content.find("div", class_="field--name-body").find(
+        "div", class_="field__item"
+    )
 
-    question_p_elements = main_content\
-        .find('section', class_='question')\
-        .findAll('p')
+    question_p_elements = main_content.find("section", class_="question").findAll("p")
 
     question_paragraphs = []
 
     for paragraph in question_p_elements:
-
         # Some paragraphs are helpfully empty. Deal with those
-        if paragraph.text.strip() != '':
+        if paragraph.text.strip() != "":
             # NB at this point we're still sending BeautifulSoup objects
             question_paragraphs.append(paragraph)
 
     # We ignore the speaker which comes back with this, but this function otherwise does all the tidying needed
     question_with_speaker = splitTextToSpeeches(question_text)[0]
-    question_text_paragraphs = question_with_speaker['paragraphs']
+    question_text_paragraphs = question_with_speaker["paragraphs"]
 
     # Now we know the title and the question, assemble the basic question object to send back
 
     question_object = {
-        'title': question_title,
-        'canonical_url': canonical_url,
-        'question_text_paragraphs': question_text_paragraphs,
-        'asked_by': asked_by_person
+        "title": question_title,
+        "canonical_url": canonical_url,
+        "question_text_paragraphs": question_text_paragraphs,
+        "asked_by": asked_by_person,
     }
 
     # Try parse the actual answers out
@@ -278,12 +280,12 @@ def parseQuestionPage(content):
 
     # Got answers?
 
-    if len(answers_object['answers']) > 0:
-        question_object['answered'] = True
-        question_object['answers'] = answers_object['answers']
-        question_object['answered_date'] = answers_object['answered_date']
+    if len(answers_object["answers"]) > 0:
+        question_object["answered"] = True
+        question_object["answers"] = answers_object["answers"]
+        question_object["answered_date"] = answers_object["answered_date"]
     else:
-        question_object['answered'] = False
+        question_object["answered"] = False
 
     # Send the parsed data back upstream
 
@@ -291,102 +293,126 @@ def parseQuestionPage(content):
 
 
 def parseAnswersFromQuestionPage(page_content):
-    ''' Given page content, see if we can get answers. '''
+    """Given page content, see if we can get answers."""
 
     # Look to see if there are any answers given
 
-    answers_div = page_content.find('div', class_='answers')
+    answers_div = page_content.find("div", class_="answers")
 
-    answers_object = {
-        'answers': []
-    }
+    answers_object = {"answers": []}
 
-    answer_articles = answers_div.findAll('article', class_='node--answer')
+    answer_articles = answers_div.findAll("article", class_="node--answer")
     for answer_article in answer_articles:
         # If there's a paragraph with a class of 'holding', we're waiting for an answer.
-        if answer_article.find('p', class_='holding'):
-            logger.debug('Question is awaiting an answer')
+        if answer_article.find("p", class_="holding"):
+            logger.debug("Question is awaiting an answer")
             continue
 
         # Sometimes the question just has no answer. Because this is "currently", still assume it's unanswered.
-        elif answer_article.find('div', class_='no-answer'):
-            logger.debug('Question has no available answers.')
+        elif answer_article.find("div", class_="no-answer"):
+            logger.debug("Question has no available answers.")
             continue
 
         # Get the date this was answered - this is the important one, not when it was asked,
 
-        answer_date = answer_article.find('div', class_='field--name-post-date').find('div', class_='field__item').text
+        answer_date = (
+            answer_article.find("div", class_="field--name-post-date")
+            .find("div", class_="field__item")
+            .text
+        )
 
-        if 'answered_date' not in answers_object:
-            answers_object['answered_date'] = dateutil.parser.parse(answer_date).date()
-            logger.debug('Question first answered on {}'.format(answers_object['answered_date']))
+        if "answered_date" not in answers_object:
+            answers_object["answered_date"] = dateutil.parser.parse(answer_date).date()
+            logger.debug(
+                "Question first answered on {}".format(answers_object["answered_date"])
+            )
 
         # Find who answered it
 
-        answered_by_name = answer_article.find('div', class_='field--name-field-answered-by').find('div', class_='field__item').text.strip()
+        answered_by_name = (
+            answer_article.find("div", class_="field--name-field-answered-by")
+            .find("div", class_="field__item")
+            .text.strip()
+        )
         answered_by_person = getSpeakerObjectFromName(answered_by_name)
 
-        logger.debug('Question answered by {}'.format(answered_by_person['name']))
+        logger.debug("Question answered by {}".format(answered_by_person["name"]))
 
         answer_paragraphs = []
 
-        answer_body = answer_article.find('div', class_='field--name-body')
+        answer_body = answer_article.find("div", class_="field--name-body")
         if answer_body:
-            answer_p_elements = answer_body.findAll('p')
+            answer_p_elements = answer_body.findAll("p")
             for paragraph in answer_p_elements:
                 # Some paragraphs are helpfully empty. Deal with those
-                if paragraph.text.strip() != '':
+                if paragraph.text.strip() != "":
                     # NB at this point we're still sending BeautifulSoup objects
                     answer_paragraphs.append(paragraph)
 
-        logger.debug('Found {} paragraphs of non-empty answers on page'.format(len(answer_paragraphs)))
+        logger.debug(
+            "Found {} paragraphs of non-empty answers on page".format(
+                len(answer_paragraphs)
+            )
+        )
 
         # Send the paragraphs of answers off to be sliced if this is multiple parts of a conversation
         answers_by_speech = splitTextToSpeeches(answer_paragraphs)
 
-        logger.debug('Found {} individual speeches within this answer'.format(len(answers_by_speech)))
+        logger.debug(
+            "Found {} individual speeches within this answer".format(
+                len(answers_by_speech)
+            )
+        )
 
         for i, answer in enumerate(answers_by_speech):
-
             # This makes sure the answer has a speaker - if it doesn't, something is wrong
-            if answer['speaker']:
-                answers_object['answers'].append({
-                    'speaker': answer['speaker'],
-                    'paragraphs': answer['paragraphs']
-                })
+            if answer["speaker"]:
+                answers_object["answers"].append(
+                    {"speaker": answer["speaker"], "paragraphs": answer["paragraphs"]}
+                )
             else:
                 # If this is the first speech with no speaker, it's the answerer.
-                if (i == 0):
-                    logger.debug('First speech with no detected speaker, using "Answered By"')
-                    answers_object['answers'].append({
-                        'speaker': answered_by_person,
-                        'paragraphs': answer['paragraphs']
-                    })
+                if i == 0:
+                    logger.debug(
+                        'First speech with no detected speaker, using "Answered By"'
+                    )
+                    answers_object["answers"].append(
+                        {
+                            "speaker": answered_by_person,
+                            "paragraphs": answer["paragraphs"],
+                        }
+                    )
                 else:
-                    logger.warning('Speech with no detected speaker in question {}!'.format(canonical_url))
-
-        answer_attachment_div = answer_article.find('div', class_='field--name-field-attachments')
+                    logger.warning(
+                        "Speech with no detected speaker in question {}!".format(
+                            canonical_url
+                        )
+                    )
+
+        answer_attachment_div = answer_article.find(
+            "div", class_="field--name-field-attachments"
+        )
         if answer_attachment_div:
-            attachments = answer_attachment_div.findAll('a')
+            attachments = answer_attachment_div.findAll("a")
             attachments = [str(a) for a in attachments]
-            answers_object['answers'].append({
-                'speaker': answered_by_person,
-                'attachments': attachments,
-            })
+            answers_object["answers"].append(
+                {
+                    "speaker": answered_by_person,
+                    "attachments": attachments,
+                }
+            )
 
     return answers_object
 
 
 def stripPatternsFromName(name):
-
     patterns_to_strip = True
 
     while patterns_to_strip:
-
         original_name = name
 
         for pattern in NAME_REGEX_TO_STRIP:
-            name = re.sub(pattern, '', name)
+            name = re.sub(pattern, "", name)
 
         if name == original_name:
             patterns_to_strip = False
@@ -395,9 +421,9 @@ def stripPatternsFromName(name):
 
 
 def getPersonIDFromName(name):
-    ''' Turn a name into a speaker ID. '''
+    """Turn a name into a speaker ID."""
 
-    if name == 'The Mayor':
+    if name == "The Mayor":
         name = CURRENT_MAYOR_NAME
 
     # If this person's name has a correction, use that instead
@@ -408,26 +434,24 @@ def getPersonIDFromName(name):
 
 
 def getSpeakerObjectFromName(name):
-    ''' Given a name, try to find a speaker ID and return a whole object. '''
+    """Given a name, try to find a speaker ID and return a whole object."""
 
-    name = name.replace('\u00a0', ' ')
+    name = name.replace("\u00a0", " ")
     name = stripPatternsFromName(name)
     id = getPersonIDFromName(name)
     if not id:
-        if 'Liz Peace' not in name:
-            logger.warning('Could not match name {} to any assembly member'.format(name))
-        id = 'unknown'
+        if "Liz Peace" not in name:
+            logger.warning(
+                "Could not match name {} to any assembly member".format(name)
+            )
+        id = "unknown"
 
-    return {
-        'id': id,
-        'name': name
-    }
+    return {"id": id, "name": name}
 
 
 def cleanParagraphText(text):
-
     # Remove non-breaking spaces followed by a space.
-    text = text.replace('\u00a0 ', ' ')
+    text = text.replace("\u00a0 ", " ")
 
     # Strip trailing whitespace
     text = text.strip()
@@ -436,34 +460,31 @@ def cleanParagraphText(text):
 
 
 def getSpeakerAndTextFromParagraph(paragraph):
-    ''' For the given paragraph text, try to detect if it is led by a speaker's name. '''
+    """For the given paragraph text, try to detect if it is led by a speaker's name."""
 
     # Strong tags are used to mark speaker names in the source
-    name_candidate = paragraph.find('strong')
+    name_candidate = paragraph.find("strong")
     if name_candidate:
-
         # Sanity check if this matches the expected format of speaker names - a name followed by a colon
-        if re.match('.*:$', name_candidate.text):
-
+        if re.match(".*:$", name_candidate.text):
             # extract() removes the element from the beautifulsoup tree and returns it
             speaker_name = name_candidate.extract()
 
-            speaker = getSpeakerObjectFromName(speaker_name.text.replace(':', '').strip())
+            speaker = getSpeakerObjectFromName(
+                speaker_name.text.replace(":", "").strip()
+            )
 
         else:
-            speaker =  False
+            speaker = False
 
     else:
         speaker = False
 
-    return {
-        'speaker': speaker,
-        'text': cleanParagraphText(paragraph.text)
-    }
+    return {"speaker": speaker, "text": cleanParagraphText(paragraph.text)}
 
 
 def splitTextToSpeeches(text_paragraphs):
-    ''' Sometimes text has several speeches by different people within it. Try isolate those. '''
+    """Sometimes text has several speeches by different people within it. Try isolate those."""
 
     answers_by_speech = []
 
@@ -471,274 +492,352 @@ def splitTextToSpeeches(text_paragraphs):
     current_speaker = False
 
     for paragraph in text_paragraphs:
-
         if isinstance(paragraph, element.NavigableString):
-            logger.debug('Ignored NavigableString')
+            logger.debug("Ignored NavigableString")
 
         else:
-
             # Ignore entirely empty paragraphs
-            if paragraph.text != '':
-
+            if paragraph.text != "":
                 paragraph_with_speaker = getSpeakerAndTextFromParagraph(paragraph)
 
                 # If this paragraph is a new speaker, wrap up the answer and start a new one
-                if paragraph_with_speaker['speaker']:
+                if paragraph_with_speaker["speaker"]:
                     if len(paragraphs_in_speech) > 0:
-                        answers_by_speech.append({
-                            'paragraphs': paragraphs_in_speech,
-                            'speaker': current_speaker
-                        })
-
-                    logger.debug('New speaker! Last speech was {} paragraphs'.format(len(paragraphs_in_speech)))
-
-                    paragraphs_in_speech = [paragraph_with_speaker['text']]
-                    current_speaker = paragraph_with_speaker['speaker']
+                        answers_by_speech.append(
+                            {
+                                "paragraphs": paragraphs_in_speech,
+                                "speaker": current_speaker,
+                            }
+                        )
+
+                    logger.debug(
+                        "New speaker! Last speech was {} paragraphs".format(
+                            len(paragraphs_in_speech)
+                        )
+                    )
+
+                    paragraphs_in_speech = [paragraph_with_speaker["text"]]
+                    current_speaker = paragraph_with_speaker["speaker"]
 
                 # If this isn't a new speaker, just append to the current one
                 else:
-                    paragraphs_in_speech.append(paragraph_with_speaker['text'])
+                    paragraphs_in_speech.append(paragraph_with_speaker["text"])
 
     # Finally, wrap up the whole thing if there's anything remaining
     if len(paragraphs_in_speech) > 0:
+        logger.debug("Final speech was {} paragraphs".format(len(paragraphs_in_speech)))
 
-        logger.debug('Final speech was {} paragraphs'.format(len(paragraphs_in_speech)))
+        answers_by_speech.append(
+            {"paragraphs": paragraphs_in_speech, "speaker": current_speaker}
+        )
 
-        answers_by_speech.append({
-            'paragraphs': paragraphs_in_speech,
-            'speaker': current_speaker
-        })
-
-    logger.debug('Split {} paragraphs into {} speeches'.format(len(text_paragraphs), len(answers_by_speech)))
+    logger.debug(
+        "Split {} paragraphs into {} speeches".format(
+            len(text_paragraphs), len(answers_by_speech)
+        )
+    )
 
     return answers_by_speech
 
 
 def buildXMLForQuestions(questions):
-    ''' Given a date, collect answered questions and output the appropriate XML file. '''
+    """Given a date, collect answered questions and output the appropriate XML file."""
 
-    pwxml = etree.Element('publicwhip')
+    pwxml = etree.Element("publicwhip")
 
     for question_id, question in questions.items():
-
-        question_number = '{}.{}'.format(question['answered_date'].strftime('%Y-%m-%d'), question['canonical_url'].split('/')[-1])
-        pw_root_id = '{}{}'.format(PUBLIC_WHIP_QUESTION_ID_PREFIX, question_number)
-
-        pw_heading_id = pw_root_id + '.h'
-        heading_element = etree.SubElement(pwxml, 'minor-heading', nospeaker='true', id=pw_heading_id)
-        heading_element.text = question['title']
-
-        pw_question_id = pw_root_id + '.q0'
-        question_element = etree.SubElement(pwxml, 'question',
-                                            id=pw_question_id,
-                                            url=question['canonical_url'],
-                                            speakername=question['asked_by']['name'],
-                                            person_id=question['asked_by']['id']
-                                            )
-
-        for paragraph in question['question_text_paragraphs']:
-            paragraph_element = etree.SubElement(question_element, 'p')
+        question_number = "{}.{}".format(
+            question["answered_date"].strftime("%Y-%m-%d"),
+            question["canonical_url"].split("/")[-1],
+        )
+        pw_root_id = "{}{}".format(PUBLIC_WHIP_QUESTION_ID_PREFIX, question_number)
+
+        pw_heading_id = pw_root_id + ".h"
+        heading_element = etree.SubElement(
+            pwxml, "minor-heading", nospeaker="true", id=pw_heading_id
+        )
+        heading_element.text = question["title"]
+
+        pw_question_id = pw_root_id + ".q0"
+        question_element = etree.SubElement(
+            pwxml,
+            "question",
+            id=pw_question_id,
+            url=question["canonical_url"],
+            speakername=question["asked_by"]["name"],
+            person_id=question["asked_by"]["id"],
+        )
+
+        for paragraph in question["question_text_paragraphs"]:
+            paragraph_element = etree.SubElement(question_element, "p")
             paragraph_element.text = paragraph
 
-        for answer_index, answer in enumerate(question['answers']):
-
-            pw_answer_id = pw_root_id + '.r' + str(answer_index)
+        for answer_index, answer in enumerate(question["answers"]):
+            pw_answer_id = pw_root_id + ".r" + str(answer_index)
 
-            answer_element = etree.SubElement(pwxml, 'reply',
-                                              id=pw_answer_id,
-                                              speakername=answer['speaker']['name'],
-                                              person_id=answer['speaker']['id']
-                                              )
+            answer_element = etree.SubElement(
+                pwxml,
+                "reply",
+                id=pw_answer_id,
+                speakername=answer["speaker"]["name"],
+                person_id=answer["speaker"]["id"],
+            )
 
-            for paragraph in answer.get('paragraphs', []):
-                paragraph_element = etree.SubElement(answer_element, 'p')
+            for paragraph in answer.get("paragraphs", []):
+                paragraph_element = etree.SubElement(answer_element, "p")
                 paragraph_element.text = paragraph
 
-            for attachment in answer.get('attachments', []):
-                paragraph_element = etree.SubElement(answer_element, 'p')
+            for attachment in answer.get("attachments", []):
+                paragraph_element = etree.SubElement(answer_element, "p")
                 paragraph_element.append(etree.fromstring(attachment))
 
     return pwxml
 
 
 def writeXMLToFile(lxml, file):
-    ''' Write an lxml element out to file. '''
+    """Write an lxml element out to file."""
 
     # Make a new document tree
     xmldoc = etree.ElementTree(lxml)
 
     # Save to XML file
-    with open(file, 'w') as outFile:
-        xmldoc.write(outFile, pretty_print=True, encoding='utf-8')
-        logger.debug('Written XML to {}'.format(file))
+    with open(file, "w") as outFile:
+        xmldoc.write(outFile, pretty_print=True, encoding="utf-8")
+        logger.debug("Written XML to {}".format(file))
 
 
 def buildDateStatusObjectFromScrape(meeting_scrape_data):
-    ''' Format a date's status for storing in the state file. '''
+    """Format a date's status for storing in the state file."""
 
     status_object = {
-        'http_status': meeting_scrape_data['http_status'],
-        'to_scrape': meeting_scrape_data['to_scrape'] if 'to_scrape' in meeting_scrape_data else True,
-        'updated': datetime.datetime.today()
+        "http_status": meeting_scrape_data["http_status"],
+        "to_scrape": meeting_scrape_data["to_scrape"]
+        if "to_scrape" in meeting_scrape_data
+        else True,
+        "updated": datetime.datetime.today(),
     }
 
-    if 'sessions' in meeting_scrape_data:
-        status_object['sessions_count'] = len(meeting_scrape_data['sessions'])
+    if "sessions" in meeting_scrape_data:
+        status_object["sessions_count"] = len(meeting_scrape_data["sessions"])
 
-    if 'questions' in meeting_scrape_data:
-        status_object['questions_count'] = len(meeting_scrape_data['questions'])
+    if "questions" in meeting_scrape_data:
+        status_object["questions_count"] = len(meeting_scrape_data["questions"])
 
     return status_object
 
 
 def loadMembershipsFromFile(members_file):
-    ''' Parse the provided file and extract data on Assembly members. '''
+    """Parse the provided file and extract data on Assembly members."""
 
     # We don't need to open this file, since Click deals with that
     members_raw_data = json.load(members_file)
 
-    logger.debug('Loaded {} people from {}'.format(len(members_raw_data['persons']), members_file.name))
+    logger.debug(
+        "Loaded {} people from {}".format(
+            len(members_raw_data["persons"]), members_file.name
+        )
+    )
 
     people_by_id = {}
     post_org_by_id = {}
 
     # This unpacks all the people in the JSON so we can pull a person's name back from their ID
-    for person in members_raw_data['persons']:
-        people_by_id[person['id']] = person
-    for post in members_raw_data['posts']:
-        post_org_by_id[post['id']] = post['organization_id']
+    for person in members_raw_data["persons"]:
+        people_by_id[person["id"]] = person
+    for post in members_raw_data["posts"]:
+        post_org_by_id[post["id"]] = post["organization_id"]
 
     # This loops through each membership, checks to see if it's for the Assembly, if so adds it to the map
 
     person_ids_by_name = {}
 
-    for membership in members_raw_data['memberships']:
-        if 'post_id' in membership and post_org_by_id[membership['post_id']] == 'london-assembly':
-            name = getNameFromPerson(people_by_id[membership['person_id']])
+    for membership in members_raw_data["memberships"]:
+        if (
+            "post_id" in membership
+            and post_org_by_id[membership["post_id"]] == "london-assembly"
+        ):
+            name = getNameFromPerson(people_by_id[membership["person_id"]])
 
             if name not in person_ids_by_name:
-                person_ids_by_name[name] = membership['person_id']
-                logger.debug('Added ID map for for {}'.format(name))
+                person_ids_by_name[name] = membership["person_id"]
+                logger.debug("Added ID map for for {}".format(name))
             else:
-                if person_ids_by_name[name] != membership['person_id']:
-                    raise Exception('Multiple people with name {}'.format(name))
+                if person_ids_by_name[name] != membership["person_id"]:
+                    raise Exception("Multiple people with name {}".format(name))
 
-    logger.debug('Added {} names with Assembly memberships'.format(len(person_ids_by_name)))
+    logger.debug(
+        "Added {} names with Assembly memberships".format(len(person_ids_by_name))
+    )
 
     return person_ids_by_name
 
 
 def getNameFromPerson(person):
+    for name in person.get("other_names", []):
+        if name["note"] == "Main":
+            return name["given_name"] + " " + name["family_name"]
 
-    for name in person.get('other_names', []):
-        if name['note'] == 'Main':
-            return name['given_name'] + ' ' + name['family_name']
-
-    raise Exception('Unable to find main name for person {}'.format(person['id']))
+    raise Exception("Unable to find main name for person {}".format(person["id"]))
 
 
 @click.group()
-@click_log.simple_verbosity_option(logger, default='warning')
-@click.option('-o', '--out', required=True, type=click.Path(exists=True, file_okay=False, writable=True), help='The directory to place output and state files.')
+@click_log.simple_verbosity_option(logger, default="warning")
+@click.option(
+    "-o",
+    "--out",
+    required=True,
+    type=click.Path(exists=True, file_okay=False, writable=True),
+    help="The directory to place output and state files.",
+)
 @click.pass_context
 def cli(context, out):
     context.ensure_object(dict)
 
-    context.obj['OUTPUT_FOLDER'] = out
+    context.obj["OUTPUT_FOLDER"] = out
 
     # Get the current state file, parse it and assign to the context
-    context.obj['state'] = getScraperState(context.obj['OUTPUT_FOLDER'])
+    context.obj["state"] = getScraperState(context.obj["OUTPUT_FOLDER"])
 
 
 @cli.command()
-@click.option('-s', '--start', type=CLI_DATETIME_FORMAT, help='The first date of the range to be scrape.')
-@click.option('-e', '--end', type=CLI_DATETIME_FORMAT, help='The last date of the range to be scraped.')
-@click.option('--force-scrape-dates', is_flag=True, help='Force all dates in the range to be re-scraped regardless of status')
-@click.option('--force-refresh-questions', is_flag=True, help='Force all detected questions to have their state refreshed')
+@click.option(
+    "-s",
+    "--start",
+    type=CLI_DATETIME_FORMAT,
+    help="The first date of the range to be scrape.",
+)
+@click.option(
+    "-e",
+    "--end",
+    type=CLI_DATETIME_FORMAT,
+    help="The last date of the range to be scraped.",
+)
+@click.option(
+    "--force-scrape-dates",
+    is_flag=True,
+    help="Force all dates in the range to be re-scraped regardless of status",
+)
+@click.option(
+    "--force-refresh-questions",
+    is_flag=True,
+    help="Force all detected questions to have their state refreshed",
+)
 @click.pass_context
 def meetings(context, start, end, force_scrape_dates, force_refresh_questions):
-    ''' Get a list of questions from the London Assembly website asked between the dates given. '''
+    """Get a list of questions from the London Assembly website asked between the dates given."""
 
-    logger.info('Scraping London Assembly')
+    logger.info("Scraping London Assembly")
 
     if start:
         start_date = start.date()
-        logger.debug('End date has been explicitly set to {} by CLI'.format(start_date))
+        logger.debug("End date has been explicitly set to {} by CLI".format(start_date))
     else:
-        start_date = datetime.datetime.strptime(DEFAULT_START_DATE, '%Y-%m-%d').date()
-        logger.debug('Start date has been automatically set to {} by config'.format(start_date))
+        start_date = datetime.datetime.strptime(DEFAULT_START_DATE, "%Y-%m-%d").date()
+        logger.debug(
+            "Start date has been automatically set to {} by config".format(start_date)
+        )
 
     if end:
         end_date = end.date()
-        logger.debug('End date has been explicitly set to {} by CLI'.format(end_date))
+        logger.debug("End date has been explicitly set to {} by CLI".format(end_date))
     else:
         # Yesterday
         end_date = (datetime.datetime.today() - datetime.timedelta(days=1)).date()
-        logger.debug('End date has been automatically set to {} (yesterday)'.format(end_date))
+        logger.debug(
+            "End date has been automatically set to {} (yesterday)".format(end_date)
+        )
 
     if end_date < start_date:
-        logger.error('End date is before the start date. Aborting.')
+        logger.error("End date is before the start date. Aborting.")
         return
 
     dates_in_range = getDatesInRange(start_date, end_date)
 
-    logger.info('Targetting {} dates between {} and {}.'.format(len(dates_in_range), start_date, end_date))
+    logger.info(
+        "Targetting {} dates between {} and {}.".format(
+            len(dates_in_range), start_date, end_date
+        )
+    )
 
     questions_in_range = []
 
     with click.progressbar(dates_in_range) as bar:
         for date in bar:
-
             # Check to see if we should actually scrape this date
-            if force_scrape_dates \
-             or str(date) not in context.obj['state']['dates'] \
-             or (str(date) in context.obj['state']['dates'] and context.obj['state']['dates'][str(date)]['to_scrape']):
-
-                    logger.info('Scraping date {}'.format(date))
-
-                    meeting_scrape_data = scrapeAssemblyMeetingOnDate(date)
-
-                    if 'questions' in meeting_scrape_data:
-                        logger.info('{} has {} questions'.format(date, len(meeting_scrape_data['questions'])))
-
-                        questions_in_range += meeting_scrape_data['questions']
-
-                    context.obj['state']['dates'][str(date)] = buildDateStatusObjectFromScrape(meeting_scrape_data)
+            if (
+                force_scrape_dates
+                or str(date) not in context.obj["state"]["dates"]
+                or (
+                    str(date) in context.obj["state"]["dates"]
+                    and context.obj["state"]["dates"][str(date)]["to_scrape"]
+                )
+            ):
+                logger.info("Scraping date {}".format(date))
+
+                meeting_scrape_data = scrapeAssemblyMeetingOnDate(date)
+
+                if "questions" in meeting_scrape_data:
+                    logger.info(
+                        "{} has {} questions".format(
+                            date, len(meeting_scrape_data["questions"])
+                        )
+                    )
+
+                    questions_in_range += meeting_scrape_data["questions"]
+
+                context.obj["state"]["dates"][str(date)] = (
+                    buildDateStatusObjectFromScrape(meeting_scrape_data)
+                )
 
             else:
+                logger.debug(
+                    "Skipping date {} (already scraped successfully)".format(date)
+                )
 
-                logger.debug('Skipping date {} (already scraped successfully)'.format(date))
-
-    logger.info('{} questions found in this scrape'.format(len(questions_in_range)))
+    logger.info("{} questions found in this scrape".format(len(questions_in_range)))
 
     for question in questions_in_range:
         # Only do this if the question doesn't already exist, or we're forcing a refresh
-        if force_refresh_questions or question not in context.obj['state']['questions']:
-            context.obj['state']['questions'][question] = {
-                'to_scrape': True,
-                'scrape_requested_on': datetime.datetime.today()
+        if force_refresh_questions or question not in context.obj["state"]["questions"]:
+            context.obj["state"]["questions"][question] = {
+                "to_scrape": True,
+                "scrape_requested_on": datetime.datetime.today(),
             }
 
 
 @cli.command()
-@click.option('-l', '--limit', type=int, help='The maximum number of questions to scrape')
-@click.option('-m', '--members', required=True, type=click.File(), help='The members.json file to match names against.')
-@click.option('--dry-run', is_flag=True, help='Should questions be marked as not needing scraping in future?')
+@click.option(
+    "-l", "--limit", type=int, help="The maximum number of questions to scrape"
+)
+@click.option(
+    "-m",
+    "--members",
+    required=True,
+    type=click.File(),
+    help="The members.json file to match names against.",
+)
+@click.option(
+    "--dry-run",
+    is_flag=True,
+    help="Should questions be marked as not needing scraping in future?",
+)
 @click.pass_context
 def questions(context, limit, members, dry_run):
-    ''' Update all questions which are still pending a scrape. '''
+    """Update all questions which are still pending a scrape."""
 
     # Try load in the Members data first - if that fails there's no point continuing.
     # ASSEMBLY_MEMBERS_BY_NAME is global to avoid having to pass it down every function until names are turned to IDs
     global ASSEMBLY_MEMBERS_BY_NAME
     ASSEMBLY_MEMBERS_BY_NAME = loadMembershipsFromFile(members)
 
-    logger.debug('{} questions are known to exist'.format(len(context.obj['state']['questions'])))
+    logger.debug(
+        "{} questions are known to exist".format(len(context.obj["state"]["questions"]))
+    )
 
     questions_to_scrape = []
 
-    for question_id, question_state in context.obj['state']['questions'].items():
-        if question_state['to_scrape']:
+    for question_id, question_state in context.obj["state"]["questions"].items():
+        if question_state["to_scrape"]:
             questions_to_scrape.append(question_id)
 
     # If a limit is provided, set it. Otherwise, scrape the lot.
@@ -746,46 +845,51 @@ def questions(context, limit, members, dry_run):
     if limit:
         questions_to_scrape = questions_to_scrape[:limit]
 
-    logger.info('Scraping {} questions'.format(len(questions_to_scrape)))
+    logger.info("Scraping {} questions".format(len(questions_to_scrape)))
 
     scraped_questions = {}
 
     with click.progressbar(questions_to_scrape) as bar:
         for question_id in bar:
-
-            scraped_questions[question_id] = scrapeQuestionWithId(question_id,context)
-            context.obj['state']['questions'][question_id]['scraped_at'] = datetime.datetime.today()
+            scraped_questions[question_id] = scrapeQuestionWithId(question_id, context)
+            context.obj["state"]["questions"][question_id]["scraped_at"] = (
+                datetime.datetime.today()
+            )
 
     answered_questions = {}
 
     for question_id, question_object in scraped_questions.items():
-
         # question will be None if we failed to scrape it, e.g page error
-        if question_object is not None and question_object['answered'] == True:
-            answered_date = question_object['answered_date']
-            answered_questions.setdefault(answered_date, {})[question_id] = question_object
+        if question_object is not None and question_object["answered"] == True:
+            answered_date = question_object["answered_date"]
+            answered_questions.setdefault(answered_date, {})[question_id] = (
+                question_object
+            )
 
             if not dry_run:
                 # Setting this question's scrape state to False means it won't be processed again
-                context.obj['state']['questions'][question_id]['to_scrape'] = False
+                context.obj["state"]["questions"][question_id]["to_scrape"] = False
 
-    logger.info('{} questions have had answers found in this scrape'.format(len(answered_questions)))
+    logger.info(
+        "{} questions have had answers found in this scrape".format(
+            len(answered_questions)
+        )
+    )
 
     # If there are new answers, write out our file.
 
     if len(answered_questions) > 0:
         for date, qns in answered_questions.items():
-
-            i = 0;
-
+            i = 0
             file_needs_writing = True
 
             while file_needs_writing:
-
-                date_string = date.strftime('%Y-%m-%d')
+                date_string = date.strftime("%Y-%m-%d")
                 letter_suffix = string.ascii_lowercase[i]
-                output_filename = XML_FILE_PREFIX + date_string + letter_suffix + '.xml'
-                output_file = os.path.join(context.obj['OUTPUT_FOLDER'], output_filename)
+                output_filename = XML_FILE_PREFIX + date_string + letter_suffix + ".xml"
+                output_file = os.path.join(
+                    context.obj["OUTPUT_FOLDER"], output_filename
+                )
 
                 if os.path.exists(output_file):
                     i = i + 1
@@ -795,67 +899,80 @@ def questions(context, limit, members, dry_run):
                     file_needs_writing = False
 
 
-@cli.command(name='set_date_scrape')
-@click.option('--date', required=True, type=CLI_DATETIME_FORMAT, help='The date to alter the scrape status of.')
-@click.option('--scrape/--no-scrape', required=True, help='Should the date be marked as needing scraping, or not?')
+@cli.command(name="set_date_scrape")
+@click.option(
+    "--date",
+    required=True,
+    type=CLI_DATETIME_FORMAT,
+    help="The date to alter the scrape status of.",
+)
+@click.option(
+    "--scrape/--no-scrape",
+    required=True,
+    help="Should the date be marked as needing scraping, or not?",
+)
 @click.pass_context
 def set_date_scrape(context, date, scrape):
-    ''' Explicitly set if a date should be scraped or not at the next run.
+    """Explicitly set if a date should be scraped or not at the next run.
 
-    Used to either manually request a re-scraping of a date, or to suppress future scraping of a date. '''
+    Used to either manually request a re-scraping of a date, or to suppress future scraping of a date."""
 
     date = date.date()
 
-    click.echo('Setting scrape status of {} to {}'.format(date, scrape))
+    click.echo("Setting scrape status of {} to {}".format(date, scrape))
 
-    if date in context.obj['state']['dates']:
-        context.obj['state']['dates'][str(date)]['to_scrape'] = scrape
+    if date in context.obj["state"]["dates"]:
+        context.obj["state"]["dates"][str(date)]["to_scrape"] = scrape
     else:
-        context.obj['state']['dates'][str(date)] = {
-            'to_scrape': scrape
-        }
+        context.obj["state"]["dates"][str(date)] = {"to_scrape": scrape}
 
 
-@cli.command(name='set_question_scrape')
-@click.option('--id', required=True, help='The question to alter the scrape status.')
-@click.option('--scrape/--no-scrape', required=True, help='Should the question be marked as needing scraping, or not?')
+@cli.command(name="set_question_scrape")
+@click.option("--id", required=True, help="The question to alter the scrape status.")
+@click.option(
+    "--scrape/--no-scrape",
+    required=True,
+    help="Should the question be marked as needing scraping, or not?",
+)
 @click.pass_context
 def set_question_scrape(context, id, scrape):
-    ''' Explicitly set if a question should be scraped or not at the next run.
+    """Explicitly set if a question should be scraped or not at the next run.
 
-    Used to either manually request a re-scraping of a question, or to suppress future scraping of a question. '''
+    Used to either manually request a re-scraping of a question, or to suppress future scraping of a question."""
 
-    click.echo('Setting scrape status of {} to {}'.format(id, scrape))
+    click.echo("Setting scrape status of {} to {}".format(id, scrape))
 
-    if id in context.obj['state']['questions']:
-        context.obj['state']['questions'][id]['to_scrape'] = scrape
+    if id in context.obj["state"]["questions"]:
+        context.obj["state"]["questions"][id]["to_scrape"] = scrape
     else:
-        context.obj['state']['questions'][id] = {
-            'to_scrape': scrape
-        }
+        context.obj["state"]["questions"][id] = {"to_scrape": scrape}
 
 
-@cli.command(name='reset_state')
+@cli.command(name="reset_state")
 @click.pass_context
 def reset_state(context):
-    ''' Reset the scraper's state file, wiping all knowledge of dates and questions. '''
+    """Reset the scraper's state file, wiping all knowledge of dates and questions."""
 
-    click.secho('Resetting the state file will wipe all information about the states of dates and questions.', bg='red', fg='white')
+    click.secho(
+        "Resetting the state file will wipe all information about the states of dates and questions.",
+        bg="red",
+        fg="white",
+    )
 
-    if click.confirm('Are you really sure you want to do this?', abort=True):
-        logger.info('Resetting scraper state file')
+    if click.confirm("Are you really sure you want to do this?", abort=True):
+        logger.info("Resetting scraper state file")
 
-        context.obj['state'] = EMPTY_STATE_OBJECT
+        context.obj["state"] = EMPTY_STATE_OBJECT
 
-        click.echo('All done. Have a nice day.')
+        click.echo("All done. Have a nice day.")
 
 
 @cli.resultcallback()
 @click.pass_context
 def process_result(context, result, **kwargs):
-    ''' Called after anything in the CLI command group, to write the state back to the file. '''
-    writeScraperState(context.obj['state'], context.obj['OUTPUT_FOLDER'])
+    """Called after anything in the CLI command group, to write the state back to the file."""
+    writeScraperState(context.obj["state"], context.obj["OUTPUT_FOLDER"])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     cli(obj={})
diff --git a/members/parl-old-check-party.py b/members/parl-old-check-party.py
index badcef9d..b01eede7 100644
--- a/members/parl-old-check-party.py
+++ b/members/parl-old-check-party.py
@@ -1,117 +1,208 @@
 #!/usr/bin/python
-# 
+#
 # Old Work-In-Progress for something using old Parliament API, comparing
 # parties. Would probably want reworking using new API. The purpose would be to
 # check for changes against our data, and alert someone that something needs
 # fixing (or longer term, fix it automatically).
 
-import re
+import sys
 import urllib.request
+
 import lxml.objectify
-import sys
 
 sys.path.append("../pyscraper")
 from lords.resolvenames import lordsList
 
-TYPES   = (
-    '2 Hered Office Holders', 'Bishops and Archbishops', 'Deputy Hereditary', 'Elected Hereditary', 'Hereditary',
-    'Hereditary of 1st creation', 'Hereds given LPs', 'Law Lord', 'Life peer',
+TYPES = (
+    "2 Hered Office Holders",
+    "Bishops and Archbishops",
+    "Deputy Hereditary",
+    "Elected Hereditary",
+    "Hereditary",
+    "Hereditary of 1st creation",
+    "Hereds given LPs",
+    "Law Lord",
+    "Life peer",
+)
+RANKS = (
+    "Archbishop",
+    "Baroness",
+    "Bishop",
+    "Countess",
+    "Duke",
+    "Earl",
+    "Lady",
+    "Lord",
+    "Marquess",
+    "Prince",
+    "Viscount",
 )
-RANKS   = ( 'Archbishop', 'Baroness', 'Bishop', 'Countess', 'Duke', 'Earl', 'Lady', 'Lord', 'Marquess', 'Prince', 'Viscount' )
-GENDERS = ( 'Female', 'Male' )
+GENDERS = ("Female", "Male")
 PARTIES = (
-    '', 'Alliance', 'Bishops', 'Conservative', 'Conservative Independent', 'Crossbench', 'Democratic Unionist',
-    'Independent Labour', 'Labour', 'Labour Independent', 'Liberal Democrat', 'Non-affiliated (current Member)',
-    'Other', 'Plaid Cymru', 'UK Independence Party', 'Ulster Unionist Party',
+    "",
+    "Alliance",
+    "Bishops",
+    "Conservative",
+    "Conservative Independent",
+    "Crossbench",
+    "Democratic Unionist",
+    "Independent Labour",
+    "Labour",
+    "Labour Independent",
+    "Liberal Democrat",
+    "Non-affiliated (current Member)",
+    "Other",
+    "Plaid Cymru",
+    "UK Independence Party",
+    "Ulster Unionist Party",
 )
-STATUS  = ('Active', 'Retired', 'Deceased', 'Suspended', 'Inactive', 'Disqualified', 'Resigned', 'LeaveOfAbsence') 
+STATUS = (
+    "Active",
+    "Retired",
+    "Deceased",
+    "Suspended",
+    "Inactive",
+    "Disqualified",
+    "Resigned",
+    "LeaveOfAbsence",
+)
+
 
 class Lord:
     left_date = None
 
     def __init__(self, lord):
-        self.ids        = { 'id': lord.get('id'), 'pims':  lord.get('pimsId'), 'dods':  lord.get('dodsId') }
-        self.type       = TYPES.index(lord.type)
-        self.rank       = RANKS.index(lord.rank)
-        self.firstName  = str(getattr(lord, 'firstName', ''))
-        self.lastName   = str(lord.lastName)
-        self.shortTitle = str(lord.shortTitle).replace('  ', ' ') # Used in division listings
-        self.longTitle  = str(lord.longTitle).replace('Rdt Hon. ', '') # Used in debate speech
-        self.party      = PARTIES.index(lord['{urn:parliament/metadata/core/2010/10/01/party}party'].partyName)
-        self.website    = str(lord.get('website', ''))
-        self.gender     = GENDERS.index(lord['{urn:parliament/metadata/core/2010/10/01/gender}gender'])
-        self.lastOath   = str(lord.lastOathDate)[:10]
-
-        honours = getattr(lord, '{urn:parliament/metadata/core/members/2010/10/01/honour}honours', None)
+        self.ids = {
+            "id": lord.get("id"),
+            "pims": lord.get("pimsId"),
+            "dods": lord.get("dodsId"),
+        }
+        self.type = TYPES.index(lord.type)
+        self.rank = RANKS.index(lord.rank)
+        self.firstName = str(getattr(lord, "firstName", ""))
+        self.lastName = str(lord.lastName)
+        self.shortTitle = str(lord.shortTitle).replace(
+            "  ", " "
+        )  # Used in division listings
+        self.longTitle = str(lord.longTitle).replace(
+            "Rdt Hon. ", ""
+        )  # Used in debate speech
+        self.party = PARTIES.index(
+            lord["{urn:parliament/metadata/core/2010/10/01/party}party"].partyName
+        )
+        self.website = str(lord.get("website", ""))
+        self.gender = GENDERS.index(
+            lord["{urn:parliament/metadata/core/2010/10/01/gender}gender"]
+        )
+        self.lastOath = str(lord.lastOathDate)[:10]
+
+        honours = getattr(
+            lord,
+            "{urn:parliament/metadata/core/members/2010/10/01/honour}honours",
+            None,
+        )
         if honours is not None:
-            self.honours = [ ( str(h.name), str(h.startDate) ) for h in honours['{urn:parliament/metadata/core/2010/10/01/honour}honour'] ]
-
-        status          = lord['{urn:parliament/metadata/core/2010/10/01/status}status']
-        self.status     = STATUS[STATUS.index(status['name'])]
-        self.statusInfo = status['statusInformation']
-
-        if self.status == 'Retired':
-            self.left_date = str(self.statusInfo['dateOfRetirement'])[:10]
-        elif self.status == 'Deceased':
-            self.left_date = str(self.statusInfo['dateOfDeath'])[:10]
-        elif self.status == 'Suspended':
-            start_date = str(self.statusInfo['startDate'])[:10]
-            end_date = str(self.statusInfo['endDate'])[:10]
-            reason = self.statusInfo['description']
+            self.honours = [
+                (str(h.name), str(h.startDate))
+                for h in honours[
+                    "{urn:parliament/metadata/core/2010/10/01/honour}honour"
+                ]
+            ]
+
+        status = lord["{urn:parliament/metadata/core/2010/10/01/status}status"]
+        self.status = STATUS[STATUS.index(status["name"])]
+        self.statusInfo = status["statusInformation"]
+
+        if self.status == "Retired":
+            self.left_date = str(self.statusInfo["dateOfRetirement"])[:10]
+        elif self.status == "Deceased":
+            self.left_date = str(self.statusInfo["dateOfDeath"])[:10]
+        elif self.status == "Suspended":
+            start_date = str(self.statusInfo["startDate"])[:10]
+            end_date = str(self.statusInfo["endDate"])[:10]
+            reason = self.statusInfo["description"]
             self.status = (self.status, start_date, end_date, reason)
-        elif self.status == 'Inactive':
-            self.left_date = str(self.statusInfo['membershipEndDate'])[:10]
-        elif self.status == 'Disqualified':
-            start_date = str(self.statusInfo['startDate'])[:10]
-            end_date = str(self.statusInfo['endDate'])[:10]
-            reason = self.statusInfo['reason']
+        elif self.status == "Inactive":
+            self.left_date = str(self.statusInfo["membershipEndDate"])[:10]
+        elif self.status == "Disqualified":
+            start_date = str(self.statusInfo["startDate"])[:10]
+            end_date = str(self.statusInfo["endDate"])[:10]
+            reason = self.statusInfo["reason"]
             self.status = (self.status, start_date, end_date, reason)
-        elif self.status == 'Resigned':
-            self.left_date = str(self.statusInfo['dateOfResignation'])[:10]
-        elif self.status == 'LeaveOfAbsence':
-            assert self.party in (PARTIES.index('Non-affiliated (current Member)'), PARTIES.index('Other'))
-            #self.party = PARTIES.index(self.statusInfo['party']['partyName'])
-        elif self.status == 'Active':
+        elif self.status == "Resigned":
+            self.left_date = str(self.statusInfo["dateOfResignation"])[:10]
+        elif self.status == "LeaveOfAbsence":
+            assert self.party in (
+                PARTIES.index("Non-affiliated (current Member)"),
+                PARTIES.index("Other"),
+            )
+            # self.party = PARTIES.index(self.statusInfo['party']['partyName'])
+        elif self.status == "Active":
             pass
 
         # Corrections
-        if self.longTitle == 'The Lord McAlpine of West Green':
-            self.left_date = '2010-05-21' # From House of Lords journal
-        if self.longTitle == 'The Most Hon. the Marquess of Salisbury DL':
-            self.status = 'Retired' # The 6th Marquess left, as I understand it
-            self.left_date = '1999-11-11'
-        if self.longTitle == 'The Rt Hon. the Viscount Younger of Leckie KT KCVO TD DL':
-            self.type = TYPES.index('Hereds given LPs') # Not a Hereditary
-        if self.longTitle == 'The Earl of Carnarvon KCVO KBE DL':
-            self.type = TYPES.index('Elected Hereditary') # One of the 92
+        if self.longTitle == "The Lord McAlpine of West Green":
+            self.left_date = "2010-05-21"  # From House of Lords journal
+        if self.longTitle == "The Most Hon. the Marquess of Salisbury DL":
+            self.status = "Retired"  # The 6th Marquess left, as I understand it
+            self.left_date = "1999-11-11"
+        if self.longTitle == "The Rt Hon. the Viscount Younger of Leckie KT KCVO TD DL":
+            self.type = TYPES.index("Hereds given LPs")  # Not a Hereditary
+        if self.longTitle == "The Earl of Carnarvon KCVO KBE DL":
+            self.type = TYPES.index("Elected Hereditary")  # One of the 92
 
     def __str__(self):
-        return '%s (%s) - %s' % ( self.longTitle, PARTIES[self.party], self.status )
+        return "%s (%s) - %s" % (self.longTitle, PARTIES[self.party], self.status)
+
 
 # Fetch the current live information
-lords = urllib.request.urlopen('http://data.parliament.uk/resources/members/api/lords/all/').read()
-lords = [ Lord(lord) for lord in lxml.objectify.fromstring(lords).peer ]
+lords = urllib.request.urlopen(
+    "http://data.parliament.uk/resources/members/api/lords/all/"
+).read()
+lords = [Lord(lord) for lord in lxml.objectify.fromstring(lords).peer]
 
 for lord in lords:
     # Ignore hereditaries retired by the House of Lords Act 1999, or
     # others who retired or dided before our records begin
-    if lord.status in ('Deceased', 'Retired') and lord.left_date <= '1999-11-11': continue
+    if lord.status in ("Deceased", "Retired") and lord.left_date <= "1999-11-11":
+        continue
 
     # We don't show ones that haven't been introduced yet (and couple of bugs, looks like)
-    if not lord.lastOath: continue
-
-    date = lord.left_date or '2011-12-04'
-    match = lordsList.MatchRevName(lord.shortTitle, date, '')
-
-    #if '%s %s' % (lord.title, lord.lastName) in self.
-    if PARTIES[lord.party] == 'Conservative' and lordsList.lords[match]['party'] == 'Con': continue
-    if PARTIES[lord.party] == 'Labour' and lordsList.lords[match]['party'] == 'Lab': continue
-    if PARTIES[lord.party] == 'Liberal Democrat' and lordsList.lords[match]['party'] == 'LDem': continue
-    if PARTIES[lord.party] == 'Crossbench' and lordsList.lords[match]['party'] == 'XB': continue
-    if PARTIES[lord.party] == 'Bishops' and lordsList.lords[match]['party'] == 'Bp': continue
-    if PARTIES[lord.party] == 'Ulster Unionist Party' and lordsList.lords[match]['party'] == 'UUP': continue
-    if PARTIES[lord.party] == 'UK Independence Party' and lordsList.lords[match]['party'] == 'UKIP': continue
-    if PARTIES[lord.party] == 'Plaid Cymru' and lordsList.lords[match]['party'] == 'PC': continue
-    if PARTIES[lord.party] == 'Plaid Cymru' and lordsList.lords[match]['party'] == 'PC': continue
-    print(PARTIES[lord.party], lordsList.lords[match]['party'])
-
+    if not lord.lastOath:
+        continue
+
+    date = lord.left_date or "2011-12-04"
+    match = lordsList.MatchRevName(lord.shortTitle, date, "")
+
+    # if '%s %s' % (lord.title, lord.lastName) in self.
+    if (
+        PARTIES[lord.party] == "Conservative"
+        and lordsList.lords[match]["party"] == "Con"
+    ):
+        continue
+    if PARTIES[lord.party] == "Labour" and lordsList.lords[match]["party"] == "Lab":
+        continue
+    if (
+        PARTIES[lord.party] == "Liberal Democrat"
+        and lordsList.lords[match]["party"] == "LDem"
+    ):
+        continue
+    if PARTIES[lord.party] == "Crossbench" and lordsList.lords[match]["party"] == "XB":
+        continue
+    if PARTIES[lord.party] == "Bishops" and lordsList.lords[match]["party"] == "Bp":
+        continue
+    if (
+        PARTIES[lord.party] == "Ulster Unionist Party"
+        and lordsList.lords[match]["party"] == "UUP"
+    ):
+        continue
+    if (
+        PARTIES[lord.party] == "UK Independence Party"
+        and lordsList.lords[match]["party"] == "UKIP"
+    ):
+        continue
+    if PARTIES[lord.party] == "Plaid Cymru" and lordsList.lords[match]["party"] == "PC":
+        continue
+    if PARTIES[lord.party] == "Plaid Cymru" and lordsList.lords[match]["party"] == "PC":
+        continue
+    print(PARTIES[lord.party], lordsList.lords[match]["party"])
diff --git a/members/wikipedia-commons.py b/members/wikipedia-commons.py
index 511684e6..b6831585 100755
--- a/members/wikipedia-commons.py
+++ b/members/wikipedia-commons.py
@@ -7,10 +7,10 @@
 # certain conditions.  However, it comes with ABSOLUTELY NO WARRANTY.
 # For details see the file LICENSE.html in the top level of the source.
 
-import datetime
+import re
 import sys
 import urllib.parse
-import re
+
 # import sets
 
 sys.path.append("../pyscraper")
@@ -18,59 +18,61 @@
 from resolvemembernames import memberList
 
 # Get region pages
-wiki_index_url = "http://en.wikipedia.org/wiki/MPs_elected_in_the_UK_general_election,_2005"
+wiki_index_url = (
+    "http://en.wikipedia.org/wiki/MPs_elected_in_the_UK_general_election,_2005"
+)
 date_parl = {
-    1997: '1999-01-01',
-    2001: '2003-01-01',
-    2005: '2007-08-01',
-    2010: '2014-01-01',
-    2015: '2016-01-01',
+    1997: "1999-01-01",
+    2001: "2003-01-01",
+    2005: "2007-08-01",
+    2010: "2014-01-01",
+    2015: "2016-01-01",
 }
-wikimembers  = {}
+wikimembers = {}
 
-# Grab page 
+# Grab page
 for year in (1997, 2001, 2005, 2010, 2015):
-    ur = open('../rawdata/Members_of_the_House_of_Commons_%d' % year)
+    ur = open("../rawdata/Members_of_the_House_of_Commons_%d" % year)
     content = ur.read()
     ur.close()
 
-# <tr>
-#<td><a href="/wiki/West_Ham_%28UK_Parliament_constituency%29" title="West Ham (UK Parliament constituency)">West Ham</a></td>
-#<td><a href="/wiki/Lyn_Brown" title="Lyn Brown">Lyn Brown</a></td>
-#<td>Labour</td>
-    matcher = '<tr>\s+<td><a href="/wiki/[^"]+" [^>]*?title="[^"]+">([^<]+)</a>(?:<br />\s+<small>.*?</small>)?\s*</td>\s+(?:<td[^>]*>\s*</td>\s*<td[^>]*><a[^>]*>[^<]*</a>\s*</td>\s*<td[^>]*>\s*</td>\s*)?<td>(?:(?:<span class="sortkey">[^<]*</span>|<span data-sort-value="[^"]*">)<span class="vcard"><span class="fn">)?(?:Dr |Sir |The Rev\. )?<a href="(/wiki/[^"]+)" [^>]*?title="[^"]+"[^>]*>([^<]+)</a>(?:(?:</span>){2,3})?(?:&#160;\(.*?\))?\s*</td>|by-election,[^"]+">([^<]+)</a> [^ ]{1,3} <a href="(/wiki/[^"]+)" title="[^"]+">([^<]+)</a>';
+    # <tr>
+    # <td><a href="/wiki/West_Ham_%28UK_Parliament_constituency%29" title="West Ham (UK Parliament constituency)">West Ham</a></td>
+    # <td><a href="/wiki/Lyn_Brown" title="Lyn Brown">Lyn Brown</a></td>
+    # <td>Labour</td>
+    matcher = '<tr>\s+<td><a href="/wiki/[^"]+" [^>]*?title="[^"]+">([^<]+)</a>(?:<br />\s+<small>.*?</small>)?\s*</td>\s+(?:<td[^>]*>\s*</td>\s*<td[^>]*><a[^>]*>[^<]*</a>\s*</td>\s*<td[^>]*>\s*</td>\s*)?<td>(?:(?:<span class="sortkey">[^<]*</span>|<span data-sort-value="[^"]*">)<span class="vcard"><span class="fn">)?(?:Dr |Sir |The Rev\. )?<a href="(/wiki/[^"]+)" [^>]*?title="[^"]+"[^>]*>([^<]+)</a>(?:(?:</span>){2,3})?(?:&#160;\(.*?\))?\s*</td>|by-election,[^"]+">([^<]+)</a> [^ ]{1,3} <a href="(/wiki/[^"]+)" title="[^"]+">([^<]+)</a>'
     matches = re.findall(matcher, content)
-    for (cons, url, name, cons2, url2, name2) in matches:
+    for cons, url, name, cons2, url2, name2 in matches:
         id = None
         if cons2:
             cons = cons2
             name = name2
             url = url2
-        cons = cons.replace('&amp;', '&')
+        cons = cons.replace("&amp;", "&")
         try:
-            (id, canonname, canoncons) = memberList.matchfullnamecons(name, cons, date_parl[year])
+            (id, canonname, canoncons) = memberList.matchfullnamecons(
+                name, cons, date_parl[year]
+            )
         except Exception as e:
             print(e, file=sys.stderr)
         if not id:
             continue
         wikimembers[id] = url
 
-print('''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>''')
+print("""<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>""")
 k = sorted(wikimembers)
 for id in k:
     url = urllib.parse.urljoin(wiki_index_url, wikimembers[id])
     print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
-print('</publicwhip>')
+print("</publicwhip>")
 
-#wikimembers = sets.Set(wikimembers.keys())
-#print "len: ", len(wikimembers)
+# wikimembers = sets.Set(wikimembers.keys())
+# print "len: ", len(wikimembers)
 
 # Check we have everybody -- ha! not likely yet
-#allmembers = sets.Set(memberList.currentmpslist())
-#symdiff = allmembers.symmetric_difference(wikimembers)
-#if len(symdiff) > 0:
+# allmembers = sets.Set(memberList.currentmpslist())
+# symdiff = allmembers.symmetric_difference(wikimembers)
+# if len(symdiff) > 0:
 #    print >>sys.stderr, "Failed to get all MPs, these ones in symmetric difference"
 #    print >>sys.stderr, symdiff
-
-
diff --git a/members/wikipedia-lords.py b/members/wikipedia-lords.py
index 34ac829b..852f3349 100755
--- a/members/wikipedia-lords.py
+++ b/members/wikipedia-lords.py
@@ -8,9 +8,9 @@
 # For details see the file LICENSE.html in the top level of the source.
 
 import datetime
+import re
 import sys
 import urllib.parse
-import re
 
 sys.path.append("../pyscraper")
 from lords.resolvenames import lordsList
@@ -20,39 +20,39 @@
 date_today = datetime.date.today().isoformat()
 wikimembers = {}
 
-# Grab page 
-ur = open('../rawdata/Members_of_the_House_of_Lords')
+# Grab page
+ur = open("../rawdata/Members_of_the_House_of_Lords")
 content = ur.read()
 ur.close()
 
-#<td><a href="/wiki/Geoffrey_Russell%2C_4th_Baron_Ampthill" title="Geoffrey Russell, 4th Baron Ampthill">The Lord Ampthill</a></td>
-matcher = '<tr>\s+<td><a href="(/wiki/[^"]+)" [^>]*?title="([^"]+)"[^>]*>([^<]+)</a>\s*</td>';
+# <td><a href="/wiki/Geoffrey_Russell%2C_4th_Baron_Ampthill" title="Geoffrey Russell, 4th Baron Ampthill">The Lord Ampthill</a></td>
+matcher = (
+    '<tr>\s+<td><a href="(/wiki/[^"]+)" [^>]*?title="([^"]+)"[^>]*>([^<]+)</a>\s*</td>'
+)
 matches = re.findall(matcher, content)
-for (url, title, name) in matches:
+for url, title, name in matches:
     id = None
     try:
         id = lordsList.GetLordIDfname(name, None, date_today)
-    except Exception as e:
+    except Exception:
         continue
 
     if not id:
         continue
     wikimembers[id] = url
 
-print('''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>''')
+print("""<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>""")
 for id, url in sorted(wikimembers.items()):
     url = urllib.parse.urljoin(wiki_index_url, url)
     print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
-print('</publicwhip>')
+print("</publicwhip>")
 
-#print "len: ", len(wikimembers)
+# print "len: ", len(wikimembers)
 
 # Check we have everybody -- ha! not likely yet
-#allmembers = set(memberList.currentmpslist())
-#symdiff = allmembers.symmetric_difference(wikimembers)
-#if len(symdiff) > 0:
+# allmembers = set(memberList.currentmpslist())
+# symdiff = allmembers.symmetric_difference(wikimembers)
+# if len(symdiff) > 0:
 #    print >>sys.stderr, "Failed to get all MPs, these ones in symmetric difference"
 #    print >>sys.stderr, symdiff
-
-
diff --git a/members/wikipedia-standingdown.py b/members/wikipedia-standingdown.py
index bbf0c5e6..19c92524 100755
--- a/members/wikipedia-standingdown.py
+++ b/members/wikipedia-standingdown.py
@@ -7,24 +7,25 @@
 # certain conditions.  However, it comes with ABSOLUTELY NO WARRANTY.
 # For details see the file LICENSE.html in the top level of the source.
 
-import sys
 import re
+import sys
 
 sys.path.append("../pyscraper")
 from resolvemembernames import memberList
 
-today = '2024-05-24'
+today = "2024-05-24"
 
-page = open('../rawdata/Members_of_the_2024_standing_down').read()
-page = re.sub('(?s)^.*?<caption>Members of Parliament not standing for re-election', '', page)
-page = re.sub('(?s)</table>.*', '', page)
+page = open("../rawdata/Members_of_the_2024_standing_down").read()
+page = re.sub(
+    "(?s)^.*?<caption>Members of Parliament not standing for re-election", "", page
+)
+page = re.sub("(?s)</table>.*", "", page)
 
-print('''<?xml version="1.0" encoding="ISO-8859-1"?>
-<publicwhip>''')
+print("""<?xml version="1.0" encoding="ISO-8859-1"?>
+<publicwhip>""")
 m = re.findall(r'<tr>\s*<td>.*?<a href="([^"]*)"[^>]*>([^<]*)</a>', page)
 for row in m:
     url, name = row
-    pid, canonname, canoncons = memberList.matchfullnamecons(name, None, today) 
+    pid, canonname, canoncons = memberList.matchfullnamecons(name, None, today)
     print(('  <personinfo id="%s" name="%s" standing_down_2024="1" />' % (pid, name)))
-print('</publicwhip>')
-
+print("</publicwhip>")
diff --git a/pyscraper/base_resolver.py b/pyscraper/base_resolver.py
index ea7939b4..62beacab 100644
--- a/pyscraper/base_resolver.py
+++ b/pyscraper/base_resolver.py
@@ -2,177 +2,211 @@
 import os
 import re
 
-members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'members'))
+members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "members"))
+
 
 class ResolverBase(object):
     def __init__(self):
         self.reloadJSON()
 
     def reloadJSON(self):
-        self.members = {} # ID --> membership
-        self.persons = {} # ID --> person
-        self.fullnames = {} # "Firstname Lastname" --> memberships
-        self.lastnames = {} # Surname --> memberships
-
-        self.constoidmap = {} # constituency name --> cons attributes (with date and ID)
-        self.considtonamemap = {} # cons ID --> name
-        self.considtomembermap = {} # cons ID --> memberships
-        self.historichansard = {} # Historic Hansard commons membership ID -> MPs
-        self.pims = {} # Pims membership ID and date -> MPs
-        self.mnis = {} # Parliament Member Names ID to person
-
-        self.parties = {} # party --> memberships
-        self.membertopersonmap = {} # member ID --> person ID
-        self.persontomembermap = {} # person ID --> memberships
+        self.members = {}  # ID --> membership
+        self.persons = {}  # ID --> person
+        self.fullnames = {}  # "Firstname Lastname" --> memberships
+        self.lastnames = {}  # Surname --> memberships
+
+        self.constoidmap = {}  # constituency name --> cons attributes (with date and ID)
+        self.considtonamemap = {}  # cons ID --> name
+        self.considtomembermap = {}  # cons ID --> memberships
+        self.historichansard = {}  # Historic Hansard commons membership ID -> MPs
+        self.pims = {}  # Pims membership ID and date -> MPs
+        self.mnis = {}  # Parliament Member Names ID to person
+
+        self.parties = {}  # party --> memberships
+        self.membertopersonmap = {}  # member ID --> person ID
+        self.persontomembermap = {}  # person ID --> memberships
 
     def import_constituencies(self):
-        data = json.load(open(os.path.join(members_dir, 'people.json')))
-        for con in data['posts']:
-            if con['organization_id'] != self.import_organization_id:
+        data = json.load(open(os.path.join(members_dir, "people.json")))
+        for con in data["posts"]:
+            if con["organization_id"] != self.import_organization_id:
                 continue
 
             attr = {
-                'id': con['id'],
-                'start_date': con.get('start_date', '0000-00-00'),
-                'end_date': con.get('end_date', '9999-12-31'),
+                "id": con["id"],
+                "start_date": con.get("start_date", "0000-00-00"),
+                "end_date": con.get("end_date", "9999-12-31"),
             }
-            if len(attr['start_date']) == 4:
-                attr['start_date'] = '%s-01-01' % attr['start_date']
-            if len(attr['end_date']) == 4:
-                attr['end_date'] = '%s-12-31' % attr['end_date']
+            if len(attr["start_date"]) == 4:
+                attr["start_date"] = "%s-01-01" % attr["start_date"]
+            if len(attr["end_date"]) == 4:
+                attr["end_date"] = "%s-12-31" % attr["end_date"]
 
-            names = [con['area']['name']] + con['area'].get('other_names', [])
+            names = [con["area"]["name"]] + con["area"].get("other_names", [])
             for name in names:
-                if not con['id'] in self.considtonamemap:
-                    self.considtonamemap[con['id']] = name
+                if con["id"] not in self.considtonamemap:
+                    self.considtonamemap[con["id"]] = name
                 self.constoidmap.setdefault(name, []).append(attr)
                 nopunc = self.strip_punctuation(name)
                 self.constoidmap.setdefault(nopunc, []).append(attr)
 
     def strip_punctuation(self, cons):
-        nopunc = cons.replace(',','').replace('-','').replace(' ','').lower().strip()
+        nopunc = cons.replace(",", "").replace("-", "").replace(" ", "").lower().strip()
         return nopunc
 
     def import_people_json(self):
-        data = json.load(open(os.path.join(members_dir, 'people.json')))
-        posts = {post['id']: post for post in data['posts']}
-        orgs = {org['id']: org for org in data['organizations']}
-        for mship in data['memberships']:
+        data = json.load(open(os.path.join(members_dir, "people.json")))
+        posts = {post["id"]: post for post in data["posts"]}
+        orgs = {org["id"]: org for org in data["organizations"]}
+        for mship in data["memberships"]:
             self.import_people_membership(mship, posts, orgs)
-        for person in data['persons']:
+        for person in data["persons"]:
             self.import_people_names(person)
 
     def import_people_membership(self, mship, posts, orgs):
-        if 'post_id' not in mship or posts[mship['post_id']]['organization_id'] != self.import_organization_id:
+        if (
+            "post_id" not in mship
+            or posts[mship["post_id"]]["organization_id"] != self.import_organization_id
+        ):
             return
 
         if mship["id"] in self.membertopersonmap:
             raise Exception("Same member id %s appeared twice" % mship["id"])
-        self.membertopersonmap[mship["id"]] = mship['person_id']
-        self.persontomembermap.setdefault(mship['person_id'], []).append(mship["id"])
+        self.membertopersonmap[mship["id"]] = mship["person_id"]
+        self.persontomembermap.setdefault(mship["person_id"], []).append(mship["id"])
 
         if self.members.get(mship["id"]):
             raise Exception("Repeated identifier %s in members JSON file" % mship["id"])
         self.members[mship["id"]] = mship
 
-        if 'end_date' not in mship:
-            mship['end_date'] = '9999-12-31'
+        if "end_date" not in mship:
+            mship["end_date"] = "9999-12-31"
 
         # index by constituency
-        mship['constituency'] = posts[mship['post_id']]['area']['name']
-        consids = self.constoidmap[mship['constituency']]
+        mship["constituency"] = posts[mship["post_id"]]["area"]["name"]
+        consids = self.constoidmap[mship["constituency"]]
         consid = None
         # find the constituency id for this person
-        mship_start_date = len(mship['start_date'])==4 and ('%s-01-01' % mship['start_date']) or mship['start_date']
-        mship_end_date = len(mship['end_date'])==4 and ('%s-12-31' % mship['end_date']) or mship['end_date']
+        mship_start_date = (
+            len(mship["start_date"]) == 4
+            and ("%s-01-01" % mship["start_date"])
+            or mship["start_date"]
+        )
+        mship_end_date = (
+            len(mship["end_date"]) == 4
+            and ("%s-12-31" % mship["end_date"])
+            or mship["end_date"]
+        )
         for cons in consids:
-            if (cons['start_date'] <= mship_start_date and
-                mship_start_date <= mship_end_date and
-                mship_end_date <= cons['end_date']):
-                if consid and consid != cons['id']:
-                    raise Exception("Two constituency ids %s %s overlap with MP %s" % (consid, cons['id'], mship['id']))
-                consid = cons['id']
+            if (
+                cons["start_date"] <= mship_start_date
+                and mship_start_date <= mship_end_date
+                and mship_end_date <= cons["end_date"]
+            ):
+                if consid and consid != cons["id"]:
+                    raise Exception(
+                        "Two constituency ids %s %s overlap with MP %s"
+                        % (consid, cons["id"], mship["id"])
+                    )
+                consid = cons["id"]
         if not consid:
             raise Exception("Constituency '%s' not found" % mship["constituency"])
         # check name in members file is same as default in cons file
         backformed_cons = self.considtonamemap[consid]
         if backformed_cons != mship["constituency"]:
-            raise Exception("Constituency '%s' in members file differs from first constituency '%s' listed in cons file" % (mship["constituency"], backformed_cons))
+            raise Exception(
+                "Constituency '%s' in members file differs from first constituency '%s' listed in cons file"
+                % (mship["constituency"], backformed_cons)
+            )
 
         # check first date ranges don't overlap, MPs only
         # Only check modern MPs as we might have overlapping data previously
-        if self.import_organization_id == 'house-of-commons':
+        if self.import_organization_id == "house-of-commons":
             for cons in self.considtomembermap.get(consid, []):
-                if cons['end_date'] < '1997-05-01': continue
-                if cons['start_date'] <= mship['start_date'] <= cons['end_date'] \
-                    or cons['start_date'] <= mship['end_date'] <= cons['end_date'] \
-                    or mship['start_date'] <= cons['start_date'] <= mship['end_date'] \
-                    or mship['start_date'] <= cons['end_date'] <= mship['end_date']:
-                    raise Exception("%s %s Two MP entries for constituency %s with overlapping dates" % (mship, cons, consid))
+                if cons["end_date"] < "1997-05-01":
+                    continue
+                if (
+                    cons["start_date"] <= mship["start_date"] <= cons["end_date"]
+                    or cons["start_date"] <= mship["end_date"] <= cons["end_date"]
+                    or mship["start_date"] <= cons["start_date"] <= mship["end_date"]
+                    or mship["start_date"] <= cons["end_date"] <= mship["end_date"]
+                ):
+                    raise Exception(
+                        "%s %s Two MP entries for constituency %s with overlapping dates"
+                        % (mship, cons, consid)
+                    )
         # then add in
         self.considtomembermap.setdefault(consid, []).append(mship)
 
         # ... and by party
-        if 'on_behalf_of_id' in mship:
-            mship['party'] = orgs[mship['on_behalf_of_id']]['name']
-            self.parties.setdefault(mship['party'], []).append(mship)
+        if "on_behalf_of_id" in mship:
+            mship["party"] = orgs[mship["on_behalf_of_id"]]["name"]
+            self.parties.setdefault(mship["party"], []).append(mship)
 
-        if 'hansard_id' in mship:
-            self.historichansard.setdefault(int(mship['hansard_id']), []).append(mship)
+        if "hansard_id" in mship:
+            self.historichansard.setdefault(int(mship["hansard_id"]), []).append(mship)
 
     def import_people_names(self, person):
-        if person['id'] not in self.persontomembermap:
+        if person["id"] not in self.persontomembermap:
             return
-        self.persons[person['id']] = person
-        memberships = [self.members[x] for x in self.persontomembermap[person['id']]]
-        for other_name in person.get('other_names', []):
-            if other_name.get('note') == 'Main':
+        self.persons[person["id"]] = person
+        memberships = [self.members[x] for x in self.persontomembermap[person["id"]]]
+        for other_name in person.get("other_names", []):
+            if other_name.get("note") == "Main":
                 self.import_people_main_name(other_name, memberships)
-            elif other_name.get('note') == 'Alternate':
+            elif other_name.get("note") == "Alternate":
                 self.import_people_alternate_name(person, other_name, memberships)
-        for identifier in person.get('identifiers', []):
-            if identifier.get('scheme') == 'pims_id':
-                id = identifier.get('identifier')
+        for identifier in person.get("identifiers", []):
+            if identifier.get("scheme") == "pims_id":
+                id = identifier.get("identifier")
                 for m in memberships:
                     p = person.copy()
-                    p['start_date'] = m['start_date']
-                    p['end_date'] = m['end_date']
+                    p["start_date"] = m["start_date"]
+                    p["end_date"] = m["end_date"]
                     self.pims.setdefault(id, []).append(p)
-            elif identifier.get('scheme') == 'datadotparl_id':
-                id = identifier.get('identifier')
+            elif identifier.get("scheme") == "datadotparl_id":
+                id = identifier.get("identifier")
                 for m in memberships:
                     p = person.copy()
-                    p['start_date'] = m['start_date']
-                    p['end_date'] = m['end_date']
+                    p["start_date"] = m["start_date"]
+                    p["end_date"] = m["end_date"]
                     self.mnis.setdefault(id, []).append(p)
 
     def import_people_main_name(self, name, memberships):
-        mships = [m for m in memberships if m['start_date'] <= name.get('end_date', '9999-12-31') and m['end_date'] >= name.get('start_date', '1000-01-01')]
-        if not mships: return
+        mships = [
+            m
+            for m in memberships
+            if m["start_date"] <= name.get("end_date", "9999-12-31")
+            and m["end_date"] >= name.get("start_date", "1000-01-01")
+        ]
+        if not mships:
+            return
 
         try:
             family_name = name["family_name"]
             given_name = name["given_name"]
         except:
-            family_name = name['lordname']
-            if name['lordofname']:
-                family_name += ' of ' + name['lordofname']
-            given_name = name['honorific_prefix']
-        compoundname = '%s %s' % (given_name, family_name)
-        no_initial = ''
-        fnnomidinitial = re.findall('^(\S*)\s\S$', given_name)
+            family_name = name["lordname"]
+            if name["lordofname"]:
+                family_name += " of " + name["lordofname"]
+            given_name = name["honorific_prefix"]
+        compoundname = "%s %s" % (given_name, family_name)
+        no_initial = ""
+        fnnomidinitial = re.findall("^(\S*)\s\S$", given_name)
         if fnnomidinitial:
             no_initial = fnnomidinitial[0] + " " + family_name
-        initial_name = ''
-        if self.import_organization_id != 'house-of-commons' and given_name:
+        initial_name = ""
+        if self.import_organization_id != "house-of-commons" and given_name:
             initial_name = given_name[0] + " " + family_name
 
         for m in mships:
-            newattr = {'id': m['id'], 'person_id': m['person_id']}
+            newattr = {"id": m["id"], "person_id": m["person_id"]}
             # merge date ranges - take the smallest range covered by
             # the membership, and the alias's range (if it has one)
-            newattr['start_date'] = max(m['start_date'], name.get('start_date', '1000-01-01'))
-            newattr['end_date'] = min(m['end_date'], name.get('end_date', '9999-12-31'))
+            newattr["start_date"] = max(
+                m["start_date"], name.get("start_date", "1000-01-01")
+            )
+            newattr["end_date"] = min(m["end_date"], name.get("end_date", "9999-12-31"))
             self.fullnames.setdefault(compoundname, []).append(newattr)
             if no_initial:
                 self.fullnames.setdefault(no_initial, []).append(newattr)
@@ -181,39 +215,54 @@ def import_people_main_name(self, name, memberships):
             self.lastnames.setdefault(family_name, []).append(newattr)
 
     def import_people_alternate_name(self, person, other_name, memberships):
-        if other_name.get('organization_id') not in (None, self.import_organization_id): return
-        mships = [m for m in memberships if m['start_date'] <= other_name.get('end_date', '9999-12-31') and m['end_date'] >= other_name.get('start_date', '1000-01-01')]
+        if other_name.get("organization_id") not in (None, self.import_organization_id):
+            return
+        mships = [
+            m
+            for m in memberships
+            if m["start_date"] <= other_name.get("end_date", "9999-12-31")
+            and m["end_date"] >= other_name.get("start_date", "1000-01-01")
+        ]
         for m in mships:
-            newattr = {'id': m['id'], 'person_id': m['person_id']}
+            newattr = {"id": m["id"], "person_id": m["person_id"]}
             # merge date ranges - take the smallest range covered by
             # the membership, and the alias's range (if it has one)
-            newattr['start_date'] = max(m['start_date'], other_name.get('start_date', '1000-01-01'))
-            newattr['end_date'] = min(m['end_date'], other_name.get('end_date', '9999-12-31'))
-            if other_name.get('family_name'):
-                self.lastnames.setdefault(other_name['family_name'], []).append(newattr)
+            newattr["start_date"] = max(
+                m["start_date"], other_name.get("start_date", "1000-01-01")
+            )
+            newattr["end_date"] = min(
+                m["end_date"], other_name.get("end_date", "9999-12-31")
+            )
+            if other_name.get("family_name"):
+                self.lastnames.setdefault(other_name["family_name"], []).append(newattr)
             else:
-                self.fullnames.setdefault(other_name['name'], []).append(newattr)
+                self.fullnames.setdefault(other_name["name"], []).append(newattr)
 
     # Used by Commons and NI
     def name_on_date(self, person_id, date):
         person = self.persons[person_id]
-        for nm in person['other_names']:
-            if nm['note'] != 'Main': continue
-            if nm.get('start_date', '0000-00-00') <= date <= nm.get('end_date', '9999-12-31'):
-                if 'family_name' in nm:
+        for nm in person["other_names"]:
+            if nm["note"] != "Main":
+                continue
+            if (
+                nm.get("start_date", "0000-00-00")
+                <= date
+                <= nm.get("end_date", "9999-12-31")
+            ):
+                if "family_name" in nm:
                     name = nm["family_name"]
-                    if nm.get('given_name'):
+                    if nm.get("given_name"):
                         name = nm["given_name"] + " " + name
-                    if nm.get('honorific_prefix'):
+                    if nm.get("honorific_prefix"):
                         name = nm["honorific_prefix"] + " " + name
-                else: # Lord (e.g. Lord Morrow in NI)
-                    name = nm['honorific_prefix']
-                    if nm['lordname']:
-                        name += ' %s' % nm['lordname']
-                    if nm['lordofname']:
-                        name += ' of %s' % nm['lordofname']
+                else:  # Lord (e.g. Lord Morrow in NI)
+                    name = nm["honorific_prefix"]
+                    if nm["lordname"]:
+                        name += " %s" % nm["lordname"]
+                    if nm["lordofname"]:
+                        name += " of %s" % nm["lordofname"]
                 return name
-        raise Exception('No found for %s on %s' % (person['id'], date))
+        raise Exception("No found for %s on %s" % (person["id"], date))
 
     def membertoperson(self, memberid):
         return self.membertopersonmap[memberid]
@@ -221,12 +270,12 @@ def membertoperson(self, memberid):
     def _match_by_id(self, lookup, id, date):
         matches = getattr(self, lookup).get(id, [])
         for m in matches:
-            if m['start_date'] <= date <= m['end_date']:
+            if m["start_date"] <= date <= m["end_date"]:
                 return m
         return None
 
     def match_by_mnis(self, mnis_id, date):
-        return self._match_by_id('mnis', mnis_id, date)
+        return self._match_by_id("mnis", mnis_id, date)
 
     def match_by_pims(self, pims_id, date):
-        return self._match_by_id('pims', pims_id, date)
+        return self._match_by_id("pims", pims_id, date)
diff --git a/pyscraper/contextexception.py b/pyscraper/contextexception.py
index 9d1b3b7d..5547caf6 100755
--- a/pyscraper/contextexception.py
+++ b/pyscraper/contextexception.py
@@ -1,9 +1,9 @@
 #! $Id: contextexception.py,v 1.12 2004/12/23 12:27:09 goatchurch Exp $
 # vim:sw=8:ts=8:et:nowrap
 
-class ContextException(Exception):
 
-    def __init__(self, description, stamp = None, fragment = None):
+class ContextException(Exception):
+    def __init__(self, description, stamp=None, fragment=None):
         self.description = description
         self.stamp = stamp
         self.fragment = fragment
diff --git a/pyscraper/get_links_from_ep.py b/pyscraper/get_links_from_ep.py
index 30db8627..d7e63cac 100755
--- a/pyscraper/get_links_from_ep.py
+++ b/pyscraper/get_links_from_ep.py
@@ -1,37 +1,35 @@
 #!/usr/bin/env python3
 
 import operator
-from lxml import etree
+
 from everypolitician import EveryPolitician
+from lxml import etree
 
 
 def output_file(country, legislature, filename):
     data = EveryPolitician().country(country).legislature(legislature)
     output_filename = "../members/{0}.xml".format(filename)
-    root = etree.Element('publicwhip')
+    root = etree.Element("publicwhip")
 
-    sorted_people = sorted(
-        data.popolo().persons,
-        key=operator.attrgetter('name')
-    )
+    sorted_people = sorted(data.popolo().persons, key=operator.attrgetter("name"))
     for person in sorted_people:
-        parlparse_id = person.identifier_value('parlparse')
+        parlparse_id = person.identifier_value("parlparse")
         if parlparse_id is not None:
             props = {}
             if person.twitter:
-                props['twitter_username'] = person.twitter
+                props["twitter_username"] = person.twitter
             if person.facebook:
-                props['facebook_page'] = person.facebook
+                props["facebook_page"] = person.facebook
 
             if props:
-                props['id'] = parlparse_id
-                info = etree.Element('personinfo', props)
+                props["id"] = parlparse_id
+                info = etree.Element("personinfo", props)
                 root.append(info)
 
     et = etree.ElementTree(root)
     et.write(output_filename, pretty_print=True)
 
 
-output_file('UK', 'Commons', 'social-media-commons')
-output_file('Scotland', 'Parliament', 'social-media-sp')
-output_file('Northern-Ireland', 'Assembly', 'social-media-ni')
+output_file("UK", "Commons", "social-media-commons")
+output_file("Scotland", "Parliament", "social-media-sp")
+output_file("Northern-Ireland", "Assembly", "social-media-ni")
diff --git a/pyscraper/gettwittermps.py b/pyscraper/gettwittermps.py
index 3a10319d..4cecc71e 100755
--- a/pyscraper/gettwittermps.py
+++ b/pyscraper/gettwittermps.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
-import urllib.request
 import csv
+import urllib.request
 import xml.sax
 
 uri = "http://spreadsheets.google.com/tq?tqx=out:csv&key=0AjWA_TWMI4t_dFI5MWRWZkRWbFJ6MVhHQzVmVndrZnc&hl=en_GB"
@@ -9,34 +9,39 @@
 f = urllib.request.urlopen(uri)
 csv_data = f.read()
 lines = csv_data.split("\n")
-rows = csv.reader(lines.__iter__(), delimiter=',', quotechar='"')
+rows = csv.reader(lines.__iter__(), delimiter=",", quotechar='"')
+
 
 class PeopleParser(xml.sax.handler.ContentHandler):
     def __init__(self):
         self.parser = xml.sax.make_parser()
         self.parser.setContentHandler(self)
-    def parse(self,filename):
+
+    def parse(self, filename):
         self.office_id_to_person_id = {}
         self.parser.parse(filename)
-    def startElement(self,name,attrs):
-        if name == 'person':
-            self.current_person_id = attrs['id']
-        elif name == 'office':
-            self.office_id_to_person_id[attrs['id']] = self.current_person_id
-    def endElement(self,name):
-        if name == 'person':
+
+    def startElement(self, name, attrs):
+        if name == "person":
+            self.current_person_id = attrs["id"]
+        elif name == "office":
+            self.office_id_to_person_id[attrs["id"]] = self.current_person_id
+
+    def endElement(self, name):
+        if name == "person":
             self.current_person_id = None
 
+
 people_parser = PeopleParser()
 people_parser.parse("../members/people.xml")
 
 person_id_to_twitter_username = {}
 
 output_filename = "../members/twitter-commons.xml"
-fp = open(output_filename,"w")
-fp.write('''<?xml version="1.0" encoding="UTF-8"?>
+fp = open(output_filename, "w")
+fp.write("""<?xml version="1.0" encoding="UTF-8"?>
 <publicwhip>
-''')
+""")
 
 for r in rows:
     if len(r) < 5:
@@ -49,8 +54,10 @@ def endElement(self,name):
     if len(twitter_username) == 0:
         continue
     if member_id not in people_parser.office_id_to_person_id:
-        raise "No person ID found for %s in line %s" % (member_id,"#".join(r))
+        raise "No person ID found for %s in line %s" % (member_id, "#".join(r))
     person_id = people_parser.office_id_to_person_id[member_id]
-    fp.write("<personinfo id=\"%s\" twitter_username=\"%s\"/>\n"%(person_id,twitter_username))
+    fp.write(
+        '<personinfo id="%s" twitter_username="%s"/>\n' % (person_id, twitter_username)
+    )
 
 fp.write("</publicwhip>")
diff --git a/pyscraper/gidmatching.py b/pyscraper/gidmatching.py
index c11ad2a5..84287746 100644
--- a/pyscraper/gidmatching.py
+++ b/pyscraper/gidmatching.py
@@ -1,344 +1,406 @@
+import difflib
 import re
+
 import miscfuncs
-import difflib
 
-#from xmlfilewrite import PrevParsedFile
+
+# from xmlfilewrite import PrevParsedFile
 class PrevParsedFile:
-	pass
+    pass
+
 
 toppath = miscfuncs.toppath
 pwxmldirs = miscfuncs.pwxmldirs
 tempfilename = miscfuncs.tempfilename
 
-from miscfuncs import NextAlphaString, AlphaStringToOrder
-
-
 
 # get the min index that matches this
 def GetMinIndex(indx, a):
-	assert indx[0] == 0 and a < indx[-1]
-	i0, i1 = 0, len(indx) - 1
-	while i0 + 1 < i1:
-		im = (i0 + i1) // 2
-		assert i0 != im and i1 != im
-		if indx[im] <= a:
-			i0 = im
-		else:
-			i1 = im
-	assert indx[i0] <= a < indx[i1]
-	return i0
+    assert indx[0] == 0 and a < indx[-1]
+    i0, i1 = 0, len(indx) - 1
+    while i0 + 1 < i1:
+        im = (i0 + i1) // 2
+        assert i0 != im and i1 != im
+        if indx[im] <= a:
+            i0 = im
+        else:
+            i1 = im
+    assert indx[i0] <= a < indx[i1]
+    return i0
 
 
 def PrepareXMLForDiff(scrapeversion):
-	chks = re.findall("<(major-heading|minor-heading|oral-heading|speech|division|divisioncount|ques|reply)\s(.*?)>\n?([\s\S]*?)\n?\s*</(major-heading|minor-heading|oral-heading|speech|division|divisioncount|ques|reply)>", scrapeversion)
-
-	# make identically structured huge string over the previous xml file with heading stuff stripped out
-	essxlist = [ ]
-	essxindx = [ ]
-	for chk in chks:
-		# print chk
-		assert chk[0] == chk[3]  # chunk type (this can fail if due to the lack of two \n's between the two labels, and thus detects an empty speech, which should not be there.  
-		# new_chk = chk[2]
-		new_chk = re.sub(
-			r'(?s)(<(p|tr)\s[^>]*>)(.*?)(<\/\2>)',
-			lambda m: (''.join((m.group(1), re.sub('\n', ' ', m.group(3)), m.group(4)))),
-			chk[2]
-		)
-		essxindx.append(len(essxlist))
-		essxlist.append("HEADING-" + chk[0])
-		speaker = re.search('nospeaker="true"|divnumber|(?:speakerid|person_id)="[^"]*"', chk[1]).group(0)
-		essxlist.append(speaker)
-
-		if re.match("oral-heading|major-heading|minor-heading", chk[0]):
-			#assert not re.search("[<>]", chk[2])
-			heading = new_chk.strip()
-			essxlist.extend(heading.split())
-		else:
-			for ps in new_chk.split('\n'):
-				m = re.match("\s*<(?:p|tr)[^>]*>\s*(.*?)\s*</(?:p|tr)>\s*$", ps)
-				if m:
-					para = m.group(1)
-				else:
-					assert re.match("\s*</?(?:table|tbody|thead|caption|divisioncount|mplist|mpname|lordlist|lord)", ps)
-					para = ps
-				essxlist.extend(re.findall("<[^>]*>|&\w+;|[^<>\s]+", para))
-
-	essxindx.append(len(essxlist))
-	assert len(chks) + 1 == len(essxindx)
-	return essxindx, essxlist, chks
+    chks = re.findall(
+        "<(major-heading|minor-heading|oral-heading|speech|division|divisioncount|ques|reply)\s(.*?)>\n?([\s\S]*?)\n?\s*</(major-heading|minor-heading|oral-heading|speech|division|divisioncount|ques|reply)>",
+        scrapeversion,
+    )
+
+    # make identically structured huge string over the previous xml file with heading stuff stripped out
+    essxlist = []
+    essxindx = []
+    for chk in chks:
+        # print chk
+        assert (
+            chk[0] == chk[3]
+        )  # chunk type (this can fail if due to the lack of two \n's between the two labels, and thus detects an empty speech, which should not be there.
+        # new_chk = chk[2]
+        new_chk = re.sub(
+            r"(?s)(<(p|tr)\s[^>]*>)(.*?)(<\/\2>)",
+            lambda m: (
+                "".join((m.group(1), re.sub("\n", " ", m.group(3)), m.group(4)))
+            ),
+            chk[2],
+        )
+        essxindx.append(len(essxlist))
+        essxlist.append("HEADING-" + chk[0])
+        speaker = re.search(
+            'nospeaker="true"|divnumber|(?:speakerid|person_id)="[^"]*"', chk[1]
+        ).group(0)
+        essxlist.append(speaker)
+
+        if re.match("oral-heading|major-heading|minor-heading", chk[0]):
+            # assert not re.search("[<>]", chk[2])
+            heading = new_chk.strip()
+            essxlist.extend(heading.split())
+        else:
+            for ps in new_chk.split("\n"):
+                m = re.match("\s*<(?:p|tr)[^>]*>\s*(.*?)\s*</(?:p|tr)>\s*$", ps)
+                if m:
+                    para = m.group(1)
+                else:
+                    assert re.match(
+                        "\s*</?(?:table|tbody|thead|caption|divisioncount|mplist|mpname|lordlist|lord)",
+                        ps,
+                    )
+                    para = ps
+                essxlist.extend(re.findall("<[^>]*>|&\w+;|[^<>\s]+", para))
+
+    essxindx.append(len(essxlist))
+    assert len(chks) + 1 == len(essxindx)
+    return essxindx, essxlist, chks
+
 
 # the difficult function that finds matches in the gids
 # we don't use an xml parsing feature because it transforms the text
 # Very hard use of difflib going on here too
 # We make great use of the indices of the different lists
 def FactorChanges(flatb, scrapeversion):
-	essxindx, essxlist, chks = PrepareXMLForDiff(scrapeversion)
-
-	# now make a huge string over the flatb with heading stuff stripped out
-	essflatblist = [ ]
-	essflatbindx = [ ]
-	for qb in flatb:
-		essflatbindx.append(len(essflatblist))
-		essflatblist.append("HEADING-" + qb.typ)
-		essflatblist.append(re.search('nospeaker="true"|(?:speakerid|person_id)="[^"]*"', qb.speaker).group(0))
-
-		if re.match("oral-heading|major-heading|minor-heading", qb.typ):
-			heading = ("".join(qb.stext)).strip()
-			essflatblist.extend(heading.split())
-
-		# strip format labels out of paragraphs
-		else:
-			for ps in qb.stext:
-				m = re.match("\s*<(?:p|tr)[^>]*>\s*(.*?)\s*</(?:p|tr)>\s*$", ps)
-				if m:
-					para = m.group(1)
-				else:
-					assert re.match("\s*</?(?:table|tbody|thead|caption|divisioncount|mplist|mpname|lordlist|lord)", ps)
-					para = ps
-				# html tags should be words on their own
-				essflatblist.extend(re.findall("<[^>]*>|&\w+;|[^<>\s]+", para))
-
-	essflatbindx.append(len(essflatblist))
-	assert len(essflatbindx) == len(flatb) + 1
-
-
-	# make parallel sequences to the flatb and to this which are stripped down to their essence
-	# so that the difflib can work on them
-	return DoFactorDiff(essflatbindx, essflatblist, essxindx, essxlist, chks, flatb)
+    essxindx, essxlist, chks = PrepareXMLForDiff(scrapeversion)
+
+    # now make a huge string over the flatb with heading stuff stripped out
+    essflatblist = []
+    essflatbindx = []
+    for qb in flatb:
+        essflatbindx.append(len(essflatblist))
+        essflatblist.append("HEADING-" + qb.typ)
+        essflatblist.append(
+            re.search(
+                'nospeaker="true"|(?:speakerid|person_id)="[^"]*"', qb.speaker
+            ).group(0)
+        )
+
+        if re.match("oral-heading|major-heading|minor-heading", qb.typ):
+            heading = ("".join(qb.stext)).strip()
+            essflatblist.extend(heading.split())
+
+        # strip format labels out of paragraphs
+        else:
+            for ps in qb.stext:
+                m = re.match("\s*<(?:p|tr)[^>]*>\s*(.*?)\s*</(?:p|tr)>\s*$", ps)
+                if m:
+                    para = m.group(1)
+                else:
+                    assert re.match(
+                        "\s*</?(?:table|tbody|thead|caption|divisioncount|mplist|mpname|lordlist|lord)",
+                        ps,
+                    )
+                    para = ps
+                # html tags should be words on their own
+                essflatblist.extend(re.findall("<[^>]*>|&\w+;|[^<>\s]+", para))
+
+    essflatbindx.append(len(essflatblist))
+    assert len(essflatbindx) == len(flatb) + 1
+
+    # make parallel sequences to the flatb and to this which are stripped down to their essence
+    # so that the difflib can work on them
+    return DoFactorDiff(essflatbindx, essflatblist, essxindx, essxlist, chks, flatb)
 
 
 def DoFactorDiff(essflatbindx, essflatblist, essxindx, essxlist, chks, flatb):
-	# now apply the diffing function on this
-	sm = difflib.SequenceMatcher(None, essxlist, essflatblist)
-	smblocks = [ ((smb[0], smb[0] + smb[2]), (smb[1], smb[1] + smb[2]))  for smb in sm.get_matching_blocks()[:-1] ]
-
-	# we collect the range for the previous speeches and map it to a set of ranges
-	# in the next speeches
-
-	# case of missing entries map to the last speech matched to.
-	lastmatchg = None
-
-	res = [ ]
-	for ix in range(len(chks)):
-		ixr = (essxindx[ix], essxindx[ix + 1])
-		nixrl = [ ]
-		nixrlsz = 0
-
-		# intersect the set of ranges against the contiguous blocks and match forwards
-		for lsmb in smblocks:
-			if ixr[1] > lsmb[0][0] and ixr[0] < lsmb[0][1]:
-				ixi = (max(ixr[0], lsmb[0][0]), min(ixr[1], lsmb[0][1]))
-				assert ixi[0] < ixi[1]
-				offs = lsmb[1][0] - lsmb[0][0]
-				ixit = (ixi[0] + offs, ixi[1] + offs)
-				assert not nixrl or (nixrl[-1][1] <= ixit[0])
-				nixrl.append(ixit)
-				nixrlsz += ixit[1] - ixit[0]
-
-		# at least one word is overlapping
-		if nixrl:
-			# go through the matchint cases
-			matchlist = [ GetMinIndex(essflatbindx, nixrl[0][0]) ]
-			if nixrlsz != ixr[1] - ixr[0] or len(nixrl) > 1:
-				matchtype = "changes"
-				for ixit in nixrl:
-					ml = GetMinIndex(essflatbindx, ixit[0])
-					if matchlist[-1] != ml:
-						matchlist.append(ml)
-					ml = GetMinIndex(essflatbindx, ixit[1] - 1)
-					if matchlist[-1] != ml:
-						matchlist.append(ml)
-				if len(matchlist) != 1:
-					matchtype = "multiplecover"
-			else:
-				assert len(nixrl) == 1
-				matchtype = "perfectmatch"
-
-		# missing speech
-		else:
-			print(chks[ix])
-			if lastmatchg:
-				print("Missing speech matched to last matched speech")
-				matchlist = [ lastmatchg ]
-			else:
-				print("No match on first speech problem.")
-				matchlist = []
-			matchtype = "missing"
-
-		# output the (sometimes more than) one redirect of the right redirect type
-		chk = chks[ix]
-		oldgid = re.search('id="([\w\d\-\.,/]*)"', chk[1]).group(1)
-		for matchg in matchlist:
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (oldgid, flatb[matchg].GID, matchtype))
-			lastmatchg = matchg
-
-		# output old version as well, if it's different
-		if matchtype != "perfectmatch":
-			res.append("<%s %s>\n" % (chk[0], chk[1]))
-			res.append(chk[2])
-			res.append("\n")
-			res.append("</%s>\n" % chk[0])
-
-	return res
+    # now apply the diffing function on this
+    sm = difflib.SequenceMatcher(None, essxlist, essflatblist)
+    smblocks = [
+        ((smb[0], smb[0] + smb[2]), (smb[1], smb[1] + smb[2]))
+        for smb in sm.get_matching_blocks()[:-1]
+    ]
+
+    # we collect the range for the previous speeches and map it to a set of ranges
+    # in the next speeches
+
+    # case of missing entries map to the last speech matched to.
+    lastmatchg = None
+
+    res = []
+    for ix in range(len(chks)):
+        ixr = (essxindx[ix], essxindx[ix + 1])
+        nixrl = []
+        nixrlsz = 0
+
+        # intersect the set of ranges against the contiguous blocks and match forwards
+        for lsmb in smblocks:
+            if ixr[1] > lsmb[0][0] and ixr[0] < lsmb[0][1]:
+                ixi = (max(ixr[0], lsmb[0][0]), min(ixr[1], lsmb[0][1]))
+                assert ixi[0] < ixi[1]
+                offs = lsmb[1][0] - lsmb[0][0]
+                ixit = (ixi[0] + offs, ixi[1] + offs)
+                assert not nixrl or (nixrl[-1][1] <= ixit[0])
+                nixrl.append(ixit)
+                nixrlsz += ixit[1] - ixit[0]
+
+        # at least one word is overlapping
+        if nixrl:
+            # go through the matchint cases
+            matchlist = [GetMinIndex(essflatbindx, nixrl[0][0])]
+            if nixrlsz != ixr[1] - ixr[0] or len(nixrl) > 1:
+                matchtype = "changes"
+                for ixit in nixrl:
+                    ml = GetMinIndex(essflatbindx, ixit[0])
+                    if matchlist[-1] != ml:
+                        matchlist.append(ml)
+                    ml = GetMinIndex(essflatbindx, ixit[1] - 1)
+                    if matchlist[-1] != ml:
+                        matchlist.append(ml)
+                if len(matchlist) != 1:
+                    matchtype = "multiplecover"
+            else:
+                assert len(nixrl) == 1
+                matchtype = "perfectmatch"
+
+        # missing speech
+        else:
+            print(chks[ix])
+            if lastmatchg:
+                print("Missing speech matched to last matched speech")
+                matchlist = [lastmatchg]
+            else:
+                print("No match on first speech problem.")
+                matchlist = []
+            matchtype = "missing"
+
+        # output the (sometimes more than) one redirect of the right redirect type
+        chk = chks[ix]
+        oldgid = re.search('id="([\w\d\-\.,/]*)"', chk[1]).group(1)
+        for matchg in matchlist:
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n'
+                % (oldgid, flatb[matchg].GID, matchtype)
+            )
+            lastmatchg = matchg
+
+        # output old version as well, if it's different
+        if matchtype != "perfectmatch":
+            res.append("<%s %s>\n" % (chk[0], chk[1]))
+            res.append(chk[2])
+            res.append("\n")
+            res.append("</%s>\n" % chk[0])
+
+    return res
 
 
 def MeasureBlockSimilarity(oldtext, qblock):
-	flattenoldtext = re.split("<[^>]*>|\s+", oldtext)
-	flattennewtext = qblock.FlattenTextWords()
+    flattenoldtext = re.split("<[^>]*>|\s+", oldtext)
+    flattennewtext = qblock.FlattenTextWords()
 
-	sm = difflib.SequenceMatcher(lambda x: x == "", flattenoldtext, flattennewtext)
-	return sm.ratio()
+    sm = difflib.SequenceMatcher(lambda x: x == "", flattenoldtext, flattennewtext)
+    return sm.ratio()
 
 
 # special case because the questions can be re-ordered
 def FactorChangesWrans(majblocks, scrapeversion):
-
-	# we need to break the scrape version
-	# we separate out and match the major headings separately
-	# (anyway, these aren't really used)
-
-	# and then match the questions
-
-	# first extract all the oldtype gid-redirects that will have been put in here by the pre-2005 bMakeOldWransGidsToNew cases
-	res = re.findall('<gidredirect oldgid="[^"]*" newgid="[^"]*" matchtype="oldwransgid"/>\n', scrapeversion)
-
-	# extract major headings and match injectively exactly (till we find a failed example).
-	mhchks = re.findall('<major-heading id="([^"]*)"[^>]*>\n\s*([\s\S]*?)\s*?\n</major-heading>', scrapeversion)
-
-	majblocknames = [ "".join(majblock[0].stext).strip()  for majblock in majblocks ]
-	for mhchk in mhchks:
-		if mhchk[1] in majblocknames:
-			i = majblocknames.index(mhchk[1])
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="perfectmatch"/>\n' % (mhchk[0], majblocks[i][0].qGID))
-			majblocknames[i] = None # take it out of circulation
-		else:
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n' % (mhchk[0], majblocks[0][0].qGID))
-
-	# break into question blocks
-	# [0]=headingGID, [1]=further choss, [2]=headingtext, [3]=question+reply text
-	# the "<publicwhip> tags have been removed, so split to end of document
-	qebchks = re.findall('<minor-heading id="([^"]*)"([^>]*)>\n([\s\S]*?)</minor-heading>\n([\s\S]*?)\s*(?=<(?:major-heading|minor-heading|gidredirect[^>]*oldwranstype)|$)',
-						 scrapeversion)
-
-	# make the map from qnums to blocks
-	qnummissings = [ ]
-	qnummapq = { }
-	for majblock in majblocks:
-		for qblock in majblock[1]:
-			for qnum in qblock.qnums:
-				assert qnum not in qnummapq  # failure means this qnum is found twice in the newly parsed file.
-				qnummapq[qnum] = qblock
-				if re.match("ZZZZerror", qnum):
-					qnummissings.append(qnum)
-
-
-	# for each block, find the map forward and check if we want to reprint it in full.
-	for qebchk in qebchks:
-		qqnums = re.findall('<p [^>]*?qnum="([\d\w]+)">', qebchk[3])
-		assert qqnums
-
-		# make sure that they all link to the same qnum in the new one
-		qblock = None
-		for qqnum in qqnums:
-			if qblock:
-				if qblock.headingqb.qGID != qnummapq[qqnum].headingqb.qGID:
-				    print(qblock.headingqb.qGID, qnummapq[qqnum].headingqb.qGID)
-				    assert qblock.headingqb.qGID == qnummapq[qqnum].headingqb.qGID
-			elif qqnum != '0' and qqnum in qnummapq:  # 0 is when there is a missing qnum
-				qblock = qnummapq[qqnum]
-
-		# in this case the qnums are fail for finding the match, so we either drop it, or find
-		# the match by closest in text.  Prefer to match blocks to
-		if not qblock:
-			# find the closest match for this block out of this missing qnum blocks on the new page
-			# (this will need to account for all blocks if in future the correction is to add in the qnum)
-			if qnummissings:
-				qmissblocksscore = [ ]
-				for qqnum in qnummissings:
-					similarity = MeasureBlockSimilarity(qebchk[3], qnummapq[qqnum])
-					qmissblocksscore.append((similarity, qqnum))
-				qmissblockscorebest = max(qmissblocksscore)
-				qblock = qnummapq[qmissblockscorebest[1]]
-				if miscfuncs.IsNotQuiet():
-					print("Missing qnum; mapping %s to %s with score %f" % (qebchk[0], qblock.headingqb.qGID, qmissblockscorebest[0]))
-				assert qmissblockscorebest[0] > 0.8  # otherwise it's not really a match and we need to look harder.  
-													 # perhaps it's matched to a block in the new file which newly has a qnum, and we then have to scan against all of them.  
-
-		# now have to check matching.
-		# convert both to strings and compare.
-		essxfq = [ ]   # this forms the string which we will be comparing against.
-		qebchkquesids = [ ] # expect only one of each
-		qebchkreplids = [ ]
-		for wd in re.findall("<[^>]*>|&\w+;|[^<>\s]+", qebchk[3]):
-			mwd = re.match('<(p|tr|reply|ques)\s*(?:p?id="([^"]*)")?[^>]*>', wd)
-			if mwd:
-				essxfq.append("<%s>" % mwd.group(1))
-				assert mwd.group(1) not in ("reply", "ques") or mwd.group(2)
-				if mwd.group(1) == "ques":
-					qebchkquesids.append(mwd.group(2))
-				elif mwd.group(1) == "reply":
-					qebchkreplids.append(mwd.group(2))
-
-			elif not re.match("<gidredirect", wd):
-				essxfq.append(wd)
-
-		if not qblock and not qnummissings:
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n' % (qebchk[0], majblocks[0][0].qGID))
-			for qebq in qebchkquesids:
-				res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n' % (qebq, majblocks[0][0].qGID))
-			for qebqr in qebchkreplids:
-				res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n' % (qebqr, majblocks[0][0].qGID))
-			# Is the lred current-gidredirects bit needed here too? Don't think so, but not sure
-			continue
-
-		# build up the same summary from the question block
-		essbkfq = [ ]
-		for qblockqr in (qblock.queses, qblock.replies):
-			for qb in qblockqr:
-				essbkfq.append("<%s>" % qb.typ)
-				for wd in re.findall("<[^>]*>|&\w+;|[^<>\s]+", "\n".join(qb.stext)):
-					mwd = re.match("<(p|tr)[^>]*>", wd)
-					if mwd:
-						essbkfq.append("<%s>" % mwd.group(1))
-					elif not re.match("<gidredirect", wd):
-						essbkfq.append(wd)
-				essbkfq.append("</%s>" % qb.typ)
-
-		# print the link forwards
-		bchanges = (essxfq != essbkfq)
-		matchtype = bchanges and "changes" or "perfectmatch"
-		if bchanges:
-			res.append("\n")
-		res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (qebchk[0], qblock.headingqb.qGID, matchtype))
-
-		# write the parallel redirects for the question and reply (both mapping to same parts of each)
-		# this may be more sophisticated once we see an example of failure
-		# ultimately this is a job for paragraph matching
-
-		# sometimes we get more than one question.  
-		# when we find a mismatch we'll deal with it as a special paragraph problem, or not bother.
-		if len(qebchkquesids) != len(qblock.queses):
-			print(len(qebchkquesids), len(qblock.queses), qblock.queses[0].qGID)
-			assert len(qebchkquesids) == len(qblock.queses)
-		for i in range(len(qebchkquesids)):
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (qebchkquesids[i], qblock.queses[i].qGID, matchtype))
-
-		assert len(qebchkreplids) == len(qblock.replies) == 1
-		for qebqr in qebchkreplids:
-			res.append('<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (qebqr, qblock.replies[0].qGID, matchtype))
-
-
-		# if changes write out the original, else just the gidmaps
-		if bchanges:
-			res.append('<minor-heading id="%s"%s>\n' % qebchk[0:2])
-			res.append(qebchk[2])
-			res.append('</minor-heading>\n')
-			res.append(qebchk[3])
-			res.append("\n\n")
-		else:
-			for lred in re.findall("<gidredirect[^>]*>\n", qebchk[3]):
-				res.append("\t")
-				res.append(lred)
-
-	return res
-
+    # we need to break the scrape version
+    # we separate out and match the major headings separately
+    # (anyway, these aren't really used)
+
+    # and then match the questions
+
+    # first extract all the oldtype gid-redirects that will have been put in here by the pre-2005 bMakeOldWransGidsToNew cases
+    res = re.findall(
+        '<gidredirect oldgid="[^"]*" newgid="[^"]*" matchtype="oldwransgid"/>\n',
+        scrapeversion,
+    )
+
+    # extract major headings and match injectively exactly (till we find a failed example).
+    mhchks = re.findall(
+        '<major-heading id="([^"]*)"[^>]*>\n\s*([\s\S]*?)\s*?\n</major-heading>',
+        scrapeversion,
+    )
+
+    majblocknames = ["".join(majblock[0].stext).strip() for majblock in majblocks]
+    for mhchk in mhchks:
+        if mhchk[1] in majblocknames:
+            i = majblocknames.index(mhchk[1])
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="perfectmatch"/>\n'
+                % (mhchk[0], majblocks[i][0].qGID)
+            )
+            majblocknames[i] = None  # take it out of circulation
+        else:
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n'
+                % (mhchk[0], majblocks[0][0].qGID)
+            )
+
+    # break into question blocks
+    # [0]=headingGID, [1]=further choss, [2]=headingtext, [3]=question+reply text
+    # the "<publicwhip> tags have been removed, so split to end of document
+    qebchks = re.findall(
+        '<minor-heading id="([^"]*)"([^>]*)>\n([\s\S]*?)</minor-heading>\n([\s\S]*?)\s*(?=<(?:major-heading|minor-heading|gidredirect[^>]*oldwranstype)|$)',
+        scrapeversion,
+    )
+
+    # make the map from qnums to blocks
+    qnummissings = []
+    qnummapq = {}
+    for majblock in majblocks:
+        for qblock in majblock[1]:
+            for qnum in qblock.qnums:
+                assert (
+                    qnum not in qnummapq
+                )  # failure means this qnum is found twice in the newly parsed file.
+                qnummapq[qnum] = qblock
+                if re.match("ZZZZerror", qnum):
+                    qnummissings.append(qnum)
+
+    # for each block, find the map forward and check if we want to reprint it in full.
+    for qebchk in qebchks:
+        qqnums = re.findall('<p [^>]*?qnum="([\d\w]+)">', qebchk[3])
+        assert qqnums
+
+        # make sure that they all link to the same qnum in the new one
+        qblock = None
+        for qqnum in qqnums:
+            if qblock:
+                if qblock.headingqb.qGID != qnummapq[qqnum].headingqb.qGID:
+                    print(qblock.headingqb.qGID, qnummapq[qqnum].headingqb.qGID)
+                    assert qblock.headingqb.qGID == qnummapq[qqnum].headingqb.qGID
+            elif (
+                qqnum != "0" and qqnum in qnummapq
+            ):  # 0 is when there is a missing qnum
+                qblock = qnummapq[qqnum]
+
+        # in this case the qnums are fail for finding the match, so we either drop it, or find
+        # the match by closest in text.  Prefer to match blocks to
+        if not qblock:
+            # find the closest match for this block out of this missing qnum blocks on the new page
+            # (this will need to account for all blocks if in future the correction is to add in the qnum)
+            if qnummissings:
+                qmissblocksscore = []
+                for qqnum in qnummissings:
+                    similarity = MeasureBlockSimilarity(qebchk[3], qnummapq[qqnum])
+                    qmissblocksscore.append((similarity, qqnum))
+                qmissblockscorebest = max(qmissblocksscore)
+                qblock = qnummapq[qmissblockscorebest[1]]
+                if miscfuncs.IsNotQuiet():
+                    print(
+                        "Missing qnum; mapping %s to %s with score %f"
+                        % (qebchk[0], qblock.headingqb.qGID, qmissblockscorebest[0])
+                    )
+                assert (
+                    qmissblockscorebest[0] > 0.8
+                )  # otherwise it's not really a match and we need to look harder.
+                # perhaps it's matched to a block in the new file which newly has a qnum, and we then have to scan against all of them.
+
+        # now have to check matching.
+        # convert both to strings and compare.
+        essxfq = []  # this forms the string which we will be comparing against.
+        qebchkquesids = []  # expect only one of each
+        qebchkreplids = []
+        for wd in re.findall("<[^>]*>|&\w+;|[^<>\s]+", qebchk[3]):
+            mwd = re.match('<(p|tr|reply|ques)\s*(?:p?id="([^"]*)")?[^>]*>', wd)
+            if mwd:
+                essxfq.append("<%s>" % mwd.group(1))
+                assert mwd.group(1) not in ("reply", "ques") or mwd.group(2)
+                if mwd.group(1) == "ques":
+                    qebchkquesids.append(mwd.group(2))
+                elif mwd.group(1) == "reply":
+                    qebchkreplids.append(mwd.group(2))
+
+            elif not re.match("<gidredirect", wd):
+                essxfq.append(wd)
+
+        if not qblock and not qnummissings:
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n'
+                % (qebchk[0], majblocks[0][0].qGID)
+            )
+            for qebq in qebchkquesids:
+                res.append(
+                    '<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n'
+                    % (qebq, majblocks[0][0].qGID)
+                )
+            for qebqr in qebchkreplids:
+                res.append(
+                    '<gidredirect oldgid="%s" newgid="%s" matchtype="removed"/>\n'
+                    % (qebqr, majblocks[0][0].qGID)
+                )
+            # Is the lred current-gidredirects bit needed here too? Don't think so, but not sure
+            continue
+
+        # build up the same summary from the question block
+        essbkfq = []
+        for qblockqr in (qblock.queses, qblock.replies):
+            for qb in qblockqr:
+                essbkfq.append("<%s>" % qb.typ)
+                for wd in re.findall("<[^>]*>|&\w+;|[^<>\s]+", "\n".join(qb.stext)):
+                    mwd = re.match("<(p|tr)[^>]*>", wd)
+                    if mwd:
+                        essbkfq.append("<%s>" % mwd.group(1))
+                    elif not re.match("<gidredirect", wd):
+                        essbkfq.append(wd)
+                essbkfq.append("</%s>" % qb.typ)
+
+        # print the link forwards
+        bchanges = essxfq != essbkfq
+        matchtype = bchanges and "changes" or "perfectmatch"
+        if bchanges:
+            res.append("\n")
+        res.append(
+            '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n'
+            % (qebchk[0], qblock.headingqb.qGID, matchtype)
+        )
+
+        # write the parallel redirects for the question and reply (both mapping to same parts of each)
+        # this may be more sophisticated once we see an example of failure
+        # ultimately this is a job for paragraph matching
+
+        # sometimes we get more than one question.
+        # when we find a mismatch we'll deal with it as a special paragraph problem, or not bother.
+        if len(qebchkquesids) != len(qblock.queses):
+            print(len(qebchkquesids), len(qblock.queses), qblock.queses[0].qGID)
+            assert len(qebchkquesids) == len(qblock.queses)
+        for i in range(len(qebchkquesids)):
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n'
+                % (qebchkquesids[i], qblock.queses[i].qGID, matchtype)
+            )
+
+        assert len(qebchkreplids) == len(qblock.replies) == 1
+        for qebqr in qebchkreplids:
+            res.append(
+                '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n'
+                % (qebqr, qblock.replies[0].qGID, matchtype)
+            )
+
+        # if changes write out the original, else just the gidmaps
+        if bchanges:
+            res.append('<minor-heading id="%s"%s>\n' % qebchk[0:2])
+            res.append(qebchk[2])
+            res.append("</minor-heading>\n")
+            res.append(qebchk[3])
+            res.append("\n\n")
+        else:
+            for lred in re.findall("<gidredirect[^>]*>\n", qebchk[3]):
+                res.append("\t")
+                res.append(lred)
+
+    return res
diff --git a/pyscraper/lazyrunall.py b/pyscraper/lazyrunall.py
index 1da9d401..fc3eeda5 100755
--- a/pyscraper/lazyrunall.py
+++ b/pyscraper/lazyrunall.py
@@ -3,19 +3,19 @@
 
 # Run the script with --help to see command line options
 
-import sys
 import os
+import sys
 
 # change current directory to pyscraper folder script is in
-os.chdir(os.path.dirname(sys.argv[0]) or '.')
+os.chdir(os.path.dirname(sys.argv[0]) or ".")
 
 from optparse import OptionParser
-from runfilters import RunFiltersDir, RunNIFilters
-from regmem.filter import RunRegmemFilters
-import ni.scrape
 
-from regmem.pullgluepages import RegmemPullGluePages
+import ni.scrape
 from miscfuncs import SetQuiet
+from regmem.filter import RunRegmemFilters
+from regmem.pullgluepages import RegmemPullGluePages
+from runfilters import RunFiltersDir, RunNIFilters
 
 # Parse the command line parameters
 
@@ -42,33 +42,64 @@
 
 # See what options there are
 
-parser.add_option("--force-parse",
-                  action="store_true", dest="forceparse", default=False,
-                  help="forces reprocessing of debates by first deleting output files")
-parser.add_option("--force-scrape",
-                  action="store_true", dest="forcescrape", default=False,
-                  help="forces redownloading of HTML first deleting output files")
-
-parser.add_option("--from", dest="datefrom", metavar="date", default="1000-01-01",
-                  help="date to process back to, default is start of time")
-parser.add_option("--to", dest="dateto", metavar="date", default="9999-12-31",
-                  help="date to process up to, default is present day")
-parser.add_option("--date", dest="date", metavar="date", default=None,
-                  help="date to process (overrides --from and --to)")
-
-parser.add_option("--patchtool",
-                  action="store_true", dest="patchtool", default=None,
-                  help="launch ./patchtool to fix errors in source HTML")
-parser.add_option("--quietc",
-                  action="store_true", dest="quietc", default=None,
-                  help="low volume error messages; continue processing further files")
+parser.add_option(
+    "--force-parse",
+    action="store_true",
+    dest="forceparse",
+    default=False,
+    help="forces reprocessing of debates by first deleting output files",
+)
+parser.add_option(
+    "--force-scrape",
+    action="store_true",
+    dest="forcescrape",
+    default=False,
+    help="forces redownloading of HTML first deleting output files",
+)
+
+parser.add_option(
+    "--from",
+    dest="datefrom",
+    metavar="date",
+    default="1000-01-01",
+    help="date to process back to, default is start of time",
+)
+parser.add_option(
+    "--to",
+    dest="dateto",
+    metavar="date",
+    default="9999-12-31",
+    help="date to process up to, default is present day",
+)
+parser.add_option(
+    "--date",
+    dest="date",
+    metavar="date",
+    default=None,
+    help="date to process (overrides --from and --to)",
+)
+
+parser.add_option(
+    "--patchtool",
+    action="store_true",
+    dest="patchtool",
+    default=None,
+    help="launch ./patchtool to fix errors in source HTML",
+)
+parser.add_option(
+    "--quietc",
+    action="store_true",
+    dest="quietc",
+    default=None,
+    help="low volume error messages; continue processing further files",
+)
 
 (options, args) = parser.parse_args()
-if (options.date):
-	options.datefrom = options.date
-	options.dateto = options.date
+if options.date:
+    options.datefrom = options.date
+    options.dateto = options.date
 if options.quietc:
-	SetQuiet()
+    SetQuiet()
 
 # See what commands there are
 
@@ -78,33 +109,33 @@
 options.regmem = False
 options.ni = False
 for arg in args:
-        if arg == "scrape":
-                options.scrape = True
-        elif arg == "parse":
-                options.parse = True
-        elif arg == "regmem":
-                options.regmem = True
-                options.remote = True
-        elif arg == "regmem-local":
-                options.regmem = True
-                options.remote = False
-        elif arg == "ni":
-                options.ni = True
-        else:
-                print("error: no such option %s" % arg, file=sys.stderr)
-                parser.print_help()
-                sys.exit(1)
-if len(args) == 0:
+    if arg == "scrape":
+        options.scrape = True
+    elif arg == "parse":
+        options.parse = True
+    elif arg == "regmem":
+        options.regmem = True
+        options.remote = True
+    elif arg == "regmem-local":
+        options.regmem = True
+        options.remote = False
+    elif arg == "ni":
+        options.ni = True
+    else:
+        print("error: no such option %s" % arg, file=sys.stderr)
         parser.print_help()
         sys.exit(1)
+if len(args) == 0:
+    parser.print_help()
+    sys.exit(1)
 if not options.scrape and not options.parse:
-        print("error: choose what to do; scrape, parse, or both", file=sys.stderr)
-        parser.print_help()
-        sys.exit(1)
+    print("error: choose what to do; scrape, parse, or both", file=sys.stderr)
+    parser.print_help()
+    sys.exit(1)
 if not options.regmem and not options.ni:
-        print("error: choose what work on; regmem, several of them", file=sys.stderr)
-        parser.print_help()
-        sys.exit(1)
+    print("error: choose what work on; regmem, several of them", file=sys.stderr)
+    parser.print_help()
+    sys.exit(1)
 
 
 # Download/generate the new data
@@ -116,7 +147,7 @@
 
 # Parse it into XML
 if options.parse:
-	if options.ni:
-		RunFiltersDir(RunNIFilters, 'ni', options, options.forceparse)
-	if options.regmem:
-		RunFiltersDir(RunRegmemFilters, 'regmem', options, options.forceparse)
+    if options.ni:
+        RunFiltersDir(RunNIFilters, "ni", options, options.forceparse)
+    if options.regmem:
+        RunFiltersDir(RunRegmemFilters, "regmem", options, options.forceparse)
diff --git a/pyscraper/lords/resolvenames.py b/pyscraper/lords/resolvenames.py
index 2589de6d..8c7d68f9 100644
--- a/pyscraper/lords/resolvenames.py
+++ b/pyscraper/lords/resolvenames.py
@@ -1,62 +1,85 @@
-import json
-import os.path
 import re
-from contextexception import ContextException
 
 from base_resolver import ResolverBase
+from contextexception import ContextException
 
-titleconv = {  'L.':'Lord',
-               'B.':'Baroness',
-               'Abp.':'Archbishop',
-               'Bp.':'Bishop',
-               'V.':'Viscount',
-               'E.':'Earl',
-               'D.':'Duke',
-               'M.':'Marquess',
-               'C.':'Countess',
-               'Ly.':'Lady',
-            }
+titleconv = {
+    "L.": "Lord",
+    "B.": "Baroness",
+    "Abp.": "Archbishop",
+    "Bp.": "Bishop",
+    "V.": "Viscount",
+    "E.": "Earl",
+    "D.": "Duke",
+    "M.": "Marquess",
+    "C.": "Countess",
+    "Ly.": "Lady",
+}
 
 # more tedious stuff to do: "earl of" and "sitting as" cases
 
-hontitles = [ 'Lord  ?Bishop', 'Bishop', 'Marquess', 'Lord', 'Baroness', 'Viscount', 'Earl', 'Countess', 
-          'Lord Archbishop', 'Archbishop', 'Duke', 'Lady' ]
-hontitleso = '|'.join(hontitles)
-
-honcompl = re.compile('(?:(%s)|(%s) \s*(.*?))(?:\s+of\s+(.*))?$' % (hontitleso, hontitleso))
+hontitles = [
+    "Lord  ?Bishop",
+    "Bishop",
+    "Marquess",
+    "Lord",
+    "Baroness",
+    "Viscount",
+    "Earl",
+    "Countess",
+    "Lord Archbishop",
+    "Archbishop",
+    "Duke",
+    "Lady",
+]
+hontitleso = "|".join(hontitles)
+
+honcompl = re.compile(
+    "(?:(%s)|(%s) \s*(.*?))(?:\s+of\s+(.*))?$" % (hontitleso, hontitleso)
+)
+
+rehonorifics = re.compile("(?: [CKO]BE| DL| TD| QC| KCMG| KCB)+$")
 
-rehonorifics = re.compile('(?: [CKO]BE| DL| TD| QC| KCMG| KCB)+$')
 
 class LordsList(ResolverBase):
-    import_organization_id = 'house-of-lords'
+    import_organization_id = "house-of-lords"
 
     def reloadJSON(self):
         super(LordsList, self).reloadJSON()
 
-        self.lordnames={} # "lordnames" --> lords
-        self.aliases={} # Corrections to full names
+        self.lordnames = {}  # "lordnames" --> lords
+        self.aliases = {}  # Corrections to full names
 
         self.import_people_json()
 
     def import_people_membership(self, mship, posts, orgs):
-        if 'organization_id' not in mship or mship['organization_id'] != self.import_organization_id:
+        if (
+            "organization_id" not in mship
+            or mship["organization_id"] != self.import_organization_id
+        ):
             return
 
         if mship["id"] in self.membertopersonmap:
             raise Exception("Same member id %s appeared twice" % mship["id"])
-        self.membertopersonmap[mship["id"]] = mship['person_id']
-        self.persontomembermap.setdefault(mship['person_id'], []).append(mship["id"])
+        self.membertopersonmap[mship["id"]] = mship["person_id"]
+        self.persontomembermap.setdefault(mship["person_id"], []).append(mship["id"])
 
         if self.members.get(mship["id"]):
             raise Exception("Repeated identifier %s in members JSON file" % mship["id"])
         self.members[mship["id"]] = mship
 
-        if 'end_date' not in mship:
-            mship['end_date'] = '9999-12-31'
+        if "end_date" not in mship:
+            mship["end_date"] = "9999-12-31"
 
     def import_people_main_name(self, name, memberships):
-        mships = [m for m in memberships if m['start_date'] <= name.get('end_date', '9999-12-31') and m['end_date'] >= name.get('start_date', '1000-01-01')]
-        if not mships: return
+        mships = [
+            m
+            for m in memberships
+            if m["start_date"] <= name.get("end_date", "9999-12-31")
+            and m["end_date"] >= name.get("start_date", "1000-01-01")
+        ]
+        if not mships:
+            return
         lname = name["lordname"] or name["lordofname"]
         lname = re.sub("\.", "", lname)
         assert lname
@@ -67,17 +90,22 @@ def import_people_main_name(self, name, memberships):
         }
         for m in mships:
             newattr = attr.copy()
-            newattr['start_date'] = max(m['start_date'], name.get('start_date', '1000-01-01'))
-            newattr['end_date'] = min(m['end_date'], name.get('end_date', '9999-12-31'))
-            newattr['id'] = m["id"]
+            newattr["start_date"] = max(
+                m["start_date"], name.get("start_date", "1000-01-01")
+            )
+            newattr["end_date"] = min(m["end_date"], name.get("end_date", "9999-12-31"))
+            newattr["id"] = m["id"]
             self.lordnames.setdefault(lname, []).append(newattr)
 
     def import_people_alternate_name(self, person, other_name, memberships):
-        if 'name' not in other_name: return  # Only full names in Lords aliases
-        self.aliases[other_name['name']] = person['id']
+        if "name" not in other_name:
+            return  # Only full names in Lords aliases
+        self.aliases[other_name["name"]] = person["id"]
 
     # main matching function
-    def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bDivision):
+    def GetLordID(
+        self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bDivision
+    ):
         if ltitle == "Lord Bishop":
             ltitle = "Bishop"
         if ltitle == "Lord Archbishop":
@@ -85,15 +113,15 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
 
         llordofname = llordofname.replace(".", "")
         llordname = llordname.replace(".", "")
-        llordname = re.sub('&#(039|146|8217);', "'", llordname)
+        llordname = re.sub("&#(039|146|8217);", "'", llordname)
 
         llordofname = llordofname.strip()
         llordname = llordname.strip()
 
         # TODO: Need a Lords version of member-aliases.xml I guess
-        if ltitle == "Bishop" and llordofname == "Southwell" and sdate>='2005-07-01':
+        if ltitle == "Bishop" and llordofname == "Southwell" and sdate >= "2005-07-01":
             llordofname = "Southwell and Nottingham"
-        if ltitle == "Bishop" and llordname == "Southwell" and sdate>='2005-07-01':
+        if ltitle == "Bishop" and llordname == "Southwell" and sdate >= "2005-07-01":
             llordname = "Southwell and Nottingham"
 
         lname = llordname or llordofname
@@ -101,11 +129,11 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
         lmatches = self.lordnames.get(lname, [])
 
         # match to successive levels of precision for identification
-        res = [ ]
+        res = []
         for lm in lmatches:
             if lm["title"] != ltitle:  # mismatch title
                 continue
-            if llordname and llordofname: # two name case
+            if llordname and llordofname:  # two name case
                 if (lm["lordname"] == llordname) and (lm["lordofname"] == llordofname):
                     if lm["start_date"] <= sdate <= lm["end_date"]:
                         res.append(lm)
@@ -128,29 +156,62 @@ def GetLordID(self, ltitle, llordname, llordofname, loffice, stampurl, sdate, bD
             if lname == lmlname:
                 if lm["start_date"] <= sdate <= lm["end_date"]:
                     if lm["lordname"] and llordofname:
-                        #if not IsNotQuiet():
-                        print("cm---", ltitle, lm["lordname"], lm["lordofname"], llordname, llordofname)
-                        raise ContextException("lordofname matches lordname in lordlist", stamp=stampurl, fragment=lname)
+                        # if not IsNotQuiet():
+                        print(
+                            "cm---",
+                            ltitle,
+                            lm["lordname"],
+                            lm["lordofname"],
+                            llordname,
+                            llordofname,
+                        )
+                        raise ContextException(
+                            "lordofname matches lordname in lordlist",
+                            stamp=stampurl,
+                            fragment=lname,
+                        )
                     else:
                         assert lm["lordofname"] and llordname
                         # of-name distinction lost in division lists
                         if not bDivision:
-                            raise ContextException("lordname matches lordofname in lordlist", stamp=stampurl, fragment=lname)
+                            raise ContextException(
+                                "lordname matches lordofname in lordlist",
+                                stamp=stampurl,
+                                fragment=lname,
+                            )
                     res.append(lm)
-                elif ltitle != "Bishop" and ltitle != "Archbishop" and (ltitle, lname) not in (("Duke", "Norfolk"), ("Duke", "Wellington"), ('Earl', 'Kinnoull'), ('Earl', 'Selborne')):
+                elif (
+                    ltitle != "Bishop"
+                    and ltitle != "Archbishop"
+                    and (ltitle, lname)
+                    not in (
+                        ("Duke", "Norfolk"),
+                        ("Duke", "Wellington"),
+                        ("Earl", "Kinnoull"),
+                        ("Earl", "Selborne"),
+                    )
+                ):
                     print(lm)
-                    raise ContextException("wrong dates on lords with same name", stamp=stampurl, fragment=lname)
+                    raise ContextException(
+                        "wrong dates on lords with same name",
+                        stamp=stampurl,
+                        fragment=lname,
+                    )
 
         if not res:
-            raise ContextException("unknown lord %s %s %s %s on %s" % (ltitle, llordname, llordofname, stampurl, sdate), stamp=stampurl, fragment=lname)
+            raise ContextException(
+                "unknown lord %s %s %s %s on %s"
+                % (ltitle, llordname, llordofname, stampurl, sdate),
+                stamp=stampurl,
+                fragment=lname,
+            )
 
         assert len(res) == 1
         return self.membertoperson(res[0]["id"])
 
-
     def GetLordIDfname(self, name, loffice, sdate, stampurl=None):
         name = re.sub("^The ", "", name)
-        name = name.replace(' Of ', ' of ')
+        name = name.replace(" Of ", " of ")
 
         if name in self.aliases:
             return self.aliases[name]
@@ -160,7 +221,9 @@ def GetLordIDfname(self, name, loffice, sdate, stampurl=None):
 
         hom = honcompl.match(name)
         if not hom:
-            raise ContextException("lord name format failure on '%s'" % name, stamp=stampurl, fragment=name)
+            raise ContextException(
+                "lord name format failure on '%s'" % name, stamp=stampurl, fragment=name
+            )
 
         # now we have a speaker, try and break it up
         ltit = hom.group(1)
@@ -181,25 +244,28 @@ def GetLordIDfname(self, name, loffice, sdate, stampurl=None):
 
         return self.GetLordID(ltit, lname, lplace, loffice, stampurl, sdate, False)
 
-
     def MatchRevName(self, fss, sdate, stampurl):
         assert fss
-        lfn = re.match('(.*?)(?: of (.*?))?, {0,3}((?:L|B|Abp|Bp|V|E|D|M|C|Ly)\.?)$', fss)
+        lfn = re.match(
+            "(.*?)(?: of (.*?))?, {0,3}((?:L|B|Abp|Bp|V|E|D|M|C|Ly)\.?)$", fss
+        )
         if not lfn:
             print("$$$%s$$$" % fss)
-            raise ContextException("No match of format in MatchRevName", stamp=stampurl, fragment=fss)
+            raise ContextException(
+                "No match of format in MatchRevName", stamp=stampurl, fragment=fss
+            )
         shorttitle = lfn.group(3)
-        if shorttitle[-1] != '.':
+        if shorttitle[-1] != ".":
             shorttitle += "."
         ltitle = titleconv[shorttitle]
         llordname = lfn.group(1).replace(".", "")
         llordname = llordname.replace("&#039;", "'")
         llordname = re.sub("^De ", "de ", llordname)
-        fullname = '%s %s' % (ltitle, llordname)
+        fullname = "%s %s" % (ltitle, llordname)
         llordofname = ""
         if lfn.group(2):
             llordofname = lfn.group(2).replace(".", "")
-            fullname = '%s of %s' % (fullname, llordofname)
+            fullname = "%s of %s" % (fullname, llordofname)
 
         if fullname in self.aliases:
             return self.aliases[fullname]
diff --git a/pyscraper/miscfuncs.py b/pyscraper/miscfuncs.py
index 1cc463e3..ec06620a 100755
--- a/pyscraper/miscfuncs.py
+++ b/pyscraper/miscfuncs.py
@@ -1,43 +1,51 @@
+import os
 import re
-import sys
 import string
-import os
 import tempfile
 
 # make the top path data directory value
-toppath = os.path.abspath('../../parldata')
+toppath = os.path.abspath("../../parldata")
 if not os.path.exists(toppath):
-        toppath = os.path.abspath('../../../parldata')
+    toppath = os.path.abspath("../../../parldata")
 if not os.path.exists(toppath):
-        toppath = os.path.abspath(os.path.expanduser('~/parldata/'))
+    toppath = os.path.abspath(os.path.expanduser("~/parldata/"))
 if not os.path.exists(toppath):
-        toppath = 'C:\\parldata'
+    toppath = "C:\\parldata"
 
 # output directories used for the scraper
 pwcmdirs = os.path.join(toppath, "cmpages")
 pwxmldirs = os.path.join(toppath, "scrapedxml")
-pwpatchesdirs = os.path.abspath("patches")  # made locally, relative to the lazyrunall.py module.  Should be relative to toppath eventually
+pwpatchesdirs = os.path.abspath(
+    "patches"
+)  # made locally, relative to the lazyrunall.py module.  Should be relative to toppath eventually
 
-if (not os.path.isdir(toppath)):
-        raise Exception('Data directory %s does not exist, please create' % (toppath))
+if not os.path.isdir(toppath):
+    raise Exception("Data directory %s does not exist, please create" % (toppath))
 # print "Data directory (set in miscfuncs.py): %s" % toppath
 
 # temporary files are stored here
 tmppath = os.path.join(toppath, "tmp")
-if (not os.path.isdir(tmppath)):
-        os.mkdir(tmppath)
+if not os.path.isdir(tmppath):
+    os.mkdir(tmppath)
 tempfilename = tempfile.mktemp("", "pw-gluetemp-", tmppath)
 
 # find raw data path
 rawdatapath = os.path.join(os.getcwd(), "../rawdata")
-if (not os.path.isdir(toppath)):
-        raise Exception('Raw data directory %s does not exist, you\'ve not got a proper checkout from CVS.' % (toppath))
+if not os.path.isdir(toppath):
+    raise Exception(
+        "Raw data directory %s does not exist, you've not got a proper checkout from CVS."
+        % (toppath)
+    )
 
 # quiet flag
 bNotQuiet = True
+
+
 def SetQuiet():
     global bNotQuiet
     bNotQuiet = False
+
+
 def IsNotQuiet():
     return bNotQuiet
 
@@ -48,16 +56,17 @@ def IsNotQuiet():
 
 # use this to generate chronological scraped files of the same page
 def NextAlphaString(s):
-    assert re.match('[a-z]*$', s)
+    assert re.match("[a-z]*$", s)
     if not s:
-        return 'a'
+        return "a"
     i = string.ascii_lowercase.find(s[-1]) + 1
     if i < len(string.ascii_lowercase):
         return s[:-1] + string.ascii_lowercase[i]
-    return NextAlphaString(s[:-1]) + 'a'
+    return NextAlphaString(s[:-1]) + "a"
+
 
 def AlphaStringToOrder(s):
-    assert re.match('[a-z]*$', s)
+    assert re.match("[a-z]*$", s)
     res = 0
     while s:
         i = string.ascii_lowercase.find(s[0]) + 1
@@ -65,6 +74,7 @@ def AlphaStringToOrder(s):
         s = s[1:]
     return res
 
+
 # Impossible to do 6pm, 7.15pm, 6.30pm, 6.45pm, 7pm without future timestamps
 # So not caring any more about timestamp errors
 # Need good timestamps for video ;-) So turning back on, might try different tack at some point
@@ -73,8 +83,10 @@ def AlphaStringToOrder(s):
 regparsetime = re.compile("^(\d+)[\.:]\s*(\d+)(?:\s?|&nbsp;)([\w\.]*)$")
 # 7 pm
 regparsetimeonhour = re.compile("^(\d+)()(?:\s?|&nbsp;)([\w\.]+)$")
+
+
 def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
-    #print "time ", time
+    # print "time ", time
 
     previoustime = None
     if previoustimearr:
@@ -88,7 +100,7 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
         timeparts = regparsetimeonhour.match(time)
     if timeparts:
         hour = int(timeparts.group(1))
-        if (timeparts.group(2) != ""):
+        if timeparts.group(2) != "":
             mins = int(timeparts.group(2))
         else:
             mins = 0
@@ -114,12 +126,15 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
             hour -= 12
 
         if previoustime and previoustimehour + 12 <= hour:
-            print("TIME: time shift by 12 (from %s to %s) -- should a p.m. be an a.m.? %s" % (previoustime, time, repr(stampurl)))
+            print(
+                "TIME: time shift by 12 (from %s to %s) -- should a p.m. be an a.m.? %s"
+                % (previoustime, time, repr(stampurl))
+            )
 
-    elif time == 'Midnight':
+    elif time == "Midnight":
         hour = 24
         mins = 0
-    elif time == 'Noon':
+    elif time == "Noon":
         hour = 12
         mins = 0
     else:
@@ -127,11 +142,10 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
 
     res = "%03d:%02d:00" % (hour, mins)
 
-
     # day-rotate situation where they went on beyond midnight
     # it's uncommon enough to handle by listing exceptional days
     # (sometimes the division time is out of order because that is where it is inserted in the record -- maybe should patch to handle)
-    #print previoustime, res, bIsDivisionTime, stampurl.sdate
+    # print previoustime, res, bIsDivisionTime, stampurl.sdate
     if previoustime and res < previoustime:
         if stampurl.sdate in ["2005-03-10"]:
             if previoustime < "024":
@@ -142,14 +156,21 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
         elif stampurl.sdate in ["2002-10-28"]:
             return res
 
-        elif hour in [0, 1, 2, 3, 4] or stampurl.sdate in ["2003-10-20", "2000-10-03", "2000-07-24", "2011-01-17"]:
+        elif hour in [0, 1, 2, 3, 4] or stampurl.sdate in [
+            "2003-10-20",
+            "2000-10-03",
+            "2000-07-24",
+            "2011-01-17",
+        ]:
             hour += 24
         else:
-            print('TIME: time rotation (from %s to %s %s) not close to midnight %s' % (previoustime, time, res, repr(stampurl)))
+            print(
+                "TIME: time rotation (from %s to %s %s) not close to midnight %s"
+                % (previoustime, time, res, repr(stampurl))
+            )
 
         res = "%03d:%02d:00" % (hour, mins)
 
-
     # capture the case where we are out of order by more than a few minutes
     # (divisions are often out of order slightly)
 
@@ -160,7 +181,10 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
         previoustimeminutes = previoustimehour * 60 + int(prevtimeMatch.group(2))
         if timeminutes < previoustimeminutes:
             if not bIsDivisionTime or (previoustimeminutes - timeminutes > 10):
-                print('TIME: time out of order, from %s to %s (division=%s) %s' % (previoustime, res, bIsDivisionTime, repr(stampurl)))
+                print(
+                    "TIME: time out of order, from %s to %s (division=%s) %s"
+                    % (previoustime, res, bIsDivisionTime, repr(stampurl))
+                )
     return res
 
 
@@ -168,84 +192,72 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
 # http://www.bigbaer.com/reference/character_entity_reference.htm
 # Make sure you update WriteXMLHeader in xmlfilewrite.py also!
 entitymap = {
-        '&nbsp;':' ',
-        '&':'&amp;',
-
-        # see http://www.cs.tut.fi/~jkorpela/www/windows-chars.html for a useful, if now dated in
-        # terms of browser support for the proper solutions, info on windows ndash/mdash (150/151)
-        '&#150;':'&ndash;',  # convert windows latin-1 extension ndash into a real one
-        '&#151;':'&mdash;',  # likewise mdash
-        '&#161;':'&iexcl;',  # inverted exclamation mark
-        '&#247;':'&divide;', # division sign
-
-        '&#232;':'&egrave;',   # this is e-grave
-        '&#233;':'&eacute;',   # this is e-acute
-        '&#234;':'&ecirc;',   # this is e-hat
-        '&#235;':'&euml;',   # this is e-double-dot
-
-        '&#223;':'&szlig;',
-        '&#224;':'&agrave;',   # this is a-grave
-        '&#225;':'&aacute;',   # this is a-acute
-        '&#226;':'&acirc;',   # this is a-hat as in debacle
-        '&#227;':'&atilde;',   # this is a-hat as in debacle
-        '&#228;':'&auml;',
-
-        '&#244;':'&ocirc;',   # this is o-hat
-        '&#246;':'&ouml;',   # this is o-double-dot
-        '&#214;':'&Ouml;',   # this is capital o-double-dot
-        '&#243;':'&oacute;',   # this is o-acute
-        '&#248;':'&oslash;',   # this is o-slash
-        '&#245;':'&otilde;', # this is o-tilde
-
-        '&#237;':'&iacute;', # this is i-acute
-        '&#238;':'&icirc;', # this is i-circumflex
-        '&#239;':'&iuml;',  # this is i-double-dot, as in naive
-
-        '&#231;':'&ccedil;',   # this is cedilla
-        '&#250;':'&uacute;',
-        '&#252;':'&uuml;',   # this is u-double-dot
-        '&#241;':'&ntilde;',   # spanish n as in Senor
-        '&#254;':'&thorn;',
-
-        '&#177;':'&plusmn;',   # this is +/- symbol
-        '&#163;':'&pound;',   # UK currency
-        '&#167;':'&sect;',   # UK currency
-        '&#169;':'&copy;',
-        '&#183;':'&middot;',   # middle dot
-        '&#176;':'&deg;',   # this is the degrees
-        '&#186;':'&ordm;',   # this is the M ordinal
-        '&#174;':'&reg;',   # this is the degrees
-        '&#182;':'&para;',  # end-paragraph (pi) symbol
-
-        '&#181;':'&micro;',   # this is one quarter symbol
-        '&#188;':'&frac14;',   # this is one quarter symbol
-        '&#189;':'&frac12;',   # this is one half symbol
-        '&#190;':'&frac34;',   # this is three quarter symbol
-
-        '&#035;':'#',    # this is hash
-        '&#095;':'_',    # this is underscore symbol
-        '&#95;':'_',    # this is underscore symbol
-
-        '&#039;':"'",   # possession apostrophe
-        "&#8364;":'&euro;', # this is euro currency
-        "&#8482;":'&trade;',
-        "&#8226;":'&bull;',
-        '&lquo;':"'",
-        '&rquo;':"'",
-        '&minus;':"-",
-
-        '&#145;':"'",
-        '&#146;':"'",
-        '&#147;':'&quot;',
-        '&#148;':'&quot;',
-        '&#133;':'...',
-        '&#134;':'&dagger;',
-
-        '&#178;':'&sup2;',
-        '&rsquo;':"'",
-        '&oelig;':'&#339;',
-        '&#230;':'&aelig;',
-        '&dagger;':'&dagger;',
+    "&nbsp;": " ",
+    "&": "&amp;",
+    # see http://www.cs.tut.fi/~jkorpela/www/windows-chars.html for a useful, if now dated in
+    # terms of browser support for the proper solutions, info on windows ndash/mdash (150/151)
+    "&#150;": "&ndash;",  # convert windows latin-1 extension ndash into a real one
+    "&#151;": "&mdash;",  # likewise mdash
+    "&#161;": "&iexcl;",  # inverted exclamation mark
+    "&#247;": "&divide;",  # division sign
+    "&#232;": "&egrave;",  # this is e-grave
+    "&#233;": "&eacute;",  # this is e-acute
+    "&#234;": "&ecirc;",  # this is e-hat
+    "&#235;": "&euml;",  # this is e-double-dot
+    "&#223;": "&szlig;",
+    "&#224;": "&agrave;",  # this is a-grave
+    "&#225;": "&aacute;",  # this is a-acute
+    "&#226;": "&acirc;",  # this is a-hat as in debacle
+    "&#227;": "&atilde;",  # this is a-hat as in debacle
+    "&#228;": "&auml;",
+    "&#244;": "&ocirc;",  # this is o-hat
+    "&#246;": "&ouml;",  # this is o-double-dot
+    "&#214;": "&Ouml;",  # this is capital o-double-dot
+    "&#243;": "&oacute;",  # this is o-acute
+    "&#248;": "&oslash;",  # this is o-slash
+    "&#245;": "&otilde;",  # this is o-tilde
+    "&#237;": "&iacute;",  # this is i-acute
+    "&#238;": "&icirc;",  # this is i-circumflex
+    "&#239;": "&iuml;",  # this is i-double-dot, as in naive
+    "&#231;": "&ccedil;",  # this is cedilla
+    "&#250;": "&uacute;",
+    "&#252;": "&uuml;",  # this is u-double-dot
+    "&#241;": "&ntilde;",  # spanish n as in Senor
+    "&#254;": "&thorn;",
+    "&#177;": "&plusmn;",  # this is +/- symbol
+    "&#163;": "&pound;",  # UK currency
+    "&#167;": "&sect;",  # UK currency
+    "&#169;": "&copy;",
+    "&#183;": "&middot;",  # middle dot
+    "&#176;": "&deg;",  # this is the degrees
+    "&#186;": "&ordm;",  # this is the M ordinal
+    "&#174;": "&reg;",  # this is the degrees
+    "&#182;": "&para;",  # end-paragraph (pi) symbol
+    "&#181;": "&micro;",  # this is one quarter symbol
+    "&#188;": "&frac14;",  # this is one quarter symbol
+    "&#189;": "&frac12;",  # this is one half symbol
+    "&#190;": "&frac34;",  # this is three quarter symbol
+    "&#035;": "#",  # this is hash
+    "&#095;": "_",  # this is underscore symbol
+    "&#95;": "_",  # this is underscore symbol
+    "&#039;": "'",  # possession apostrophe
+    "&#8364;": "&euro;",  # this is euro currency
+    "&#8482;": "&trade;",
+    "&#8226;": "&bull;",
+    "&lquo;": "'",
+    "&rquo;": "'",
+    "&minus;": "-",
+    "&#145;": "'",
+    "&#146;": "'",
+    "&#147;": "&quot;",
+    "&#148;": "&quot;",
+    "&#133;": "...",
+    "&#134;": "&dagger;",
+    "&#178;": "&sup2;",
+    "&rsquo;": "'",
+    "&oelig;": "&#339;",
+    "&#230;": "&aelig;",
+    "&dagger;": "&dagger;",
 }
 entitymaprev = entitymap.values()
 
@@ -253,14 +265,14 @@ def TimeProcessing(time, previoustimearr, bIsDivisionTime, stampurl):
 def StripAnchorTags(text):
     raise Exception("I've never called this function, so test it")
 
-    abf = re.split('(<[^>]*>)', text)
+    abf = re.split("(<[^>]*>)", text)
 
-    ret = ''
+    ret = ""
     for ab in abf:
-        if re.match('<a[^>]*>(?i)', ab):
+        if re.match("<a[^>]*>(?i)", ab):
             pass
 
-        elif re.match('</a>(?i)', ab):
+        elif re.match("</a>(?i)", ab):
             pass
 
         else:
@@ -270,148 +282,163 @@ def StripAnchorTags(text):
 
 
 def WriteCleanText(fout, text, striphref=True):
-    text = re.sub('<!--.*?-->', '', text)
-    abf = re.split('(<[^>]*>)', text)
+    text = re.sub("<!--.*?-->", "", text)
+    abf = re.split("(<[^>]*>)", text)
     for ab in abf:
         # delete comments and links
-        if re.match('<!-[^>]*?->', ab):
+        if re.match("<!-[^>]*?->", ab):
             pass
 
         # XXX Differs from pullgluepages version
-        elif striphref and re.match('<a[^>]+>(?i)', ab):
-            anamem = re.match('<a name\s*?=(?i)', ab)
+        elif striphref and re.match("<a[^>]+>(?i)", ab):
+            anamem = re.match("<a name\s*?=(?i)", ab)
             if anamem:
-                fout.write(re.sub('\s', ' ', ab))
+                fout.write(re.sub("\s", " ", ab))
 
-        elif striphref and re.match('</?a>(?i)', ab):
+        elif striphref and re.match("</?a>(?i)", ab):
             pass
 
         # spaces only inside tags
-        elif re.match('<[^>]*>', ab):
-            fout.write(re.sub('\s', ' ', ab))
+        elif re.match("<[^>]*>", ab):
+            fout.write(re.sub("\s", " ", ab))
 
         # take out spurious > symbols and dos linefeeds
         else:
-            fout.write(re.sub('>|\r', '', ab))
+            fout.write(re.sub(">|\r", "", ab))
 
 
 # Legacy patch system, use patchfilter.py and patchtool now
 def ApplyFixSubstitutions(text, sdate, fixsubs):
     for sub in fixsubs:
-        if sub[3] == 'all' or sub[3] == sdate:
+        if sub[3] == "all" or sub[3] == sdate:
             (text, n) = re.subn(sub[0], sub[1], text)
             if (sub[2] != -1) and (n != sub[2]):
                 print(sub)
-                raise Exception('wrong number of substitutions %d on %s' % (n, sub[0]))
+                raise Exception("wrong number of substitutions %d on %s" % (n, sub[0]))
     return text
 
 
 # this only accepts <sup> and <i> tags
 def StraightenHTMLrecurse(stex, stampurl):
     # split the text into <i></i> and <sup></sup> and <sub></sub> and <a href></a>
-    qisup = re.search(r'(<(a|i|b|s|small|sup|sub)( href="[^"]*")?>(.*?)</\2>)(?i)', stex)
+    qisup = re.search(
+        r'(<(a|i|b|s|small|sup|sub)( href="[^"]*")?>(.*?)</\2>)(?i)', stex
+    )
     if qisup:
         qtagtype = qisup.group(2)
-        qhref = qisup.group(3) or ''
-        qtag = ('<%s%s>' % (qtagtype, qhref), '</%s>' % qtagtype)
+        qhref = qisup.group(3) or ""
+        qtag = ("<%s%s>" % (qtagtype, qhref), "</%s>" % qtagtype)
     if not qisup:
         qisup = re.search('(<(a) href="([^"]*)">(.*?)</a>)(?i)', stex)
         if qisup:
-            qtag = ('<a href="%s">' % qisup.group(3), '</a>')
+            qtag = ('<a href="%s">' % qisup.group(3), "</a>")
 
     if qisup:
-        sres = StraightenHTMLrecurse(stex[:qisup.start(1)], stampurl)
+        sres = StraightenHTMLrecurse(stex[: qisup.start(1)], stampurl)
         sres.append(qtag[0])
         sres.extend(StraightenHTMLrecurse(qisup.group(4), stampurl))
         sres.append(qtag[1])
-        sres.extend(StraightenHTMLrecurse(stex[qisup.end(1):], stampurl))
+        sres.extend(StraightenHTMLrecurse(stex[qisup.end(1) :], stampurl))
         return sres
 
-    sres = re.split('(&[a-z0-9]*?;|&#\d+;|"|\xa3|&|\x01|\x0e|\x14|\x92|\xb0|\xab|\xe9|\xc3\xb8|\xc3\xb1|<[^>]*>|<|>)', stex)
+    sres = re.split(
+        '(&[a-z0-9]*?;|&#\d+;|"|\xa3|&|\x01|\x0e|\x14|\x92|\xb0|\xab|\xe9|\xc3\xb8|\xc3\xb1|<[^>]*>|<|>)',
+        stex,
+    )
     for i in range(len(sres)):
-        #print "sresi ", sres[i], "\n"
-        #print "-----------------------------------------------\n"
+        # print "sresi ", sres[i], "\n"
+        # print "-----------------------------------------------\n"
 
         if not sres[i]:
             pass
-        elif re.match('&#[0-9]+;', sres[i]) and not re.match('[345][0-9];', sres[i]):
+        elif re.match("&#[0-9]+;", sres[i]) and not re.match("[345][0-9];", sres[i]):
             pass
-        elif sres[i][0] == '&':
+        elif sres[i][0] == "&":
             if sres[i] in entitymap:
                 sres[i] = entitymap[sres[i]]
             elif sres[i] in entitymaprev:
                 pass
-            elif sres[i] == '&mdash;': # special case as entitymap maps it with spaces
+            elif sres[i] == "&mdash;":  # special case as entitymap maps it with spaces
                 pass
-            elif sres[i] in ('&quot;', '&amp;', '&lt;', '&gt;'):
+            elif sres[i] in ("&quot;", "&amp;", "&lt;", "&gt;"):
                 pass
-            elif sres[i] in ('&ldquo;', '&rdquo;'):
-                sres[i] = '&quot;'
+            elif sres[i] in ("&ldquo;", "&rdquo;"):
+                sres[i] = "&quot;"
             else:
-                raise Exception(sres[i] + ' unknown ent')
-                sres[i] = 'UNKNOWN-ENTITY'
+                raise Exception(sres[i] + " unknown ent")
+                sres[i] = "UNKNOWN-ENTITY"
 
         elif sres[i] == '"':
-            sres[i] = '&quot;'
+            sres[i] = "&quot;"
 
         # junk chars sometimes get in
         # NB this only works if the characters are split in the regexp above
-        elif sres[i] == '\x01':
-            sres[i] = ''
-        elif sres[i] == '\x0e':
-            sres[i] = ' '
-        elif sres[i] == '\x14':
-            sres[i] = ' '
-        elif sres[i] == '\x92':
+        elif sres[i] == "\x01":
+            sres[i] = ""
+        elif sres[i] == "\x0e":
+            sres[i] = " "
+        elif sres[i] == "\x14":
+            sres[i] = " "
+        elif sres[i] == "\x92":
             sres[i] = "'"
-        elif sres[i] == '\xa3':
-            sres[i] = '&pound;'
-        elif sres[i] == '\xb0':
-            sres[i] = '&deg;'
-        elif sres[i] == '\xab':
-            sres[i] = '&eacute;'
-        elif sres[i] == '\xe9':
-            sres[i] = '&eacute;'
-        elif sres[i] == '\xc3\xb8':
-            sres[i] = '&oslash;'
-        elif sres[i] == '\xc3\xb1':
-            sres[i] = '&ntilde;'
-
-        elif re.match('</?i>$(?i)', sres[i]):
-            sres[i] = '' # 'OPEN-i-TAG-OUT-OF-PLACE' 'CLOSE-i-TAG-OUT-OF-PLACE'
-
-        elif re.match('<xref locref=\d+>$', sres[i]): # what is this? wrans 2003-05-13 has one
-            sres[i] = ''
+        elif sres[i] == "\xa3":
+            sres[i] = "&pound;"
+        elif sres[i] == "\xb0":
+            sres[i] = "&deg;"
+        elif sres[i] == "\xab":
+            sres[i] = "&eacute;"
+        elif sres[i] == "\xe9":
+            sres[i] = "&eacute;"
+        elif sres[i] == "\xc3\xb8":
+            sres[i] = "&oslash;"
+        elif sres[i] == "\xc3\xb1":
+            sres[i] = "&ntilde;"
+
+        elif re.match("</?i>$(?i)", sres[i]):
+            sres[i] = ""  # 'OPEN-i-TAG-OUT-OF-PLACE' 'CLOSE-i-TAG-OUT-OF-PLACE'
+
+        elif re.match(
+            "<xref locref=\d+>$", sres[i]
+        ):  # what is this? wrans 2003-05-13 has one
+            sres[i] = ""
 
         # allow brs through
-        elif re.match('<br ?/?>$(?i)', sres[i]):
-            sres[i] = '<br/>'
+        elif re.match("<br ?/?>$(?i)", sres[i]):
+            sres[i] = "<br/>"
 
         # discard garbage that appears in recent today postings
-        elif re.match('<jf\d+>$(?i)', sres[i]):
-            sres[i] = ''
+        elif re.match("<jf\d+>$(?i)", sres[i]):
+            sres[i] = ""
 
-        elif sres[i][0] == '<' or sres[i][0] == '>':
+        elif sres[i][0] == "<" or sres[i][0] == ">":
             print("Part:", sres[i][0])
-            print("All:",sres[i])
+            print("All:", sres[i])
             print("stex:", stex)
             print("raising")
-            raise ContextException('tag %s tag out of place in %s' % (sres[i], stex), stamp=stampurl, fragment=stex)
+            raise ContextException(
+                "tag %s tag out of place in %s" % (sres[i], stex),
+                stamp=stampurl,
+                fragment=stex,
+            )
 
     return sres
 
 
 # The lookahead assertion (?=<table) stops matching tables when another begin table is reached
 paratag = '</?p(?: style="margin-left: ?[23]0px;")?(?: align=(?:left|"center"))?(?: id="[^"]*" class="timestamp")?(?: class[= ]"(?:tabletext|normaltext|amendment_hs_quote|amendment_indentone|amendment_indenttwo|clause_heading)")?(?: style="margin-bottom:\d+px;")?>'
-restmatcher = paratag + '|<ul><ul><ul>|</ul></ul></ul>|</?ul>|<br>|</?font[^>]*>(?i)'
-reparts = re.compile('(<table[\s\S]*?(?:</table>|(?=<table))|' + restmatcher + ')')
-reparts2 = re.compile('(<table[^>]*?>|' + restmatcher + ')')
+restmatcher = paratag + "|<ul><ul><ul>|</ul></ul></ul>|</?ul>|<br>|</?font[^>]*>(?i)"
+reparts = re.compile("(<table[\s\S]*?(?:</table>|(?=<table))|" + restmatcher + ")")
+reparts2 = re.compile("(<table[^>]*?>|" + restmatcher + ")")
+
+retable = re.compile("<table[\s\S]*?</table>(?i)")
+retablestart = re.compile("<table[\s\S]*?(?i)")
+reparaspace = re.compile(
+    paratag
+    + "|<ul><ul><ul>|</ul></ul></ul>|</?ul>|</?br>|</?font[^>]*>|<table[^>]*>$(?i)"
+)
+reparaempty = re.compile("(?:\s|</?i>|&nbsp;)*$(?i)")
+reitalif = re.compile("\s*<i>\s*$(?i)")
 
-retable = re.compile('<table[\s\S]*?</table>(?i)')
-retablestart = re.compile('<table[\s\S]*?(?i)')
-reparaspace = re.compile(paratag + '|<ul><ul><ul>|</ul></ul></ul>|</?ul>|</?br>|</?font[^>]*>|<table[^>]*>$(?i)')
-reparaempty = re.compile('(?:\s|</?i>|&nbsp;)*$(?i)')
-reitalif = re.compile('\s*<i>\s*$(?i)')
 
 # Break text into paragraphs.
 # the result alternates between lists of space types, and strings
@@ -423,12 +450,11 @@ def SplitParaSpace(text, stampurl):
 
     # list of space objects, list of string
     spclist = []
-    pstring = ''
+    pstring = ""
     parts = reparts.split(text)
     newparts = []
     # split up the start <table> bits without end </table> into component parts
     for nf in parts:
-
         # a tiny bit of extra splitting up as output
         if retablestart.match(nf) and not retable.match(nf):
             newparts.extend(reparts2.split(nf))
@@ -437,11 +463,11 @@ def SplitParaSpace(text, stampurl):
 
         # get rid of blank and boring paragraphs
         if reparaempty.match(nf):
-            if pstring and re.search('\S', nf):
+            if pstring and re.search("\S", nf):
                 print(text)
-                print('---' + pstring)
-                print('---' + nf)
-                raise Exception(' it carried across empty para ')
+                print("---" + pstring)
+                print("---" + nf)
+                raise Exception(" it carried across empty para ")
             continue
 
         # list of space type objects
@@ -456,11 +482,10 @@ def SplitParaSpace(text, stampurl):
                 print(text)
                 print(spclist)
                 print(pstring)
-                raise Exception(' double italic in paraspace ')
-            pstring = '<i>'
+                raise Exception(" double italic in paraspace ")
+            pstring = "<i>"
             continue
 
-
         # we now have a string of a paragraph which we are putting into the list.
 
         # table type
@@ -468,7 +493,7 @@ def SplitParaSpace(text, stampurl):
         if retable.match(nf):
             if pstring:
                 print(text)
-                raise Exception(' non-empty preceding string ')
+                raise Exception(" non-empty preceding string ")
             pstring = nf
             bthisparaalone = True
 
@@ -479,21 +504,22 @@ def SplitParaSpace(text, stampurl):
             else:
                 pstring = lnf.strip()
 
-
         # check that paragraphs have some text
-        if re.match('(?:<[^>]*>|\s)*$', pstring):
+        if re.match("(?:<[^>]*>|\s)*$", pstring):
             print("\nspclist:", spclist)
             print("\npstring:", pstring)
             print("\nthe text:", text[:100])
             print("\nnf:", nf)
-            raise ContextException('no text in paragraph', stamp=stampurl, fragment=pstring)
+            raise ContextException(
+                "no text in paragraph", stamp=stampurl, fragment=pstring
+            )
 
         # check that paragraph spaces aren't only font text, and have something
         # real in them, unless they are breaks because of tables
         if not (bprevparaalone or bthisparaalone):
             bnonfont = False
             for sl in spclist:
-                if not re.match('</?font[^>]*>(?i)', sl):
+                if not re.match("</?font[^>]*>(?i)", sl):
                     bnonfont = True
             if not bnonfont:
                 print("text:", text)
@@ -502,17 +528,20 @@ def SplitParaSpace(text, stampurl):
                 print("----------")
                 print("nf", nf)
                 print("----------")
-                raise ContextException('font found in middle of paragraph should be a paragraph break or removed', stamp=stampurl, fragment=pstring)
+                raise ContextException(
+                    "font found in middle of paragraph should be a paragraph break or removed",
+                    stamp=stampurl,
+                    fragment=pstring,
+                )
         bprevparaalone = bthisparaalone
 
-
         # put the preceding space, then the string into output list
         res.append(spclist)
         res.append(pstring)
-        #print "???%s???" % pstring
+        # print "???%s???" % pstring
 
-        spclist = [ ]
-        pstring = ''
+        spclist = []
+        pstring = ""
 
     # findal spaces into the output list
     res.append(spclist)
@@ -523,27 +552,29 @@ def SplitParaSpace(text, stampurl):
 # Break text into paragraphs and mark the paragraphs according to their <ul> indentation
 def SplitParaIndents(text, stampurl):
     dell = SplitParaSpace(text, stampurl)
-    #print "dell", dell
+    # print "dell", dell
 
-    res =  [ ]
-    resdent = [ ]
+    res = []
+    resdent = []
     bIndent = 0
     for i in range(len(dell)):
         if (i % 2) == 0:
             for sp in dell[i]:
-                if re.match('(?:<ul><ul>)?<ul>(?i)', sp):
-                    if bIndent==1:
-                        print(dell[i - 1: i + 1])
-                        raise ContextException(' already indented ', stamp=stampurl, fragment=sp)
+                if re.match("(?:<ul><ul>)?<ul>(?i)", sp):
+                    if bIndent == 1:
+                        print(dell[i - 1 : i + 1])
+                        raise ContextException(
+                            " already indented ", stamp=stampurl, fragment=sp
+                        )
                     bIndent = 1
-                elif re.match('(?:</ul></ul>)?</ul>(?i)', sp):
+                elif re.match("(?:</ul></ul>)?</ul>(?i)", sp):
                     # no error
-                    #if not bIndent:
+                    # if not bIndent:
                     #   raise Exception, ' already not-indentented '
                     bIndent = 0
                 elif re.match('<p style="margin-left: ?[23]0px;">', sp):
                     bIndent = 2
-                elif bIndent == 2 and re.match('</p>', sp):
+                elif bIndent == 2 and re.match("</p>", sp):
                     bIndent = 0
             continue
 
@@ -555,7 +586,7 @@ def SplitParaIndents(text, stampurl):
         tex = dell[i]
         cindent = bIndent > 0 and 1 or 0
 
-        qitbod = re.match('<i>([\s\S]*?)</i>[.:]?$', tex)
+        qitbod = re.match("<i>([\s\S]*?)</i>[.:]?$", tex)
         if qitbod:
             tex = qitbod.group(1)
             cindent = cindent + 2
@@ -563,14 +594,7 @@ def SplitParaIndents(text, stampurl):
         res.append(tex)
         resdent.append(cindent)
 
-    #if bIndent:
+    # if bIndent:
     #   print text
     #   raise ' still indented after last space '
     return (res, resdent)
-
-
-
-
-
-
-
diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py
index f4e41fe0..85de2209 100755
--- a/pyscraper/new_hansard.py
+++ b/pyscraper/new_hansard.py
@@ -2,30 +2,32 @@
 # -*- coding: utf-8 -*-
 
 import datetime
-import re
+import io
 import os
+import re
 import sys
-import io
 import tempfile
-from lxml import etree
 import xml.sax
+
 import miscfuncs
+from lxml import etree
 
 xmlvalidate = xml.sax.make_parser()
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(os.path.join(os.path.dirname(__file__), 'lords'))
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+sys.path.append(os.path.join(os.path.dirname(__file__), "lords"))
 
-from pullgluepages import MakeDayMap, GetFileDayVersions
+from contextexception import ContextException
+from gidmatching import DoFactorDiff, PrepareXMLForDiff
 from miscfuncs import pwxmldirs
+from pullgluepages import GetFileDayVersions, MakeDayMap
 from resolvemembernames import MemberList
 from resolvenames import LordsList
-from filtersentence_xml import PhraseTokenize
-from gidmatching import PrepareXMLForDiff, DoFactorDiff
-from contextexception import ContextException
 from xmlfilewrite import WriteXMLHeader
 
-parldata = '../../../parldata/'
+from filtersentence_xml import PhraseTokenize
+
+parldata = "../../../parldata/"
 
 xml_parser = etree.XMLParser(ns_clean=True)
 etree.set_default_parser(xml_parser)
@@ -33,11 +35,11 @@
 
 class PimsList(MemberList):
     def pbc_match(self, name, date):
-        name = re.sub(r'\n', ' ', name)
+        name = re.sub(r"\n", " ", name)
         # names are mostly lastname,\nfirstname so reform first
-        if re.search(',', name):
-            last, first = name.split(',')
-            full = '{0} {1}'.format(first.strip(), last.strip())
+        if re.search(",", name):
+            last, first = name.split(",")
+            full = "{0} {1}".format(first.strip(), last.strip())
         # apart from committee chairman which we can use as is
         else:
             full = name.strip()
@@ -46,8 +48,8 @@ def pbc_match(self, name, date):
             mem_id = ids.pop()
             person_id = self.membertopersonmap[mem_id]
             member = self.persons[person_id]
-            member['person_id'] = member.get('id')
-            member['name'] = self.name_on_date(member['person_id'], date)
+            member["person_id"] = member.get("id")
+            member["name"] = self.name_on_date(member["person_id"], date)
             return member
 
         return None
@@ -58,119 +60,114 @@ class BaseParseDayXML(object):
     resolver = PimsList()
 
     type_to_xpath = {
-        'debate': (
+        "debate": (
             '//ns:System[@type="Debate"]',
-            'http://www.parliament.uk/commons/hansard/print'
+            "http://www.parliament.uk/commons/hansard/print",
         ),
-        'westminhall': (
+        "westminhall": (
             '//ns:System[@type="WestHall"]',
-            'http://www.parliament.uk/commons/hansard/print'
+            "http://www.parliament.uk/commons/hansard/print",
         ),
-        'lords': (
+        "lords": (
             '//ns:System[@type="Debate"]',
-            'http://www.parliament.uk/lords/hansard/print'
+            "http://www.parliament.uk/lords/hansard/print",
         ),
-        'standing': (
+        "standing": (
             '//ns:System[@type="Debate"]',
-            'http://www.parliament.uk/commons/hansard/print'
+            "http://www.parliament.uk/commons/hansard/print",
         ),
     }
 
-    oral_headings = [
-        'hs_3OralAnswers'
-    ]
+    oral_headings = ["hs_3OralAnswers"]
     major_headings = [
-        'hs_6bDepartment',
-        'hs_6bBigBoldHdg',
-        'hs_2cBillTitle',
-        'hs_2cUrgentQuestion',
-        'hs_3cMainHdg',
-        'hs_2BusinessWODebate',
-        'hs_2cStatement',
-        'hs_2BillTitle',
-        'hs_6bBillTitle',
-        'hs_6bBusinessB4Questions',
-        'hs_6bPrivateBusiness',
-        'hs_6bRoyalAssent',
-        'hs_6bBillsPresented', # FIXME should grab text of following tag
-        'hs_6fCntrItalHdg',
-        'hs_2cSO24Application',
-        'hs_6bFormalmotion',
-        'hs_2cDeferredDiv',
-        'hs_3cPetitions',
+        "hs_6bDepartment",
+        "hs_6bBigBoldHdg",
+        "hs_2cBillTitle",
+        "hs_2cUrgentQuestion",
+        "hs_3cMainHdg",
+        "hs_2BusinessWODebate",
+        "hs_2cStatement",
+        "hs_2BillTitle",
+        "hs_6bBillTitle",
+        "hs_6bBusinessB4Questions",
+        "hs_6bPrivateBusiness",
+        "hs_6bRoyalAssent",
+        "hs_6bBillsPresented",  # FIXME should grab text of following tag
+        "hs_6fCntrItalHdg",
+        "hs_2cSO24Application",
+        "hs_6bFormalmotion",
+        "hs_2cDeferredDiv",
+        "hs_3cPetitions",
     ]
     chair_headings = [
-        'hs_76fChair',
+        "hs_76fChair",
     ]
     minor_headings = [
-        'hs_8Question',
-        'hs_8GenericHdg',
-        'hs_8Clause',
-        'hs_7SmCapsHdg',
-        'hs_7PrivateBusinessHdg',
-        'hs_7Bill',
-        'hs_6bcBigBoldHdg',
-        'hs_6bCorrection',
+        "hs_8Question",
+        "hs_8GenericHdg",
+        "hs_8Clause",
+        "hs_7SmCapsHdg",
+        "hs_7PrivateBusinessHdg",
+        "hs_7Bill",
+        "hs_6bcBigBoldHdg",
+        "hs_6bCorrection",
     ]
     generic_headings = [
-        'hs_2cDebatedMotion',
-        'hs_2cGenericHdg',
-        'hs_2GenericHdg',
-    ]
-    whall_headings = [
-        'hs_2cWestHallDebate',
-        'hs_2WestHallDebate'
+        "hs_2cDebatedMotion",
+        "hs_2cGenericHdg",
+        "hs_2GenericHdg",
     ]
+    whall_headings = ["hs_2cWestHallDebate", "hs_2WestHallDebate"]
     paras = [
-        'hs_Para',
-        'hs_AmendmentLevel1',
-        'hs_AmendmentLevel2',
-        'hs_AmendmentLevel3',
-        'hs_AmendmentLevel4',
-        'hs_AmendmentHeading',
-        'hs_newline10',
-        'hs_newline12',
-        'hs_Question',
-        'hs_6CntrCapsHdg',
+        "hs_Para",
+        "hs_AmendmentLevel1",
+        "hs_AmendmentLevel2",
+        "hs_AmendmentLevel3",
+        "hs_AmendmentLevel4",
+        "hs_AmendmentHeading",
+        "hs_newline10",
+        "hs_newline12",
+        "hs_Question",
+        "hs_6CntrCapsHdg",
     ]
     indents = [
-        'hs_quote',
-        'hs_QuoteAllIndent',
-        'hs_ParaIndent',
-        'hs_AmendmentLevel0',
-        'hs_IndentOne',
-        'hs_IndentTwo',
+        "hs_quote",
+        "hs_QuoteAllIndent",
+        "hs_ParaIndent",
+        "hs_AmendmentLevel0",
+        "hs_IndentOne",
+        "hs_IndentTwo",
     ]
     empty_tags = [
-        'StartProcedure',
-        'EndProcedure',
+        "StartProcedure",
+        "EndProcedure",
     ]
     ignored_tags = [
-        'hs_TimeCode',
-        'hs_6bPetitions',
-        'hs_3MainHdg',
-        'hs_3cWestHall',
-        'hs_Venue'
+        "hs_TimeCode",
+        "hs_6bPetitions",
+        "hs_3MainHdg",
+        "hs_3cWestHall",
+        "hs_Venue",
     ]
     root = None
-    ns = ''
+    ns = ""
     ns_map = {}
 
-    division_number_element = 'Number'
-    division_ayes_attribute = 'ayes'
-    division_noes_attribute = 'noes'
+    division_number_element = "Number"
+    division_ayes_attribute = "ayes"
+    division_noes_attribute = "noes"
 
     debate_type = None
     current_speech = None
-    date = ''
-    rev = 'a'
+    date = ""
+    rev = "a"
     use_pids = True
     current_col = 0
     current_speech_col = 0
     current_speech_num = 0
     next_speech_num = 0
     current_speech_part = 1
-    current_time = ''
+    current_time = ""
     output_heading = False
     skip_tag = None
     uc_titles = False
@@ -181,14 +178,14 @@ def __init__(self):
     def reset(self):
         self.debate_type = None
         self.current_speech = None
-        self.date = ''
-        self.rev = 'a'
+        self.date = ""
+        self.rev = "a"
         self.current_col = 0
         self.current_speech_col = 0
         self.current_speech_num = 0
         self.next_speech_num = 0
         self.current_speech_part = 1
-        self.current_time = ''
+        self.current_time = ""
         self.root = None
         self.input_root = None
         self.output_heading = False
@@ -204,18 +201,15 @@ def is_pre_new_parser(self):
     def get_tag_name_no_ns(self, tag):
         # remove annoying namespace for brevities sake
         tag_name = str(tag.tag)
-        tag_name = tag_name.replace(
-            '{{{0}}}'.format(self.ns),
-            ''
-        )
+        tag_name = tag_name.replace("{{{0}}}".format(self.ns), "")
         return tag_name
 
     def get_pid(self):
-        pid = '{0}{1}.{2}/{3}'.format(
+        pid = "{0}{1}.{2}/{3}".format(
             self.rev,
             self.current_speech_col,
             self.current_speech_num,
-            self.current_speech_part
+            self.current_speech_part,
         )
         self.current_speech_part = self.current_speech_part + 1
         return pid
@@ -224,21 +218,21 @@ def get_speech_id_first_part(self):
         return self.date
 
     def get_speech_url(self, url):
-        return ''
+        return ""
 
     def get_major_url(self, url):
-        return ''
+        return ""
 
     def get_minor_url(self, url):
-        return ''
+        return ""
 
     def get_speech_id(self):
-        speech_id = 'uk.org.publicwhip/{0}/{1}{2}.{3}.{4}'.format(
+        speech_id = "uk.org.publicwhip/{0}/{1}{2}.{3}.{4}".format(
             self.debate_type,
             self.get_speech_id_first_part(),
             self.rev,
             self.current_speech_col,
-            self.next_speech_num
+            self.next_speech_num,
         )
         self.current_speech_num = self.next_speech_num
         if self.current_speech_col == self.current_col:
@@ -257,8 +251,8 @@ def check_for_pi(self, tag):
 
     def check_for_pi_at_start(self, tag):
         self.pi_at_start = False
-        for c in tag.xpath('./node()'):
-            if isinstance(c, str) and re.match('\s*$', c):
+        for c in tag.xpath("./node()"):
+            if isinstance(c, str) and re.match("\s*$", c):
                 continue
             elif type(c) is etree._ProcessingInstruction:
                 self.parse_pi(c)
@@ -268,12 +262,12 @@ def check_for_pi_at_start(self, tag):
     # this just makes any gid redirection easier
     def get_text_from_element(self, el):
         text = self.get_single_line_text_from_element(el)
-        text = '\n{0}\n'.format(text)
+        text = "\n{0}\n".format(text)
         return text
 
     def get_single_line_text_from_element(self, el):
-        text = ''.join(el.xpath('.//text()'))
-        text = re.sub('\n', ' ', text).strip()
+        text = "".join(el.xpath(".//text()"))
+        text = re.sub("\n", " ", text).strip()
         return text
 
     def clear_current_speech(self):
@@ -284,29 +278,30 @@ def clear_current_speech(self):
 
     def new_speech(self, member, url):
         self.clear_current_speech()
-        self.current_speech = etree.Element('speech')
-        self.current_speech.set('id', self.get_speech_id())
+        self.current_speech = etree.Element("speech")
+        self.current_speech.set("id", self.get_speech_id())
         if member is not None:
-            self.current_speech.set('speakername', member['name'])
-            if 'type' in member:
-                self.current_speech.set('type', member['type'])
-            if 'person_id' in member:
-                self.current_speech.set('person_id', member['person_id'])
+            self.current_speech.set("speakername", member["name"])
+            if "type" in member:
+                self.current_speech.set("type", member["type"])
+            if "person_id" in member:
+                self.current_speech.set("person_id", member["person_id"])
             else:
-                self.current_speech.set('nospeaker', 'true')
+                self.current_speech.set("nospeaker", "true")
         else:
-            self.current_speech.set('nospeaker', 'true')
-        self.current_speech.set('colnum', self.current_col)
-        self.current_speech.set('time', self.current_time)
-        self.current_speech.set(
-            'url',
-            self.get_speech_url(url)
-        )
+            self.current_speech.set("nospeaker", "true")
+        self.current_speech.set("colnum", self.current_col)
+        self.current_speech.set("time", self.current_time)
+        self.current_speech.set("url", self.get_speech_url(url))
         self.current_speech_part = 1
 
     def parse_system_header(self, header):
-        sitting = header.xpath('./ns:Sitting', namespaces=self.ns_map)[0]
-        date = datetime.datetime.strptime(sitting.get('short-date'), '%d %B %Y').date().isoformat()
+        sitting = header.xpath("./ns:Sitting", namespaces=self.ns_map)[0]
+        date = (
+            datetime.datetime.strptime(sitting.get("short-date"), "%d %B %Y")
+            .date()
+            .isoformat()
+        )
         if date:
             self.date = date
 
@@ -316,110 +311,111 @@ def handle_minus_member(self, member):
     def _parse_member_or_b(self, tag):
         member_tag = None
         tag_name = self.get_tag_name_no_ns(tag)
-        if tag_name == 'B':
-            member_tags = tag.xpath('.//ns:Member', namespaces=self.ns_map)
+        if tag_name == "B":
+            member_tags = tag.xpath(".//ns:Member", namespaces=self.ns_map)
             if len(member_tags) == 1:
                 member_tag = member_tags[0]
-        elif tag_name == 'Member':
+        elif tag_name == "Member":
             member_tag = tag
         return member_tag
 
     def parse_member(self, tag):
         member_tag = self._parse_member_or_b(tag)
         if member_tag is not None:
-            mnis_id = member_tag.get('MnisId')
+            mnis_id = member_tag.get("MnisId")
             pims_id = None
-            if mnis_id in (None, '-1'):
-                pims_id = member_tag.get('PimsId')
+            if mnis_id in (None, "-1"):
+                pims_id = member_tag.get("PimsId")
                 if pims_id in (None, "0", "-1"):
                     return self.handle_minus_member(member_tag)
 
-            if pims_id: # Old way
+            if pims_id:  # Old way
                 member = self.resolver.match_by_pims(pims_id, self.date)
             else:
                 member = self.resolver.match_by_mnis(mnis_id, self.date)
             if member is not None:
-                member['person_id'] = member.get('id')
-                member['name'] = self.resolver.name_on_date(member['person_id'], self.date)
-                if member_tag.get('ContributionType'):
-                    member['type'] = member_tag.get('ContributionType')
+                member["person_id"] = member.get("id")
+                member["name"] = self.resolver.name_on_date(
+                    member["person_id"], self.date
+                )
+                if member_tag.get("ContributionType"):
+                    member["type"] = member_tag.get("ContributionType")
                 return member
             else:
                 raise ContextException(
-                    'No match for MnisId {0}\n'.format(mnis_id),
-                    stamp=tag.get('url'),
-                    fragment=member_tag.text
+                    "No match for MnisId {0}\n".format(mnis_id),
+                    stamp=tag.get("url"),
+                    fragment=member_tag.text,
                 )
 
         return None
 
     def parse_date(self, date):
         text = self.get_single_line_text_from_element(date)
-        time_parts = re.match('\s*the\s+house (?:being |having )?met at?\s+(.*?)$(?i)', text)
+        time_parts = re.match(
+            "\s*the\s+house (?:being |having )?met at?\s+(.*?)$(?i)", text
+        )
         if time_parts:
             time = time_parts.group(1)
-            time = re.sub('</?i>',' ', time)
-            time = re.sub('\s+',' ', time)
+            time = re.sub("</?i>", " ", time)
+            time = re.sub("\s+", " ", time)
             if re.match("half-past Nine(?i)", time):
-                    newtime = '09:30:00'
+                newtime = "09:30:00"
             elif re.match("a quarter to Ten o(?i)", time):
-                    newtime = '09:45:00'
+                newtime = "09:45:00"
             elif re.match("Ten o.clock(?i)", time):
-                    newtime = '10:00:00'
+                newtime = "10:00:00"
             elif re.match("half-past Ten(?i)", time):
-                    newtime = '10:30:00'
+                newtime = "10:30:00"
             elif re.match("Eleven o&#039;clock(?i)", time):
-                    newtime = '11:00:00'
+                newtime = "11:00:00"
             elif re.match("twenty-five minutes past\s*Eleven(?i)", time):
-                    newtime = '11:25:00'
+                newtime = "11:25:00"
             elif re.match("twenty-six minutes past\s*Eleven(?i)", time):
-                    newtime = '11:26:00'
+                newtime = "11:26:00"
             elif re.match("twenty-nine minutes past\s*Eleven(?i)", time):
-                    newtime = '11:29:00'
+                newtime = "11:29:00"
             elif re.match("half-past Eleven(?i)", time):
-                    newtime = '11:30:00'
+                newtime = "11:30:00"
             elif re.match("(Twelve noon|Midday)(?i)", time):
-                    newtime = '12:00:00'
+                newtime = "12:00:00"
             elif re.match("half-past Twelve(?i)", time):
-                    newtime = '12:30:00'
+                newtime = "12:30:00"
             elif re.match("One o.clock(?i)", time):
-                    newtime = '13:00:00'
+                newtime = "13:00:00"
             elif re.match("half-past One(?i)", time):
-                    newtime = '13:30:00'
+                newtime = "13:30:00"
             elif re.match("Two o.clock(?i)", time):
-                    newtime = '14:00:00'
+                newtime = "14:00:00"
             elif re.match("half-past Two(?i)", time):
-                    newtime = '14:30:00'
+                newtime = "14:30:00"
             elif re.match("half-past Three(?i)", time):
-                    newtime = '15:30:00'
+                newtime = "15:30:00"
             elif re.match("twenty minutes to Three(?i)", time):
-                    newtime = '14:40:00'
+                newtime = "14:40:00"
             elif re.match("10 minutes past Three(?i)", time):
-                    newtime = '15:10:00'
+                newtime = "15:10:00"
             elif re.match("Six o'clock(?i)", time):
-                    newtime = '18:00:00'
+                newtime = "18:00:00"
             else:
-                    raise ContextException("No time matched", fragment=time)
+                raise ContextException("No time matched", fragment=time)
 
             self.time = newtime
         # this covers the "The Attorney General was Asked - " type
         # bits at the start of Oral questions which are in an
         # hs_6fDate tag.
-        elif re.match('.*was asked.*', text):
+        elif re.match(".*was asked.*", text):
             self.parse_para_with_member(date, None)
 
     def parse_oral_heading(self, heading):
         self.clear_current_speech()
         self.output_heading = True
-        tag = etree.Element('oral-heading')
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
-        tag.set(
-            'url',
-            self.get_major_url(heading.get('url'))
-        )
+        tag = etree.Element("oral-heading")
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
+        tag.set("url", self.get_major_url(heading.get("url")))
         tag.text = heading.text
         self.root.append(tag)
 
@@ -432,29 +428,26 @@ def parse_debateheading(self, tag):
 
     def parse_major(self, heading, **kwargs):
         text = self.get_text_from_element(heading)
-        if text.strip() == 'Prayers':
+        if text.strip() == "Prayers":
             return
         self.clear_current_speech()
-        tag = etree.Element('major-heading')
+        tag = etree.Element("major-heading")
         if self.uc_titles:
             tag.text = text.upper()
         else:
             tag.text = text
 
-        if 'extra_text' in kwargs:
-            tag.text = '{0} - '.format(tag.text)
-            i = etree.Element('i')
-            i.text = kwargs['extra_text']
+        if "extra_text" in kwargs:
+            tag.text = "{0} - ".format(tag.text)
+            i = etree.Element("i")
+            i.text = kwargs["extra_text"]
             tag.append(i)
 
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
-        tag.set(
-            'url',
-            self.get_major_url(heading.get('url'))
-        )
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
+        tag.set("url", self.get_major_url(heading.get("url")))
         self.root.append(tag)
         self.output_heading = True
 
@@ -469,26 +462,27 @@ def parse_chair(self, heading):
         self.initial_chair = self.get_text_from_element(heading)
 
     def parse_minor(self, heading):
-
         next_elt = heading.getnext()
-        if next_elt is not None and self.get_tag_name_no_ns(next_elt) in self.minor_headings:
-            text = ' - '.join([
-                self.get_single_line_text_from_element(heading),
-                self.get_single_line_text_from_element(next_elt)
-            ])
+        if (
+            next_elt is not None
+            and self.get_tag_name_no_ns(next_elt) in self.minor_headings
+        ):
+            text = " - ".join(
+                [
+                    self.get_single_line_text_from_element(heading),
+                    self.get_single_line_text_from_element(next_elt),
+                ]
+            )
             heading.text = text
             self.skip_tag = self.get_tag_name_no_ns(next_elt)
 
         self.clear_current_speech()
-        tag = etree.Element('minor-heading')
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
-        tag.set(
-            'url',
-            self.get_minor_url(heading.get('url'))
-        )
+        tag = etree.Element("minor-heading")
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
+        tag.set("url", self.get_minor_url(heading.get("url")))
         text = self.get_text_from_element(heading)
         tag.text = text
         self.root.append(tag)
@@ -521,15 +515,17 @@ def parse_opposition(self, heading):
         minor heading
         """
         following = heading.xpath(
-            '(./following-sibling::ns:hs_2cDebatedMotion|./following-sibling::ns:hs_7SmCapsHdg|./following-sibling::ns:hs_2GenericHdg)',
-            namespaces=self.ns_map
+            "(./following-sibling::ns:hs_2cDebatedMotion|./following-sibling::ns:hs_7SmCapsHdg|./following-sibling::ns:hs_2GenericHdg)",
+            namespaces=self.ns_map,
         )
-        text = ''
+        text = ""
         if len(following) == 1:
-            text = ' - '.join([
-                self.get_single_line_text_from_element(heading),
-                self.get_single_line_text_from_element(following[0])
-            ])
+            text = " - ".join(
+                [
+                    self.get_single_line_text_from_element(heading),
+                    self.get_single_line_text_from_element(following[0]),
+                ]
+            )
             heading.text = text
             self.skip_tag = self.get_tag_name_no_ns(following[0])
 
@@ -542,15 +538,16 @@ def parse_debated_motion(self, motion):
         when we see the hs_2DebatedMotion tag
         """
         following = motion.xpath(
-            './following-sibling::ns:hs_6bFormalmotion',
-            namespaces=self.ns_map
+            "./following-sibling::ns:hs_6bFormalmotion", namespaces=self.ns_map
         )
-        text = ''
+        text = ""
         if len(following) == 1:
-            text = ' - '.join([
-                self.get_single_line_text_from_element(motion),
-                self.get_single_line_text_from_element(following[0])
-            ])
+            text = " - ".join(
+                [
+                    self.get_single_line_text_from_element(motion),
+                    self.get_single_line_text_from_element(following[0]),
+                ]
+            )
             motion.text = text
             self.skip_tag = self.get_tag_name_no_ns(following[0])
 
@@ -571,54 +568,47 @@ def parse_WHDebate(self, debate):
         minor heading
         """
         chair = debate.xpath(
-            '(./preceding-sibling::ns:hs_76fChair | ./following-sibling::ns:hs_76fChair)',
-            namespaces=self.ns_map
+            "(./preceding-sibling::ns:hs_76fChair | ./following-sibling::ns:hs_76fChair)",
+            namespaces=self.ns_map,
         )
         if len(chair) == 1:
             chair_text = self.get_single_line_text_from_element(chair[0])
-            text = '\n{0} — {1}\n'.format(text, chair_text)
+            text = "\n{0} — {1}\n".format(text, chair_text)
 
         self.clear_current_speech()
-        tag = etree.Element('minor-heading')
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
-        tag.set(
-            'url',
-            self.get_major_url(debate.get('url'))
-        )
+        tag = etree.Element("minor-heading")
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
+        tag.set("url", self.get_major_url(debate.get("url")))
         tag.text = text
         self.root.append(tag)
         self.output_heading = True
 
     def parse_question(self, question):
-        member = question.xpath('.//ns:Member', namespaces=self.ns_map)[0]
+        member = question.xpath(".//ns:Member", namespaces=self.ns_map)[0]
         member = self.parse_member(member)
 
-        first_para = question.xpath('.//ns:hs_Para', namespaces=self.ns_map)[0]
-        self.new_speech(member, first_para.get('url'))
+        first_para = question.xpath(".//ns:hs_Para", namespaces=self.ns_map)[0]
+        self.new_speech(member, first_para.get("url"))
 
-        number = ''.join(
-            question.xpath('.//ns:Number/text()', namespaces=self.ns_map)
-        )
-        if number != '':
-            self.current_speech.set('oral-qnum', number)
+        number = "".join(question.xpath(".//ns:Number/text()", namespaces=self.ns_map))
+        if number != "":
+            self.current_speech.set("oral-qnum", number)
 
-        p = etree.Element('p')
-        p.set('pid', self.get_pid())
-        uin = question.xpath('.//ns:Uin', namespaces=self.ns_map)
+        p = etree.Element("p")
+        p.set("pid", self.get_pid())
+        uin = question.xpath(".//ns:Uin", namespaces=self.ns_map)
         if len(uin) > 0:
-            uin_text = ''.join(uin[0].xpath('.//text()'))
-            m = re.match('\[\s*(\d+)\s*\]', uin_text)
+            uin_text = "".join(uin[0].xpath(".//text()"))
+            m = re.match("\[\s*(\d+)\s*\]", uin_text)
             if m is not None:
                 no = m.groups(1)[0]
-                p.set('qnum', no)
+                p.set("qnum", no)
 
-        text = first_para.xpath(
-            './/ns:QuestionText/text()', namespaces=self.ns_map
-        )
-        text = ''.join(text)
+        text = first_para.xpath(".//ns:QuestionText/text()", namespaces=self.ns_map)
+        text = "".join(text)
         """
         sometimes the question text is after the tag rather
         than inside it in which case we want to grab all the
@@ -634,50 +624,48 @@ def parse_question(self, question):
         the Secretary of State aware that the Construction Industry (etc)
         </hs_Para></Question>
         """
-        if text == '':
+        if text == "":
             q_text = first_para.xpath(
-                './/ns:QuestionText/following-sibling::text()',
-                namespaces=self.ns_map
+                ".//ns:QuestionText/following-sibling::text()", namespaces=self.ns_map
             )
             if len(q_text):
-                text = ''.join(q_text)
+                text = "".join(q_text)
 
-        p.text = re.sub('\n', ' ', text)
+        p.text = re.sub("\n", " ", text)
         self.current_speech.append(p)
 
         # and sometimes there is more question text in following siblings
         # so we need to handle those too
         following_tags = first_para.xpath(
-            './following-sibling::*',
-            namespaces=self.ns_map
+            "./following-sibling::*", namespaces=self.ns_map
         )
         for t in following_tags:
             tag_name = self.get_tag_name_no_ns(t)
             self.handle_tag(tag_name, t)
 
     def parse_indent(self, tag):
-        self.parse_para_with_member(tag, None, css_class='indent')
+        self.parse_para_with_member(tag, None, css_class="indent")
 
     def parse_petition(self, petition):
-        petition.text = 'Petition - {0}'.format(petition.text)
+        petition.text = "Petition - {0}".format(petition.text)
         self.parse_major(petition)
 
     def output_normally_ignored(self):
         self.clear_current_speech()
 
-        tag = etree.Element('major-heading')
-        tag.text = 'Prayers'
+        tag = etree.Element("major-heading")
+        tag.text = "Prayers"
 
-        if hasattr(self, 'initial_chair'):
-            tag.text += ' - '
-            i = etree.Element('i')
+        if hasattr(self, "initial_chair"):
+            tag.text += " - "
+            i = etree.Element("i")
             i.text = self.initial_chair
             tag.append(i)
 
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
         self.root.append(tag)
         self.output_heading = True
 
@@ -685,29 +673,31 @@ def parse_para_with_member(self, para, member, **kwargs):
         if not self.output_heading:
             self.output_normally_ignored()
 
-        members = para.xpath('.//ns:Member', namespaces=self.ns_map)
+        members = para.xpath(".//ns:Member", namespaces=self.ns_map)
         if member is not None:
-            self.new_speech(member, para.get('url'))
+            self.new_speech(member, para.get("url"))
         elif members:
             m_name = None
-            bs = members[0].xpath('./ns:B', namespaces=self.ns_map)
+            bs = members[0].xpath("./ns:B", namespaces=self.ns_map)
             if len(bs) == 1:
-                m_name = {'name': re.sub('\s+', ' ', bs[0].text).strip()}
+                m_name = {"name": re.sub("\s+", " ", bs[0].text).strip()}
             elif len(bs) == 0:
-                m_name = {'name': re.sub('\s+', ' ', members[0].text).strip()}
-            self.new_speech(m_name, para.get('url'))
+                m_name = {"name": re.sub("\s+", " ", members[0].text).strip()}
+            self.new_speech(m_name, para.get("url"))
         elif self.current_speech is None:
-            self.new_speech(None, para.get('url'))
+            self.new_speech(None, para.get("url"))
 
         # this makes the text fetching a bit easier
-        if kwargs.get('strip_member', True):
+        if kwargs.get("strip_member", True):
             for m in members:
-                italics = m.xpath('.//ns:I', namespaces=self.ns_map)
-                text = ''.join(self.get_single_line_text_from_element(i) for i in italics)
+                italics = m.xpath(".//ns:I", namespaces=self.ns_map)
+                text = "".join(
+                    self.get_single_line_text_from_element(i) for i in italics
+                )
                 if text:
-                    kwargs['css_class'] = 'italic'
+                    kwargs["css_class"] = "italic"
                 if m.tail:
-                    text += ' ' + m.tail
+                    text += " " + m.tail
                 m.getparent().text = text
                 m.getparent().remove(m)
 
@@ -715,21 +705,21 @@ def parse_para_with_member(self, para, member, **kwargs):
         if len(text) == 0:
             return
 
-        i = para.xpath('./ns:I', namespaces=self.ns_map)
+        i = para.xpath("./ns:I", namespaces=self.ns_map)
         if len(i) == 1:
             i_text = self.get_single_line_text_from_element(i[0])
             if text == i_text:
-                kwargs['css_class'] = 'italic'
+                kwargs["css_class"] = "italic"
 
-        fs = '<p>{0}</p>'.format(PhraseTokenize(self.date, text).GetPara())
+        fs = "<p>{0}</p>".format(PhraseTokenize(self.date, text).GetPara())
         tag = etree.fromstring(fs)
 
         if self.use_pids:
-            tag.set('pid', self.get_pid())
-        if 'css_class' in kwargs:
-            tag.set('class', kwargs['css_class'])
-        if 'pwmotiontext' in kwargs:
-            tag.set('pwmotiontext', kwargs['pwmotiontext'])
+            tag.set("pid", self.get_pid())
+        if "css_class" in kwargs:
+            tag.set("class", kwargs["css_class"])
+        if "pwmotiontext" in kwargs:
+            tag.set("pwmotiontext", kwargs["pwmotiontext"])
 
         self.current_speech.append(tag)
 
@@ -741,7 +731,7 @@ def parse_para(self, para):
         member = None
         for tag in para:
             tag_name = self.get_tag_name_no_ns(tag)
-            if tag_name == 'B' or tag_name == 'Member':
+            if tag_name == "B" or tag_name == "Member":
                 m = self.parse_member(tag)
                 if m:
                     member = m
@@ -749,45 +739,48 @@ def parse_para(self, para):
         self.parse_para_with_member(para, member)
 
     def parse_brev(self, brev):
-        self.parse_para_with_member(brev, None, css_class="indent", pwmotiontext='yes')
+        self.parse_para_with_member(brev, None, css_class="indent", pwmotiontext="yes")
 
     def parse_votelist(self, votes, direction, vote_list, is_teller=False):
         for vote in votes:
-            tag = etree.Element('mpname')
+            tag = etree.Element("mpname")
             member = self.parse_member(vote)
-            tag.set('person_id', member['person_id'])
-            tag.set('vote', direction)
+            tag.set("person_id", member["person_id"])
+            tag.set("vote", direction)
             if is_teller:
-                tag.set('teller', 'yes')
-            if self.debate_type == 'standing':
-                tag.set('membername', member['name'])
-            tag.text = member['name']
+                tag.set("teller", "yes")
+            if self.debate_type == "standing":
+                tag.set("membername", member["name"])
+            tag.text = member["name"]
 
             proxy = None
             vote_text = self.get_single_line_text_from_element(vote)
-            m = re.search('\(Proxy vote cast by (.*)\)', vote_text)
+            m = re.search("\(Proxy vote cast by (.*)\)", vote_text)
             if m:
                 proxy = self.resolver.pbc_match(m.group(1), self.date)
             if proxy:
-                tag.set('proxy', proxy['id'])
+                tag.set("proxy", proxy["id"])
 
             vote_list.append(tag)
 
         return vote_list
 
     def parse_table(self, wrapper):
-        rows = wrapper.xpath('.//ns:row', namespaces=self.ns_map)
-        tag = etree.Element('table')
-        body = etree.Element('tbody')
+        rows = wrapper.xpath(".//ns:row", namespaces=self.ns_map)
+        tag = etree.Element("table")
+        body = etree.Element("tbody")
         url = None
         for row in rows:
-            row_tag = etree.Element('tr')
-            row_tag.set('pid', self.get_pid())
+            row_tag = etree.Element("tr")
+            row_tag.set("pid", self.get_pid())
 
-            for entry in row.xpath('(.//ns:hs_TableHeading|.//ns:hs_brev|.//ns:hs_Para|.//ns:hs_para)', namespaces=self.ns_map):
+            for entry in row.xpath(
+                "(.//ns:hs_TableHeading|.//ns:hs_brev|.//ns:hs_Para|.//ns:hs_para)",
+                namespaces=self.ns_map,
+            ):
                 if url is None:
-                    url = entry.get('url')
-                td_tag = etree.Element('td')
+                    url = entry.get("url")
+                td_tag = etree.Element("td")
                 td_tag.text = self.get_single_line_text_from_element(entry)
                 row_tag.append(td_tag)
 
@@ -802,19 +795,21 @@ def parse_table(self, wrapper):
         self.current_speech.append(tag)
 
     def get_division_tag(self, division, yes_text, no_text):
-        tag = etree.Element('division')
-
-        tag.set('id', self.get_speech_id())
-        tag.set('nospeaker', 'true')
-        tag.set('divdate', self.date)
-        div_number = division.xpath('.//ns:' + self.division_number_element, namespaces=self.ns_map)[0]
+        tag = etree.Element("division")
+
+        tag.set("id", self.get_speech_id())
+        tag.set("nospeaker", "true")
+        tag.set("divdate", self.date)
+        div_number = division.xpath(
+            ".//ns:" + self.division_number_element, namespaces=self.ns_map
+        )[0]
         div_number = self.get_single_line_text_from_element(div_number)
 
-        tag.set('divnumber', div_number)
-        tag.set('colnum', self.current_col)
-        tag.set('time', self.current_time)
+        tag.set("divnumber", div_number)
+        tag.set("colnum", self.current_col)
+        tag.set("time", self.current_time)
 
-        div_count = etree.Element('divisioncount')
+        div_count = etree.Element("divisioncount")
         div_count.set(self.division_ayes_attribute, yes_text)
         div_count.set(self.division_noes_attribute, no_text)
 
@@ -827,102 +822,111 @@ def parse_division(self, division):
             if type(tag) is etree._ProcessingInstruction:
                 continue
             tag_name = self.get_tag_name_no_ns(tag)
-            if tag_name not in ('hs_Para', 'England', 'EnglandWales', 'hs_DivListHeader', 'TwoColumn'):
+            if tag_name not in (
+                "hs_Para",
+                "England",
+                "EnglandWales",
+                "hs_DivListHeader",
+                "TwoColumn",
+            ):
                 if not self.handle_tag(tag_name, tag):
-                    raise ContextException('unhandled tag: {0}'.format(tag_name), fragment=tag, stamp=tag.get('url'))
+                    raise ContextException(
+                        "unhandled tag: {0}".format(tag_name),
+                        fragment=tag,
+                        stamp=tag.get("url"),
+                    )
 
-        ayes_count = \
-            division.xpath('./ns:hs_Para/ns:AyesNumber/text()', namespaces=self.ns_map)
-        noes_count = \
-            division.xpath('./ns:hs_Para/ns:NoesNumber/text()', namespaces=self.ns_map)
+        ayes_count = division.xpath(
+            "./ns:hs_Para/ns:AyesNumber/text()", namespaces=self.ns_map
+        )
+        noes_count = division.xpath(
+            "./ns:hs_Para/ns:NoesNumber/text()", namespaces=self.ns_map
+        )
 
-        ayes_count_text = ''.join(ayes_count)
-        noes_count_text = ''.join(noes_count)
+        ayes_count_text = "".join(ayes_count)
+        noes_count_text = "".join(noes_count)
 
         self.clear_current_speech()
 
         tag = self.get_division_tag(division, ayes_count_text, noes_count_text)
 
-        ayes = division.xpath(
-            './/ns:NamesAyes//ns:Member', namespaces=self.ns_map
-        )
-        noes = division.xpath(
-            './/ns:NamesNoes//ns:Member', namespaces=self.ns_map
-        )
+        ayes = division.xpath(".//ns:NamesAyes//ns:Member", namespaces=self.ns_map)
+        noes = division.xpath(".//ns:NamesNoes//ns:Member", namespaces=self.ns_map)
 
         aye_tellers = division.xpath(
-            './/ns:TellerNamesAyes//ns:Member', namespaces=self.ns_map
+            ".//ns:TellerNamesAyes//ns:Member", namespaces=self.ns_map
         )
         noe_tellers = division.xpath(
-            './/ns:TellerNamesNoes//ns:Member', namespaces=self.ns_map
+            ".//ns:TellerNamesNoes//ns:Member", namespaces=self.ns_map
         )
 
-        aye_list = etree.Element('mplist')
-        aye_list.set('vote', 'aye')
-        aye_list = self.parse_votelist(ayes, 'aye', aye_list)
-        aye_list = self.parse_votelist(aye_tellers, 'aye', aye_list, True)
+        aye_list = etree.Element("mplist")
+        aye_list.set("vote", "aye")
+        aye_list = self.parse_votelist(ayes, "aye", aye_list)
+        aye_list = self.parse_votelist(aye_tellers, "aye", aye_list, True)
         tag.append(aye_list)
 
-        noe_list = etree.Element('mplist')
-        noe_list.set('vote', 'no')
-        noe_list = self.parse_votelist(noes, 'no', noe_list)
-        noe_list = self.parse_votelist(noe_tellers, 'no', noe_list, True)
+        noe_list = etree.Element("mplist")
+        noe_list.set("vote", "no")
+        noe_list = self.parse_votelist(noes, "no", noe_list)
+        noe_list = self.parse_votelist(noe_tellers, "no", noe_list, True)
         tag.append(noe_list)
 
         self.root.append(tag)
 
-        # England/EnglandWales not used since May 2018
-        paras = division.xpath('(./ns:hs_Para|./ns:England/ns:hs_Para|./ns:EnglandWales/ns:hs_Para)', namespaces=self.ns_map)
+        # England/EnglandWales not used since May 2018
+        paras = division.xpath(
+            "(./ns:hs_Para|./ns:England/ns:hs_Para|./ns:EnglandWales/ns:hs_Para)",
+            namespaces=self.ns_map,
+        )
         for para in paras:
             text = self.get_single_line_text_from_element(para)
-            if re.search(r'Division\s*No', text):
+            if re.search(r"Division\s*No", text):
                 continue
             self.parse_para(para)
 
     def parse_time(self, tag):
-        time_txt = ''.join(tag.xpath('.//text()'))
-        if time_txt == '':
+        time_txt = "".join(tag.xpath(".//text()"))
+        if time_txt == "":
             return
-        matches = re.match('(\d+)(?:[:.,]\s*(\d+))?[\xa0\s]*(am|pm)', time_txt)
+        matches = re.match("(\d+)(?:[:.,]\s*(\d+))?[\xa0\s]*(am|pm)", time_txt)
         if matches:
             hours = int(matches.group(1))
             minutes = int(matches.group(2) or 0)
-            if matches.group(3) == 'pm' and hours < 12:
+            if matches.group(3) == "pm" and hours < 12:
                 hours += 12
             time = datetime.time(hours, minutes)
             self.current_time = time.isoformat()
-        elif time_txt in ('Noon', 'noon') or re.match('12\s*?noon', time_txt):
+        elif time_txt in ("Noon", "noon") or re.match("12\s*?noon", time_txt):
             self.current_time = "12:00:00"
-        elif re.match('12\s*?midnight', time_txt):
+        elif re.match("12\s*?midnight", time_txt):
             self.current_time = "00:00:00"
-        elif re.match('Midnight', time_txt):
+        elif re.match("Midnight", time_txt):
             self.current_time = "00:00:00"
         else:
             raise ContextException(
-                "Unmatched time %s" % time_txt,
-                fragment=tag,
-                stamp=tag.get('url')
+                "Unmatched time %s" % time_txt, fragment=tag, stamp=tag.get("url")
             )
 
     def parse_procedure(self, procedure):
-        tag = etree.Element('p')
+        tag = etree.Element("p")
         text = self.get_single_line_text_from_element(procedure)
         if len(text) == 0:
             return
 
         # We ignore prayers
-        if re.match('Prayers.*?read by', text):
+        if re.match("Prayers.*?read by", text):
             return
 
         if not self.output_heading:
             self.output_normally_ignored()
 
-        tag.set('pid', self.get_pid())
-        tag.set('class', 'italic')
+        tag.set("pid", self.get_pid())
+        tag.set("class", "italic")
         tag.text = text
 
         if self.current_speech is None:
-            self.new_speech(None, procedure.get('url'))
+            self.new_speech(None, procedure.get("url"))
 
         self.current_speech.append(tag)
 
@@ -930,7 +934,7 @@ def parse_pi(self, pi):
         # you would think there is a better way to do this but I can't seem
         # to extract attributes from processing instructions :(
         text = str(pi)
-        matches = re.search(r'column=(\d+)\?', text)
+        matches = re.search(r"column=(\d+)\?", text)
         if matches is not None:
             col = matches.group(1)
             self.current_col = col
@@ -941,20 +945,20 @@ def handle_tag(self, tag_name, tag):
 
         if self.skip_tag is not None and tag_name == self.skip_tag:
             self.skip_tag = None
-        elif tag_name == 'hs_6fDate':
+        elif tag_name == "hs_6fDate":
             self.parse_date(tag)
         elif tag_name in self.oral_headings:
             self.parse_oral_heading(tag)
-        elif tag_name == 'hs_3cOppositionDay':
+        elif tag_name == "hs_3cOppositionDay":
             self.parse_opposition(tag)
-        elif tag_name == 'hs_2DebatedMotion':
+        elif tag_name == "hs_2DebatedMotion":
             self.parse_debated_motion(tag)
-        elif tag_name == 'DebateHeading':
+        elif tag_name == "DebateHeading":
             handled = self.parse_debateheading(tag)
-        elif tag_name == 'hs_2DebBill':
-            if self.debate_type == 'westminhall':
+        elif tag_name == "hs_2DebBill":
+            if self.debate_type == "westminhall":
                 self.parse_WHDebate(tag)
-            elif self.debate_type == 'debate':
+            elif self.debate_type == "debate":
                 self.parse_major(tag)
         elif tag_name in self.major_headings:
             self.parse_major(tag)
@@ -966,21 +970,21 @@ def handle_tag(self, tag_name, tag):
             self.parse_generic(tag)
         elif tag_name in self.whall_headings:
             self.parse_WHDebate(tag)
-        elif tag_name == 'Question':
+        elif tag_name == "Question":
             self.parse_question(tag)
-        elif tag_name == 'hs_8Petition':
+        elif tag_name == "hs_8Petition":
             self.parse_petition(tag)
         elif tag_name in self.indents:
             self.parse_indent(tag)
         elif tag_name in self.paras:
             self.parse_para(tag)
-        elif tag_name == 'hs_brev' or tag_name == 'hs_brevIndent':
+        elif tag_name == "hs_brev" or tag_name == "hs_brevIndent":
             self.parse_brev(tag)
-        elif tag_name == 'TableWrapper':
+        elif tag_name == "TableWrapper":
             self.parse_table(tag)
-        elif tag_name == 'Division':
+        elif tag_name == "Division":
             self.parse_division(tag)
-        elif tag_name == 'hs_Timeline':
+        elif tag_name == "hs_Timeline":
             self.parse_time(tag)
         elif tag_name in self.ignored_tags:
             pass
@@ -996,21 +1000,20 @@ def parse_day(self, xml_file):
         ok = self.setup_parser(xml_file)
         if not ok:
             return False
-        self.root.set('scraperversion', self.rev)
-        self.root.set('latest', 'yes')
-        self.current_col = self.input_root[0].get('ColStart')
+        self.root.set("scraperversion", self.rev)
+        self.root.set("latest", "yes")
+        self.current_col = self.input_root[0].get("ColStart")
 
         headers = self.input_root[0].xpath(
-            './/ns:Fragment/ns:Header', namespaces=self.ns_map
+            ".//ns:Fragment/ns:Header", namespaces=self.ns_map
         )
         self.parse_system_header(headers[0])
 
         body_tags = self.input_root[0].xpath(
-            './/ns:Fragment/ns:Body', namespaces=self.ns_map
+            ".//ns:Fragment/ns:Body", namespaces=self.ns_map
         )
         for b in body_tags:
             for tag in b:
-
                 # column numbers are contained in processing
                 # instructions so first check if the tag is
                 # one of those because then we don't need to
@@ -1025,13 +1028,13 @@ def parse_day(self, xml_file):
 
                 tag_name = self.get_tag_name_no_ns(tag)
                 if self.verbose >= 2:
-                    start_tag = re.sub('>.*', '>', etree.tounicode(tag))
-                    print('Parsing %s' % start_tag)
+                    start_tag = re.sub(">.*", ">", etree.tounicode(tag))
+                    print("Parsing %s" % start_tag)
                 if not self.handle_tag(tag_name, tag):
                     raise ContextException(
-                        'unhandled tag: {0}'.format(tag_name),
+                        "unhandled tag: {0}".format(tag_name),
                         fragment=etree.tostring(tag),
-                        stamp=tag.get('url')
+                        stamp=tag.get("url"),
                     )
 
                 # PI handling - check inside all tags for processing
@@ -1049,7 +1052,7 @@ def get_date(self, xml_file):
             return False
 
         headers = self.input_root[0].xpath(
-            './/ns:Fragment/ns:Header', namespaces=self.ns_map
+            ".//ns:Fragment/ns:Header", namespaces=self.ns_map
         )
         self.parse_system_header(headers[0])
         return self.date
@@ -1061,20 +1064,19 @@ def setup_parser(self, xml_file):
         if self.input_root is not None:
             return True
 
-        self.root = etree.Element('publicwhip')
+        self.root = etree.Element("publicwhip")
         self.ns = self.type_to_xpath[self.debate_type][1]
-        self.ns_map = {'ns': self.ns}
+        self.ns_map = {"ns": self.ns}
         root_xpath = self.type_to_xpath[self.debate_type][0]
 
         self.xml_root = self.get_parser(xml_file).getroot()
-        self.input_root = self.xml_root.xpath(
-            root_xpath, namespaces=self.ns_map
-        )
+        self.input_root = self.xml_root.xpath(root_xpath, namespaces=self.ns_map)
         if len(self.input_root) == 0:
             if self.verbose >= 1:
                 sys.stderr.write(
-                    'Failed to find any debates of type {0} in {1}\n'
-                    .format(self.debate_type, xml_file.name)
+                    "Failed to find any debates of type {0} in {1}\n".format(
+                        self.debate_type, xml_file.name
+                    )
                 )
             return False
         return True
@@ -1090,10 +1092,10 @@ class PBCParseDayXML(BaseParseDayXML):
     use_pids = False
 
     ignored_tags = [
-        'hs_CLHeading',
-        'hs_CLAttended',
-        'hs_6fCntrItalHdg',
-        'hs_TimeCode',
+        "hs_CLHeading",
+        "hs_CLAttended",
+        "hs_6fCntrItalHdg",
+        "hs_TimeCode",
     ]
 
     def reset(self):
@@ -1109,11 +1111,11 @@ def get_speech_id_first_part(self):
     def get_member_with_no_id(self, member_tag):
         name = member_tag.text
         if not name:
-            bs = member_tag.xpath('./ns:B', namespaces=self.ns_map)
+            bs = member_tag.xpath("./ns:B", namespaces=self.ns_map)
             if bs:
                 name = bs[0].text
 
-        name = name.rstrip(':')
+        name = name.rstrip(":")
         member = self.resolver.pbc_match(name, self.date)
         return member
 
@@ -1124,70 +1126,70 @@ def get_member_with_no_id(self, member_tag):
     # we want the immediately preceding one which will be the last one
     # in the array
     def get_attending_status(self, member_tag):
-        text = member_tag.xpath('./preceding-sibling::text()')
-        if len(text) > 0 and re.search('\u2020', text[-1]):
-            return 'true'
+        text = member_tag.xpath("./preceding-sibling::text()")
+        if len(text) > 0 and re.search("\u2020", text[-1]):
+            return "true"
 
-        return 'false'
+        return "false"
 
     def parse_chairmen(self, chair):
-        member_tags = chair.xpath('.//ns:Member', namespaces=self.ns_map)
+        member_tags = chair.xpath(".//ns:Member", namespaces=self.ns_map)
         for member_tag in member_tags:
             member = self.parse_member(member_tag)
             if member is None:
                 member = self.get_member_with_no_id(member_tag)
 
             if member is not None:
-                member['attending'] = self.get_attending_status(member_tag)
+                member["attending"] = self.get_attending_status(member_tag)
                 self.chairs.append(member)
             else:
                 raise ContextException(
-                    'No match for PBC chairman {0}'.format(member_tag.text),
-                    stamp=member_tag.get('url'),
-                    fragment=member_tag.text
+                    "No match for PBC chairman {0}".format(member_tag.text),
+                    stamp=member_tag.get("url"),
+                    fragment=member_tag.text,
                 )
 
     def parse_clmember(self, clmember):
-        member_tag = clmember.xpath('.//ns:Member', namespaces=self.ns_map)[0]
+        member_tag = clmember.xpath(".//ns:Member", namespaces=self.ns_map)[0]
         member = self.parse_member(member_tag)
         if member is None:
             member = self.get_member_with_no_id(member_tag)
 
-        cons_tags = member_tag.xpath('.//ns:I', namespaces=self.ns_map)
-        cons = ''
+        cons_tags = member_tag.xpath(".//ns:I", namespaces=self.ns_map)
+        cons = ""
         if len(cons_tags) == 1:
             cons_tag = cons_tags[0]
             cons = cons_tag.text
-            cons = re.sub(r'[()]', '', cons)
+            cons = re.sub(r"[()]", "", cons)
 
         if member is not None:
-            member['attending'] = self.get_attending_status(member_tag)
-            member['pbc_cons'] = cons
+            member["attending"] = self.get_attending_status(member_tag)
+            member["pbc_cons"] = cons
             self.members.append(member)
         else:
             raise ContextException(
-                'No match for PBC member {0}'.format(member_tag.text),
-                stamp=member_tag.get('url'),
-                fragment=member_tag.text
+                "No match for PBC member {0}".format(member_tag.text),
+                stamp=member_tag.get("url"),
+                fragment=member_tag.text,
             )
 
     def parse_clerks(self, clerks):
         text = clerks.text
-        self.clerks = text.split(',')
+        self.clerks = text.split(",")
 
     def parse_witness(self, witness):
         self.witnesses.append(witness.text)
 
     def committee_finished(self):
-        committee = etree.Element('committee')
+        committee = etree.Element("committee")
 
-        chairmen = etree.Element('chairmen')
+        chairmen = etree.Element("chairmen")
         for c in self.chairs:
-            mp = etree.Element('mpname')
-            mp.set('person_id', c['person_id'])
-            mp.set('membername', c['name'])
-            mp.set('attending', c['attending'])
-            mp.text = c['name']
+            mp = etree.Element("mpname")
+            mp.set("person_id", c["person_id"])
+            mp.set("membername", c["name"])
+            mp.set("attending", c["attending"])
+            mp.text = c["name"]
             chairmen.append(mp)
 
         committee.append(chairmen)
@@ -1195,39 +1197,41 @@ def committee_finished(self):
         def current_membership(pid):
             members = self.resolver.persontomembermap[pid]
             members = [self.resolver.members[mid] for mid in members]
-            members = [m for m in members if m['start_date'] <= self.date <= m['end_date']]
+            members = [
+                m for m in members if m["start_date"] <= self.date <= m["end_date"]
+            ]
             assert len(members) == 1
             return members[0]
 
         for m in self.members:
-            mp = etree.Element('mpname')
-            mp.set('person_id', m['person_id'])
-            mp.set('membername', m['name'])
-            mp.set('attending', m['attending'])
-            mp.text = m['name']
-            cons = etree.Element('i')
+            mp = etree.Element("mpname")
+            mp.set("person_id", m["person_id"])
+            mp.set("membername", m["name"])
+            mp.set("attending", m["attending"])
+            mp.text = m["name"]
+            cons = etree.Element("i")
 
             # if it's a different cons then it's probably a position
             # so use that instead and skip the party
-            curr_member = current_membership(m['person_id'])
-            if curr_member['constituency'] != m['pbc_cons']:
-                cons.text = '({0})'.format(m['pbc_cons'])
+            curr_member = current_membership(m["person_id"])
+            if curr_member["constituency"] != m["pbc_cons"]:
+                cons.text = "({0})".format(m["pbc_cons"])
             else:
-                cons.text = '({0})'.format(curr_member['constituency'])
-                cons.tail = '({0})'.format(curr_member['party'])
+                cons.text = "({0})".format(curr_member["constituency"])
+                cons.tail = "({0})".format(curr_member["party"])
             mp.append(cons)
             committee.append(mp)
 
         for c in self.clerks:
-            clerk = etree.Element('clerk')
+            clerk = etree.Element("clerk")
             clerk.text = c
             committee.append(clerk)
 
         self.root.append(committee)
 
-        witnesses = etree.Element('witnesses')
+        witnesses = etree.Element("witnesses")
         for w in self.witnesses:
-            witness = etree.Element('witness')
+            witness = etree.Element("witness")
             witness.text = w
             witnesses.append(witness)
 
@@ -1236,15 +1240,15 @@ def current_membership(pid):
     def parse_bill_title(self, title_tag):
         title = self.get_single_line_text_from_element(title_tag)
 
-        bill = etree.Element('bill')
-        bill.set('title', title)
-        bill.set('session', self.session)
+        bill = etree.Element("bill")
+        bill.set("title", title)
+        bill.set("session", self.session)
         bill.text = title
 
         self.root.insert(0, bill)
 
     def handle_minus_member(self, member):
-        if member.get('InTheChair') == 'True':
+        if member.get("InTheChair") == "True":
             return self.current_chair
 
         return self.get_member_with_no_id(member)
@@ -1252,57 +1256,52 @@ def handle_minus_member(self, member):
     def parse_chair(self, chair):
         text = self.get_text_from_element(chair)
 
-        if text in ('\n(Morning)\n', '\n(Afternoon)\n'):
+        if text in ("\n(Morning)\n", "\n(Afternoon)\n"):
             # Actually a date, not a chair, they gave the wrong tag
             return self.parse_date(chair)
 
-        self.new_speech(None, chair.get('url'))
-        tag = etree.Element('p')
+        self.new_speech(None, chair.get("url"))
+        tag = etree.Element("p")
         tag.text = text
         self.current_speech.append(tag)
 
-        chair_match = re.match(
-            r'\s*\[\s*(.*)\s+in\s+the\s+chair\s*\](?i)',
-            text
-        )
+        chair_match = re.match(r"\s*\[\s*(.*)\s+in\s+the\s+chair\s*\](?i)", text)
         if chair_match is not None:
             name = chair_match.groups(1)[0]
             chair = self.resolver.pbc_match(name, self.date)
             if chair is not None:
                 self.current_chair = chair
         else:
-            raise ContextException('No match for chair {0}'.format(text))
+            raise ContextException("No match for chair {0}".format(text))
 
     def get_division_tag(self, division, yes_text, no_text):
-        tag = etree.Element('divisioncount')
+        tag = etree.Element("divisioncount")
 
-        div_number = \
-            division.xpath('.//ns:Number/text()', namespaces=self.ns_map)
+        div_number = division.xpath(".//ns:Number/text()", namespaces=self.ns_map)
 
-        tag.set('id', self.get_speech_id())
-        tag.set('divnumber', ''.join(div_number))
-        tag.set('ayes', yes_text)
-        tag.set('noes', no_text)
-        tag.set('url', '')
+        tag.set("id", self.get_speech_id())
+        tag.set("divnumber", "".join(div_number))
+        tag.set("ayes", yes_text)
+        tag.set("noes", no_text)
+        tag.set("url", "")
 
         return tag
 
-
     def parse_amendment(self, amendment, level):
-        tag = etree.Element('p')
-        tag.set('amendmenttext', 'true')
-        tag.set('amendmentlevel', str(level))
+        tag = etree.Element("p")
+        tag.set("amendmenttext", "true")
+        tag.set("amendmentlevel", str(level))
         tag.text = amendment.text
 
         if self.current_speech is None:
-            self.new_speech(None, amendment.get('url'))
+            self.new_speech(None, amendment.get("url"))
         self.current_speech.append(tag)
 
     def parse_table(self, table):
-        paras = table.xpath('(.//ns:hs_Para|.//ns:hs_brev)', namespaces=self.ns_map)
+        paras = table.xpath("(.//ns:hs_Para|.//ns:hs_brev)", namespaces=self.ns_map)
         for para in paras:
             tag_name = self.get_tag_name_no_ns(para)
-            if tag_name == 'hs_Para':
+            if tag_name == "hs_Para":
                 self.parse_para_with_member(para, None)
             else:
                 self.parse_para_with_member(para, None, css_class="indent")
@@ -1315,20 +1314,20 @@ def parse_para(self, para):
         has_witness = False
         for tag in para.iter():
             tag_name = self.get_tag_name_no_ns(tag)
-            if tag_name == 'Witness':
+            if tag_name == "Witness":
                 has_witness = True
-                name = self.get_single_line_text_from_element(tag).rstrip(':')
-                self.new_speech({'name': name}, para.get('url'))
+                name = self.get_single_line_text_from_element(tag).rstrip(":")
+                self.new_speech({"name": name}, para.get("url"))
             # Infer from italic text that it's a motiony thing and we should
             # start a new para which is a bit fragile
-            elif tag_name == 'I':
+            elif tag_name == "I":
                 has_i = True
 
         if has_i and not has_witness:
-            self.new_speech(None, para.get('url'))
+            self.new_speech(None, para.get("url"))
 
         if has_witness:
-            for w in para.xpath('.//ns:Witness', namespaces=self.ns_map):
+            for w in para.xpath(".//ns:Witness", namespaces=self.ns_map):
                 w.getparent().text = w.tail
                 w.getparent().remove(w)
             self.parse_para_with_member(para, None)
@@ -1338,30 +1337,30 @@ def parse_para(self, para):
     def handle_tag(self, tag_name, tag):
         handled = True
 
-        if tag_name == 'hs_CLMember':
+        if tag_name == "hs_CLMember":
             self.parse_clmember(tag)
-        elif tag_name == 'hs_CLClerks':
+        elif tag_name == "hs_CLClerks":
             self.parse_clerks(tag)
-        elif tag_name == 'hs_CLChairman':
+        elif tag_name == "hs_CLChairman":
             self.parse_chairmen(tag)
-        elif tag_name == 'hs_8GenericHdg':
+        elif tag_name == "hs_8GenericHdg":
             self.parse_minor(tag)
-        elif tag_name == 'hs_AmendmentLevel1':
+        elif tag_name == "hs_AmendmentLevel1":
             self.parse_amendment(tag, 1)
-        elif tag_name == 'hs_AmendmentLevel2':
+        elif tag_name == "hs_AmendmentLevel2":
             self.parse_amendment(tag, 2)
-        elif tag_name == 'TableWrapper':
+        elif tag_name == "TableWrapper":
             self.parse_table(tag)
-        elif tag_name == 'hs_CLPara':
+        elif tag_name == "hs_CLPara":
             self.parse_witness(tag)
-        elif tag_name == 'hs_brevIndent':
+        elif tag_name == "hs_brevIndent":
             self.parse_brev(tag)
-        elif tag_name in ('hs_2BillTitle', 'hs_2DebBill'):
+        elif tag_name in ("hs_2BillTitle", "hs_2DebBill"):
             self.parse_bill_title(tag)
-        elif tag_name == 'hs_3MainHdg':
+        elif tag_name == "hs_3MainHdg":
             self.committee_finished()
             self.parse_major(tag)
-        elif tag_name == 'hs_ParaIndent':
+        elif tag_name == "hs_ParaIndent":
             self.parse_para_with_member(tag, None, css_class="indent")
         else:
             handled = super(PBCParseDayXML, self).handle_tag(tag_name, tag)
@@ -1374,59 +1373,73 @@ def get_sitting(self, xml_file):
             return False
 
         # This isn't nice.
-        fragment = self.input_root[0].xpath('.//ns:Fragment', namespaces=self.ns_map)[0]
-        self.session, debate_num = re.search('Commons/(\d{4}_\d{4})/Committee_\d+/Debate_(\d+)/Sitting_\d+', fragment.get('__uri__')).groups()
-        header = fragment.xpath('./ns:Header', namespaces=self.ns_map)[0]
+        fragment = self.input_root[0].xpath(".//ns:Fragment", namespaces=self.ns_map)[0]
+        self.session, debate_num = re.search(
+            "Commons/(\d{4}_\d{4})/Committee_\d+/Debate_(\d+)/Sitting_\d+",
+            fragment.get("__uri__"),
+        ).groups()
+        header = fragment.xpath("./ns:Header", namespaces=self.ns_map)[0]
         try:
             # The sitting number is only given in a random attribute
-            data_id = header.xpath('./ns:SystemDataId', namespaces=self.ns_map)[0]
+            data_id = header.xpath("./ns:SystemDataId", namespaces=self.ns_map)[0]
             data_id = self.get_single_line_text_from_element(data_id)
-            sitting_num = int(re.match('P(?:BC|MB)\s*\d+-(\d+)', data_id).group(1))
+            sitting_num = int(re.match("P(?:BC|MB)\s*\d+-(\d+)", data_id).group(1))
         except:
             # Try and find one in the filename then.
-            sitting_num = int(re.search('_(\d+)(?:st|nd|rd|th)_', xml_file.name).group(1))
+            sitting_num = int(
+                re.search("_(\d+)(?:st|nd|rd|th)_", xml_file.name).group(1)
+            )
 
         try:
-            title = header.xpath('./ns:Title', namespaces=self.ns_map)[0]
+            title = header.xpath("./ns:Title", namespaces=self.ns_map)[0]
             title = self.get_single_line_text_from_element(title)
         except:
-            fragment = self.xml_root.xpath('.//ns:Fragment', namespaces=self.ns_map)[0]
-            title = fragment.xpath('.//ns:Cover', namespaces=self.ns_map)[0].get('debate')
+            fragment = self.xml_root.xpath(".//ns:Fragment", namespaces=self.ns_map)[0]
+            title = fragment.xpath(".//ns:Cover", namespaces=self.ns_map)[0].get(
+                "debate"
+            )
 
-        title = title.partition(' ')[0].upper()
+        title = title.partition(" ")[0].upper()
 
-        self.session = re.sub('(\d{4})_\d\d(\d\d)', r'\1-\2', self.session)
+        self.session = re.sub("(\d{4})_\d\d(\d\d)", r"\1-\2", self.session)
 
         # The 0 here is a part number. I do not know what the XML outputs for multiple parts
-        self.sitting_id = "standing%s_%s_%02d-%d_%s" % (debate_num, title, sitting_num, 0, self.date)
+        self.sitting_id = "standing%s_%s_%02d-%d_%s" % (
+            debate_num,
+            title,
+            sitting_num,
+            0,
+            self.date,
+        )
 
 
 class LordsParseDayXML(BaseParseDayXML):
     resolver = LordsList()
 
     paras = [
-        'hs_para',
-        'hs_parafo',
-        'hs_Question',
-        'hs_newline10',
-        'hs_newline12',
-        'hs_HeadingTwo',
+        "hs_para",
+        "hs_parafo",
+        "hs_Question",
+        "hs_newline10",
+        "hs_newline12",
+        "hs_HeadingTwo",
     ]
 
     ignored_tags = [
-        'hs_date',
-        'hs_Venue',
+        "hs_date",
+        "hs_Venue",
     ]
 
-    division_number_element = 'DivisionNumber'
-    division_ayes_attribute = 'content'
-    division_noes_attribute = 'not-content'
+    division_number_element = "DivisionNumber"
+    division_ayes_attribute = "content"
+    division_noes_attribute = "not-content"
 
     """
     Lords XML is scattered with processing instructions which upset
     tag.text meaning it returns None which in turn breaks a lot of 
     our processing so just strip them all out.
     """
+
     def get_parser(self, xml_file):
         parser = etree.parse(xml_file)
         pis = parser.xpath('//processing-instruction("xpp")')
@@ -1437,23 +1450,23 @@ def get_parser(self, xml_file):
         return parser
 
     def parse_quote(self, quote):
-        tag = etree.Element('p')
-        tag.set('pid', self.get_pid())
-        tag.set('class', 'indent')
+        tag = etree.Element("p")
+        tag.set("pid", self.get_pid())
+        tag.set("class", "indent")
 
-        tag.text = re.sub('\n', ' ', quote.text)
+        tag.text = re.sub("\n", " ", quote.text)
 
-        i = quote.xpath('./ns:I', namespaces=self.ns_map)
+        i = quote.xpath("./ns:I", namespaces=self.ns_map)
         if len(i) == 1:
             i_text = self.get_single_line_text_from_element(i[0])
-            new_i = etree.Element('i')
+            new_i = etree.Element("i")
             new_i.text = i_text
-            new_i.tail = re.sub('\n', ' ', i[0].tail or '')
-            if re.match(r'Official Report,?$', i_text):
-                phrase = etree.Element('phrase')
-                phrase.set('class', 'offrep')
+            new_i.tail = re.sub("\n", " ", i[0].tail or "")
+            if re.match(r"Official Report,?$", i_text):
+                phrase = etree.Element("phrase")
+                phrase.set("class", "offrep")
                 # FIXME: generate a proper id here
-                phrase.set('id', new_i.tail)
+                phrase.set("id", new_i.tail)
                 phrase.append(new_i)
                 tag.append(phrase)
             else:
@@ -1463,15 +1476,12 @@ def parse_quote(self, quote):
 
     def parse_member(self, member):
         # special hand edited XML case :/
-        name = member.get('ContinuationText')
-        if name == 'The Queen':
-            return {
-                'person_id': 'uk.org.publicwhip/person/13935',
-                'name': 'The Queen'
-            }
+        name = member.get("ContinuationText")
+        if name == "The Queen":
+            return {"person_id": "uk.org.publicwhip/person/13935", "name": "The Queen"}
 
         tag_name = self.get_tag_name_no_ns(member)
-        if tag_name == 'B' and self.get_single_line_text_from_element(member) == '':
+        if tag_name == "B" and self.get_single_line_text_from_element(member) == "":
             return None
 
         found_member = super(LordsParseDayXML, self).parse_member(member)
@@ -1481,25 +1491,27 @@ def parse_member(self, member):
             member_tag = self._parse_member_or_b(member)
             if member_tag is None:
                 raise ContextException(
-                    'Could not find member',
-                     stamp=member.get('url'),
-                     fragment=etree.tostring(member),
+                    "Could not find member",
+                    stamp=member.get("url"),
+                    fragment=etree.tostring(member),
                 )
-            if member_tag.get('MnisId') == '-1':
+            if member_tag.get("MnisId") == "-1":
                 found_member = {
-                    'person_id': 'unknown',
-                    'name': self.get_single_line_text_from_element(member).rstrip(':')
+                    "person_id": "unknown",
+                    "name": self.get_single_line_text_from_element(member).rstrip(":"),
                 }
 
         return found_member
 
     def parse_newdebate(self, tag):
-        time = tag.xpath('.//ns:hs_time', namespaces=self.ns_map)
+        time = tag.xpath(".//ns:hs_time", namespaces=self.ns_map)
         if len(time):
             self.parse_time(time[0])
 
-        heading = tag.xpath('.//ns:hs_DebateHeading|.//hs_AmendmentHeading', namespaces=self.ns_map)
-        debate_type = tag.xpath('.//ns:hs_DebateType', namespaces=self.ns_map)
+        heading = tag.xpath(
+            ".//ns:hs_DebateHeading|.//hs_AmendmentHeading", namespaces=self.ns_map
+        )
+        debate_type = tag.xpath(".//ns:hs_DebateType", namespaces=self.ns_map)
         if len(heading):
             if len(debate_type):
                 text = self.get_single_line_text_from_element(debate_type[0])
@@ -1508,28 +1520,28 @@ def parse_newdebate(self, tag):
                 self.parse_major(heading[0])
         else:
             raise ContextException(
-                'New Lords debate with no heading',
-                 stamp=tag.get('url'),
-                 fragment=tag
-             )
+                "New Lords debate with no heading", stamp=tag.get("url"), fragment=tag
+            )
             return
 
-        #procedure = tag.xpath('.//ns:hs_Procedure', namespaces=self.ns_map)
-        #if len(procedure) == 1:
+        # procedure = tag.xpath('.//ns:hs_Procedure', namespaces=self.ns_map)
+        # if len(procedure) == 1:
         #    self.handle_para(procedure[0])
 
-        want_member = tag.get('BusinessType') in ('Question', 'GeneralDebate')
+        want_member = tag.get("BusinessType") in ("Question", "GeneralDebate")
 
         member = None
-        member_tags = tag.xpath('.//ns:Member', namespaces=self.ns_map)
+        member_tags = tag.xpath(".//ns:Member", namespaces=self.ns_map)
         if len(member_tags):
             if want_member:
                 member = self.parse_member(member_tags[0])
             else:
-                tabledby_tags = tag.xpath('.//ns:hs_TabledBy', namespaces=self.ns_map)
-                self.parse_para_with_member(tabledby_tags[0], None, css_class='italic', strip_member=False)
+                tabledby_tags = tag.xpath(".//ns:hs_TabledBy", namespaces=self.ns_map)
+                self.parse_para_with_member(
+                    tabledby_tags[0], None, css_class="italic", strip_member=False
+                )
 
-        questions = tag.xpath('.//ns:hs_Question', namespaces=self.ns_map)
+        questions = tag.xpath(".//ns:hs_Question", namespaces=self.ns_map)
         for question in questions:
             self.parse_para_with_member(question, member if want_member else None)
 
@@ -1541,47 +1553,45 @@ def parse_tabledby(self, tabledby):
             tabledby,
             None,
             strip_member=False,
-            css_class='italic',
-            pwmotiontext='unrecognized'
+            css_class="italic",
+            pwmotiontext="unrecognized",
         )
 
     def parse_amendment(self, amendment):
         self.parse_para_with_member(
-            amendment,
-            None,
-            css_class='italic',
-            pwmotiontext='unrecognized'
+            amendment, None, css_class="italic", pwmotiontext="unrecognized"
         )
 
     def parse_clause_heading(self, heading):
-        tag = etree.Element('p')
+        tag = etree.Element("p")
         text = self.get_single_line_text_from_element(heading)
-        i = etree.Element('i')
+        i = etree.Element("i")
         i.text = text
-        b = etree.Element('b')
+        b = etree.Element("b")
         b.append(i)
 
-        tag.set('pid', self.get_pid())
+        tag.set("pid", self.get_pid())
         tag.append(b)
         if self.current_speech is None:
-            self.new_speech(None, heading.get('url'))
+            self.new_speech(None, heading.get("url"))
         self.current_speech.append(tag)
 
     def parse_division(self, division):
-        ayes_count = \
-            division.xpath('.//ns:ContentsNumber/text()', namespaces=self.ns_map)
-        noes_count = \
-            division.xpath('.//ns:NotContentsNumber/text()', namespaces=self.ns_map)
+        ayes_count = division.xpath(
+            ".//ns:ContentsNumber/text()", namespaces=self.ns_map
+        )
+        noes_count = division.xpath(
+            ".//ns:NotContentsNumber/text()", namespaces=self.ns_map
+        )
 
-        ayes_count_text = ''.join(ayes_count)
-        noes_count_text = ''.join(noes_count)
+        ayes_count_text = "".join(ayes_count)
+        noes_count_text = "".join(noes_count)
 
         # output a summary of the division results
-        div_summary = \
-            "Ayes {0}, Noes {1}.".format(ayes_count_text, noes_count_text)
-        div_summary_tag = etree.Element('p')
-        div_summary_tag.set('pid', self.get_pid())
-        div_summary_tag.set('pwmotiontext', 'yes')
+        div_summary = "Ayes {0}, Noes {1}.".format(ayes_count_text, noes_count_text)
+        div_summary_tag = etree.Element("p")
+        div_summary_tag.set("pid", self.get_pid())
+        div_summary_tag.set("pwmotiontext", "yes")
         div_summary_tag.text = div_summary
         self.current_speech.append(div_summary_tag)
 
@@ -1590,50 +1600,49 @@ def parse_division(self, division):
         tag = self.get_division_tag(division, ayes_count_text, noes_count_text)
 
         ayes = division.xpath(
-            './/ns:NamesContents//ns:hs_DivListNames', namespaces=self.ns_map
+            ".//ns:NamesContents//ns:hs_DivListNames", namespaces=self.ns_map
         )
         noes = division.xpath(
-            './/ns:NamesNotContents//ns:hs_DivListNames', namespaces=self.ns_map
+            ".//ns:NamesNotContents//ns:hs_DivListNames", namespaces=self.ns_map
         )
 
-        aye_list = etree.Element('lordlist')
-        aye_list.set('vote', 'content')
-        aye_list = self.parse_votelist(ayes, 'content', aye_list)
+        aye_list = etree.Element("lordlist")
+        aye_list.set("vote", "content")
+        aye_list = self.parse_votelist(ayes, "content", aye_list)
         tag.append(aye_list)
 
-        no_list = etree.Element('lordlist')
-        no_list.set('vote', 'not-content')
-        no_list = self.parse_votelist(noes, 'not-content', no_list)
+        no_list = etree.Element("lordlist")
+        no_list.set("vote", "not-content")
+        no_list = self.parse_votelist(noes, "not-content", no_list)
         tag.append(no_list)
 
         self.root.append(tag)
 
-        paras = division.xpath('./ns:hs_Procedure', namespaces=self.ns_map)
+        paras = division.xpath("./ns:hs_Procedure", namespaces=self.ns_map)
         for para in paras:
-            text = ''.join(para.xpath('.//text()'))
-            if re.search(r'Contents', text) or \
-                    re.search(r'Division\s*on', text):
+            text = "".join(para.xpath(".//text()"))
+            if re.search(r"Contents", text) or re.search(r"Division\s*on", text):
                 continue
             self.parse_para(para)
 
     def parse_votelist(self, votes, direction, vote_list):
         for vote in votes:
-            tag = etree.Element('lord')
+            tag = etree.Element("lord")
             member_name = self.get_single_line_text_from_element(vote)
             is_teller = False
-            if re.match('.*\[Teller\].*', member_name):
-                member_name = re.sub('\[Teller\]', '', member_name)
+            if re.match(".*\[Teller\].*", member_name):
+                member_name = re.sub("\[Teller\]", "", member_name)
                 member_name = member_name.strip()
                 is_teller = True
 
             # convert smart quote to apostrophe
-            member_name = re.sub('\u2019', "'", member_name)
+            member_name = re.sub("\u2019", "'", member_name)
 
-            member = self.resolver.MatchRevName(member_name, self.date, vote.get('url'))
-            tag.set('person_id', member)
-            tag.set('vote', direction)
+            member = self.resolver.MatchRevName(member_name, self.date, vote.get("url"))
+            tag.set("person_id", member)
+            tag.set("vote", direction)
             if is_teller:
-                tag.set('teller', 'yes')
+                tag.set("teller", "yes")
             tag.text = self.resolver.name_on_date(member, self.date)
             vote_list.append(tag)
 
@@ -1642,25 +1651,25 @@ def parse_votelist(self, votes, direction, vote_list):
     def handle_tag(self, tag_name, tag):
         handled = True
 
-        if tag_name == 'hs_time':
+        if tag_name == "hs_time":
             self.parse_time(tag)
-        elif tag_name == 'hs_quotefo':
+        elif tag_name == "hs_quotefo":
             self.parse_quote(tag)
-        elif tag_name == 'NewDebate':
+        elif tag_name == "NewDebate":
             self.parse_newdebate(tag)
-        elif tag_name == 'hs_Procedure':
+        elif tag_name == "hs_Procedure":
             self.parse_procedure(tag)
-        elif tag_name == 'hs_prayers':
+        elif tag_name == "hs_prayers":
             return True
-        elif tag_name == 'hs_AmendmentHeading':
+        elif tag_name == "hs_AmendmentHeading":
             self.parse_amendment_heading(tag)
-        elif tag_name == 'hs_TabledBy':
+        elif tag_name == "hs_TabledBy":
             self.parse_tabledby(tag)
-        elif tag_name == 'Amendment':
+        elif tag_name == "Amendment":
             self.parse_amendment(tag)
-        elif tag_name == 'hs_ClauseHeading':
+        elif tag_name == "hs_ClauseHeading":
             self.parse_clause_heading(tag)
-        elif tag_name == 'Division':
+        elif tag_name == "Division":
             self.parse_division(tag)
         else:
             handled = super(LordsParseDayXML, self).handle_tag(tag_name, tag)
@@ -1669,25 +1678,20 @@ def handle_tag(self, tag_name, tag):
 
 
 class ParseDay(object):
-    valid_types = [
-        'debate',
-        'westminhall',
-        'lords',
-        'standing'
-    ]
+    valid_types = ["debate", "westminhall", "lords", "standing"]
 
     output_dirs = {
-        'debate': 'debates',
-        'westminhall': 'westminhall',
-        'lords': 'lordspages',
-        'standing': 'standing'
+        "debate": "debates",
+        "westminhall": "westminhall",
+        "lords": "lordspages",
+        "standing": "standing",
     }
 
     output_files = {
-        'debate': 'debates',
-        'westminhall': 'westminster',
-        'lords': 'daylord',
-        'standing': 'standing'
+        "debate": "debates",
+        "westminhall": "westminster",
+        "lords": "daylord",
+        "standing": "standing",
     }
 
     parser = None
@@ -1698,14 +1702,15 @@ def reset(self):
         self.parser = None
 
     def get_output_pbc_filename(self, date, xml_file):
-        shortnamemap = { }
+        shortnamemap = {}
         pwstandingpages = os.path.join(pwxmldirs, "standing")
         for f in os.listdir(pwstandingpages):
             m = re.match("(standing.*?)([a-z]*)\.xml$", f)
             if m:
                 shortnamemap.setdefault(m.group(1), []).append(
-                    (miscfuncs.AlphaStringToOrder(m.group(2)), m.group(2), f))
-            elif f.endswith('~') or f == 'changedates.txt':
+                    (miscfuncs.AlphaStringToOrder(m.group(2)), m.group(2), f)
+                )
+            elif f.endswith("~") or f == "changedates.txt":
                 pass
             elif os.path.isfile(os.path.join(pwstandingpages, f)):
                 print("not recognized file:", f, " in ", pwstandingpages)
@@ -1719,7 +1724,7 @@ def get_output_pbc_filename(self, date, xml_file):
             dgflatestalpha = ldgf[1]
             dgflatest = os.path.join(pwstandingpages, ldgf[2])
         self.rev = miscfuncs.NextAlphaString(dgflatestalpha)
-        dgfnext = os.path.join(pwstandingpages, '%s%s.xml' % (sitting_id, self.rev))
+        dgfnext = os.path.join(pwstandingpages, "%s%s.xml" % (sitting_id, self.rev))
         assert not dgflatest or os.path.isfile(dgflatest)
         assert not os.path.isfile(dgfnext), dgfnext
 
@@ -1730,19 +1735,18 @@ def get_output_filename(self, date, debate_type):
             self.output_dirs.get(debate_type),
             self.output_files.get(debate_type),
             pwxmldirs,
-            'xml'
+            "xml",
         )
 
-        latestFilePath, latestFileStem, nextFilePath, nextFileStem = \
-            GetFileDayVersions(
-                date,
-                daymap,
-                scrapedDataOutputPath,
-                self.output_files.get(debate_type),
-                'xml'
-            )
+        latestFilePath, latestFileStem, nextFilePath, nextFileStem = GetFileDayVersions(
+            date,
+            daymap,
+            scrapedDataOutputPath,
+            self.output_files.get(debate_type),
+            "xml",
+        )
 
-        version_match = re.match('\d+-\d+-\d+([a-z])', nextFileStem)
+        version_match = re.match("\d+-\d+-\d+([a-z])", nextFileStem)
         self.rev = version_match.groups(1)[0]
 
         return latestFilePath, nextFilePath
@@ -1751,6 +1755,7 @@ def get_output_filename(self, date, debate_type):
     This fakes exactly enough of the old flatb structure from the filter
     version of the code for use in the diff/redirect creation code.
     """
+
     def gen_flatb(self, chks):
         flatb = []
         for chk in chks:
@@ -1759,53 +1764,57 @@ def gen_flatb(self, chks):
             if gidmatch:
                 gid = gidmatch.group(1)
                 # http://stackoverflow.com/questions/652276/is-it-possible-to-create-anonymous-objects-in-python#652417
-                entry = type('', (object,), {"GID": gid})()
+                entry = type("", (object,), {"GID": gid})()
                 flatb.append(entry)
 
         return flatb
 
     def normalise_gids(self, string):
-        string = re.sub('(publicwhip\/[a-z]*\/\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
-        string = re.sub('(publicwhip\/standing\/.*?\d{4}-\d{2}-\d{2})[a-z]', r'\1', string)
-        string = re.sub('(pid=")[a-z]([\d.\/]*")', r'\1\2', string)
+        string = re.sub("(publicwhip\/[a-z]*\/\d{4}-\d{2}-\d{2})[a-z]", r"\1", string)
+        string = re.sub(
+            "(publicwhip\/standing\/.*?\d{4}-\d{2}-\d{2})[a-z]", r"\1", string
+        )
+        string = re.sub('(pid=")[a-z]([\d.\/]*")', r"\1\2", string)
 
         return string
 
     def compare_xml_files(self, prevfile, nextfile):
-        hprevfile = io.open(prevfile, encoding='utf-8')
+        hprevfile = io.open(prevfile, encoding="utf-8")
         dprevfile = hprevfile.readlines()
         hprevfile.close()
 
-        hnextfile = io.open(nextfile, encoding='utf-8')
+        hnextfile = io.open(nextfile, encoding="utf-8")
         dnextfile = hnextfile.readlines()
         hnextfile.close()
 
         if len(dprevfile) == len(dnextfile):
-            sprevfile = self.normalise_gids(''.join(dprevfile[1:]))
-            snextfile = self.normalise_gids(''.join(dnextfile[1:]))
+            sprevfile = self.normalise_gids("".join(dprevfile[1:]))
+            snextfile = self.normalise_gids("".join(dnextfile[1:]))
             if sprevfile == snextfile:
                 return "SAME"
         if len(dprevfile) < len(dnextfile):
-            sprevfile = self.normalise_gids(''.join(dprevfile[1:]))
-            snextfile = self.normalise_gids(''.join(dnextfile[1:len(dprevfile)]))
+            sprevfile = self.normalise_gids("".join(dprevfile[1:]))
+            snextfile = self.normalise_gids("".join(dnextfile[1 : len(dprevfile)]))
             if sprevfile == snextfile:
                 return "EXTENSION"
         return "DIFFERENT"
 
     def remove_para_newlines(self, string):
         return re.sub(
-            '(?s)(<p[^>]*>)(.*?)(<\/p>)',
-            lambda m: (''.join((m.group(1), re.sub('\n', ' ', m.group(2)), m.group(3)))),
-            string
+            "(?s)(<p[^>]*>)(.*?)(<\/p>)",
+            lambda m: (
+                "".join((m.group(1), re.sub("\n", " ", m.group(2)), m.group(3)))
+            ),
+            string,
         )
 
     def rewrite_previous_version(self, newfile):
         # open the old and new XML files
-        xin = io.open(self.prev_file, encoding='utf-8')
+        xin = io.open(self.prev_file, encoding="utf-8")
         xprevs = xin.read()
         xin.close()
 
-        xin = io.open(newfile, encoding='utf-8')
+        xin = io.open(newfile, encoding="utf-8")
         xcur = xin.read()
         xin.close()
 
@@ -1813,11 +1822,11 @@ def rewrite_previous_version(self, newfile):
         xcur = self.remove_para_newlines(xcur)
 
         # pull out the scrape versions and the XML as a string
-        mpw = re.search('<publicwhip([^>]*)>([\s\S]*?)</publicwhip>', xprevs)
-        mpc = re.search('<publicwhip([^>]*)>([\s\S]*?)</publicwhip>', xcur)
+        mpw = re.search("<publicwhip([^>]*)>([\s\S]*?)</publicwhip>", xprevs)
+        mpc = re.search("<publicwhip([^>]*)>([\s\S]*?)</publicwhip>", xcur)
 
         if mpc is None or mpw is None:
-            sys.stderr.write('Failed to do diff for {0}\n'.format(self.prev_file))
+            sys.stderr.write("Failed to do diff for {0}\n".format(self.prev_file))
             return
 
         # take the XML string and turn it into the data structures used
@@ -1825,11 +1834,15 @@ def rewrite_previous_version(self, newfile):
         essflatbindx, essflatblist, oldchks = PrepareXMLForDiff(mpc.group(2))
         essxindx, essxlist, chks = PrepareXMLForDiff(mpw.group(2))
         flatb = self.gen_flatb(oldchks)
-        xprevcompress = DoFactorDiff(essflatbindx, essflatblist, essxindx, essxlist, chks, flatb)
+        xprevcompress = DoFactorDiff(
+            essflatbindx, essflatblist, essxindx, essxlist, chks, flatb
+        )
 
         # spit out the rewritten previous version with redirects
-        tempfilenameoldxml = tempfile.mktemp(".xml", "pw-filtertempold-", miscfuncs.tmppath)
-        foout = io.open(tempfilenameoldxml, mode="w", encoding='utf-8')
+        tempfilenameoldxml = tempfile.mktemp(
+            ".xml", "pw-filtertempold-", miscfuncs.tmppath
+        )
+        foout = io.open(tempfilenameoldxml, mode="w", encoding="utf-8")
         if self.parser.is_pre_new_parser:
             WriteXMLHeader(foout)
         foout.write('<publicwhip scrapeversion="%s" latest="no">\n' % self.prev_file)
@@ -1846,17 +1859,17 @@ def output(self, stream):
 
     def handle_file(self, filename, debate_type, verbose):
         if debate_type not in self.valid_types:
-            sys.stderr.write('{0} not a valid type'.format(debate_type))
+            sys.stderr.write("{0} not a valid type".format(debate_type))
             sys.exit()
 
-        xml_file = io.open(filename, encoding='utf-8')
+        xml_file = io.open(filename, encoding="utf-8")
         self.set_parser_for_type(debate_type)
         self.parser.verbose = verbose
         date = self.parser.get_date(xml_file)
         if date is False:
-            return 'not-present'
+            return "not-present"
 
-        if debate_type == 'standing':
+        if debate_type == "standing":
             prev_file, output_file = self.get_output_pbc_filename(date, xml_file)
         else:
             prev_file, output_file = self.get_output_filename(date, debate_type)
@@ -1870,13 +1883,13 @@ def handle_file(self, filename, debate_type, verbose):
         parse_ok = self.parse_day(xml_file, debate_type)
 
         if parse_ok:
-            out = io.open(tempfilename, mode='w', encoding='utf-8')
+            out = io.open(tempfilename, mode="w", encoding="utf-8")
             self.output(out)
             out.close()
         else:
-            sys.stderr.write('Failed to parse {0}\n'.format(filename))
+            sys.stderr.write("Failed to parse {0}\n".format(filename))
             os.remove(tempfilename)
-            return 'failed'
+            return "failed"
 
         # FIME: should be using more temp files here
         # if we have a previous version check if it's different from
@@ -1884,32 +1897,32 @@ def handle_file(self, filename, debate_type, verbose):
         if self.prev_file is not None:
             diffs = self.compare_xml_files(self.prev_file, tempfilename)
             # if they are the same then delete the old one
-            if diffs == 'SAME':
+            if diffs == "SAME":
                 os.remove(tempfilename)
-                return 'same'
+                return "same"
             # otherwise do the diff and redirect dance
             else:
                 self.rewrite_previous_version(tempfilename)
-                return 'change'
+                return "change"
         else:
             os.rename(tempfilename, self.output_file)
-            return 'new'
+            return "new"
 
     def set_parser_for_type(self, debate_type):
         if self.parser is not None:
             return
 
         parser_types = {
-            'lords': LordsParseDayXML,
-            'standing': PBCParseDayXML,
+            "lords": LordsParseDayXML,
+            "standing": PBCParseDayXML,
         }
         self.parser = parser_types.get(debate_type, CommonsParseDayXML)()
         self.parser.debate_type = debate_type
 
     def parse_day(self, text, debate_type):
         self.set_parser_for_type(debate_type)
-        if debate_type == 'standing':
-            if not hasattr(self.parser, 'sitting_id'):
+        if debate_type == "standing":
+            if not hasattr(self.parser, "sitting_id"):
                 self.parser.get_sitting(text)
         parse_ok = self.parser.parse_day(text)
         if parse_ok:
@@ -1917,8 +1930,9 @@ def parse_day(self, text, debate_type):
 
         return False
 
-if __name__ == '__main__':
-    xml_file = codecs.open(sys.argv[1], encoding='utf-8')
+
+if __name__ == "__main__":
+    xml_file = codecs.open(sys.argv[1], encoding="utf-8")
     house = sys.argv[2]
     parse = ParseDay()
     parse_ok = parse.parse_day(xml_file, house)
diff --git a/pyscraper/ni/parse.py b/pyscraper/ni/parse.py
index 617ca7e7..e3532de3 100755
--- a/pyscraper/ni/parse.py
+++ b/pyscraper/ni/parse.py
@@ -1,15 +1,17 @@
 #! /usr/bin/env python3
 
-import re
 import json
 import os
+import re
 import sys
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from ni.resolvenames import memberList
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from contextexception import ContextException
 
-parldata = '../../../parldata/'
+from ni.resolvenames import memberList
+
+parldata = "../../../parldata/"
+
 
 class ParseDayParserBase(object):
     def __init__(self, fp, date, **kwargs):
@@ -19,21 +21,22 @@ def __init__(self, fp, date, **kwargs):
         self.idB = 0
 
     def id(self):
-        return '%s.%s.%s' % (self.date, self.idA, self.idB)
+        return "%s.%s.%s" % (self.date, self.idA, self.idB)
 
     def time_period(self, ptext, optional=False):
-        match = re.search('(\d\d?)(?:[.:]\s*(\d\d?))? ?(am|pm|noon|midnight)', ptext)
+        match = re.search("(\d\d?)(?:[.:]\s*(\d\d?))? ?(am|pm|noon|midnight)", ptext)
         if not match:
             if not optional:
-                raise ContextException('Time not found in TimePeriod %s' % p)
+                raise ContextException("Time not found in TimePeriod %s" % p)
             return None
         hour = int(match.group(1))
-        if hour<12 and match.group(3) == 'pm':
+        if hour < 12 and match.group(3) == "pm":
             hour += 12
-        if hour==12 and match.group(3) in ('midnight', 'am'):
+        if hour == 12 and match.group(3) in ("midnight", "am"):
             hour = 0
-        minutes = match.group(2) or '00'
-        if len(minutes) == 1: minutes = '0' + minutes
+        minutes = match.group(2) or "00"
+        if len(minutes) == 1:
+            minutes = "0" + minutes
         timestamp = "%s:%s" % (hour, minutes)
         return timestamp
 
@@ -41,106 +44,142 @@ def time_period(self, ptext, optional=False):
 class ParseDayJSON(ParseDayParserBase):
     def display_speech(self):
         if self.heading:
-            timestamp = self.heading['ts']
+            timestamp = self.heading["ts"]
             if timestamp:
                 timestamp = ' time="%s"' % timestamp
-            typ = self.heading['type']
-            text = self.heading['text']
-            if typ == 'major':
+            typ = self.heading["type"]
+            text = self.heading["text"]
+            if typ == "major":
                 self.idA += 1
                 self.idB = 0
             else:
                 self.idB += 1
-            self.out.write('<%s-heading id="uk.org.publicwhip/ni/%s"%s>%s</%s-heading>\n' % (typ, self.id(), timestamp, text, typ))
+            self.out.write(
+                '<%s-heading id="uk.org.publicwhip/ni/%s"%s>%s</%s-heading>\n'
+                % (typ, self.id(), timestamp, text, typ)
+            )
             self.heading = {}
         if self.text:
-            if 'id' in self.speaker:
-                speaker_str = self.speaker['id']
-            elif 'name' in self.speaker:
-                speaker_str = 'person_id="unknown" speakername="%s"' % self.speaker['name']
+            if "id" in self.speaker:
+                speaker_str = self.speaker["id"]
+            elif "name" in self.speaker:
+                speaker_str = (
+                    'person_id="unknown" speakername="%s"' % self.speaker["name"]
+                )
             else:
                 speaker_str = 'nospeaker="true"'
-            timestamp = self.speaker.get('ts', '')
+            timestamp = self.speaker.get("ts", "")
             if timestamp:
                 timestamp = ' time="%s"' % timestamp
             self.idB += 1
-            self.out.write('<speech id="uk.org.publicwhip/ni/%s" %s%s>\n%s</speech>\n' % (self.id(), speaker_str, timestamp, self.text))
-            self.text = ''
+            self.out.write(
+                '<speech id="uk.org.publicwhip/ni/%s" %s%s>\n%s</speech>\n'
+                % (self.id(), speaker_str, timestamp, self.text)
+            )
+            self.text = ""
 
     def parse_day(self, input):
         self.heading = {}
         self.pre_heading = {}
         self.speaker = {}
-        self.text = ''
-        timestamp = ''
+        self.text = ""
+        timestamp = ""
         j = json.loads(input)
-        if 'AllHansardComponentsList' in j:
-            j = j['AllHansardComponentsList']['HansardComponent']
+        if "AllHansardComponentsList" in j:
+            j = j["AllHansardComponentsList"]["HansardComponent"]
         for line in j:
-            text = (line['ComponentText'] or '').replace('&', '&amp;')
+            text = (line["ComponentText"] or "").replace("&", "&amp;")
             if not text:
                 print("WARNING: Empty line: %s" % line)
-            elif line['ComponentType'] == 'Document Title':
-                assert re.match('(Plenary|PLE), %s/%s/%s$(?i)' % (self.date[8:10], self.date[5:7], self.date[0:4]), text), text
-            elif line['ComponentType'] == 'Time':
+            elif line["ComponentType"] == "Document Title":
+                assert re.match(
+                    "(Plenary|PLE), %s/%s/%s$(?i)"
+                    % (self.date[8:10], self.date[5:7], self.date[0:4]),
+                    text,
+                ), text
+            elif line["ComponentType"] == "Time":
                 timestamp = self.time_period(text)
-            elif line['ComponentType'] == 'Header':
-                if line['ComponentHeaderId'] in (0, 1, '0', '1'):
-                    typ = 'major'
-                elif line['ComponentHeaderId'] in (2, '2'):
-                    typ = 'minor'
+            elif line["ComponentType"] == "Header":
+                if line["ComponentHeaderId"] in (0, 1, "0", "1"):
+                    typ = "major"
+                elif line["ComponentHeaderId"] in (2, "2"):
+                    typ = "minor"
                 else:
-                    raise Exception("Unknown ComponentHeaderId %s" % line['ComponentHeaderId'])
-                if self.heading and self.heading['type'] == typ:
-                    self.pre_heading = {'level': line['ComponentHeaderId'], 'text': self.heading['text']}
-                    self.heading['text'] += ' &#8212; %s' % text
+                    raise Exception(
+                        "Unknown ComponentHeaderId %s" % line["ComponentHeaderId"]
+                    )
+                if self.heading and self.heading["type"] == typ:
+                    self.pre_heading = {
+                        "level": line["ComponentHeaderId"],
+                        "text": self.heading["text"],
+                    }
+                    self.heading["text"] += " &#8212; %s" % text
                 else:
                     self.display_speech()
-                    self.speaker = {'ts': timestamp}
-                    if self.pre_heading and self.pre_heading['level'] == line['ComponentHeaderId']:
-                        text = '%s &#8212; %s' % (self.pre_heading['text'], text)
-                    elif self.pre_heading and self.pre_heading['level'] > line['ComponentHeaderId']:
+                    self.speaker = {"ts": timestamp}
+                    if (
+                        self.pre_heading
+                        and self.pre_heading["level"] == line["ComponentHeaderId"]
+                    ):
+                        text = "%s &#8212; %s" % (self.pre_heading["text"], text)
+                    elif (
+                        self.pre_heading
+                        and self.pre_heading["level"] > line["ComponentHeaderId"]
+                    ):
                         self.pre_heading = {}
-                    self.heading = {'text': text, 'ts': timestamp, 'type': typ}
-            elif re.match('Speaker \((MlaName|DeputyChairAndName|ChairAndName|DeputySpeaker|PrincipalDeputySpeaker|MinisterAndName|ActingSpeaker|TemporarySpeaker|Speaker)\)$', line['ComponentType']):
+                    self.heading = {"text": text, "ts": timestamp, "type": typ}
+            elif re.match(
+                "Speaker \((MlaName|DeputyChairAndName|ChairAndName|DeputySpeaker|PrincipalDeputySpeaker|MinisterAndName|ActingSpeaker|TemporarySpeaker|Speaker)\)$",
+                line["ComponentType"],
+            ):
                 # RelatedItemId here is the NI speaker ID. We could use that!
                 # But for now, carry on going by name as all that code exists.
                 self.display_speech()
-                speaker = text.replace(':', '')
+                speaker = text.replace(":", "")
                 id, stri = memberList.match(speaker, self.date)
-                self.speaker = {'id': stri, 'ts': timestamp}
-            elif line['ComponentType'] == 'Speaker (Special)' or line['ComponentType'] == 'Speaker (GuestSpeaker)':
+                self.speaker = {"id": stri, "ts": timestamp}
+            elif (
+                line["ComponentType"] == "Speaker (Special)"
+                or line["ComponentType"] == "Speaker (GuestSpeaker)"
+            ):
                 self.display_speech()
-                speaker = text.replace(':', '')
-                self.speaker = {'name': speaker, 'ts': timestamp}
-            elif line['ComponentType'] == 'Question':
+                speaker = text.replace(":", "")
+                self.speaker = {"name": speaker, "ts": timestamp}
+            elif line["ComponentType"] == "Question":
                 self.display_speech()
-                m = re.match('(T?[0-9]+\. )?(.*?) asked', text)
+                m = re.match("(T?[0-9]+\. )?(.*?) asked", text)
                 id, stri = memberList.match(m.group(2), self.date)
-                self.speaker = {'id': stri, 'ts': timestamp}
+                self.speaker = {"id": stri, "ts": timestamp}
                 self.text += "<p>%s</p>\n" % text
-            elif line['ComponentType'] == 'Quote':
+            elif line["ComponentType"] == "Quote":
                 self.text += '<p class="indent">%s</p>\n' % text
-            elif line['ComponentType'] in ('Plenary Item Text', 'Procedure Line'):
-                match = re.match('The Assembly met at ((\d\d?):(\d\d?) (am|pm)|12 noon)', text)
+            elif line["ComponentType"] in ("Plenary Item Text", "Procedure Line"):
+                match = re.match(
+                    "The Assembly met at ((\d\d?):(\d\d?) (am|pm)|12 noon)", text
+                )
                 if match:
                     timestamp = self.time_period(text)
-                    self.speaker['ts'] = timestamp
+                    self.speaker["ts"] = timestamp
                 self.text += '<p class="italic">%s</p>\n' % text
-            elif line['ComponentType'] == 'Bill Text':
-                self.text += text.replace('<p>', '<p class="indent">')  # Already is HTML
-            elif line['ComponentType'] in ('Division', 'Spoken Text'):
-                text = re.sub('\s*<BR />\s*<BR />\s*(?i)', '</p>\n<p>', text)
-                text = re.sub('WIDTH=50%', 'WIDTH="50%"', text)
-                self.text += '<p>%s</p>\n' % text
+            elif line["ComponentType"] == "Bill Text":
+                self.text += text.replace(
+                    "<p>", '<p class="indent">'
+                )  # Already is HTML
+            elif line["ComponentType"] in ("Division", "Spoken Text"):
+                text = re.sub("\s*<BR />\s*<BR />\s*(?i)", "</p>\n<p>", text)
+                text = re.sub("WIDTH=50%", 'WIDTH="50%"', text)
+                self.text += "<p>%s</p>\n" % text
             else:
-                raise ContextException("Uncaught Component Type! %s" % line['ComponentType'])
+                raise ContextException(
+                    "Uncaught Component Type! %s" % line["ComponentType"]
+                )
         self.display_speech()
 
+
 class ParseDay(object):
     def parse_day(self, out, text, date):
         out.write('<?xml version="1.0" encoding="utf-8"?>\n')
-        out.write('''
+        out.write("""
 <!DOCTYPE publicwhip
 [
 <!ENTITY pound   "&#163;">
@@ -191,16 +230,16 @@ def parse_day(self, out, text, date):
 ]>
 
 <publicwhip>
-''')
-        if date > '2014-11-01':
+""")
+        if date > "2014-11-01":
             parser = ParseDayJSON(out, date)
         else:
             sys.exit("Parsing <=2014-11-01 HTML is no longer supported")
         parser.parse_day(text)
-        out.write('</publicwhip>\n')
+        out.write("</publicwhip>\n")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     fp = sys.stdout
     text = open(sys.argv[1]).read()
     date = os.path.basename(sys.argv[1])[2:12]
diff --git a/pyscraper/ni/resolvenames.py b/pyscraper/ni/resolvenames.py
index 9980f553..ac7a7ecd 100755
--- a/pyscraper/ni/resolvenames.py
+++ b/pyscraper/ni/resolvenames.py
@@ -1,49 +1,60 @@
-import re
 import datetime
-from contextexception import ContextException
+import re
 
 from base_resolver import ResolverBase
+from contextexception import ContextException
+
 
 class MemberList(ResolverBase):
     deputy_speaker = None
-    import_organization_id = 'northern-ireland-assembly'
+    import_organization_id = "northern-ireland-assembly"
 
     def reloadJSON(self):
         super(MemberList, self).reloadJSON()
 
         self.members = {
-            "uk.org.publicwhip/member/454" : { 'given_name':'Paul', 'family_name':'Murphy', 'title':'', 'party':'Labour' },
-            "uk.org.publicwhip/member/384" : { 'given_name':'John', 'family_name':'McFall', 'title':'', 'party':'Labour' },
-        } # ID --> MLAs
-
-        self.debatedate=None
-        self.debatenamehistory=[] # recent speakers in debate
-        self.debateofficehistory={} # recent offices ("The Deputy Prime Minister")
-
-        self.retitles = re.compile('^(?:Rev |Dr |Mr |Mrs |Ms |Miss |Sir |Lord )+')
-        self.rehonorifics = re.compile('(?: OBE| CBE| MP)+$')
+            "uk.org.publicwhip/member/454": {
+                "given_name": "Paul",
+                "family_name": "Murphy",
+                "title": "",
+                "party": "Labour",
+            },
+            "uk.org.publicwhip/member/384": {
+                "given_name": "John",
+                "family_name": "McFall",
+                "title": "",
+                "party": "Labour",
+            },
+        }  # ID --> MLAs
+
+        self.debatedate = None
+        self.debatenamehistory = []  # recent speakers in debate
+        self.debateofficehistory = {}  # recent offices ("The Deputy Prime Minister")
+
+        self.retitles = re.compile("^(?:Rev |Dr |Mr |Mrs |Ms |Miss |Sir |Lord )+")
+        self.rehonorifics = re.compile("(?: OBE| CBE| MP)+$")
 
         self.import_constituencies()
         self.import_people_json()
 
     def list(self, date=None, fro=None, to=None):
-        if date == 'now':
+        if date == "now":
             date = datetime.date.today().isoformat()
         if date:
             fro = to = date
         if not fro:
-            fro = '1000-01-01'
+            fro = "1000-01-01"
         if not to:
-            to = '9999-12-31'
+            to = "9999-12-31"
         ids = []
         for m in self.members.values():
-            if 'start_date' in m and to >= m["start_date"] and fro <= m["end_date"]:
+            if "start_date" in m and to >= m["start_date"] and fro <= m["end_date"]:
                 ids.append(self.membertoperson(m["id"]))
         return ids
 
     # useful to have this function out there
     def striptitles(self, text):
-        text = text.replace("&rsquo;", "'").replace('\u2019', "'")
+        text = text.replace("&rsquo;", "'").replace("\u2019", "'")
         text = text.replace("&nbsp;", " ")
         (text, titletotal) = self.retitles.subn("", text)
         text = self.rehonorifics.sub("", text)
@@ -52,13 +63,16 @@ def striptitles(self, text):
     # date can be none, will give more matches
     def fullnametoids(self, tinput, date):
         # Special case gender uniques
-        if tinput == 'Mrs Bell': tinput = 'Mrs E Bell'
+        if tinput == "Mrs Bell":
+            tinput = "Mrs E Bell"
 
         text, titletotal = self.striptitles(tinput)
 
         # Special case for non-MLAs
-        if text == 'P Murphy': return ["uk.org.publicwhip/member/454"]
-        if text == 'McFall': return ["uk.org.publicwhip/member/384"]
+        if text == "P Murphy":
+            return ["uk.org.publicwhip/member/454"]
+        if text == "McFall":
+            return ["uk.org.publicwhip/member/384"]
 
         # Find unique identifier for member
         ids = set()
@@ -70,20 +84,30 @@ def fullnametoids(self, tinput, date):
         # If a speaker, then match against the special speaker parties
         if text == "Speaker" or text == "The Speaker":
             matches.extend(self.parties.get("Speaker", []))
-        if not matches and text in ('Deputy Speaker', 'Madam Deputy Speaker', 'The Deputy Speaker', 'The Principal Deputy Speaker', 'Madam Principal Deputy Speaker'):
+        if not matches and text in (
+            "Deputy Speaker",
+            "Madam Deputy Speaker",
+            "The Deputy Speaker",
+            "The Principal Deputy Speaker",
+            "Madam Principal Deputy Speaker",
+        ):
             if not self.deputy_speaker:
-                raise ContextException('Deputy speaker speaking, but do not know who it is')
+                raise ContextException(
+                    "Deputy speaker speaking, but do not know who it is"
+                )
             return self.fullnametoids(self.deputy_speaker, date)
 
         if matches:
             for m in matches:
-                if (date == None) or (date >= m["start_date"] and date <= m["end_date"]):
+                if (date == None) or (
+                    date >= m["start_date"] and date <= m["end_date"]
+                ):
                     ids.add(m["id"])
         return ids
 
     def setDeputy(self, deputy):
-        if deputy == 'Mr Wilson':
-            deputy = 'Mr J Wilson'
+        if deputy == "Mr Wilson":
+            deputy = "Mr J Wilson"
         self.deputy_speaker = deputy
 
     def match_person(self, input, date=None):
@@ -92,7 +116,9 @@ def match_person(self, input, date=None):
         if len(ids) == 0:
             raise ContextException("No match %s" % input)
         if len(ids) > 1:
-            raise ContextException("Multiple matches %s, possibles are %s" % (input, ids))
+            raise ContextException(
+                "Multiple matches %s, possibles are %s" % (input, ids)
+            )
         id = ids.pop()
         return id
 
@@ -101,10 +127,10 @@ def match(self, input, date):
         if self.debatedate != date:
             self.debatedate = date
             self.cleardebatehistory()
-        speakeroffice = ''
+        speakeroffice = ""
         office = None
-        input = re.sub(' \(Designate\)', '', input)
-        match = re.match('(.*) \((.*?)\)\s*$', input)
+        input = re.sub(" \(Designate\)", "", input)
+        match = re.match("(.*) \((.*?)\)\s*$", input)
         if match:
             office = match.group(1)
             speakeroffice = ' speakeroffice="%s"' % office
@@ -123,42 +149,54 @@ def match(self, input, date):
             self.debateofficehistory.setdefault(office, set()).update(ids)
 
         if len(ids) == 0:
-            if not re.search('Some Members|A Member|Several Members|Members', input):
+            if not re.search("Some Members|A Member|Several Members|Members", input):
                 # import pdb;pdb.set_trace()
                 raise ContextException("No matches %s" % (input))
-            return None, 'person_id="unknown" error="No match" speakername="%s"' % (input)
-        if len(ids) > 1 and 'uk.org.publicwhip/member/90355' in ids:
+            return None, 'person_id="unknown" error="No match" speakername="%s"' % (
+                input
+            )
+        if len(ids) > 1 and "uk.org.publicwhip/member/90355" in ids:
             # Special case for 8th May, when Mr Hay becomes Speaker
-            if input == 'Mr Hay':
-                ids.remove('uk.org.publicwhip/member/90355')
-            elif input == 'Mr Speaker':
-                ids.remove('uk.org.publicwhip/member/90287')
+            if input == "Mr Hay":
+                ids.remove("uk.org.publicwhip/member/90355")
+            elif input == "Mr Speaker":
+                ids.remove("uk.org.publicwhip/member/90287")
             else:
-                raise ContextException('Problem with Mr Hay!')
-        elif len(ids) > 1 and 'uk.org.publicwhip/member/90449' in ids:
+                raise ContextException("Problem with Mr Hay!")
+        elif len(ids) > 1 and "uk.org.publicwhip/member/90449" in ids:
             # Special case for 2015-01-12, when Mr McLaughlin becomes Speaker
-            if input == 'Mr Mitchel McLaughlin':
-                ids.remove('uk.org.publicwhip/member/90497')
-            elif input == 'Mr Principal Deputy Speaker':
-                ids.remove('uk.org.publicwhip/member/90497')
-            elif input == 'Mr Speaker':
-                ids.remove('uk.org.publicwhip/member/90449')
+            if input == "Mr Mitchel McLaughlin":
+                ids.remove("uk.org.publicwhip/member/90497")
+            elif input == "Mr Principal Deputy Speaker":
+                ids.remove("uk.org.publicwhip/member/90497")
+            elif input == "Mr Speaker":
+                ids.remove("uk.org.publicwhip/member/90449")
             else:
                 raise ContextException('Problem with Mr McLaughlin! Got "%s"' % input)
         elif len(ids) > 1:
             names = ""
             for id in ids:
                 name = self.name_on_date(self.membertoperson(id), date)
-                names += '%s %s (%s) ' % (id, name, self.members[id]["constituency"])
-            raise ContextException("Multiple matches %s, possibles are %s" % (input, names))
-            return None, 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (input)
+                names += "%s %s (%s) " % (id, name, self.members[id]["constituency"])
+            raise ContextException(
+                "Multiple matches %s, possibles are %s" % (input, names)
+            )
+            return (
+                None,
+                'person_id="unknown" error="Matched multiple times" speakername="%s"'
+                % (input),
+            )
         for id in ids:
             pass
         person_id = self.membertoperson(id)
         remadename = self.name_on_date(person_id, date)
         if self.members[id]["party"] == "Speaker" and re.search("Speaker", input):
             remadename = input
-        return person_id, 'person_id="%s" speakername="%s"%s' % (person_id, remadename, speakeroffice)
+        return person_id, 'person_id="%s" speakername="%s"%s' % (
+            person_id,
+            remadename,
+            speakeroffice,
+        )
 
     def cleardebatehistory(self):
         self.debatenamehistory = []
@@ -167,4 +205,5 @@ def cleardebatehistory(self):
     def getmember(self, memberid):
         return self.members[memberid]
 
+
 memberList = MemberList()
diff --git a/pyscraper/ni/scrape.py b/pyscraper/ni/scrape.py
index 9f2db033..de5951a5 100755
--- a/pyscraper/ni/scrape.py
+++ b/pyscraper/ni/scrape.py
@@ -3,52 +3,57 @@
 # XXX Pagination has been introduced for the 1998-2003 pages, so any
 # rescraping of those will break with this current code.
 
+import datetime
 import json
-import urllib.request
-import urllib.parse
-import re
-import time, datetime
 import os
+import re
 import sys
+import time
+import urllib.parse
+import urllib.request
 
-API_ROOT = 'http://data.niassembly.gov.uk/hansard_json.ashx?m=GetAllHansardReports'
-API_PLENARY = 'http://data.niassembly.gov.uk/hansard_json.ashx?m=GetHansardComponentsByPlenaryDate&plenaryDate='
+API_ROOT = "http://data.niassembly.gov.uk/hansard_json.ashx?m=GetAllHansardReports"
+API_PLENARY = "http://data.niassembly.gov.uk/hansard_json.ashx?m=GetHansardComponentsByPlenaryDate&plenaryDate="
 
 root = []
-#for i in range(1997,2003):
+# for i in range(1997,2003):
 #    root.append('http://www.niassembly.gov.uk/record/hansard_session%d.htm' % i)
-for i in range(2005,2007):
-    root.append('http://archive.niassembly.gov.uk/record/hansard_session%d_A.htm' % i)
-root.append('http://archive.niassembly.gov.uk/record/hansard_session%d_TA.htm' % i)
-for i in range(2006,2012):
-    root.append('http://archive.niassembly.gov.uk/record/hansard_session%d.htm' % i)
-for i in range(11,15):
-    root.append('http://www.niassembly.gov.uk/Assembly-Business/Official-Report/Reports-%d-%d/' % (i, i+1))
+for i in range(2005, 2007):
+    root.append("http://archive.niassembly.gov.uk/record/hansard_session%d_A.htm" % i)
+root.append("http://archive.niassembly.gov.uk/record/hansard_session%d_TA.htm" % i)
+for i in range(2006, 2012):
+    root.append("http://archive.niassembly.gov.uk/record/hansard_session%d.htm" % i)
+for i in range(11, 15):
+    root.append(
+        "http://www.niassembly.gov.uk/Assembly-Business/Official-Report/Reports-%d-%d/"
+        % (i, i + 1)
+    )
 
 ni_dir = os.path.dirname(__file__)
 
+
 def scrape_ni_day(url, filename, forcescrape):
-    filename = '%s/../../../parldata/cmpages/ni/%s' % (ni_dir, filename)
+    filename = "%s/../../../parldata/cmpages/ni/%s" % (ni_dir, filename)
     data = urllib.request.urlopen(url).read()
 
-    if b'ExceptionMessage' in data or b'"Message":"An error has occurred."' in data:
-        print('ERROR received scraping %s' % url)
+    if b"ExceptionMessage" in data or b'"Message":"An error has occurred."' in data:
+        print("ERROR received scraping %s" % url)
         return
 
     save = True
     if os.path.isfile(filename):
-        current = open(filename, 'rb').read()
+        current = open(filename, "rb").read()
         if current == data and not forcescrape:
             save = False
 
     if save:
         print("NI scraping %s" % url)
-        open(filename, 'wb').write(data)
+        open(filename, "wb").write(data)
 
 
 def scrape_ni(datefrom, dateto, forcescrape=False):
     # Let's use the API for anything post 2014-11-01 for the moment
-    date_switch = '2014-11-01'
+    date_switch = "2014-11-01"
     if datefrom <= date_switch:
         scrape_ni_html(datefrom, dateto, forcescrape)
     if dateto >= date_switch:
@@ -59,15 +64,17 @@ def scrape_ni_json(datefrom, dateto, forcescrape):
     ur = urllib.request.urlopen(API_ROOT)
     index = json.load(ur)
 
-    if 'ExceptionMessage' in index:
-        print('ERROR received scraping NI root')
+    if "ExceptionMessage" in index:
+        print("ERROR received scraping NI root")
         return
 
-    for day in index['AllHansardComponentsList']['HansardComponent']:
-        date = day['PlenaryDate'][:10]
-        if date < datefrom or date > dateto: continue
-        if date < '2014-11-01': continue
-        filename = 'ni%s.json' % date
+    for day in index["AllHansardComponentsList"]["HansardComponent"]:
+        date = day["PlenaryDate"][:10]
+        if date < datefrom or date > dateto:
+            continue
+        if date < "2014-11-01":
+            continue
+        filename = "ni%s.json" % date
         scrape_ni_day(API_PLENARY + str(date), filename, forcescrape)
 
 
@@ -78,29 +85,43 @@ def scrape_ni_html(datefrom, dateto, forcescrape):
         ur.close()
 
         # Manual fixes
-        page = page.replace('990315', '990715').replace('000617', '000619').replace('060706', '060606')
-        page = page.replace('060919', '060919p').replace('071101', '071001').replace('071102', '071002')
-
-        match = re.findall('<a href="([^"]*(p?)(\d{6})(i?)(?:today)?\.htm)">View (?:as|in) HTML *</a>', page)
+        page = (
+            page.replace("990315", "990715")
+            .replace("000617", "000619")
+            .replace("060706", "060606")
+        )
+        page = (
+            page.replace("060919", "060919p")
+            .replace("071101", "071001")
+            .replace("071102", "071002")
+        )
+
+        match = re.findall(
+            '<a href="([^"]*(p?)(\d{6})(i?)(?:today)?\.htm)">View (?:as|in) HTML *</a>',
+            page,
+        )
         for day in match:
             date = time.strptime(day[2], "%y%m%d")
-            date = '%d-%02d-%02d' % date[:3]
-            if date < datefrom or date > dateto: continue
-            filename = 'ni%s%s%s.html' % (date, day[1], day[3])
+            date = "%d-%02d-%02d" % date[:3]
+            if date < datefrom or date > dateto:
+                continue
+            filename = "ni%s%s%s.html" % (date, day[1], day[3])
             scrape_ni_day(urllib.parse.urljoin(url, day[0]), filename, forcescrape)
 
-        match = re.findall('<a class="html-link" href=\'(/Assembly-Business/Official-Report/Reports-\d\d-(\d\d/([^/]*)/))\'>Read now</a>', page)
+        match = re.findall(
+            "<a class=\"html-link\" href='(/Assembly-Business/Official-Report/Reports-\d\d-(\d\d/([^/]*)/))'>Read now</a>",
+            page,
+        )
         for day in match:
             # Normally 12-December-2011 but recently 23-January-2012-1030am---1100am and 1030-1100am--17-January-2012
             # and Monday-16-April
             formats = (
                 # Manual fix for 2013-02-18
-                (r'(18-Febraury-2013)', '%d-%braury-%Y', day[2]),
-
-                (r'(\d{1,2}-[a-zA-Z]*-\d\d\d\d)', "%d-%B-%Y", day[2]),
-                (r'(\d{2}/[a-zA-Z]*-\d{1,2}-[a-zA-Z]*)', "%y/%A-%d-%B", day[1]),
-                (r'(\d{2}/\d{1,2}-[a-zA-Z]*)', "%y/%d-%B", day[1]),
-                )
+                (r"(18-Febraury-2013)", "%d-%braury-%Y", day[2]),
+                (r"(\d{1,2}-[a-zA-Z]*-\d\d\d\d)", "%d-%B-%Y", day[2]),
+                (r"(\d{2}/[a-zA-Z]*-\d{1,2}-[a-zA-Z]*)", "%y/%A-%d-%B", day[1]),
+                (r"(\d{2}/\d{1,2}-[a-zA-Z]*)", "%y/%d-%B", day[1]),
+            )
 
             date = None
             for date_re, date_format, day_part in formats:
@@ -114,12 +135,16 @@ def scrape_ni_html(datefrom, dateto, forcescrape):
             if not date:
                 raise ValueError("%s is not in a recognized format" % day[1])
 
-            if datetime.date(*date[:3]) == datetime.date.today(): continue
-            if datetime.date(*date[:3]) < datetime.date(2011, 12, 12): continue
-            date = '%d-%02d-%02d' % date[:3]
-            if date < datefrom or date > dateto: continue
-            filename = 'ni%s.html' % date
+            if datetime.date(*date[:3]) == datetime.date.today():
+                continue
+            if datetime.date(*date[:3]) < datetime.date(2011, 12, 12):
+                continue
+            date = "%d-%02d-%02d" % date[:3]
+            if date < datefrom or date > dateto:
+                continue
+            filename = "ni%s.html" % date
             scrape_ni_day(urllib.parse.urljoin(url, day[0]), filename, forcescrape)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     scrape_ni(*sys.argv[1:])
diff --git a/pyscraper/ni/wikipedia-mla.py b/pyscraper/ni/wikipedia-mla.py
index cbb092e0..2fa2cc6c 100755
--- a/pyscraper/ni/wikipedia-mla.py
+++ b/pyscraper/ni/wikipedia-mla.py
@@ -8,21 +8,26 @@
 # For details see the file LICENSE.html in the top level of the source.
 
 import os
+import re
 import sys
 import urllib.parse
-import re
 
-file_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..')
+file_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..")
 sys.path.insert(0, file_dir)
 from ni.resolvenames import memberList
 
-wiki_index_url = "https://en.wikipedia.org/wiki/Members_of_the_4th_Northern_Ireland_Assembly"
-wikimembers  = {}
+wiki_index_url = (
+    "https://en.wikipedia.org/wiki/Members_of_the_4th_Northern_Ireland_Assembly"
+)
+wikimembers = {}
+
 
 # Grab pages
 def read(y):
-    with open(file_dir + '/../rawdata/Members_of_the_NIA_%d' % y) as ur:
+    with open(file_dir + "/../rawdata/Members_of_the_NIA_%d" % y) as ur:
         return ur.read()
+
+
 content = read(2003) + read(2007) + read(2011) + read(2016) + read(2017) + read(2022)
 
 matches = set()
@@ -36,33 +41,40 @@ def read(y):
 matches.update(re.findall(matcher, content))
 
 # 4-6th Assembly changes
-changes = re.findall('(?s)<h2[^>]*>(?:<span[^>]*>)?MLAs by constituency.*?<h2[^>]*>(?:<span[^>]*>)?Changes(.*?)</html>', content)
+changes = re.findall(
+    "(?s)<h2[^>]*>(?:<span[^>]*>)?MLAs by constituency.*?<h2[^>]*>(?:<span[^>]*>)?Changes(.*?)</html>",
+    content,
+)
 for change in changes:
-    for m in re.findall('''(?x)
+    for m in re.findall(
+        """(?x)
             <td[ ]style="width:[ ]2px;[^>]*>\s*</td>\s* # Thin column of party colour
             <td[^>]*>.*?\s*</td>\s* # Party name
             <td><a[ ]href="(/(?:wiki|w)/[^"]+)"[^>]*>([^<]+)</a>.*?\s*</td>\s* # Outgoing
-            <td>(?:<a[ ]href="(/(?:wiki|w)/[^"]+)"[^>]*>([^<]+)</a>|<i>Vacant</i>).*?\s*</td> # Incoming''', change):
+            <td>(?:<a[ ]href="(/(?:wiki|w)/[^"]+)"[^>]*>([^<]+)</a>|<i>Vacant</i>).*?\s*</td> # Incoming""",
+        change,
+    ):
         matches.add((m[0], m[1], None))
-        if m[2]: matches.add((m[2], m[3], None))
+        if m[2]:
+            matches.add((m[2], m[3], None))
 
-for (url, name, cons) in matches:
+for url, name, cons in matches:
     date = None
-    if 'Mark Durkan' in name:
-        date = '2008-01-01'
+    if "Mark Durkan" in name:
+        date = "2008-01-01"
     pid = memberList.match_person(name, date)
     wikimembers[pid] = url
 
-print('''<?xml version="1.0" encoding="UTF-8"?>
-<publicwhip>''')
+print("""<?xml version="1.0" encoding="UTF-8"?>
+<publicwhip>""")
 k = sorted(wikimembers)
 for id in k:
     url = urllib.parse.urljoin(wiki_index_url, wikimembers[id])
     print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
-print('</publicwhip>')
+print("</publicwhip>")
 
 wikimembers = set(wikimembers.keys())
-allmembers = set(memberList.list(fro='2004-01-01'))
+allmembers = set(memberList.list(fro="2004-01-01"))
 symdiff = allmembers.symmetric_difference(wikimembers)
 if len(symdiff) > 0:
     print("Failed to get all MLAs, these ones in symmetric difference", file=sys.stderr)
diff --git a/pyscraper/parlphrases.py b/pyscraper/parlphrases.py
index 828948bc..22299ada 100755
--- a/pyscraper/parlphrases.py
+++ b/pyscraper/parlphrases.py
@@ -4,216 +4,214 @@
 import re
 
 
-
 class ParlPhrases:
-	jobs = [
-	"Chief Secretary to the Treasury",
-	"Deputy Prime Minister",
-	"Prime Minister",
-	"Economic Secretary to the Treasury",
-	"Financial Secretary to Her Majesty's Treasury",
-	"Financial Secretary to the Treasury",
-	"Home Secretary",
-	"International Development Secretary",
-	"Minister for Barking",
-	"Minister for Children at the Department for Education and Skills",
-	"Minister for Disabled People",
-	"Minister for Environment and Agri-Environment",
-	"Minister for Europe",
-	"Minister for Europe's office",
-	"Minister for Europe, Foreign and Commonwealth Office",
-	"Minister for Local Government",
-	"Minister for Nature, Conservation and Fisheries",
-	"Minister for State for Transport",
-	"Minister for Transport",
-	"Minister for the Cabinet Office",
-	"Minister of State at DEFRA",
-	"Minister of State at the Cabinet Office",
-	"Minister of State at the Department for Environment, Food and Rural Affairs",
-	"Minister of State at the Department of Trade and Industry",
-	"Minister of State at the FCO",
-	"Minister of State at the Foreign and Commonwealth Office",
-	"Minister of State at the Home Office",
-	"Minister of State for Crime Reduction, Policing and Community Safety at the Home Office",
-	"Minister of State for Defence",
-	"Minister of State for Environment and Agri-Environment",
-	"Minister of State for Housing and Planning",
-	"Minister of State for Rural Affairs",
-	"Minister of State for Transport",
-	"Minister of State for the Armed Forces",
-	"Minister of State for the Cabinet Office",
-	"Minister of State for the Home Department",
-	"Minister of State in the Cabinet Office",
-	"Minister of State, Cabinet Office",
-	"Minister of State, Cabinet Office",
-	"Minister of State, Department of Health",
-	"Minister of State, Foreign and Commonwealth Office",
-	"Minister of State",
-	"Parliamentary Under Secretary for the Environment, Food and Rural Affairs",
-	"Parliamentary Under Secretary of State at the Department for Education and Skills",
-	"Parliamentary Under Secretary of State at the Department for International Development",
-	"Parliamentary Under Secretary of State at the Foreign and Commonwealth Office",
-	"Parliamentary Under Secretary of State for Defence",
-	"Parliamentary Under Secretary of State for Work and Pensions",
-	"Parliamentary Under Secretary of State for the Environment, Food and Rural Affairs",
-	"Parliamentary Under-Secretary for Employment Relations, Competition and Consumers",
-	"Parliamentary Under-Secretary for Health",
-	"Parliamentary Under-Secretary for Public Health",
-	"Parliamentary Under-Secretary for the Cabinet Office",
-	"Parliamentary Under-Secretary of State at the Department for Environment, Food and Rural Affairs",
-	"Parliamentary Under-Secretary of State at the Department for International Development",
-	"Parliamentary Under-Secretary of State at the Department for the Environment, Food and Rural Affairs",
-	"Parliamentary Under-Secretary of State at the Foreign Office",
-	"Parliamentary Under-Secretary of State at the Foreign and Commonwealth Office",
-	"Parliamentary Under-Secretary of State at the Home Department",
-	"Parliamentary Under-Secretary of State at the Home Office"
-	"Parliamentary Under-Secretary of State for Defence",
-	"Parliamentary Under-Secretary of State for Defence and Minister for Veterans",
-	"Parliamentary Under-Secretary of State for Employment Relations, Competition and Consumers",
-	"Parliamentary Under-Secretary of State for Environment, Food and Rural Affairs",
-	"Parliamentary Under-Secretary of State for Foreign and Commonwealth Affairs",
-	"Parliamentary Under-Secretary of State for Health",
-	"Parliamentary Under-Secretary of State for Public Health",
-	"Parliamentary Under-Secretary of State for Work and Pensions",
-	"Parliamentary Under-Secretary of State for the Department for Constitutional Affairs",
-	"Parliamentary Under-Secretary of State for the Foreign and Commonwealth Office",
-	"Parliamentary Under-Secretary of State for the Home Office",
-	"Parliamentary Under-Secretary of State",
-	"Parliamentary Under Secretary, the Office of the Deputy Prime Minister",
-	"Parliamentary Under Secretary of State",
-	"Parliamentary Under-Secretary",
-	"Under-Secretary of State for Defence",
-	"Under-Secretary of State for Environment, Food and Rural Affairs",
-	"Secretary of State for Constitutional Affairs",
-	"Secretary of State for Culture, Media and Sport",
-	"Secretary of State for Defence",
-	"Secretary of State for Education and Skills",
-	"Secretary of State for Education",
-	"Secretary of State for Foreign and Commonwealth Affairs",
-	"Secretary of State for Health",
-	"Secretary of State for International Development",
-	"Secretary of State for Northern Ireland",
-	"Secretary of State for Scotland",
- 	"Secretary of State for Trade and Industry",
-	"Secretary of State for Transport",
-	"Secretary of State for Work and Pensions",
-	"Secretary of State for the Environment, Food and Rural Affairs",
-	"Secretary of State for the Foreign and Commonwealth Office",
-	"Secretary of State for the Home Department",
-	"Secretary of State for Energy and Climate Change",
- 	"Secretary of State",
-		]
-
-	wransmajorheadings = {
-                "ADMINISTRATION COMMITTEE":"ADMINISTRATION COMMITTEE",
-		"ADVOCATE-GENERAL":"ADVOCATE-GENERAL",
-			"ADVOCATE GENERAL":"ADVOCATE-GENERAL",
-		"ADVOCATE-GENERAL FOR SCOTLAND":"ADVOCATE-GENERAL FOR SCOTLAND",
-		"AGRICULTURE, FISHERIES AND FOOD":"AGRICULTURE, FISHERIES AND FOOD",
-		"ATTORNEY-GENERAL":"ATTORNEY-GENERAL",
-                "BUSINESS, ENTERPRISE AND REGULATORY REFORM":"BUSINESS, ENTERPRISE AND REGULATORY REFORM",
-                "BUSINESS, INNOVATION AND SKILLS":"BUSINESS, INNOVATION AND SKILLS",
-		"CABINET OFFICE":"CABINET OFFICE",
-			"CABINET":"CABINET OFFICE",
-                "CHILDREN, SCHOOLS AND FAMILIES":"CHILDREN, SCHOOLS AND FAMILIES",
-		"CULTURE MEDIA AND SPORT":"CULTURE MEDIA AND SPORT",
-			"CULTURE, MEDIA AND SPORT":"CULTURE MEDIA AND SPORT",
-			"CULTURE, MEDIA AND SPORTA":"CULTURE MEDIA AND SPORT",
-			"CULTURE, MEDIA, SPORT":"CULTURE MEDIA AND SPORT",
-		"CHURCH COMMISSIONERS":"CHURCH COMMISSIONERS",
-                        "CHURCH COMMISSIONER":"CHURCH COMMISSIONERS",
-		"COMMUNITIES AND LOCAL GOVERNMENT":"COMMUNITIES AND LOCAL GOVERNMENT",
-		"CONSTITUTIONAL AFFAIRS":"CONSTITUTIONAL AFFAIRS",
-                        "CONSTITIONAL AFFAIRS":"CONSTITUTIONAL AFFAIRS",
-                        "CONSTITUTIONAL AFFFAIRS":"CONSTITUTIONAL AFFAIRS",
-		"DEFENCE":"DEFENCE",
-		"DEPUTY PRIME MINISTER":"DEPUTY PRIME MINISTER",
-		"DUCHY OF LANCASTER":"DUCHY OF LANCASTER",
-		"EDUCATION AND EMPLOYMENT":"EDUCATION AND EMPLOYMENT",
-		"EMPLOYMENT, SOCIAL POLICY, HEALTH AND CONSUMER AFFAIRS COUNCIL": "EMPLOYMENT, SOCIAL POLICY, HEALTH AND CONSUMER AFFAIRS COUNCIL",
-                "ENERGY AND CLIMATE CHANGE":"ENERGY AND CLIMATE CHANGE",
-                "ENVIRONMENT, TRANSPORT AND THE REGIONS":"ENVIRONMENT, TRANSPORT AND THE REGIONS",
-		"ENVIRONMENT FOOD AND RURAL AFFAIRS":"ENVIRONMENT FOOD AND RURAL AFFAIRS",
-			"ENVIRONMENT, FOOD AND RURAL AFFAIRS":"ENVIRONMENT FOOD AND RURAL AFFAIRS",
-			"DEFRA":"ENVIRONMENT, FOOD AND RURAL AFFAIRS",
-		"ENVIRONMENT, FOOD AND THE REGIONS":"ENVIRONMENT, FOOD AND THE REGIONS",
-		"ENVIRONMENT":"ENVIRONMENT",
-		"EDUCATION AND SKILLS":"EDUCATION AND SKILLS",
-		"EDUCATION":"EDUCATION",
-		"ELECTORAL COMMISSION COMMITTEE":"ELECTORAL COMMISSION COMMITTEE",
-			"ELECTORAL COMMISSION":"ELECTORAL COMMISSION COMMITTEE",
-			"SPEAKER'S COMMITTEE ON THE ELECTORAL COMMISSION":"ELECTORAL COMMISSION COMMITTEE",
-		"FOREIGN AND COMMONWEALTH AFFAIRS":"FOREIGN AND COMMONWEALTH AFFAIRS",
-			"FOREIGN AND COMMONWEALTH":"FOREIGN AND COMMONWEALTH AFFAIRS",
-			"FOREIGN AND COMMONWEALTH OFFICE":"FOREIGN AND COMMONWEALTH AFFAIRS",
-                "GOVERNMENT EQUALITIES OFFICE":"GOVERNMENT EQUALITIES OFFICE",
-                        "GOVERNMENT EQUALITIES":"GOVERNMENT EQUALITIES OFFICE",
-		"HOME DEPARTMENT":"HOME DEPARTMENT",
-			"HOME OFFICE":"HOME DEPARTMENT",
-			"HOME":"HOME DEPARTMENT",
-		"HEALTH":"HEALTH",
-		"HOUSE OF COMMONS":"HOUSE OF COMMONS",
-		"HOUSE OF COMMONS COMMISSION":"HOUSE OF COMMONS COMMISSION",
-			"HOUSE OF COMMMONS COMMISSION":"HOUSE OF COMMONS COMMISSION",
-                "INDEPENDENT PARLIAMENTARY STANDARDS AUTHORITY COMMITTEE":"INDEPENDENT PARLIAMENTARY STANDARDS AUTHORITY COMMITTEE",
-                "INNOVATION, UNIVERSITIES AND SKILLS":"INNOVATION, UNIVERSITIES AND SKILLS",
-		"INTERNATIONAL DEVELOPMENT":"INTERNATIONAL DEVELOPMENT",
-			"INTERNATIONAL DEVELOPENT":"INTERNATIONAL DEVELOPMENT",
-                        "INTERNATIONAL DEVEOPMENT":"INTERNATIONAL DEVELOPMENT",
-                "JUSTICE":"JUSTICE",
-		"LEADER OF THE HOUSE":"LEADER OF THE HOUSE",
-		"LEADER OF THE COUNCIL":"LEADER OF THE COUNCIL",
-		"LORD CHANCELLOR":"LORD CHANCELLOR",
-			"LORD CHANCELLOR'S DEPARTMENT":"LORD CHANCELLOR",
-			"LORD CHANCELLORS DEPARTMENT":"LORD CHANCELLOR",
-			"LORD CHANCELLOR'S DEPT":"LORD CHANCELLOR",
-		"LORD PRESIDENT OF THE COUNCIL":"LORD PRESIDENT OF THE COUNCIL",
-		"MEMBERS ESTIMATE COMMITTEE": "MEMBERS ESTIMATE COMMITTEE",
-		"MINISTER FOR WOMEN":"MINISTER FOR WOMEN",
-		    "WOMEN":"MINISTER FOR WOMEN",
-                    "MINISTER FOR WOMEN AND EQUALITY":"MINISTER FOR WOMEN AND EQUALITY",
-		"NATIONAL HERITAGE":"NATIONAL HERITAGE",
-		"NORTHERN IRELAND":"NORTHERN IRELAND",
-                "OLYMPICS":"OLYMPICS",
-		"OVERSEAS DEVELOPMENT ADMINISTRATION":"OVERSEAS DEVELOPMENT ADMINISTRATION",
-		"PRIME MINISTER":"PRIME MINISTER",
-		"PRIVY COUNCIL":"PRIVY COUNCIL",
-			"PRIVY COUNCIL OFFICE":"PRIVY COUNCIL",
-		"PRESIDENT OF THE COUNCIL":"PRESIDENT OF THE COUNCIL",
-		"PUBLIC ACCOUNTS COMMISSION":"PUBLIC ACCOUNTS COMMISSION",
-		"PUBLIC ACCOUNTS COMMITTEE":"PUBLIC ACCOUNTS COMMITTEE",
-		"SOLICITOR-GENERAL":"SOLICITOR-GENERAL",
-			"SOLICITOR GENERAL":"SOLICITOR-GENERAL",
-		"SCOTLAND":"SCOTLAND",
-		"SOCIAL SECURITY":"SOCIAL SECURITY",
-		"TRANSPORT":"TRANSPORT",
-		"TRANSPORT, LOCAL GOVERNMENT AND THE REGIONS":"TRANSPORT, LOCAL GOVERNMENT AND THE REGIONS",
-		"TRADE AND INDUSTRY":"TRADE AND INDUSTRY",
-		"TREASURY":"TREASURY",
-		"WALES":"WALES",
-                "WOMEN AND EQUALITY":"WOMEN AND EQUALITY",
-                "WOMEN AND EQUALITIES":"WOMEN AND EQUALITIES",
-		"WORK AND PENSIONS":"WORK AND PENSIONS",
-			}
+    jobs = [
+        "Chief Secretary to the Treasury",
+        "Deputy Prime Minister",
+        "Prime Minister",
+        "Economic Secretary to the Treasury",
+        "Financial Secretary to Her Majesty's Treasury",
+        "Financial Secretary to the Treasury",
+        "Home Secretary",
+        "International Development Secretary",
+        "Minister for Barking",
+        "Minister for Children at the Department for Education and Skills",
+        "Minister for Disabled People",
+        "Minister for Environment and Agri-Environment",
+        "Minister for Europe",
+        "Minister for Europe's office",
+        "Minister for Europe, Foreign and Commonwealth Office",
+        "Minister for Local Government",
+        "Minister for Nature, Conservation and Fisheries",
+        "Minister for State for Transport",
+        "Minister for Transport",
+        "Minister for the Cabinet Office",
+        "Minister of State at DEFRA",
+        "Minister of State at the Cabinet Office",
+        "Minister of State at the Department for Environment, Food and Rural Affairs",
+        "Minister of State at the Department of Trade and Industry",
+        "Minister of State at the FCO",
+        "Minister of State at the Foreign and Commonwealth Office",
+        "Minister of State at the Home Office",
+        "Minister of State for Crime Reduction, Policing and Community Safety at the Home Office",
+        "Minister of State for Defence",
+        "Minister of State for Environment and Agri-Environment",
+        "Minister of State for Housing and Planning",
+        "Minister of State for Rural Affairs",
+        "Minister of State for Transport",
+        "Minister of State for the Armed Forces",
+        "Minister of State for the Cabinet Office",
+        "Minister of State for the Home Department",
+        "Minister of State in the Cabinet Office",
+        "Minister of State, Cabinet Office",
+        "Minister of State, Cabinet Office",
+        "Minister of State, Department of Health",
+        "Minister of State, Foreign and Commonwealth Office",
+        "Minister of State",
+        "Parliamentary Under Secretary for the Environment, Food and Rural Affairs",
+        "Parliamentary Under Secretary of State at the Department for Education and Skills",
+        "Parliamentary Under Secretary of State at the Department for International Development",
+        "Parliamentary Under Secretary of State at the Foreign and Commonwealth Office",
+        "Parliamentary Under Secretary of State for Defence",
+        "Parliamentary Under Secretary of State for Work and Pensions",
+        "Parliamentary Under Secretary of State for the Environment, Food and Rural Affairs",
+        "Parliamentary Under-Secretary for Employment Relations, Competition and Consumers",
+        "Parliamentary Under-Secretary for Health",
+        "Parliamentary Under-Secretary for Public Health",
+        "Parliamentary Under-Secretary for the Cabinet Office",
+        "Parliamentary Under-Secretary of State at the Department for Environment, Food and Rural Affairs",
+        "Parliamentary Under-Secretary of State at the Department for International Development",
+        "Parliamentary Under-Secretary of State at the Department for the Environment, Food and Rural Affairs",
+        "Parliamentary Under-Secretary of State at the Foreign Office",
+        "Parliamentary Under-Secretary of State at the Foreign and Commonwealth Office",
+        "Parliamentary Under-Secretary of State at the Home Department",
+        "Parliamentary Under-Secretary of State at the Home Office"
+        "Parliamentary Under-Secretary of State for Defence",
+        "Parliamentary Under-Secretary of State for Defence and Minister for Veterans",
+        "Parliamentary Under-Secretary of State for Employment Relations, Competition and Consumers",
+        "Parliamentary Under-Secretary of State for Environment, Food and Rural Affairs",
+        "Parliamentary Under-Secretary of State for Foreign and Commonwealth Affairs",
+        "Parliamentary Under-Secretary of State for Health",
+        "Parliamentary Under-Secretary of State for Public Health",
+        "Parliamentary Under-Secretary of State for Work and Pensions",
+        "Parliamentary Under-Secretary of State for the Department for Constitutional Affairs",
+        "Parliamentary Under-Secretary of State for the Foreign and Commonwealth Office",
+        "Parliamentary Under-Secretary of State for the Home Office",
+        "Parliamentary Under-Secretary of State",
+        "Parliamentary Under Secretary, the Office of the Deputy Prime Minister",
+        "Parliamentary Under Secretary of State",
+        "Parliamentary Under-Secretary",
+        "Under-Secretary of State for Defence",
+        "Under-Secretary of State for Environment, Food and Rural Affairs",
+        "Secretary of State for Constitutional Affairs",
+        "Secretary of State for Culture, Media and Sport",
+        "Secretary of State for Defence",
+        "Secretary of State for Education and Skills",
+        "Secretary of State for Education",
+        "Secretary of State for Foreign and Commonwealth Affairs",
+        "Secretary of State for Health",
+        "Secretary of State for International Development",
+        "Secretary of State for Northern Ireland",
+        "Secretary of State for Scotland",
+        "Secretary of State for Trade and Industry",
+        "Secretary of State for Transport",
+        "Secretary of State for Work and Pensions",
+        "Secretary of State for the Environment, Food and Rural Affairs",
+        "Secretary of State for the Foreign and Commonwealth Office",
+        "Secretary of State for the Home Department",
+        "Secretary of State for Energy and Climate Change",
+        "Secretary of State",
+    ]
 
-	daysofweek = 'Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday'
-	monthsofyear = 'January|February|March|April|May|June|July|August|September|October|November|December'
-	datephrase = '((?:%s )?((?:\d+ )?(?:%s)(?: \d+)?))' % (daysofweek, monthsofyear)
-	redatephrase = re.compile(datephrase)
+    wransmajorheadings = {
+        "ADMINISTRATION COMMITTEE": "ADMINISTRATION COMMITTEE",
+        "ADVOCATE-GENERAL": "ADVOCATE-GENERAL",
+        "ADVOCATE GENERAL": "ADVOCATE-GENERAL",
+        "ADVOCATE-GENERAL FOR SCOTLAND": "ADVOCATE-GENERAL FOR SCOTLAND",
+        "AGRICULTURE, FISHERIES AND FOOD": "AGRICULTURE, FISHERIES AND FOOD",
+        "ATTORNEY-GENERAL": "ATTORNEY-GENERAL",
+        "BUSINESS, ENTERPRISE AND REGULATORY REFORM": "BUSINESS, ENTERPRISE AND REGULATORY REFORM",
+        "BUSINESS, INNOVATION AND SKILLS": "BUSINESS, INNOVATION AND SKILLS",
+        "CABINET OFFICE": "CABINET OFFICE",
+        "CABINET": "CABINET OFFICE",
+        "CHILDREN, SCHOOLS AND FAMILIES": "CHILDREN, SCHOOLS AND FAMILIES",
+        "CULTURE MEDIA AND SPORT": "CULTURE MEDIA AND SPORT",
+        "CULTURE, MEDIA AND SPORT": "CULTURE MEDIA AND SPORT",
+        "CULTURE, MEDIA AND SPORTA": "CULTURE MEDIA AND SPORT",
+        "CULTURE, MEDIA, SPORT": "CULTURE MEDIA AND SPORT",
+        "CHURCH COMMISSIONERS": "CHURCH COMMISSIONERS",
+        "CHURCH COMMISSIONER": "CHURCH COMMISSIONERS",
+        "COMMUNITIES AND LOCAL GOVERNMENT": "COMMUNITIES AND LOCAL GOVERNMENT",
+        "CONSTITUTIONAL AFFAIRS": "CONSTITUTIONAL AFFAIRS",
+        "CONSTITIONAL AFFAIRS": "CONSTITUTIONAL AFFAIRS",
+        "CONSTITUTIONAL AFFFAIRS": "CONSTITUTIONAL AFFAIRS",
+        "DEFENCE": "DEFENCE",
+        "DEPUTY PRIME MINISTER": "DEPUTY PRIME MINISTER",
+        "DUCHY OF LANCASTER": "DUCHY OF LANCASTER",
+        "EDUCATION AND EMPLOYMENT": "EDUCATION AND EMPLOYMENT",
+        "EMPLOYMENT, SOCIAL POLICY, HEALTH AND CONSUMER AFFAIRS COUNCIL": "EMPLOYMENT, SOCIAL POLICY, HEALTH AND CONSUMER AFFAIRS COUNCIL",
+        "ENERGY AND CLIMATE CHANGE": "ENERGY AND CLIMATE CHANGE",
+        "ENVIRONMENT, TRANSPORT AND THE REGIONS": "ENVIRONMENT, TRANSPORT AND THE REGIONS",
+        "ENVIRONMENT FOOD AND RURAL AFFAIRS": "ENVIRONMENT FOOD AND RURAL AFFAIRS",
+        "ENVIRONMENT, FOOD AND RURAL AFFAIRS": "ENVIRONMENT FOOD AND RURAL AFFAIRS",
+        "DEFRA": "ENVIRONMENT, FOOD AND RURAL AFFAIRS",
+        "ENVIRONMENT, FOOD AND THE REGIONS": "ENVIRONMENT, FOOD AND THE REGIONS",
+        "ENVIRONMENT": "ENVIRONMENT",
+        "EDUCATION AND SKILLS": "EDUCATION AND SKILLS",
+        "EDUCATION": "EDUCATION",
+        "ELECTORAL COMMISSION COMMITTEE": "ELECTORAL COMMISSION COMMITTEE",
+        "ELECTORAL COMMISSION": "ELECTORAL COMMISSION COMMITTEE",
+        "SPEAKER'S COMMITTEE ON THE ELECTORAL COMMISSION": "ELECTORAL COMMISSION COMMITTEE",
+        "FOREIGN AND COMMONWEALTH AFFAIRS": "FOREIGN AND COMMONWEALTH AFFAIRS",
+        "FOREIGN AND COMMONWEALTH": "FOREIGN AND COMMONWEALTH AFFAIRS",
+        "FOREIGN AND COMMONWEALTH OFFICE": "FOREIGN AND COMMONWEALTH AFFAIRS",
+        "GOVERNMENT EQUALITIES OFFICE": "GOVERNMENT EQUALITIES OFFICE",
+        "GOVERNMENT EQUALITIES": "GOVERNMENT EQUALITIES OFFICE",
+        "HOME DEPARTMENT": "HOME DEPARTMENT",
+        "HOME OFFICE": "HOME DEPARTMENT",
+        "HOME": "HOME DEPARTMENT",
+        "HEALTH": "HEALTH",
+        "HOUSE OF COMMONS": "HOUSE OF COMMONS",
+        "HOUSE OF COMMONS COMMISSION": "HOUSE OF COMMONS COMMISSION",
+        "HOUSE OF COMMMONS COMMISSION": "HOUSE OF COMMONS COMMISSION",
+        "INDEPENDENT PARLIAMENTARY STANDARDS AUTHORITY COMMITTEE": "INDEPENDENT PARLIAMENTARY STANDARDS AUTHORITY COMMITTEE",
+        "INNOVATION, UNIVERSITIES AND SKILLS": "INNOVATION, UNIVERSITIES AND SKILLS",
+        "INTERNATIONAL DEVELOPMENT": "INTERNATIONAL DEVELOPMENT",
+        "INTERNATIONAL DEVELOPENT": "INTERNATIONAL DEVELOPMENT",
+        "INTERNATIONAL DEVEOPMENT": "INTERNATIONAL DEVELOPMENT",
+        "JUSTICE": "JUSTICE",
+        "LEADER OF THE HOUSE": "LEADER OF THE HOUSE",
+        "LEADER OF THE COUNCIL": "LEADER OF THE COUNCIL",
+        "LORD CHANCELLOR": "LORD CHANCELLOR",
+        "LORD CHANCELLOR'S DEPARTMENT": "LORD CHANCELLOR",
+        "LORD CHANCELLORS DEPARTMENT": "LORD CHANCELLOR",
+        "LORD CHANCELLOR'S DEPT": "LORD CHANCELLOR",
+        "LORD PRESIDENT OF THE COUNCIL": "LORD PRESIDENT OF THE COUNCIL",
+        "MEMBERS ESTIMATE COMMITTEE": "MEMBERS ESTIMATE COMMITTEE",
+        "MINISTER FOR WOMEN": "MINISTER FOR WOMEN",
+        "WOMEN": "MINISTER FOR WOMEN",
+        "MINISTER FOR WOMEN AND EQUALITY": "MINISTER FOR WOMEN AND EQUALITY",
+        "NATIONAL HERITAGE": "NATIONAL HERITAGE",
+        "NORTHERN IRELAND": "NORTHERN IRELAND",
+        "OLYMPICS": "OLYMPICS",
+        "OVERSEAS DEVELOPMENT ADMINISTRATION": "OVERSEAS DEVELOPMENT ADMINISTRATION",
+        "PRIME MINISTER": "PRIME MINISTER",
+        "PRIVY COUNCIL": "PRIVY COUNCIL",
+        "PRIVY COUNCIL OFFICE": "PRIVY COUNCIL",
+        "PRESIDENT OF THE COUNCIL": "PRESIDENT OF THE COUNCIL",
+        "PUBLIC ACCOUNTS COMMISSION": "PUBLIC ACCOUNTS COMMISSION",
+        "PUBLIC ACCOUNTS COMMITTEE": "PUBLIC ACCOUNTS COMMITTEE",
+        "SOLICITOR-GENERAL": "SOLICITOR-GENERAL",
+        "SOLICITOR GENERAL": "SOLICITOR-GENERAL",
+        "SCOTLAND": "SCOTLAND",
+        "SOCIAL SECURITY": "SOCIAL SECURITY",
+        "TRANSPORT": "TRANSPORT",
+        "TRANSPORT, LOCAL GOVERNMENT AND THE REGIONS": "TRANSPORT, LOCAL GOVERNMENT AND THE REGIONS",
+        "TRADE AND INDUSTRY": "TRADE AND INDUSTRY",
+        "TREASURY": "TREASURY",
+        "WALES": "WALES",
+        "WOMEN AND EQUALITY": "WOMEN AND EQUALITY",
+        "WOMEN AND EQUALITIES": "WOMEN AND EQUALITIES",
+        "WORK AND PENSIONS": "WORK AND PENSIONS",
+    }
 
+    daysofweek = "Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday"
+    monthsofyear = "January|February|March|April|May|June|July|August|September|October|November|December"
+    datephrase = "((?:%s )?((?:\d+ )?(?:%s)(?: \d+)?))" % (daysofweek, monthsofyear)
+    redatephrase = re.compile(datephrase)
 
-	# make a huge regexp
-	def __init__(self):
-		sio = None
-		for j in self.jobs:
-			if sio:
-				sio.write('|')
-			else:
-				sio = io.StringIO()
-			sio.write(j)
+    # make a huge regexp
+    def __init__(self):
+        sio = None
+        for j in self.jobs:
+            if sio:
+                sio.write("|")
+            else:
+                sio = io.StringIO()
+            sio.write(j)
 
-		self.regexpjobs = sio.getvalue()
-		sio.close()
+        self.regexpjobs = sio.getvalue()
+        sio.close()
 
 
 parlPhrases = ParlPhrases()
diff --git a/pyscraper/patchtool.py b/pyscraper/patchtool.py
index a5f46ee8..ae11c6cf 100755
--- a/pyscraper/patchtool.py
+++ b/pyscraper/patchtool.py
@@ -1,150 +1,159 @@
 #!/usr/bin/env python3
 # vim:sw=8:ts=8:et:nowrap
 
-import sys
+import optparse
 import os
-import shutil
-import miscfuncs
 import re
+import shutil
+import sys
 import tempfile
-import optparse
+
+import miscfuncs
 
 # change current directory to pyscraper folder script is in
-os.chdir(os.path.dirname(sys.argv[0]) or '.')
+os.chdir(os.path.dirname(sys.argv[0]) or ".")
 
 from resolvemembernames import memberList
+
 toppath = miscfuncs.toppath
 
+
 # File names of patch files
 # this is horid since it shadows stuff that's done distributively in the scrapers
 def GenPatchFileNames(typ, sdate):
-	qfolder = toppath
-	qfolder = os.path.join(qfolder, "cmpages")
-
-	# transform the typ into the file stub
-	if typ == "wrans":
-		stub = "answers"
-	elif typ == "lords" or typ == 'lordspages':
-		typ = "lordspages"
-		stub = "daylord"
-	elif typ == "westminhall":
-		stub = "westminster"
-	elif typ == "wms":
-		stub = "ministerial"
-	elif typ == "standing":
-		stub = "standing"
-	elif typ[0:9] == 'chgpages/':
-		stub = re.sub('chgpages/', '', typ)
-	else:
-		stub = typ
-
-	extension = "json" if typ == "ni" else "html"
-
-	folder = os.path.join(qfolder, typ)
-
-	# lords case where we use the new top level patch directory
-	pdire = os.path.join(toppath, "patches")
-# all patches will be moved to where they belong
-#	if typ != "lordspages":
-#		pdire = "patches"  # as local directory
-	pdire = os.path.join(pdire, typ)
-	if not os.path.isdir(pdire):
-		os.mkdir(pdire)
-
-	patchfile = os.path.join(pdire, "%s%s.%s.patch" % (stub, sdate, extension))
-	orgfile = os.path.join(folder, "%s%s.%s" % (stub, sdate, extension))
-	tmpfile = tempfile.mktemp(".%s" % extension, "patchtmp-%s%s-" % (stub, sdate), miscfuncs.tmppath)
-	tmppatchfile = os.path.join(pdire, "%s%s.%s.patch.new" % (stub, sdate, extension))
-
-	return patchfile, orgfile, tmpfile, tmppatchfile
+    qfolder = toppath
+    qfolder = os.path.join(qfolder, "cmpages")
+
+    # transform the typ into the file stub
+    if typ == "wrans":
+        stub = "answers"
+    elif typ == "lords" or typ == "lordspages":
+        typ = "lordspages"
+        stub = "daylord"
+    elif typ == "westminhall":
+        stub = "westminster"
+    elif typ == "wms":
+        stub = "ministerial"
+    elif typ == "standing":
+        stub = "standing"
+    elif typ[0:9] == "chgpages/":
+        stub = re.sub("chgpages/", "", typ)
+    else:
+        stub = typ
+
+    extension = "json" if typ == "ni" else "html"
+
+    folder = os.path.join(qfolder, typ)
+
+    # lords case where we use the new top level patch directory
+    pdire = os.path.join(toppath, "patches")
+    # all patches will be moved to where they belong
+    # if typ != "lordspages":
+    # pdire = "patches"  # as local directory
+    pdire = os.path.join(pdire, typ)
+    if not os.path.isdir(pdire):
+        os.mkdir(pdire)
+
+    patchfile = os.path.join(pdire, "%s%s.%s.patch" % (stub, sdate, extension))
+    orgfile = os.path.join(folder, "%s%s.%s" % (stub, sdate, extension))
+    tmpfile = tempfile.mktemp(
+        ".%s" % extension, "patchtmp-%s%s-" % (stub, sdate), miscfuncs.tmppath
+    )
+    tmppatchfile = os.path.join(pdire, "%s%s.%s.patch.new" % (stub, sdate, extension))
+
+    return patchfile, orgfile, tmpfile, tmppatchfile
+
 
 # Launches editor on copy of file, and makes patch file of changes the user
 # makes interactively
 def RunPatchToolW(typ, sdate, stamp, frag):
-	(patchfile, orgfile, tmpfile, tmppatchfile) = GenPatchFileNames(typ, sdate)
-
-	shutil.copyfile(orgfile, tmpfile)
-	if os.path.isfile(patchfile):
-		print("Patching ", patchfile)
-		status = os.system('patch --quiet "%s" < "%s"' % (tmpfile, patchfile))
-
-	# run the editor (first finding the line number to be edited)
-	gp = 0
-	finforlines = open(tmpfile, "r")
-	rforlines = finforlines.read();
-	finforlines.close()
-
-	if stamp:
-		aname = stamp.GetAName()
-		ganamef = re.search(('<a name\s*=\s*"%s">([\s\S]*?)<a name(?i)' % aname), rforlines)
-		if ganamef:
-			gp = ganamef.start(1)
-	else:
-		ganamef = None
-
-	if not frag:
-		fragl = -1
-	elif ganamef:
-		fragl = ganamef.group(1).find(str(frag))
-	else:
-		fragl = rforlines.find(str(frag))
-	if fragl != -1:
-		gp += fragl
-
-	gl = rforlines.count('\n', 0, gp)
-	gc = 0
-	if gl:
-		gc = gp - rforlines.rfind('\n', 0, gp)
-	#print "find loc codes ", gp, gl, gc
-
-	if 1==0 and sys.platform == "win32":
-		os.system('"C:\Program Files\ConTEXT\ConTEXT" %s /g%d:%d' % (tmpfile, gc + 1, gl + 1))
-	else:
-		# TODO add column support using gc + 1, if you can work out vim's syntax
-		editor = os.getenv('EDITOR')
-		if not editor:
-			editor = 'vim'
-		os.system('%s "%s" +%d' % (editor, tmpfile, gl + 1))
-
-
-	# now create the diff file
-	if os.path.isfile(tmppatchfile):
-		os.remove(tmppatchfile)
-	ern = os.system('diff -u "%s" "%s" > "%s"' % (orgfile, tmpfile, tmppatchfile))
-	if ern == 2:
-		print("Error running diff")
-		sys.exit(1)
-	os.remove(tmpfile)
-	if os.path.isfile(patchfile):
-		os.remove(patchfile)
-	if os.path.getsize(tmppatchfile):
-		os.rename(tmppatchfile, patchfile)
-		print("Making patchfile ", patchfile)
+    (patchfile, orgfile, tmpfile, tmppatchfile) = GenPatchFileNames(typ, sdate)
+
+    shutil.copyfile(orgfile, tmpfile)
+    if os.path.isfile(patchfile):
+        print("Patching ", patchfile)
+        status = os.system('patch --quiet "%s" < "%s"' % (tmpfile, patchfile))
+
+    # run the editor (first finding the line number to be edited)
+    gp = 0
+    finforlines = open(tmpfile, "r")
+    rforlines = finforlines.read()
+    finforlines.close()
+
+    if stamp:
+        aname = stamp.GetAName()
+        ganamef = re.search(
+            ('<a name\s*=\s*"%s">([\s\S]*?)<a name(?i)' % aname), rforlines
+        )
+        if ganamef:
+            gp = ganamef.start(1)
+    else:
+        ganamef = None
+
+    if not frag:
+        fragl = -1
+    elif ganamef:
+        fragl = ganamef.group(1).find(str(frag))
+    else:
+        fragl = rforlines.find(str(frag))
+    if fragl != -1:
+        gp += fragl
+
+    gl = rforlines.count("\n", 0, gp)
+    gc = 0
+    if gl:
+        gc = gp - rforlines.rfind("\n", 0, gp)
+    # print "find loc codes ", gp, gl, gc
+
+    if 1 == 0 and sys.platform == "win32":
+        os.system(
+            '"C:\Program Files\ConTEXT\ConTEXT" %s /g%d:%d' % (tmpfile, gc + 1, gl + 1)
+        )
+    else:
+        # TODO add column support using gc + 1, if you can work out vim's syntax
+        editor = os.getenv("EDITOR")
+        if not editor:
+            editor = "vim"
+        os.system('%s "%s" +%d' % (editor, tmpfile, gl + 1))
+
+    # now create the diff file
+    if os.path.isfile(tmppatchfile):
+        os.remove(tmppatchfile)
+    ern = os.system('diff -u "%s" "%s" > "%s"' % (orgfile, tmpfile, tmppatchfile))
+    if ern == 2:
+        print("Error running diff")
+        sys.exit(1)
+    os.remove(tmpfile)
+    if os.path.isfile(patchfile):
+        os.remove(patchfile)
+    if os.path.getsize(tmppatchfile):
+        os.rename(tmppatchfile, patchfile)
+        print("Making patchfile ", patchfile)
 
 
 def RunPatchTool(typ, sdatext, ce):
-	if not ce.stamp:
-		print("No stamp available, so won't move your cursor to right place")
-	else:
-		assert ce.stamp.sdate[:10] == sdatext[:10]  # omitting the letter extension
+    if not ce.stamp:
+        print("No stamp available, so won't move your cursor to right place")
+    else:
+        assert ce.stamp.sdate[:10] == sdatext[:10]  # omitting the letter extension
 
-	print("\nHit RETURN to launch your editor to make patches ")
-	sys.stdin.readline()
-	RunPatchToolW(typ, sdatext, ce.stamp, ce.fragment)
-	memberList.reloadJSON()
+    print("\nHit RETURN to launch your editor to make patches ")
+    sys.stdin.readline()
+    RunPatchToolW(typ, sdatext, ce.stamp, ce.fragment)
+    memberList.reloadJSON()
 
 
 # So it works from the command line
-if __name__ == '__main__':
-	parser=optparse.OptionParser()
-	
-	(options,args)=parser.parse_args()
+if __name__ == "__main__":
+    parser = optparse.OptionParser()
+
+    (options, args) = parser.parse_args()
 
-	args=[sys.argv[0]]+args
-	#print args
+    args = [sys.argv[0]] + args
+    # print args
 
-	if len(args) != 3:
-		print("""
+    if len(args) != 3:
+        print("""
 This generates files for the patchfilter.py filter.
 
 They are standard patch files which apply to the glued HTML files which we
@@ -158,6 +167,5 @@ def RunPatchTool(typ, sdatext, ce):
 in the patches folder underneath this folder.  The original file is
 untouched.  We consider the patches permanent data, so add them to CVS.
 """)
-		sys.exit(1)
-	RunPatchToolW(args[1], args[2], None, "")
-
+        sys.exit(1)
+    RunPatchToolW(args[1], args[2], None, "")
diff --git a/pyscraper/process_hansard.py b/pyscraper/process_hansard.py
index 290edcb4..0dc80703 100755
--- a/pyscraper/process_hansard.py
+++ b/pyscraper/process_hansard.py
@@ -2,30 +2,33 @@
 # -*- coding: utf-8 -*-
 
 import argparse
-import os
 import datetime
+import os
 import re
 from os.path import join
-from miscfuncs import toppath
 
+from miscfuncs import toppath
 from new_hansard import ParseDay
 
-
 today = datetime.date.today()
 yesterday = today - datetime.timedelta(1)
 
-parser = argparse.ArgumentParser(description='Process Hansard XML.')
-parser.add_argument('--from', dest='date_from', default=yesterday.isoformat(), metavar='YYYY-MM-DD')
-parser.add_argument('--to', dest='date_to', default=today.isoformat(), metavar='YYYY-MM-DD')
-parser.add_argument('-v', '--verbose', action='count', default=0)
+parser = argparse.ArgumentParser(description="Process Hansard XML.")
+parser.add_argument(
+    "--from", dest="date_from", default=yesterday.isoformat(), metavar="YYYY-MM-DD"
+)
+parser.add_argument(
+    "--to", dest="date_to", default=today.isoformat(), metavar="YYYY-MM-DD"
+)
+parser.add_argument("-v", "--verbose", action="count", default=0)
 ARGS = parser.parse_args()
 
-index_filename = join(toppath, 'seen_hansard_xml.txt')
-zip_directory = join(toppath, 'cmpages', 'hansardzips')
+index_filename = join(toppath, "seen_hansard_xml.txt")
+zip_directory = join(toppath, "cmpages", "hansardzips")
 zip_dir_slash = "%s/" % zip_directory
 
 
-dir_match = '\d+_((\d{4}-\d{2}-\d{2})_\d{2}:\d{2}:\d{2})$'
+dir_match = "\d+_((\d{4}-\d{2}-\d{2})_\d{2}:\d{2}:\d{2})$"
 dirs = []
 for d in os.listdir(zip_directory):
     m = re.match(dir_match, d)
@@ -35,7 +38,7 @@
 
 # process the directories in date order so we do any revisions in the correct
 # order
-dirs.sort(key=lambda x: re.match('.*/%s' % dir_match, x).group(1))
+dirs.sort(key=lambda x: re.match(".*/%s" % dir_match, x).group(1))
 
 
 # make sure we only look at a file once
@@ -44,13 +47,14 @@ def __init__(self):
         entries = []
         if os.path.exists(index_filename):
             with open(index_filename) as f:
-                entries = [e.strip().replace(zip_dir_slash, '') for e in f.readlines()]
+                entries = [e.strip().replace(zip_dir_slash, "") for e in f.readlines()]
         super(Entries, self).__init__(entries)
 
     def dump(self):
-        with open(index_filename, 'w') as f:
+        with open(index_filename, "w") as f:
             f.writelines("{0}\n".format(entry) for entry in self)
 
+
 entries = Entries()
 
 
@@ -59,12 +63,12 @@ def find(pattern, path):
     for root, dirs, files in os.walk(path):
         for name in files:
             if re.search(pattern, name):
-                result.append(join(root, name).replace(zip_dir_slash, ''))
+                result.append(join(root, name).replace(zip_dir_slash, ""))
     return result
 
 
 def handle_file(filename, debate_type):
-    file_key = '{0}:{1}'.format(debate_type, filename)
+    file_key = "{0}:{1}".format(debate_type, filename)
     if file_key in entries:
         if ARGS.verbose:
             print("already seen {0}, not parsing again".format(filename))
@@ -75,36 +79,41 @@ def handle_file(filename, debate_type):
         print("looking at {0}".format(filename))
     ret = parser.handle_file(join(zip_directory, filename), debate_type, ARGS.verbose)
 
-    if ret == 'failed':
+    if ret == "failed":
         print("ERROR parsing {0} {1}".format(filename, debate_type))
-    elif ret == 'not-present':
+    elif ret == "not-present":
         if ARGS.verbose:
             print("Nothing to parse in {0} {1}".format(filename, debate_type))
-    elif ret == 'same':
-        prev_file = parser.prev_file.replace(toppath, '')
+    elif ret == "same":
+        prev_file = parser.prev_file.replace(toppath, "")
         print("parsed {0}, no changes from {1}".format(filename, prev_file))
-    elif ret in ('change', 'new'):
-        output_file = parser.output_file.replace(toppath, '')
+    elif ret in ("change", "new"):
+        output_file = parser.output_file.replace(toppath, "")
         print("parsed {0} to {1}".format(filename, output_file))
     else:
-        output_file = parser.output_file.replace(toppath, '')
-        print("parsed {0} {1} to {2}, unknown return {3}".format(filename, debate_type, output_file, ret))
+        output_file = parser.output_file.replace(toppath, "")
+        print(
+            "parsed {0} {1} to {2}, unknown return {3}".format(
+                filename, debate_type, output_file, ret
+            )
+        )
     entries.append(file_key)
 
     return True
 
+
 parser = ParseDay()
 try:
     for d in dirs:
-        xml_files = find('([CL]HAN|PBC).*\.xml$', d)
+        xml_files = find("([CL]HAN|PBC).*\.xml$", d)
         for x in xml_files:
-            if 'CHAN' in x:
-                handle_file(x, 'debate')
-                handle_file(x, 'westminhall')
-            elif 'LHAN' in x:
-                handle_file(x, 'lords')
-            elif 'PBC' in x:
-                handle_file(x, 'standing')
+            if "CHAN" in x:
+                handle_file(x, "debate")
+                handle_file(x, "westminhall")
+            elif "LHAN" in x:
+                handle_file(x, "lords")
+            elif "PBC" in x:
+                handle_file(x, "standing")
 
 # this is just to make sure we record progress
 except Exception:
diff --git a/pyscraper/pullgluepages.py b/pyscraper/pullgluepages.py
index a9bc44b2..d2aca509 100755
--- a/pyscraper/pullgluepages.py
+++ b/pyscraper/pullgluepages.py
@@ -1,14 +1,16 @@
 # vim:sw=8:ts=8:et:nowrap
 
-import re
 import os.path
+import re
+
 import miscfuncs
 
 pwcmdirs = miscfuncs.pwcmdirs
 
-from miscfuncs import NextAlphaString, AlphaStringToOrder
+from miscfuncs import AlphaStringToOrder, NextAlphaString
+
 
-def MakeDayMap(folder, typ, basedir=pwcmdirs, extension='html'):
+def MakeDayMap(folder, typ, basedir=pwcmdirs, extension="html"):
     # make the output directory
     if not os.path.isdir(basedir):
         os.mkdir(basedir)
@@ -19,14 +21,16 @@ def MakeDayMap(folder, typ, basedir=pwcmdirs, extension='html'):
     # the following is code copied from the lordspullgluepages
 
     # scan through the directory and make a mapping of all the copies for each
-    lddaymap = { }
+    lddaymap = {}
     for ldfile in os.listdir(pwcmfolder):
-        mnums = re.match("%s(\d{4}-\d\d-\d\d)([a-z]*)\.%s$" % ( typ, extension ), ldfile)
+        mnums = re.match("%s(\d{4}-\d\d-\d\d)([a-z]*)\.%s$" % (typ, extension), ldfile)
         if mnums:
             sdate = mnums.group(1)
             salpha = mnums.group(2)
-            lddaymap.setdefault(sdate, []).append((AlphaStringToOrder(salpha), salpha, ldfile))
-        elif ldfile.endswith('~') or ldfile == 'changedates.txt':
+            lddaymap.setdefault(sdate, []).append(
+                (AlphaStringToOrder(salpha), salpha, ldfile)
+            )
+        elif ldfile.endswith("~") or ldfile == "changedates.txt":
             pass
         elif os.path.isfile(os.path.join(pwcmfolder, ldfile)):
             print("not recognized file:", ldfile, " in ", pwcmfolder)
@@ -34,18 +38,20 @@ def MakeDayMap(folder, typ, basedir=pwcmdirs, extension='html'):
     return lddaymap, pwcmfolder
 
 
-def GetFileDayVersions(day, lddaymap, pwcmfolder, typ, extension='html'):
-	# make the filename
-	dgflatestalpha, dgflatest, dgflatestdayalpha = "", None, None
-	if day in lddaymap:
-		ldgf = max(lddaymap[day]) # uses alphastringtoorder
-		dgflatestalpha = ldgf[1]
-		dgflatest = os.path.join(pwcmfolder, ldgf[2])
-		dgflatestdayalpha = "%s%s" % (day, dgflatestalpha)
-	dgfnextalpha = NextAlphaString(dgflatestalpha)
-	ldgfnext = '%s%s%s.%s' % (typ, day, dgfnextalpha, extension)
-	dgfnext = os.path.join(pwcmfolder, ldgfnext)
-	dgfnextdayalpha = "%s%s" % (day, dgfnextalpha)
-	assert not dgflatest or os.path.isfile(dgflatest), '%s exists and is not a file?' % dgflatest
-	assert not os.path.isfile(dgfnext), '%s already exists?' % dgfnext
-	return dgflatest, dgflatestdayalpha, dgfnext, dgfnextdayalpha
+def GetFileDayVersions(day, lddaymap, pwcmfolder, typ, extension="html"):
+    # make the filename
+    dgflatestalpha, dgflatest, dgflatestdayalpha = "", None, None
+    if day in lddaymap:
+        ldgf = max(lddaymap[day])  # uses alphastringtoorder
+        dgflatestalpha = ldgf[1]
+        dgflatest = os.path.join(pwcmfolder, ldgf[2])
+        dgflatestdayalpha = "%s%s" % (day, dgflatestalpha)
+    dgfnextalpha = NextAlphaString(dgflatestalpha)
+    ldgfnext = "%s%s%s.%s" % (typ, day, dgfnextalpha, extension)
+    dgfnext = os.path.join(pwcmfolder, ldgfnext)
+    dgfnextdayalpha = "%s%s" % (day, dgfnextalpha)
+    assert not dgflatest or os.path.isfile(dgflatest), (
+        "%s exists and is not a file?" % dgflatest
+    )
+    assert not os.path.isfile(dgfnext), "%s already exists?" % dgfnext
+    return dgflatest, dgflatestdayalpha, dgfnext, dgfnextdayalpha
diff --git a/pyscraper/regmem/filter.py b/pyscraper/regmem/filter.py
index 8919a801..5386631e 100755
--- a/pyscraper/regmem/filter.py
+++ b/pyscraper/regmem/filter.py
@@ -1,14 +1,15 @@
 #! /usr/bin/env python3
 # vim:sw=8:ts=8:et:nowrap
 
-import re
 import os
+import re
 
+import miscfuncs
 from bs4 import BeautifulSoup
 from contextexception import ContextException
 from resolvemembernames import memberList
 from xmlfilewrite import WriteXMLHeader
-import miscfuncs
+
 toppath = miscfuncs.toppath
 
 # directories
@@ -31,21 +32,34 @@ def __init__(self, fout, text, sdate):
         self.memberset = set()
 
     def _handle_h2(self, row):
-        title = row.encode_contents().decode('utf-8')
-        if title in ('HAGUE, Rt Hon William (Richmond (Yorks)', 'PEARCE, Teresa (Erith and Thamesmead'):
-            title += ')'
+        title = row.encode_contents().decode("utf-8")
+        if title in (
+            "HAGUE, Rt Hon William (Richmond (Yorks)",
+            "PEARCE, Teresa (Erith and Thamesmead",
+        ):
+            title += ")"
         res = re.search("^([^,]*), ([^(]*) \((.*)\)\s*$", title)
         if not res:
-            raise ContextException("Failed to break up into first/last/cons: %s" % title)
+            raise ContextException(
+                "Failed to break up into first/last/cons: %s" % title
+            )
         (lastname, firstname, constituency) = res.groups()
         firstname = memberList.striptitles(firstname)[0]
-        if self.sdate < '2015-06-01':
+        if self.sdate < "2015-06-01":
             lastname = memberList.lowercaselastname(lastname)
-        lastname = lastname.replace('O\u2019brien', "O'Brien") # Hmm
-        (id, remadename, remadecons) = memberList.matchfullnamecons(firstname + " " + lastname, constituency, self.sdate)
+        lastname = lastname.replace("O\u2019brien", "O'Brien")  # Hmm
+        (id, remadename, remadecons) = memberList.matchfullnamecons(
+            firstname + " " + lastname, constituency, self.sdate
+        )
         if not id:
-            raise ContextException("Failed to match name %s %s (%s) date %s\n" % (firstname, lastname, constituency, self.sdate))
-        self.fout.write('<regmem personid="%s" membername="%s" date="%s">\n' % (id, remadename, self.sdate))
+            raise ContextException(
+                "Failed to match name %s %s (%s) date %s\n"
+                % (firstname, lastname, constituency, self.sdate)
+            )
+        self.fout.write(
+            '<regmem personid="%s" membername="%s" date="%s">\n'
+            % (id, remadename, self.sdate)
+        )
         self.title = title
         self.category = None
         self.subcategory = None
@@ -53,63 +67,85 @@ def _handle_h2(self, row):
         self.memberset.add(id)
 
     def parse(self):
-        print("2010-? new register of members interests!  Check it is working properly (via mpinfoin.pl) - %s" % self.sdate)
+        print(
+            "2010-? new register of members interests!  Check it is working properly (via mpinfoin.pl) - %s"
+            % self.sdate
+        )
 
         WriteXMLHeader(self.fout)
         self.fout.write("<publicwhip>\n")
 
-        text = re.sub('<span class="highlight">([^<]*?)</span>', r'\1', self.text)
-        soup = BeautifulSoup(text, 'lxml')
+        text = re.sub('<span class="highlight">([^<]*?)</span>', r"\1", self.text)
+        soup = BeautifulSoup(text, "lxml")
         for row in soup.body.children:
             if not row.name:
-                continue # Space between tags
-            if row.name == 'page':
+                continue  # Space between tags
+            if row.name == "page":
                 self._end_entry()
-            elif row.name == 'h2':
+            elif row.name == "h2":
                 self._handle_h2(row)
             else:
-                cls = row.get('class', [''])[0]
-                text = row.encode_contents().decode('utf-8')
-                if cls == 'spacer':
+                cls = row.get("class", [""])[0]
+                text = row.encode_contents().decode("utf-8")
+                if cls == "spacer":
                     if self.record:
-                        self.fout.write('\t\t</record>\n')
+                        self.fout.write("\t\t</record>\n")
                         self.record = False
                     continue
-                if not text or re.match('\s*\.\s*$', text): continue
-                if text == '<strong>%s</strong>' % self.title: continue
-                if re.match('\s*Nil\.?\s*$', text):
-                    self.fout.write('Nil.\n')
+                if not text or re.match("\s*\.\s*$", text):
+                    continue
+                if text == "<strong>%s</strong>" % self.title:
+                    continue
+                if re.match("\s*Nil\.?\s*$", text):
+                    self.fout.write("Nil.\n")
                     continue
                 # Since 2015 election, register is all paragraphs, no headings :(
-                if row.name == 'h3' or cls == 'shd0' or re.match('<strong>\d+\. ', text):
-                    if re.match('\s*$', text): continue
-                    m = re.match("(?:\s*<strong>)?\s*(\d\d?)\.\s*(.*)(?:</strong>\s*)?$", text)
+                if (
+                    row.name == "h3"
+                    or cls == "shd0"
+                    or re.match("<strong>\d+\. ", text)
+                ):
+                    if re.match("\s*$", text):
+                        continue
+                    m = re.match(
+                        "(?:\s*<strong>)?\s*(\d\d?)\.\s*(.*)(?:</strong>\s*)?$", text
+                    )
                     if m:
                         if self.record:
-                            self.fout.write('\t\t</record>\n')
+                            self.fout.write("\t\t</record>\n")
                             self.record = False
                         if self.category:
-                            self.fout.write('\t</category>\n')
+                            self.fout.write("\t</category>\n")
                         self.category, categoryname = m.groups()
                         self.subcategory = None
-                        categoryname = re.sub('<[^>]*>(?s)', '', categoryname).strip()
-                        self.fout.write('\t<category type="%s" name="%s">\n' % (self.category, categoryname))
+                        categoryname = re.sub("<[^>]*>(?s)", "", categoryname).strip()
+                        self.fout.write(
+                            '\t<category type="%s" name="%s">\n'
+                            % (self.category, categoryname)
+                        )
                         continue
                 if not self.record:
-                    self.fout.write('\t\t<record>\n')
+                    self.fout.write("\t\t<record>\n")
                     self.record = True
                 # This never matches nowadays - work out what to do with it
                 subcategorymatch = re.match("\s*\(([ab])\)\s*(.*)$", text)
                 if subcategorymatch:
                     self.subcategory = subcategorymatch.group(1)
-                    self.fout.write('\t\t\t(%s)\n' % self.subcategory)
-                    self.fout.write('\t\t\t<item subcategory="%s">%s</item>\n' % (self.subcategory, subcategorymatch.group(2)))
+                    self.fout.write("\t\t\t(%s)\n" % self.subcategory)
+                    self.fout.write(
+                        '\t\t\t<item subcategory="%s">%s</item>\n'
+                        % (self.subcategory, subcategorymatch.group(2))
+                    )
                     continue
                 if self.subcategory:
-                    self.fout.write('\t\t\t<item subcategory="%s">%s</item>\n' % (self.subcategory, text))
+                    self.fout.write(
+                        '\t\t\t<item subcategory="%s">%s</item>\n'
+                        % (self.subcategory, text)
+                    )
                 else:
-                    if cls: cls = ' class="%s"' % cls
-                    self.fout.write('\t\t\t<item%s>%s</item>\n' % (cls, text))
+                    if cls:
+                        cls = ' class="%s"' % cls
+                    self.fout.write("\t\t\t<item%s>%s</item>\n" % (cls, text))
         self._end_entry()
         self.fout.write("</publicwhip>\n")
 
@@ -117,7 +153,9 @@ def parse(self):
 
     def check_missing(self):
         # check for missing/extra entries
-        membersetexpect = set([m['person_id'] for m in memberList.mpslistondate(self.sdate)])
+        membersetexpect = set(
+            [m["person_id"] for m in memberList.mpslistondate(self.sdate)]
+        )
         missing = membersetexpect.difference(self.memberset)
         if len(missing) > 0:
             print("Missing %d MP entries:\n" % len(missing), missing)
@@ -127,19 +165,20 @@ def check_missing(self):
 
     def _end_entry(self):
         if self.record:
-            self.fout.write('\t\t</record>\n')
+            self.fout.write("\t\t</record>\n")
         if self.category:
-            self.fout.write('\t</category>\n')
+            self.fout.write("\t</category>\n")
         if self.title:
-            self.fout.write('</regmem>\n')
+            self.fout.write("</regmem>\n")
 
 
 def RunRegmemFilters(fout, text, sdate, sdatever):
-    if sdate >= '2010-09-01':
+    if sdate >= "2010-09-01":
         return RunRegmemFilters2010(fout, text, sdate).parse()
-    sys.exit('Parsing regmem HTML before 2010-09-01 is no longer supported')
+    sys.exit("Parsing regmem HTML before 2010-09-01 is no longer supported")
+
 
+if __name__ == "__main__":
+    from runfilters import RunFiltersDir
 
-if __name__ == '__main__':
-        from runfilters import RunFiltersDir
-        RunFiltersDir(RunRegmemFilters, 'regmem', '2010-09-01', '9999-12-31', True)
+    RunFiltersDir(RunRegmemFilters, "regmem", "2010-09-01", "9999-12-31", True)
diff --git a/pyscraper/regmem/pullgluepages.py b/pyscraper/regmem/pullgluepages.py
index f686088d..b79ffdcb 100755
--- a/pyscraper/regmem/pullgluepages.py
+++ b/pyscraper/regmem/pullgluepages.py
@@ -1,17 +1,18 @@
 #! /usr/bin/env python3
 
-from datetime import datetime
 import glob
-import sys
-import urllib.request
-import urllib.parse
-import re
 import os.path
-import time
+import re
+import sys
 import tempfile
-from bs4 import BeautifulSoup
+import time
+import urllib.parse
+import urllib.request
+from datetime import datetime
 
 import miscfuncs
+from bs4 import BeautifulSoup
+
 toppath = miscfuncs.toppath
 
 # Pulls in register of members interests, glues them together, removes comments,
@@ -24,10 +25,14 @@
 
 tempfilename = tempfile.mktemp("", "pw-gluetemp-", miscfuncs.tmppath)
 
+
 class AppURLopener(urllib.request.FancyURLopener):
     version = os.getenv("USER_AGENT")
+
+
 opener = AppURLopener()
 
+
 # Scrape everything from the contents page
 def GlueByContents(fout, url_contents, regmemdate, remote):
     if remote:
@@ -37,11 +42,11 @@ def GlueByContents(fout, url_contents, regmemdate, remote):
     sr = ur.read()
     ur.close()
 
-    soup = BeautifulSoup(sr, 'lxml')
-    mps = soup.find('a', attrs={'name':'A'}).parent.find_next_siblings('p')
+    soup = BeautifulSoup(sr, "lxml")
+    mps = soup.find("a", attrs={"name": "A"}).parent.find_next_siblings("p")
     for p in mps:
-        url = urllib.parse.urljoin(url_contents, p.a['href'])
-        #print(" reading " + url)
+        url = urllib.parse.urljoin(url_contents, p.a["href"])
+        # print(" reading " + url)
         if remote:
             parts = urllib.parse.urlparse(url)
             parts = parts._replace(path=urllib.parse.quote(parts.path))
@@ -49,7 +54,7 @@ def GlueByContents(fout, url_contents, regmemdate, remote):
         else:
             url = urllib.parse.quote(url)
             ur = open(url)
-        sr = ur.read().decode('utf-8')
+        sr = ur.read().decode("utf-8")
         ur.close()
 
         if remote and ur.code == 404:
@@ -58,18 +63,21 @@ def GlueByContents(fout, url_contents, regmemdate, remote):
 
         # write the marker telling us which page this comes from
         lt = time.gmtime()
-        fout.write('<page url="%s" scrapedate="%s" scrapetime="%s"/>\n' % \
-            (url, time.strftime('%Y-%m-%d', lt), time.strftime('%X', lt)))
+        fout.write(
+            '<page url="%s" scrapedate="%s" scrapetime="%s"/>\n'
+            % (url, time.strftime("%Y-%m-%d", lt), time.strftime("%X", lt))
+        )
 
-        sr = re.sub('<p([^>]*)/>', r'<p\1></p>', sr)
-        soup_mp = BeautifulSoup(sr, 'lxml')
+        sr = re.sub("<p([^>]*)/>", r"<p\1></p>", sr)
+        soup_mp = BeautifulSoup(sr, "lxml")
         try:
-            page = soup_mp.find('h1').find_next_siblings(lambda t: t.name != 'div')
+            page = soup_mp.find("h1").find_next_siblings(lambda t: t.name != "div")
         except:
-            print('Problem with ' + url.decode('utf-8'))
-        page = '\n'.join([ str(p) for p in page ]) + '\n'
+            print("Problem with " + url.decode("utf-8"))
+        page = "\n".join([str(p) for p in page]) + "\n"
         miscfuncs.WriteCleanText(fout, page)
 
+
 def GlueByNext(fout, url, regmemdate):
     # loop which scrapes through all the pages following the nextlinks
     starttablewritten = False
@@ -78,48 +86,54 @@ def GlueByNext(fout, url, regmemdate):
         matcheddate = True
     sections = 0
     while 1:
-        #print " reading " + url
+        # print " reading " + url
         ur = opener.open(url)
-        sr = ur.read().decode('utf-8')
-        ur.close();
-
+        sr = ur.read().decode("utf-8")
+        ur.close()
         sections += 1
 
         # check date
         if not matcheddate:
             dateinpage = re.search("current as at\s*<[bB]>(.*)</[bB]>", sr)
             if not dateinpage:
-                raise Exception('Not found date marker')
+                raise Exception("Not found date marker")
             dateinpage = dateinpage.group(1).replace("&nbsp;", " ")
-            dateinpage = datetime.strptime(dateinpage, '%d %B %Y').date().isoformat()
+            dateinpage = datetime.strptime(dateinpage, "%d %B %Y").date().isoformat()
             if dateinpage != regmemdate:
-                raise Exception('Date in page is %s, expected %s - update the URL list in regmempullgluepages.py' % (dateinpage, regmemdate))
+                raise Exception(
+                    "Date in page is %s, expected %s - update the URL list in regmempullgluepages.py"
+                    % (dateinpage, regmemdate)
+                )
             matcheddate = True
 
         # write the marker telling us which page this comes from
         lt = time.gmtime()
-        fout.write('<page url="%s" scrapedate="%s" scrapetime="%s"/>\n' % \
-            (url, time.strftime('%Y-%m-%d', lt), time.strftime('%X', lt)))
+        fout.write(
+            '<page url="%s" scrapedate="%s" scrapetime="%s"/>\n'
+            % (url, time.strftime("%Y-%m-%d", lt), time.strftime("%X", lt))
+        )
 
         # split by sections
         hrsections = re.split(
-            '<TABLE border=0 width="90%">|' +
-            '</TABLE>\s*?<!-- end of variable data -->|' +
-            '<!-- end of variable data -->\s*</TABLE>' +
-            '(?i)', sr)
+            '<TABLE border=0 width="90%">|'
+            + "</TABLE>\s*?<!-- end of variable data -->|"
+            + "<!-- end of variable data -->\s*</TABLE>"
+            + "(?i)",
+            sr,
+        )
 
         # write the body of the text
-#       for i in range(0,len(hrsections)):
-#           print "------"
-#           print hrsections[i]
+        #       for i in range(0,len(hrsections)):
+        #           print "------"
+        #           print hrsections[i]
         text = hrsections[2]
-        m = re.search('<TABLE .*?>([\s\S]*)</TABLE>', text)
+        m = re.search("<TABLE .*?>([\s\S]*)</TABLE>", text)
         if m:
             text = m.group(1)
-        m = re.search('<TABLE .*?>([\s\S]*)', text)
+        m = re.search("<TABLE .*?>([\s\S]*)", text)
         if m:
             text = m.group(1)
-        if not starttablewritten and re.search('COLSPAN=4', text):
+        if not starttablewritten and re.search("COLSPAN=4", text):
             text = "<TABLE>\n" + text
             starttablewritten = True
         miscfuncs.WriteCleanText(fout, text)
@@ -127,7 +141,10 @@ def GlueByNext(fout, url, regmemdate):
         # find the lead on with the footer
         footer = hrsections[3]
 
-        nextsectionlink = re.findall('<A href="([^>]*?)"><IMG border=0\s+align=top src="/pa/img/next(?:grn|drd).gif" ALT="next page"></A>', footer)
+        nextsectionlink = re.findall(
+            '<A href="([^>]*?)"><IMG border=0\s+align=top src="/pa/img/next(?:grn|drd).gif" ALT="next page"></A>',
+            footer,
+        )
         if not nextsectionlink:
             break
         if len(nextsectionlink) > 1:
@@ -137,7 +154,7 @@ def GlueByNext(fout, url, regmemdate):
     # you evidently didn't find any links
     assert sections > 10
 
-    fout.write('</TABLE>')
+    fout.write("</TABLE>")
 
 
 # read through our index list of daydebates
@@ -162,7 +179,7 @@ def GlueAllType(pcmdir, cmindex, fproto, forcescrape, remote):
 
         # now we take out the local pointer and start the gluing
         dtemp = open(tempfilename, "w")
-        if dnu[0] > '2010-09-01':
+        if dnu[0] > "2010-09-01":
             GlueByContents(dtemp, url, dnu[0], remote)
         else:
             GlueByNext(dtemp, url, dnu[0])
@@ -171,38 +188,41 @@ def GlueAllType(pcmdir, cmindex, fproto, forcescrape, remote):
         dtemp.close()
         os.rename(tempfilename, dgf)
 
+
 # Get index of all regmem pages from the index
 def FindRegmemPages(remote):
     if not remote:
         urls = []
         dir = os.path.join(pwcmdirs, "regmem-pages")
-        contents = sorted(glob.glob(dir + '/*/contents.htm'))
+        contents = sorted(glob.glob(dir + "/*/contents.htm"))
         for url in contents:
-            m = re.search('(\d\d)(\d\d)(\d\d)', url)
-            date = '20%s-%s-%s' % m.groups()
+            m = re.search("(\d\d)(\d\d)(\d\d)", url)
+            date = "20%s-%s-%s" % m.groups()
             urls.append((date, url))
         return urls
 
     corrections = {
-        '/pa/cm/cmregmem/060214/memi02.htm': '2006-02-13',
-        '/pa/cm/cmregmem/051101/memi02.htm': '2005-11-01',
-        '/pa/cm/cmregmem/925/part1contents.htm': '2013-01-18',
+        "/pa/cm/cmregmem/060214/memi02.htm": "2006-02-13",
+        "/pa/cm/cmregmem/051101/memi02.htm": "2005-11-01",
+        "/pa/cm/cmregmem/925/part1contents.htm": "2013-01-18",
     }
     urls = []
-    idxurl = 'https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/parliamentary-commissioner-for-standards/registers-of-interests/register-of-members-financial-interests/'
+    idxurl = "https://www.parliament.uk/mps-lords-and-offices/standards-and-financial-interests/parliamentary-commissioner-for-standards/registers-of-interests/register-of-members-financial-interests/"
     ur = opener.open(idxurl)
     content = ur.read()
     if b"Cloudflare" in content:
         sys.exit("Cloudflare please wait page, cannot proceed")
     ur.close()
 
-    soup = BeautifulSoup(content, 'lxml')
-    soup = soup.find(attrs='main-body').find('ul')
-    ixurls = [urllib.parse.urljoin(idxurl, ix['href']) for ix in soup.find_all('a', href=True)]
+    soup = BeautifulSoup(content, "lxml")
+    soup = soup.find(attrs="main-body").find("ul")
+    ixurls = [
+        urllib.parse.urljoin(idxurl, ix["href"]) for ix in soup.find_all("a", href=True)
+    ]
 
     for ixurl in ixurls:
         ur = opener.open(ixurl)
-        content = ur.read().decode('utf-8')
+        content = ur.read().decode("utf-8")
         ur.close()
 
         # <B>14&nbsp;May&nbsp;2001&nbsp;(Dissolution)</B>
@@ -211,77 +231,95 @@ def FindRegmemPages(remote):
         # 2016-03-11 bad HTML, missing '<tr>'
         content = re.sub(
             "<td>\s*</td>\s*<td nowrap><b>Register of Members' Financial Interests - as at 7th March 2016</b></td>",
-            '<tr>\g<0>',
-            content)
+            "<tr>\g<0>",
+            content,
+        )
         # And similar 2004-12-03
         content = re.sub(
             '<td>\s*</td>\s*<td><a href="041203/memi02.htm"><b>3 December 2004</b></a></td>',
-            '<tr>\g<0>',
-            content)
+            "<tr>\g<0>",
+            content,
+        )
 
-        soup = BeautifulSoup(content, 'lxml')
+        soup = BeautifulSoup(content, "lxml")
 
-        if soup.find(text=re.compile('^Contents$(?i)')):
+        if soup.find(text=re.compile("^Contents$(?i)")):
             # An immediate register page.
             # Remove comments
-            content = re.sub('<!--.*?-->(?s)', '', content)
+            content = re.sub("<!--.*?-->(?s)", "", content)
 
-            alldates = re.findall('(?i)<(?:b|strong)>(\d+[a-z]* [A-Z][a-z]* \d\d\d\d)', content)
+            alldates = re.findall(
+                "(?i)<(?:b|strong)>(\d+[a-z]* [A-Z][a-z]* \d\d\d\d)", content
+            )
             if len(alldates) != 1:
                 print(alldates)
-                raise Exception('Date match failed, expected one got %d\n%s' % (len(alldates), url))
+                raise Exception(
+                    "Date match failed, expected one got %d\n%s" % (len(alldates), url)
+                )
 
-            date = datetime.strptime(alldates[0], '%d %B %Y').date().isoformat()
+            date = datetime.strptime(alldates[0], "%d %B %Y").date().isoformat()
             if (date, ixurl) not in urls:
                 urls.append((date, ixurl))
-        elif re.search('Session 201[79]|Session 20[2-9]', content):
-            allurl_soups = soup.find_all('a', href=re.compile("(memi02|part1contents|/contents\.htm)"))
+        elif re.search("Session 201[79]|Session 20[2-9]", content):
+            allurl_soups = soup.find_all(
+                "a", href=re.compile("(memi02|part1contents|/contents\.htm)")
+            )
             for url_soup in allurl_soups:
-                url = url_soup['href']
+                url = url_soup["href"]
                 url = urllib.parse.urljoin(ixurl, url)
-                date = re.sub('^.*(\d\d)(\d\d)(\d\d).*', r'20\1-\2-\3', url)
+                date = re.sub("^.*(\d\d)(\d\d)(\d\d).*", r"20\1-\2-\3", url)
                 url_path = urllib.parse.urlparse(url)[2]
                 if url_path in corrections:
                     date = corrections[url_path]
                 if (date, url) not in urls:
                     urls.append((date, url))
         else:
-            allurl_soups = soup.find_all('a', href=re.compile("(memi02|part1contents|/contents\.htm)"))
+            allurl_soups = soup.find_all(
+                "a", href=re.compile("(memi02|part1contents|/contents\.htm)")
+            )
             for url_soup in allurl_soups:
-                row_content = url_soup.find_parent('tr').encode_contents().decode('utf-8')
-                url = url_soup['href']
-                #print url
-                if url == '060324/memi02.htm':
+                row_content = (
+                    url_soup.find_parent("tr").encode_contents().decode("utf-8")
+                )
+                url = url_soup["href"]
+                # print url
+                if url == "060324/memi02.htm":
                     # fix broken URL
-                    url = '/pa/cm/cmregmem/' + url
+                    url = "/pa/cm/cmregmem/" + url
 
                 url = urllib.parse.urljoin(ixurl, url)
 
-                alldates = re.findall('\d+[a-z]*\s+[A-Z][a-z]*\s+\d\d\d\d', row_content, re.DOTALL)
+                alldates = re.findall(
+                    "\d+[a-z]*\s+[A-Z][a-z]*\s+\d\d\d\d", row_content, re.DOTALL
+                )
                 if len(alldates) != 1:
                     print(alldates)
-                    raise Exception('Date match failed, expected one got %d\n%s' % (len(alldates), url))
+                    raise Exception(
+                        "Date match failed, expected one got %d\n%s"
+                        % (len(alldates), url)
+                    )
 
                 url_path = urllib.parse.urlparse(url)[2]
                 if url_path in corrections:
                     date = corrections[url_path]
                 else:
-                    alldates[0] = re.sub('\s+', ' ', alldates[0])
-                    alldates[0] = re.sub('(?<=\d)(st|nd|rd|th)', '', alldates[0])
-                    date = datetime.strptime(alldates[0], '%d %B %Y').date().isoformat()
+                    alldates[0] = re.sub("\s+", " ", alldates[0])
+                    alldates[0] = re.sub("(?<=\d)(st|nd|rd|th)", "", alldates[0])
+                    date = datetime.strptime(alldates[0], "%d %B %Y").date().isoformat()
 
                 if (date, url) not in urls:
                     urls.append((date, url))
 
     return urls
 
+
 ###############
 # main function
 ###############
 def RegmemPullGluePages(options):
     # make the output directory
     if not os.path.isdir(pwcmdirs):
-            os.mkdir(pwcmdirs)
+        os.mkdir(pwcmdirs)
 
     # When these were hardcoded originally:
     # http://www.publications.parliament.uk/pa/cm/cmhocpap.htm#register
@@ -297,7 +335,8 @@ def RegmemPullGluePages(options):
 
     # bring in and glue together parliamentary register of members interests and put into their own directories.
     # third parameter is a regexp, fourth is the filename (%s becomes the date).
-    GlueAllType(pwcmregmem, urls, 'regmem%s.html', options.forcescrape, options.remote)
+    GlueAllType(pwcmregmem, urls, "regmem%s.html", options.forcescrape, options.remote)
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     RegmemPullGluePages(False)
diff --git a/pyscraper/resolvemembernames.py b/pyscraper/resolvemembernames.py
index 8434cf26..fbc48429 100755
--- a/pyscraper/resolvemembernames.py
+++ b/pyscraper/resolvemembernames.py
@@ -2,50 +2,52 @@
 
 # Converts names of MPs into unique identifiers
 
-import json
-import re
-import copy
-import sys
 import datetime
+import json
 import os
+import re
 
-from parlphrases import parlPhrases
-from contextexception import ContextException
 from base_resolver import ResolverBase
+from contextexception import ContextException
+from parlphrases import parlPhrases
 
 # These we don't necessarily match to a speaker id, deliberately
-regnospeakers = "Hon\.? Members|Members of the House of Commons|" + \
-        "Deputy? ?Speaker|Second Deputy Chairman(?i)|Speaker-Elect|" + \
-        "The Chairman|First Deputy Chairman|Temporary Chairman|" + \
-        "An hon. Member"
+regnospeakers = (
+    "Hon\.? Members|Members of the House of Commons|"
+    + "Deputy? ?Speaker|Second Deputy Chairman(?i)|Speaker-Elect|"
+    + "The Chairman|First Deputy Chairman|Temporary Chairman|"
+    + "An hon. Member"
+)
 
 reChairman = "The Chairman|Chairman|The Chair"
 
 # Work out the absolute path of the 'members' directory from
 # '__file__', so that we can import this module from any current
 # directory:
-members_path = os.path.abspath(os.path.join(os.path.split(__file__)[0],'..','members'))
+members_path = os.path.abspath(
+    os.path.join(os.path.split(__file__)[0], "..", "members")
+)
 
 
 class MemberList(ResolverBase):
-    import_organization_id = 'house-of-commons'
+    import_organization_id = "house-of-commons"
 
     def reloadJSON(self):
         super(MemberList, self).reloadJSON()
 
-        self.debatedate=None
-        self.debatenamehistory=[] # recent speakers in debate
-        self.debateofficehistory={} # recent offices ("The Deputy Prime Minister")
+        self.debatedate = None
+        self.debatenamehistory = []  # recent speakers in debate
+        self.debateofficehistory = {}  # recent offices ("The Deputy Prime Minister")
         # keep track of the chairman in committees
         self.chairman = None
 
         # "rah" here is a typo in division 64 on 13 Jan 2003 "Ancram, rah Michael"
         self.titles = "Dr |Hon |hon |rah |rh |right hon |Mrs |Ms |Mr |Miss |Mis |Rt Hon |Reverend |The Rev |The Reverend |Sir |Dame |Rev |Prof |Professor |Earl of |Canon "
-        self.retitles = re.compile('^(?:%s)' % self.titles)
-        self.rejobs = re.compile('^%s$' % parlPhrases.regexpjobs)
+        self.retitles = re.compile("^(?:%s)" % self.titles)
+        self.rejobs = re.compile("^%s$" % parlPhrases.regexpjobs)
 
-        self.honourifics = " MP| CBE| OBE| KBE| DL| MBE| QC| BEM| rh| RH| Esq| QPM| JP| FSA| Bt| B.Ed \(Hons\)| TD";
-        self.rehonourifics = re.compile('(?:%s)$' % self.honourifics)
+        self.honourifics = " MP| CBE| OBE| KBE| DL| MBE| QC| BEM| rh| RH| Esq| QPM| JP| FSA| Bt| B.Ed \(Hons\)| TD"
+        self.rehonourifics = re.compile("(?:%s)$" % self.honourifics)
 
         self.import_constituencies()
         self.import_people_json()
@@ -56,30 +58,37 @@ def member_full_name(self, id, date, include_cons=False):
         m = self.members[id]
         name = self.name_on_date(self.membertoperson(id), date)
         if include_cons:
-            name += " (%s) " % m['constituency']
+            name += " (%s) " % m["constituency"]
         return name
 
     def import_minister_json(self, file):
         data = json.load(open(os.path.join(members_path, file)))
-        for mship in data['memberships']:
+        for mship in data["memberships"]:
             # we load these two positions and alias them into fullnames,
             # as they are often used in wrans instead of fullnames, with
             # no way of telling.
-            if mship.get('role') in ("Solicitor General", "Advocate General for Scotland", "Attorney General", "The Solicitor-General", "The Attorney-General"):
-                person = mship['person_id']
-                if person not in self.persontomembermap: continue  # Not an MP
+            if mship.get("role") in (
+                "Solicitor General",
+                "Advocate General for Scotland",
+                "Attorney General",
+                "The Solicitor-General",
+                "The Attorney-General",
+            ):
+                person = mship["person_id"]
+                if person not in self.persontomembermap:
+                    continue  # Not an MP
                 # find all the member ids for this person
                 ids = self.persontomembermap[person]
                 for id in ids:
                     m = self.members[id]
                     # add ones which overlap the membership dates to the alias
-                    newattr = {'id': m['id'], 'person_id': m['person_id']}
-                    early = max(m['start_date'], mship.get('start_date', '1000-01-01'))
-                    late = min(m['end_date'], mship.get('end_date', '9999-12-31'))
+                    newattr = {"id": m["id"], "person_id": m["person_id"]}
+                    early = max(m["start_date"], mship.get("start_date", "1000-01-01"))
+                    late = min(m["end_date"], mship.get("end_date", "9999-12-31"))
                     # sometimes the ranges don't overlap
                     if early <= late:
-                        newattr['start_date'] = early
-                        newattr['end_date'] = late
+                        newattr["start_date"] = early
+                        newattr["end_date"] = late
                         self.fullnames.setdefault(mship["role"], []).append(newattr)
                         # print mship["role"], early, late, mship['name']
 
@@ -101,7 +110,7 @@ def striptitles(self, text):
         text = text.replace(",", " ")
         text = text.replace("&nbsp;", " ")
         text = text.replace("  ", " ")
-        text = text.replace('\u2019', "'")
+        text = text.replace("\u2019", "'")
 
         # Remove initial titles (may be several)
         titletotal = 0
@@ -133,15 +142,25 @@ def fullnametoids(self, tinput, date):
         # If a speaker, then match against the special speaker parties
         if not matches and (text == "Speaker" or text == "The Speaker"):
             matches = self.parties.get("Speaker", None)
-        if not matches and (text == "Deputy Speaker" or text == "Deputy-Speaker" or text == "Madam Deputy Speaker"):
+        if not matches and (
+            text == "Deputy Speaker"
+            or text == "Deputy-Speaker"
+            or text == "Madam Deputy Speaker"
+        ):
             matches = self.parties.get("Deputy Speaker", None)
 
         if matches:
             for attr in matches:
-                if (date == None) or (date >= attr["start_date"] and date <= attr["end_date"]):
+                if (date == None) or (
+                    date >= attr["start_date"] and date <= attr["end_date"]
+                ):
                     ids.add(attr["id"])
                 # Special case Mr MacDougall questions answered after he died
-                if attr["id"]=='uk.org.publicwhip/member/1992' and date >= '2008-09-01' and date <= '2008-09-30':
+                if (
+                    attr["id"] == "uk.org.publicwhip/member/1992"
+                    and date >= "2008-09-01"
+                    and date <= "2008-09-30"
+                ):
                     ids.add(attr["id"])
         return ids
 
@@ -150,7 +169,7 @@ def fullnametoids(self, tinput, date):
     # alwaysmatchcons says it is an error to have an unknown/mismatching constituency
     # (rather than just treating cons as None if the cons is unknown)
     # date or cons can be None
-    def matchfullnamecons(self, fullname, cons, date, alwaysmatchcons = True):
+    def matchfullnamecons(self, fullname, cons, date, alwaysmatchcons=True):
         origfullname = fullname
         fullname = self.basicsubs(fullname)
         fullname = fullname.strip()
@@ -171,16 +190,19 @@ def matchfullnamecons(self, fullname, cons, date, alwaysmatchcons = True):
         if len(ids) > 1:
             # only error for case where cons is present, others case happens too much
             if cons:
-                errstring = 'Matched multiple times: %s : %s : %s : %s - perhaps constituency spelling is not known' % (fullname, cons or "[nocons]", date, ids.__str__())
+                errstring = (
+                    "Matched multiple times: %s : %s : %s : %s - perhaps constituency spelling is not known"
+                    % (fullname, cons or "[nocons]", date, ids.__str__())
+                )
                 # actually, even no-cons case happens too often
                 # (things like ministerships, with name in brackets after them)
                 print(errstring)
-                #raise ContextException(errstring, fragment=origfullname)
+                # raise ContextException(errstring, fragment=origfullname)
             lids = list(ids)  # I really hate the Set type
             lids = sorted(map(self.membertoperson, lids))
             return None, "MultipleMatch", tuple(lids)
 
-        for lid in ids: # pop is no good as it changes the set
+        for lid in ids:  # pop is no good as it changes the set
             pass
         remadename = self.member_full_name(lid, date)
         remadecons = self.members[lid]["constituency"]
@@ -195,7 +217,10 @@ def matchwmsname(self, office, fullname, date):
             if brackids and len(brackids) == 1:
                 id = brackids.pop()
                 remadename = self.member_full_name(id, date)
-                return 'person_id="%s" speakername="%s"' % (self.membertoperson(id), remadename)
+                return 'person_id="%s" speakername="%s"' % (
+                    self.membertoperson(id),
+                    remadename,
+                )
 
         office = self.basicsubs(office)
         speakeroffice = ' speakeroffice="%s"' % office
@@ -207,30 +232,40 @@ def matchwmsname(self, office, fullname, date):
             ids = self.fullnametoids(office, date)
             ids = self.intersect_constituency(fullname, ids, date)
             if ids:
-                speakeroffice = ''
+                speakeroffice = ""
         if len(ids) == 0:
-            return 'person_id="unknown" error="No match" speakername="%s"%s' % (fullname, speakeroffice)
+            return 'person_id="unknown" error="No match" speakername="%s"%s' % (
+                fullname,
+                speakeroffice,
+            )
         if len(ids) > 1:
-            return 'person_id="unknown" error="Matched multiple times" speakername="%s"%s' % (fullname, speakeroffice)
+            return (
+                'person_id="unknown" error="Matched multiple times" speakername="%s"%s'
+                % (fullname, speakeroffice)
+            )
 
         id = ids.pop()
         remadename = self.member_full_name(id, date)
-        return 'person_id="%s" speakername="%s"%s' % (self.membertoperson(id), remadename, speakeroffice)
-
+        return 'person_id="%s" speakername="%s"%s' % (
+            self.membertoperson(id),
+            remadename,
+            speakeroffice,
+        )
 
     # Lowercases a surname, getting cases like these right:
     #     CLIFTON-BROWN to Clifton-Brown
     #     MCAVOY to McAvoy
     def lowercaselastname(self, name):
         words = re.split("( |-|')", name)
-        words = [ word.capitalize() for word in words ]
+        words = [word.capitalize() for word in words]
 
         def handlescottish(word):
-            if (re.match("Mc[a-z]", word)):
+            if re.match("Mc[a-z]", word):
                 return word[0:2] + word[2].upper() + word[3:]
-            if (re.match("Mac[a-z]", word)):
+            if re.match("Mac[a-z]", word):
                 return word[0:3] + word[3].upper() + word[4:]
             return word
+
         words = map(handlescottish, words)
 
         return "".join(words)
@@ -270,7 +305,7 @@ def matchdebatename(self, input, bracket, date, typ):
 
         # Clear name history if date change
         self.date_setup(date)
-  
+
         if input == "The Queen":
             return 'person_id="uk.org.publicwhip/person/13935" speakername="The Queen"'
 
@@ -287,7 +322,9 @@ def matchdebatename(self, input, bracket, date, typ):
 
                 # If so, intersect those matches with ones from the first part
                 # (some offices get matched in first part - like Mr. Speaker)
-                if len(ids) == 0 or (len(brackids) == 1 and re.search("speaker(?i)", input)):
+                if len(ids) == 0 or (
+                    len(brackids) == 1 and re.search("speaker(?i)", input)
+                ):
                     ids = brackids
                 else:
                     ids = ids.intersection(brackids)
@@ -298,7 +335,6 @@ def matchdebatename(self, input, bracket, date, typ):
         # If ambiguous (either form "Mr. O'Brien" or full name, ambiguous due
         # to missing constituency) look in recent name match history
         if len(ids) > 1:
-
             # search through history, starting at the end
 
             # [1:] here we misses the first entry, i.e. it misses the previous
@@ -315,7 +351,11 @@ def matchdebatename(self, input, bracket, date, typ):
                 x = self.debatenamehistory[ix]
                 if x in ids:
                     # first match, use it and exit
-                    ids = set([x,])
+                    ids = set(
+                        [
+                            x,
+                        ]
+                    )
                     break
                 ix -= 1
 
@@ -323,15 +363,22 @@ def matchdebatename(self, input, bracket, date, typ):
             # a division in the main chamber on something about which they
             # haven't heard the debate, and then the same person keeps talking,
             # so it's possible the same person speaks twice in a row.
-            if ix == -1 and typ == 'westminhall' and self.debatenamehistory[-1] in ids:
-                ids = set([self.debatenamehistory[-1],])
-
+            if ix == -1 and typ == "westminhall" and self.debatenamehistory[-1] in ids:
+                ids = set(
+                    [
+                        self.debatenamehistory[-1],
+                    ]
+                )
 
         # Special cases when offices are referred to differently after first appearance
         office = input
         if office == "The Advocate-General":
             office = "The Advocate-General for Scotland"
-        if office in ("The First Deputy Chairman", "The Second Deputy Chairman", "The Chairman"):
+        if office in (
+            "The First Deputy Chairman",
+            "The Second Deputy Chairman",
+            "The Chairman",
+        ):
             office += " of Ways and Means"
 
         # Office name history ("The Deputy Prime Minster (John Prescott)" is later
@@ -343,7 +390,7 @@ def matchdebatename(self, input, bracket, date, typ):
 
         # Match between office and name - store for later use in the same days text
         if speakeroffice != "":
-            if input in ('The Temporary Chair', 'Madam Deputy Speaker'):
+            if input in ("The Temporary Chair", "Madam Deputy Speaker"):
                 self.debateofficehistory[input] = set(ids)
             else:
                 self.debateofficehistory.setdefault(input, set()).update(ids)
@@ -357,16 +404,21 @@ def matchdebatename(self, input, bracket, date, typ):
         if len(ids) == 0:
             if not re.search(regnospeakers, input):
                 raise Exception("No matches %s" % (rebracket))
-            self.debatenamehistory.append(None) # see below
+            self.debatenamehistory.append(None)  # see below
             return 'person_id="unknown" error="No match" speakername="%s"' % (rebracket)
         if len(ids) > 1:
             names = ""
             for id in ids:
                 names += self.member_full_name(id, date, True)
             if not re.search(regnospeakers, input):
-                raise Exception("Multiple matches %s, possibles are %s" % (rebracket, names))
-            self.debatenamehistory.append(None) # see below
-            return 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (rebracket)
+                raise Exception(
+                    "Multiple matches %s, possibles are %s" % (rebracket, names)
+                )
+            self.debatenamehistory.append(None)  # see below
+            return (
+                'person_id="unknown" error="Matched multiple times" speakername="%s"'
+                % (rebracket)
+            )
 
         # Extract the one id remaining
         for id in ids:
@@ -376,7 +428,7 @@ def matchdebatename(self, input, bracket, date, typ):
         # text and the like it breaks.  It finds a few errors though.
         # (note that we even store failed matches as None above, so they count
         # as a speaker for the purposes of this check working)
-        #if len(self.debatenamehistory) > 0 and self.debatenamehistory[-1] == id and not self.isspeaker(id):
+        # if len(self.debatenamehistory) > 0 and self.debatenamehistory[-1] == id and not self.isspeaker(id):
         #    raise Exception, "Same person speaks twice in a row %s" % rebracket
 
         # Store id in history for this day
@@ -386,10 +438,15 @@ def matchdebatename(self, input, bracket, date, typ):
         remadename = self.member_full_name(id, date)
         if self.members[id]["party"] == "Speaker" and re.search("Speaker", input):
             remadename = input
-        if self.members[id]["party"] == "Deputy Speaker" and re.search("Deputy Speaker", input):
+        if self.members[id]["party"] == "Deputy Speaker" and re.search(
+            "Deputy Speaker", input
+        ):
             remadename = input
-        return 'person_id="%s" speakername="%s"%s' % (self.membertoperson(id), remadename, speakeroffice)
-
+        return 'person_id="%s" speakername="%s"%s' % (
+            self.membertoperson(id),
+            remadename,
+            speakeroffice,
+        )
 
     def mpnameexists(self, input, date):
         ids = self.fullnametoids(input, date)
@@ -397,8 +454,8 @@ def mpnameexists(self, input, date):
         if len(ids) > 0:
             return 1
 
-        if re.match('Mr\. |Mrs\. |Miss |Dr\. ', input):
-            print(' potential missing MP name ' + input)
+        if re.match("Mr\. |Mrs\. |Miss |Dr\. ", input):
+            print(" potential missing MP name " + input)
 
         return 0
 
@@ -414,31 +471,35 @@ def date_setup(self, date):
         if self.debatedate != date:
             self.debatedate = date
             self.cleardebatehistory()
-            
+
     def intersect_constituency(self, text, ids, date, allow_empty_date=False):
         """Return the intersection of a set of ids with any
         constituency matches for a text fragment
         """
-        
+
         consids = self.constoidmap.get(text, None)
         if consids:
             # Search for constituency matches, and intersect results with them
             newids = set()
             for cons in consids:
-                if (allow_empty_date and date is None) or (cons["start_date"] <= date and date <= cons["end_date"]):
-                    consid = cons['id']
+                if (allow_empty_date and date is None) or (
+                    cons["start_date"] <= date and date <= cons["end_date"]
+                ):
+                    consid = cons["id"]
                     # get any mps
                     matches = self.considtomembermap.get(consid, None)
-                        
+
                     if matches:
                         for m in matches:
-                            if (allow_empty_date and date is None) or (date >= m["start_date"] and date <= m["end_date"]):
+                            if (allow_empty_date and date is None) or (
+                                date >= m["start_date"] and date <= m["end_date"]
+                            ):
                                 if m["id"] in ids:
                                     newids.add(m["id"])
             ids = newids
-        
-        return ids    
-            
+
+        return ids
+
     def disambiguate_from_history(self, ids):
         # search through history, starting at the end
 
@@ -453,52 +514,60 @@ def disambiguate_from_history(self, ids):
         # this looking back two can sometimes fail if a speaker is interrupted
         # by something procedural, and then picks up his thread straight after himself
         # (eg in westminsterhall if there is a suspension to go vote in a division in the main chamber on something about which they haven't heard the debate)
-        
+
         ix = len(self.debatenamehistory) - 2
         while ix >= 0:
             x = self.debatenamehistory[ix]
-            
+
             if x in ids:
                 # first match, use it and exit
-                ids = set([x,])
+                ids = set(
+                    [
+                        x,
+                    ]
+                )
                 break
             ix -= 1
         return ids
-        
+
     def set_chairman(self, chairman):
         chairman = self.basicsubs(chairman)
         chairman = self.fixnamecase(chairman)
         chairman = chairman.strip()
         self.chairman = chairman
-        
+
     def get_chairman(self):
         return self.chairman
-    
+
     def matchcttename(self, input, bracket, date):
         """Generates an XML fragment for use in describing a committee member
-        in Public Bill Committee Debates. 
+        in Public Bill Committee Debates.
         input: A string extracted from a committee member list, expected to be a name
-        bracket: A string extracted from a bracket directly following input in the 
+        bracket: A string extracted from a bracket directly following input in the
             original document
-        date: The date of the debate - used to narrow name matches 
+        date: The date of the debate - used to narrow name matches
         """
         self.date_setup(date)
         input = self.basicsubs(input)
         ids = self.fullnametoids(input, date)
-        
+
         # Bracket should be constituency
-        if bracket: ids = self.intersect_constituency(bracket, ids, date)
-        
+        if bracket:
+            ids = self.intersect_constituency(bracket, ids, date)
+
         # If ambiguous (either form "Mr. O'Brien" or full name, ambiguous due
         # to missing constituency) look in recent name match history
-        if len(ids) > 1: ids = self.disambiguate_from_history(ids)    
+        if len(ids) > 1:
+            ids = self.disambiguate_from_history(ids)
 
         if len(ids) == 0 and re.search(reChairman, input):
             if self.chairman:
                 ids = self.fullnametoids(self.chairman, date)
             if len(ids) == 0:
-                raise ContextException("Couldn't match Committee Chairman %s" % self.chairman)
-            
+                raise ContextException(
+                    "Couldn't match Committee Chairman %s" % self.chairman
+                )
+
         if len(ids) == 0:
             if not re.search(regnospeakers, input):
                 raise ContextException("No matches %s" % (input))
@@ -507,35 +576,40 @@ def matchcttename(self, input, bracket, date):
             names = ""
             for id in ids:
                 names += id + " " + self.member_full_name(id, date, True)
-            raise ContextException("Multiple matches %s, possibles are %s" % (input, names))
+            raise ContextException(
+                "Multiple matches %s, possibles are %s" % (input, names)
+            )
             return ' person_id="unknown" error="Matched multiple times" '
 
         for id in ids:
             pass
-    
-        # we can use the committee member names to help resolve ambiguities 
+
+        # we can use the committee member names to help resolve ambiguities
         # in the following debate
         self.debatenamehistory.append(id)
         remadename = self.member_full_name(id, date)
-        ret = """ person_id="%s" membername="%s" """ % (self.membertoperson(id), remadename)
-        return ret.encode('ascii', 'xmlcharrefreplace')
-    
+        ret = """ person_id="%s" membername="%s" """ % (
+            self.membertoperson(id),
+            remadename,
+        )
+        return ret.encode("ascii", "xmlcharrefreplace")
+
     def matchcttedebatename(self, input, bracket, date, external_speakers=False):
-        """Match a name from a Public Bill Committee debate and generate an XML 
+        """Match a name from a Public Bill Committee debate and generate an XML
         fragment for use in a speech tag
         input - name text to be matched
         bracket - extra text extracted from a bracket following the name
-        date - date of document input comes from 
+        date - date of document input comes from
         external_speakers - flag indicating that we are expecting external speakers,
         if true, ContextExceptions are not thrown for no matches"""
-        
+
         speakeroffice = ""
         input = self.basicsubs(input)
         # clear debate history if name change
         self.date_setup(date)
         # Sometimes no bracketed component: Mr. Prisk
         ids = self.fullnametoids(input, date)
-        
+
         # Different types of brackets...
         if bracket:
             # Sometimes name in brackets:
@@ -554,16 +628,17 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
 
             # Sometimes constituency in brackets: Malcolm Bruce (Gordon)
             ids = self.intersect_constituency(bracket, ids, date)
-           
+
         # If ambiguous (either form "Mr. O'Brien" or full name, ambiguous due
         # to missing constituency) look in recent name match history
-        if len(ids) > 1: ids = self.disambiguate_from_history(ids)
+        if len(ids) > 1:
+            ids = self.disambiguate_from_history(ids)
 
         # Office name history ("The Deputy Prime Minster (John Prescott)" is later
         # referred to in the same day as just "The Deputy Prime Minister")
         officeids = self.debateofficehistory.get(input, None)
         if officeids and len(ids) == 0:
-             ids = officeids
+            ids = officeids
 
         # Match between office and name - store for later use in the same days text
         if speakeroffice != "":
@@ -571,30 +646,38 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
 
         # Chairman
         if len(ids) == 0 and re.search(reChairman, input):
-            #print "trying %s chair: %s" % (input, self.chairman)
+            # print "trying %s chair: %s" % (input, self.chairman)
             if self.chairman:
                 ids = self.fullnametoids(self.chairman, date)
             if len(ids) == 0:
-                raise ContextException("Couldn't match Committee Chairman %s" % self.chairman)
-                
+                raise ContextException(
+                    "Couldn't match Committee Chairman %s" % self.chairman
+                )
+
         # Put together original in case we need it
         rebracket = input
-        if bracket: rebracket += " (" + bracket + ")"
+        if bracket:
+            rebracket += " (" + bracket + ")"
 
         # Return errors
         if len(ids) == 0:
             if not re.search(regnospeakers, input) and not external_speakers:
                 raise ContextException("No matches %s" % (rebracket))
-            self.debatenamehistory.append(None) # see below
+            self.debatenamehistory.append(None)  # see below
             return 'person_id="unknown" error="No match" speakername="%s"' % (rebracket)
         if len(ids) > 1:
             names = ""
             for id in ids:
                 names += self.member_full_name(id, date, True)
             if not re.search(regnospeakers, input):
-                raise ContextException("Multiple matches %s, possibles are %s" % (rebracket, names))
-            self.debatenamehistory.append(None) # see below
-            return 'person_id="unknown" error="Matched multiple times" speakername="%s"' % (rebracket)
+                raise ContextException(
+                    "Multiple matches %s, possibles are %s" % (rebracket, names)
+                )
+            self.debatenamehistory.append(None)  # see below
+            return (
+                'person_id="unknown" error="Matched multiple times" speakername="%s"'
+                % (rebracket)
+            )
 
         # Extract the one id remaining
         for id in ids:
@@ -603,21 +686,30 @@ def matchcttedebatename(self, input, bracket, date, external_speakers=False):
         # Store id in history for this day
         self.debatenamehistory.append(id)
         remadename = self.member_full_name(id, date)
-        ret = 'person_id="%s" speakername="%s"%s' % (self.membertoperson(id), remadename, speakeroffice)
-        return ret.encode('ascii', 'xmlcharrefreplace')
-    
+        ret = 'person_id="%s" speakername="%s"%s' % (
+            self.membertoperson(id),
+            remadename,
+            speakeroffice,
+        )
+        return ret.encode("ascii", "xmlcharrefreplace")
+
     def canonicalcons(self, cons, date):
         consids = self.constoidmap.get(cons, None)
         if not consids:
             raise Exception("Unknown constituency %s" % cons)
         consid = None
         for consattr in consids:
-            if consattr['start_date'] <= date and date <= consattr['end_date']:
+            if consattr["start_date"] <= date and date <= consattr["end_date"]:
                 if consid:
-                    raise Exception("Two like-named constituency ids %s %s overlap with date %s" % (consid, consattr['id'], date))
-                consid = consattr['id']
-        if not consid in self.considtonamemap:
-            raise Exception("Not known name of consid %s cons %s date %s" % (consid, cons, date))
+                    raise Exception(
+                        "Two like-named constituency ids %s %s overlap with date %s"
+                        % (consid, consattr["id"], date)
+                    )
+                consid = consattr["id"]
+        if consid not in self.considtonamemap:
+            raise Exception(
+                "Not known name of consid %s cons %s date %s" % (consid, cons, date)
+            )
         return self.considtonamemap[consid]
 
     def getmember(self, memberid):
@@ -631,7 +723,10 @@ def getmembersoneelection(self, memberid):
         personid = self.membertopersonmap[memberid]
         members = self.persontomembermap[personid]
 
-        ids = [memberid, ]
+        ids = [
+            memberid,
+        ]
+
         def scanoneway(whystr, datestr, delta, whystrrev, datestrrev):
             id = memberid
             while 1:
@@ -639,14 +734,22 @@ def scanoneway(whystr, datestr, delta, whystrrev, datestrrev):
                 if attr[whystr] != "changed_party":
                     break
                 dayend = datetime.date(*map(int, attr[datestr].split("-")))
-                dayafter = datetime.date.fromordinal(dayend.toordinal() + delta).isoformat()
+                dayafter = datetime.date.fromordinal(
+                    dayend.toordinal() + delta
+                ).isoformat()
                 for m in members:
                     mattr = self.getmember(m)
-                    if mattr[whystrrev] == "changed_party" and mattr[datestrrev] == dayafter:
+                    if (
+                        mattr[whystrrev] == "changed_party"
+                        and mattr[datestrrev] == dayafter
+                    ):
                         id = mattr["id"]
                         break
                 else:
-                    raise Exception("Couldn't find %s %s member party changed from %s date %s" % (whystr, attr[whystr], id, dayafter))
+                    raise Exception(
+                        "Couldn't find %s %s member party changed from %s date %s"
+                        % (whystr, attr[whystr], id, dayafter)
+                    )
 
                 ids.append(id)
 
@@ -654,23 +757,36 @@ def scanoneway(whystr, datestr, delta, whystrrev, datestrrev):
         scanoneway("start_reason", "start_date", -1, "end_reason", "end_date")
 
         return ids
-            
+
     # Historic ID -> ID
     def matchhistoric(self, hansard_id, date):
         ids = []
         for attr in self.historichansard[hansard_id]:
-            attr_start_date = len(attr['start_date'])==4 and ('%s-01-01' % attr['start_date']) or attr['start_date']
-            attr_end_date = len(attr['end_date'])==4 and ('%s-12-31' % attr['end_date']) or attr['end_date']
-            #print hansard_id, attr_start_date, date, attr_end_date
+            attr_start_date = (
+                len(attr["start_date"]) == 4
+                and ("%s-01-01" % attr["start_date"])
+                or attr["start_date"]
+            )
+            attr_end_date = (
+                len(attr["end_date"]) == 4
+                and ("%s-12-31" % attr["end_date"])
+                or attr["end_date"]
+            )
+            # print hansard_id, attr_start_date, date, attr_end_date
             if attr_start_date <= date and date <= attr_end_date:
                 ids.append(attr["id"])
 
         if len(ids) == 0:
-            raise Exception('Could not find ID for Historic ID %s, date %s' % (hansard_id, date))
+            raise Exception(
+                "Could not find ID for Historic ID %s, date %s" % (hansard_id, date)
+            )
         if len(ids) > 1:
-            raise Exception('Multiple results for Historic ID %s, date %s: %s' % (hansard_id, date, ','.join(ids)))
+            raise Exception(
+                "Multiple results for Historic ID %s, date %s: %s"
+                % (hansard_id, date, ",".join(ids))
+            )
         return ids[0]
 
+
 # Construct the global singleton of class which people will actually use
 memberList = MemberList()
-
diff --git a/pyscraper/runfilters.py b/pyscraper/runfilters.py
index 08e79938..da92bc35 100755
--- a/pyscraper/runfilters.py
+++ b/pyscraper/runfilters.py
@@ -1,27 +1,20 @@
 # vim:sw=8:ts=8:et:nowrap
 
-import sys
-import re
 import os
+import re
+import shutil
+import sys
 import tempfile
 import time
-import shutil
-
 import xml.sax
-xmlvalidate = xml.sax.make_parser()
-
-from ni.parse import ParseDay as ParseNIDay
-
-from contextexception import ContextException
-from patchtool import RunPatchTool
-
-from gidmatching import FactorChanges, FactorChangesWrans
 
-from resolvemembernames import memberList
+xmlvalidate = xml.sax.make_parser()
 
 import miscfuncs
+from contextexception import ContextException
 from miscfuncs import AlphaStringToOrder
-
+from ni.parse import ParseDay as ParseNIDay
+from patchtool import RunPatchTool
 
 toppath = miscfuncs.toppath
 pwcmdirs = miscfuncs.pwcmdirs
@@ -51,8 +44,11 @@ def ApplyPatches(filein, fileout, patchfile):
     print("---- This should not happen, therefore assert!")
     assert False
 
+
 # the operation on a single file
-def RunFilterFile(FILTERfunction, xprev, sdate, sdatever, dname, jfin, patchfile, jfout, bquietc):
+def RunFilterFile(
+    FILTERfunction, xprev, sdate, sdatever, dname, jfin, patchfile, jfout, bquietc
+):
     # now apply patches and parse
     patchtempfilename = tempfile.mktemp("", "pw-applypatchtemp-", miscfuncs.tmppath)
 
@@ -70,20 +66,23 @@ def RunFilterFile(FILTERfunction, xprev, sdate, sdatever, dname, jfin, patchfile
     ofin.close()
 
     # do the filtering according to the type.  Some stuff is being inlined here
-    if dname == 'regmem' or dname == 'ni':
-        regmemout = open(tempfilename, 'w')
+    if dname == "regmem" or dname == "ni":
+        regmemout = open(tempfilename, "w")
         try:
-            FILTERfunction(regmemout, text, sdate, sdatever)  # totally different filter function format
+            FILTERfunction(
+                regmemout, text, sdate, sdatever
+            )  # totally different filter function format
         finally:
             regmemout.close()
         # in win32 this function leaves the file open and stops it being renamed
         if sys.platform != "win32":
-            xmlvalidate.parse(tempfilename) # validate XML before renaming
+            xmlvalidate.parse(tempfilename)  # validate XML before renaming
         if os.path.isfile(jfout):
             os.remove(jfout)
         os.rename(tempfilename, jfout)
         return
 
+
 # hunt the patchfile
 def findpatchfile(name, d1, d2):
     patchfile = os.path.join(d1, "%s.patch" % name)
@@ -91,6 +90,7 @@ def findpatchfile(name, d1, d2):
         patchfile = os.path.join(d2, "%s.patch" % name)
     return patchfile
 
+
 # this works on triplets of directories all called dname
 def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
     # the in and out directories for the type
@@ -106,11 +106,13 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
 
     # build up the groups of html files per day
     # scan through the directory and make a mapping of all the copies for each
-    daymap = { }
+    daymap = {}
     for ldfile in os.listdir(pwcmdirin):
         mnums = re.match("[a-z]*(\d{4}-\d\d-\d\d)([a-z]*)\.(html|json)$", ldfile)
         if mnums:
-            daymap.setdefault(mnums.group(1), []).append((AlphaStringToOrder(mnums.group(2)), mnums.group(2), ldfile))
+            daymap.setdefault(mnums.group(1), []).append(
+                (AlphaStringToOrder(mnums.group(2)), mnums.group(2), ldfile)
+            )
         elif os.path.isfile(os.path.join(pwcmdirin, ldfile)):
             print("not recognized file:", ldfile, " inn ", pwcmdirin)
 
@@ -134,7 +136,9 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
         for fdayc in fdaycs:
             fin = fdayc[2]
             jfin = os.path.join(pwcmdirin, fdayc[2])
-            jfout = os.path.join(pwxmldirout, re.match('(.*\.)(html|json)$', fin).group(1) + 'xml')
+            jfout = os.path.join(
+                pwxmldirout, re.match("(.*\.)(html|json)$", fin).group(1) + "xml"
+            )
             patchfile = findpatchfile(fin, newpwpatchesdir, pwpatchesdir)
             if os.path.isfile(jfout):
                 out_modified = os.stat(jfout).st_mtime
@@ -146,11 +150,12 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
                     if patch_modified > out_modified:
                         bmodifiedoutoforder = fin
         if bmodifiedoutoforder:
-            print("input or patch modified since output reparsing ", bmodifiedoutoforder)
-
+            print(
+                "input or patch modified since output reparsing ", bmodifiedoutoforder
+            )
 
         # now we parse these files -- in order -- to accumulate their catalogue of diffs
-        xprev = None # previous xml file from which we check against diffs, and its version string
+        xprev = None  # previous xml file from which we check against diffs, and its version string
         for fdayc in fdaycs:
             fin = fdayc[2]
             jfin = os.path.join(pwcmdirin, fin)
@@ -158,23 +163,39 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
 
             # here we repeat the parsing and run the patchtool editor until this file goes through.
             # create the output file name
-            jfout = os.path.join(pwxmldirout, re.match('(.*\.)(html|json)$', fin).group(1) + 'xml')
+            jfout = os.path.join(
+                pwxmldirout, re.match("(.*\.)(html|json)$", fin).group(1) + "xml"
+            )
             patchfile = findpatchfile(fin, newpwpatchesdir, pwpatchesdir)
 
             # skip already processed files, if date is earler and it's not a forced reparse
             # (checking output date against input and patchfile, if there is one)
-            bparsefile = not os.path.isfile(jfout) or forcereparse or bmodifiedoutoforder
+            bparsefile = (
+                not os.path.isfile(jfout) or forcereparse or bmodifiedoutoforder
+            )
 
-            while bparsefile:  # flag is being used acually as if bparsefile: while True:
+            while (
+                bparsefile
+            ):  # flag is being used acually as if bparsefile: while True:
                 try:
-                    RunFilterFile(FILTERfunction, xprev, sdate, sdatever, dname, jfin, patchfile, jfout, options.quietc)
+                    RunFilterFile(
+                        FILTERfunction,
+                        xprev,
+                        sdate,
+                        sdatever,
+                        dname,
+                        jfin,
+                        patchfile,
+                        jfout,
+                        options.quietc,
+                    )
 
                     # update the list of files which have been changed
                     # (don't see why it can't be determined by the modification time on the file)
                     # (-- because rsync is crap, and different computers have different clocks)
                     newlistf = os.path.join(pwxmldirout, changedatesfile)
-                    fil = open(newlistf,'a+')
-                    fil.write('%d,%s\n' % (time.time(), os.path.split(jfout)[1]))
+                    fil = open(newlistf, "a+")
+                    fil.write("%d,%s\n" % (time.time(), os.path.split(jfout)[1]))
                     fil.close()
                     break
 
@@ -186,15 +207,18 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
                         RunPatchTool(dname, (sdate + sdatever), ce)
                         # find file again, in case new
                         patchfile = findpatchfile(fin, newpwpatchesdir, pwpatchesdir)
-                        continue # emphasise that this is the repeat condition
+                        continue  # emphasise that this is the repeat condition
 
                     elif options.quietc:
                         options.anyerrors = True
                         print(ce.description)
-                        print("\tERROR! %s failed on %s, quietly moving to next day" % (dname, sdate))
+                        print(
+                            "\tERROR! %s failed on %s, quietly moving to next day"
+                            % (dname, sdate)
+                        )
                         newday = 1
                         # sys.exit(1) # remove this and it will continue past an exception (but then keep throwing the same tired errors)
-                        break # leave the loop having not written the xml file; go onto the next day
+                        break  # leave the loop having not written the xml file; go onto the next day
 
                     # reraise case (used for parser development), so we can get a stackdump and end
                     else:
@@ -208,8 +232,10 @@ def RunFiltersDir(FILTERfunction, dname, options, forcereparse):
 
 
 def FixExtraColNumParas(text):
-    '''Try and deal with extra paragraphs caused by removing column numbers'''
-    text = re.sub('(?:<br>\s*)?</p>(\s*<stamp coldate[^>]*>\s*)<p>(?=[a-z])', r'\1', text)
+    """Try and deal with extra paragraphs caused by removing column numbers"""
+    text = re.sub(
+        "(?:<br>\s*)?</p>(\s*<stamp coldate[^>]*>\s*)<p>(?=[a-z])", r"\1", text
+    )
     return text
 
 
@@ -217,4 +243,3 @@ def RunNIFilters(fp, text, sdate, sdatever):
     parser = ParseNIDay()
     print("NI parsing %s..." % sdate)
     parser.parse_day(fp, text, sdate + sdatever)
-
diff --git a/pyscraper/sp/common.py b/pyscraper/sp/common.py
index e4b2bba2..9016588e 100644
--- a/pyscraper/sp/common.py
+++ b/pyscraper/sp/common.py
@@ -1,52 +1,62 @@
 # A few functions that turn out to be useful in many of the Scottish
 # Parliament scraping scripts.
 
-import sys
 import datetime
-sys.path.append('../')
-from bs4 import NavigableString
-from bs4 import Tag
-from bs4 import Comment
+import sys
 
+sys.path.append("../")
 import re
 
-def non_tag_data_in(o, tag_replacement=''):
+from bs4 import Comment, NavigableString, Tag
+
+
+def non_tag_data_in(o, tag_replacement=""):
     if o.__class__ == NavigableString:
-        return re.sub('(?ms)[\r\n]',' ',o)
+        return re.sub("(?ms)[\r\n]", " ", o)
     elif o.__class__ == Tag:
-        if o.name == 'script':
+        if o.name == "script":
             return tag_replacement
         else:
-            return tag_replacement.join( [non_tag_data_in(x) for x in o.contents] )
+            return tag_replacement.join([non_tag_data_in(x) for x in o.contents])
     elif o.__class__ == Comment:
         return tag_replacement
     else:
         # Hope it's a string or something else concatenatable...
         return o
 
+
 def tidy_string(s):
     # Lots of the paragraphs in the HTML begin with a pointless ':'
     # surrounded by spaces:
-    result = re.sub("(?imsu)^\s*:\s*",'',s)
-    result = re.sub('(?ims)\s+',' ',result)
+    result = re.sub("(?imsu)^\s*:\s*", "", s)
+    result = re.sub("(?ims)\s+", " ", result)
     return result.strip()
 
-def just_time( non_tag_text ):
-    m = re.match( '^\s*(\d?\d)[:\.](\d\d)\s*$', non_tag_text )
+
+def just_time(non_tag_text):
+    m = re.match("^\s*(\d?\d)[:\.](\d\d)\s*$", non_tag_text)
     if m:
-        return datetime.time(int(m.group(1),10),int(m.group(2),10))
+        return datetime.time(int(m.group(1), 10), int(m.group(2), 10))
     else:
         return None
 
-def meeting_closed( non_tag_text ):
-    m = re.match( '(?imsu)^\s*:?\s*Meeting\s+closed\s+at\s+(\d?\d)[:\.](\d\d)\s*\.?\s*$', non_tag_text )
+
+def meeting_closed(non_tag_text):
+    m = re.match(
+        "(?imsu)^\s*:?\s*Meeting\s+closed\s+at\s+(\d?\d)[:\.](\d\d)\s*\.?\s*$",
+        non_tag_text,
+    )
     if m:
-        return datetime.time(int(m.group(1),10),int(m.group(2),10))
+        return datetime.time(int(m.group(1), 10), int(m.group(2), 10))
     else:
         return None
 
-def meeting_suspended( non_tag_text ):
-    m = re.match( '(?imsu)^\s*:?\s*Meeting\s+suspended(\s+(at|until)\s+(\d?\d)[:\.](\d\d)\s*\.?\s*|\s*\.?\s*)$', non_tag_text )
+
+def meeting_suspended(non_tag_text):
+    m = re.match(
+        "(?imsu)^\s*:?\s*Meeting\s+suspended(\s+(at|until)\s+(\d?\d)[:\.](\d\d)\s*\.?\s*|\s*\.?\s*)$",
+        non_tag_text,
+    )
     if m:
         if m.group(2):
             at_or_until = m.group(2)
diff --git a/pyscraper/sp/fastest-msps.py b/pyscraper/sp/fastest-msps.py
index 19495718..8d68ce81 100755
--- a/pyscraper/sp/fastest-msps.py
+++ b/pyscraper/sp/fastest-msps.py
@@ -1,78 +1,95 @@
 #!/usr/bin/env python3
 
-import xml.sax
-import re
-import os
 import glob
+import re
 import textwrap
-import sys
+import xml.sax
 
 minimum_passages_to_consider = 1
 
 from optparse import OptionParser
+
 parser = OptionParser()
-parser.add_option('-v', '--verbose', dest='verbose', action="store_true",
-                  default=False, help='noisy output')
-parser.add_option('-o', '--output', dest='output_filename',
-                  help='output filename for HTML table')
+parser.add_option(
+    "-v",
+    "--verbose",
+    dest="verbose",
+    action="store_true",
+    default=False,
+    help="noisy output",
+)
+parser.add_option(
+    "-o", "--output", dest="output_filename", help="output filename for HTML table"
+)
 (options, args) = parser.parse_args()
 
+
 # For mapping IDs to names, etc.
 class PeopleParser(xml.sax.handler.ContentHandler):
     def __init__(self):
         self.parser = xml.sax.make_parser()
         self.parser.setContentHandler(self)
-    def parse(self,filename):
+
+    def parse(self, filename):
         self.officeid_to_personid = {}
         self.personid_to_latestname = {}
         self.officeid_to_latestname = {}
         self.parser.parse(filename)
-        self.officeid_to_latestname['unknown'] = "[Unknown]"
-        self.personid_to_latestname['unknown'] = "[Unknown]"
-        self.officeid_to_personid['unknown'] = "unknown"
-    def startElement(self,name,attrs):
-        if name == 'person':
-            self.current_personid = attrs['id']
-            self.current_latestname = attrs['latestname']
-            self.personid_to_latestname[self.current_personid] = attrs['latestname']
-        elif name == 'office':
-            self.officeid_to_personid[attrs['id']] = self.current_personid
-            self.officeid_to_latestname[attrs['id']] = self.current_latestname
-    def endElement(self,name):
-        if name == 'person':
+        self.officeid_to_latestname["unknown"] = "[Unknown]"
+        self.personid_to_latestname["unknown"] = "[Unknown]"
+        self.officeid_to_personid["unknown"] = "unknown"
+
+    def startElement(self, name, attrs):
+        if name == "person":
+            self.current_personid = attrs["id"]
+            self.current_latestname = attrs["latestname"]
+            self.personid_to_latestname[self.current_personid] = attrs["latestname"]
+        elif name == "office":
+            self.officeid_to_personid[attrs["id"]] = self.current_personid
+            self.officeid_to_latestname[attrs["id"]] = self.current_latestname
+
+    def endElement(self, name):
+        if name == "person":
             self.current_personid = None
 
+
 people_parser = PeopleParser()
 people_parser.parse("../../members/people.xml")
 
+
 class TimedSpeechSection:
     def __init__(self):
         self.speakerid = None
         self.text = None
         self.minutes = None
         self.date_string = None
+
     def count_words(self):
-        words = re.split('(?ims)\s+',self.text)
+        words = re.split("(?ims)\s+", self.text)
         return len(words)
+
     def __str__(self):
         result = ""
-        result += ""+str(self.minutes)+"min speech from "+self.speakerid
-        result += " ("+people_parser.personid_to_latestname[self.speakerid]+") "
-        result += "on "+self.date_string+"\n"
-        lines = textwrap.wrap(self.text,64)
+        result += "" + str(self.minutes) + "min speech from " + self.speakerid
+        result += " (" + people_parser.personid_to_latestname[self.speakerid] + ") "
+        result += "on " + self.date_string + "\n"
+        lines = textwrap.wrap(self.text, 64)
         for line in lines:
-            result += "  "+line+"\n"
+            result += "  " + line + "\n"
         return result
 
+
 class SpeakingSpeedParser(xml.sax.handler.ContentHandler):
     def __init__(self):
         self.parser = xml.sax.make_parser()
         self.parser.setContentHandler(self)
         self.all_timed_stretches = []
         self.current_date_string = None
-    def complete_time_stretch(self,new_minutes_into_day):
+
+    def complete_time_stretch(self, new_minutes_into_day):
         length_of_last_stretch = new_minutes_into_day - self.previous_minutes_into_day
-        if options.verbose: print("Got speaker IDs: "+str(self.distinct_speakers_since_last_time))
+        if options.verbose:
+            print("Got speaker IDs: " + str(self.distinct_speakers_since_last_time))
         if len(self.distinct_speakers_since_last_time) == 1:
             # If there was only a unique speaker in that line:
             tss = TimedSpeechSection()
@@ -84,36 +101,49 @@ def complete_time_stretch(self,new_minutes_into_day):
         self.previous_minutes_into_day = new_minutes_into_day
         self.text_since_last_time = ""
         self.distinct_speakers_since_last_time = set()
-    def startElement(self,name,attrs):
-        if name == 'place-holder' and ('time' in attrs):
-            m = re.search('^(\d\d):(\d\d)',attrs['time'])
+
+    def startElement(self, name, attrs):
+        if name == "place-holder" and ("time" in attrs):
+            m = re.search("^(\d\d):(\d\d)", attrs["time"])
             if not m:
                 return
-            minutes_into_day = 60 * int(m.group(1),10)
-            minutes_into_day += + int(m.group(2),10)
+            minutes_into_day = 60 * int(m.group(1), 10)
+            minutes_into_day += +int(m.group(2), 10)
             # If there are zero minutes between this and the last
             # time point, ignore it and wait for the next one:
             if minutes_into_day == self.previous_minutes_into_day:
                 return
-            if options.verbose: print("Got minutes_into_day: "+str(minutes_into_day)+" from "+attrs['time'])
+            if options.verbose:
+                print(
+                    "Got minutes_into_day: "
+                    + str(minutes_into_day)
+                    + " from "
+                    + attrs["time"]
+                )
             # ------
             if self.previous_minutes_into_day == 0:
                 self.previous_minutes_into_day = minutes_into_day
             else:
                 self.complete_time_stretch(minutes_into_day)
-        elif name == 'speech' and ('nospeaker' not in attrs):
+        elif name == "speech" and ("nospeaker" not in attrs):
             # if ('speakerid' in attrs) and re.search('^uk.org',attrs['speakerid']):
-            if ('speakerid' in attrs):
-                self.current_speakerid = people_parser.officeid_to_personid[attrs['speakerid']]
-                if options.verbose: print("Got new speakerid: "+str(self.current_speakerid))
-    def endElement(self,name):
-        if name == 'speech':
+            if "speakerid" in attrs:
+                self.current_speakerid = people_parser.officeid_to_personid[
+                    attrs["speakerid"]
+                ]
+                if options.verbose:
+                    print("Got new speakerid: " + str(self.current_speakerid))
+
+    def endElement(self, name):
+        if name == "speech":
             self.current_speakerid = None
-    def characters(self,c):
+
+    def characters(self, c):
         if self.current_speakerid:
             self.distinct_speakers_since_last_time.add(self.current_speakerid)
             self.text_since_last_time += c
-    def parse(self,filename,date_string):
+
+    def parse(self, filename, date_string):
         self.current_date_string = date_string
         self.previous_minutes_into_day = 0
         self.text_since_last_time = ""
@@ -121,6 +151,7 @@ def parse(self,filename,date_string):
         self.current_speakerid = None
         self.parser.parse(filename)
 
+
 filenames = glob.glob("../../../parldata/scrapedxml/sp/sp????-??-??.xml")
 filenames.sort()
 ssp = SpeakingSpeedParser()
@@ -129,78 +160,89 @@ def parse(self,filename,date_string):
 
 days_done = 0
 for filename in filenames:
-    if options.verbose: print("Parsing day: "+str(filename))
-    m = re.search('(\d{4}-\d{2}-\d{2})',filename)
-    ssp.parse(filename,m.group(1))
+    if options.verbose:
+        print("Parsing day: " + str(filename))
+    m = re.search("(\d{4}-\d{2}-\d{2})", filename)
+    ssp.parse(filename, m.group(1))
     days_done += 1
     if max_days >= 0 and days_done >= max_days:
         break
 
+
 class SpeakerTotals:
     def __init__(self):
         self.total_words = 0
         self.total_time_in_minutes = 0
         self.total_passages = 0
+
     def words_per_minute(self):
         return self.total_words / float(self.total_time_in_minutes)
 
+
 all_speakers = {}
 
 for tss in ssp.all_timed_stretches:
     # We need to discard some here: anything including 'meeting
     # suspended' or 'meeting adjourned' typically includes the length
     # of the break afterwards.
-    if re.search('(?ims)meeting\s+(suspended|adjourned)',tss.text):
+    if re.search("(?ims)meeting\s+(suspended|adjourned)", tss.text):
         continue
-    if re.search('(?ims)be\s+a\s+division',tss.text):
+    if re.search("(?ims)be\s+a\s+division", tss.text):
         continue
-    if re.search('(?ims)voting\speriod',tss.text):
+    if re.search("(?ims)voting\speriod", tss.text):
         continue
-    if re.search('(?ims)short\sbreak',tss.text):
+    if re.search("(?ims)short\sbreak", tss.text):
         continue
     # The one minute speeches are typically brief introductions by the
     # chair of the meeting, so subject to a lot more error.  Skip
     # those...
     if tss.minutes < 2:
         continue
-    all_speakers.setdefault(tss.speakerid,SpeakerTotals())
+    all_speakers.setdefault(tss.speakerid, SpeakerTotals())
     all_speakers[tss.speakerid].total_words += tss.count_words()
     all_speakers[tss.speakerid].total_time_in_minutes += tss.minutes
     all_speakers[tss.speakerid].total_passages += 1
     if options.verbose:
         print("=====================================================")
         s = str(tss)
-        print(s.encode('UTF-8'))
+        print(s.encode("UTF-8"))
+
 
 def sort_speakers(speakerid):
     return all_speakers[speakerid].words_per_minute()
 
+
 speakers_found = sorted(all_speakers, key=sort_speakers, reverse=True)
 
 if options.verbose:
     for s in speakers_found:
-        print(s+" spoke at "+str(all_speakers[s].words_per_minute())+" words per minute")
+        print(
+            s
+            + " spoke at "
+            + str(all_speakers[s].words_per_minute())
+            + " words per minute"
+        )
 
 people_to_exclude = set()
 
 # Exclude people who presided over proceedings in the parliament:
 
 # Presiding officers:
-people_to_exclude.add('uk.org.publicwhip/person/13337') # Lord Steel of Aikwood
-people_to_exclude.add('uk.org.publicwhip/person/14084') # George Reid
-people_to_exclude.add('uk.org.publicwhip/person/13985') # Alex Fergusson
+people_to_exclude.add("uk.org.publicwhip/person/13337")  # Lord Steel of Aikwood
+people_to_exclude.add("uk.org.publicwhip/person/14084")  # George Reid
+people_to_exclude.add("uk.org.publicwhip/person/13985")  # Alex Fergusson
 
 # Deputy presiding officers:
-people_to_exclude.add('uk.org.publicwhip/person/13984') # Patricia Ferguson
-people_to_exclude.add('uk.org.publicwhip/person/14110') # Murray Tosh
-people_to_exclude.add('uk.org.publicwhip/person/13997') # Trish Godman
-people_to_exclude.add('uk.org.publicwhip/person/10441') # Alasdair Morgan
+people_to_exclude.add("uk.org.publicwhip/person/13984")  # Patricia Ferguson
+people_to_exclude.add("uk.org.publicwhip/person/14110")  # Murray Tosh
+people_to_exclude.add("uk.org.publicwhip/person/13997")  # Trish Godman
+people_to_exclude.add("uk.org.publicwhip/person/10441")  # Alasdair Morgan
 
 # Also, Dr Winnie Ewing
-people_to_exclude.add('uk.org.publicwhip/person/13981') # Winnie Ewing
+people_to_exclude.add("uk.org.publicwhip/person/13981")  # Winnie Ewing
 
 if options.output_filename:
-    fp = open(options.output_filename,"w")
+    fp = open(options.output_filename, "w")
     fp.write("<table>\n")
     fp.write("<tr>")
     fp.write("<th>Rank</th>")
@@ -218,14 +260,16 @@ def sort_speakers(speakerid):
         if s in people_to_exclude:
             continue
         fp.write("<tr>")
-        fp.write("<td>%d</td>"%(i,))
-        url = re.sub('(?ims)^.*/(\d+)$',r'http://www.theyworkforyou.com/msp/?pid=\1',s)
+        fp.write("<td>%d</td>" % (i,))
+        url = re.sub(
+            "(?ims)^.*/(\d+)$", r"http://www.theyworkforyou.com/msp/?pid=\1", s
+        )
         name = people_parser.personid_to_latestname[s]
-        fp.write("<td><a href=\"%s\">%s</a></td>"%(url,name))
-        fp.write("<td>%.1f</td>"%(totals.words_per_minute(),))
-        fp.write("<td>%d</td>"%(totals.total_words))
-        fp.write("<td>%d</td>"%(totals.total_time_in_minutes))
-        fp.write("<td>%d</td>"%(totals.total_passages))
+        fp.write('<td><a href="%s">%s</a></td>' % (url, name))
+        fp.write("<td>%.1f</td>" % (totals.words_per_minute(),))
+        fp.write("<td>%d</td>" % (totals.total_words))
+        fp.write("<td>%d</td>" % (totals.total_time_in_minutes))
+        fp.write("<td>%d</td>" % (totals.total_passages))
         fp.write("</tr>\n")
         i += 1
     fp.write("</table>")
diff --git a/pyscraper/sp/get-official-reports-new.py b/pyscraper/sp/get-official-reports-new.py
index 04532408..e7cba673 100755
--- a/pyscraper/sp/get-official-reports-new.py
+++ b/pyscraper/sp/get-official-reports-new.py
@@ -1,54 +1,77 @@
 #!/usr/bin/env python3
 
-import lxml.html
-from lxml import etree
-import dateutil.parser
 import datetime
+import os
+import random
 import re
 import sys
+import time
 import urllib.error
 import urllib.request
-import os
-from optparse import OptionParser
-import time
-import gzip
 from io import StringIO
-import random
+from optparse import OptionParser
+
+from lxml import etree
 
 output_directory = "../../../parldata/cmpages/sp/official-reports-new/"
 
-current_reports_url_format = 'https://archive2021.parliament.scot/parliamentarybusiness/ReportSelectPage.aspx?type=plenary&year={0}&page=0&meeting=-1'
+current_reports_url_format = "https://archive2021.parliament.scot/parliamentarybusiness/ReportSelectPage.aspx?type=plenary&year={0}&page=0&meeting=-1"
 
-official_report_url_format = 'https://archive2021.parliament.scot/parliamentarybusiness/report.aspx?r={0}&mode=html'
+official_report_url_format = "https://archive2021.parliament.scot/parliamentarybusiness/report.aspx?r={0}&mode=html"
 
-user_agent = 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'
+user_agent = (
+    "Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:9.0.1) Gecko/20100101 Firefox/9.0.1"
+)
 
 parser = OptionParser()
-parser.add_option('-q', '--quiet', dest='quiet', action='store_true',
-                  default=False, help='Suppress progress messages')
-parser.add_option('-t', '--test', dest='test', action='store_true',
-                  default=False, help='Run doctests')
-parser.add_option('--start_range', dest='start_range',
-                  help='fetches documents between START_RANGE and END_RANGE',
-                  metavar='START_RANGE')
-parser.add_option('--end_range', dest='end_range',
-                  help='fetches documents between START_RANGE and END_RANGE',
-                  metavar='END_RANGE')
-parser.add_option('--daily', help='Fetch the documents listed today',
-                  default=False, action='store_true')
-parser.add_option('--track-missing', dest='missing',
-                  help='Record and skip those that are missing',
-                  default=False, action='store_true')
+parser.add_option(
+    "-q",
+    "--quiet",
+    dest="quiet",
+    action="store_true",
+    default=False,
+    help="Suppress progress messages",
+)
+parser.add_option(
+    "-t", "--test", dest="test", action="store_true", default=False, help="Run doctests"
+)
+parser.add_option(
+    "--start_range",
+    dest="start_range",
+    help="fetches documents between START_RANGE and END_RANGE",
+    metavar="START_RANGE",
+)
+parser.add_option(
+    "--end_range",
+    dest="end_range",
+    help="fetches documents between START_RANGE and END_RANGE",
+    metavar="END_RANGE",
+)
+parser.add_option(
+    "--daily",
+    help="Fetch the documents listed today",
+    default=False,
+    action="store_true",
+)
+parser.add_option(
+    "--track-missing",
+    dest="missing",
+    help="Record and skip those that are missing",
+    default=False,
+    action="store_true",
+)
 
 (options, args) = parser.parse_args()
 
 minimum_sleep = 2
 maximum_sleep = 10
 
+
 def pp(element):
-    print(etree.tostring(element, pretty_print = True))
+    print(etree.tostring(element, pretty_print=True))
 
-missing_report_ids_filename = os.path.join(output_directory, 'missing')
+
+missing_report_ids_filename = os.path.join(output_directory, "missing")
 missing_report_ids = set()
 
 if options.missing:
@@ -59,14 +82,15 @@ def pp(element):
                 i = int(line, 10)
                 missing_report_ids.add(i)
 
+
 def get_document_from_id(official_report_id):
-    html_filename = os.path.join(output_directory, str(official_report_id) + '.html')
+    html_filename = os.path.join(output_directory, str(official_report_id) + ".html")
     if not os.path.exists(html_filename):
         url = official_report_url_format.format(official_report_id)
         if not options.quiet:
             print("Fetching:", url)
         request = urllib.request.Request(url)
-        request.add_header('User-Agent', user_agent)
+        request.add_header("User-Agent", user_agent)
         opener = urllib.request.build_opener()
         response = None
         try:
@@ -87,10 +111,10 @@ def get_document_from_id(official_report_id):
         # redirects us to a search page rather than issuing a 404 so we
         # check to make sure that hasn't happened before saving
         if response.geturl() == url:
-            with open(html_filename, 'wb') as fp:
+            with open(html_filename, "wb") as fp:
                 fp.write(response.read())
         else:
-            html_filename = ''
+            html_filename = ""
             if not options.quiet:
                 print("   * looks like a redirect, not saving")
         time.sleep(random.uniform(minimum_sleep, maximum_sleep))
@@ -99,8 +123,8 @@ def get_document_from_id(official_report_id):
         with open(html_filename) as fp:
             tree = etree.parse(fp, parser)
 
-def main():
 
+def main():
     if options.start_range and options.end_range:
         for i in range(int(options.start_range), int(options.end_range, 10) + 1):
             get_document_from_id(i)
@@ -109,18 +133,18 @@ def main():
         year = datetime.date.today().year
         url = current_reports_url_format.format(year)
         request = urllib.request.Request(url)
-        request.add_header('User-Agent', user_agent)
+        request.add_header("User-Agent", user_agent)
         opener = urllib.request.build_opener()
         response = urllib.request.urlopen(request)
         parser = etree.HTMLParser()
-        html = response.read().decode('utf-8')
-        html = re.sub('(?ims)^\s*', '', html)
+        html = response.read().decode("utf-8")
+        html = re.sub("(?ims)^\s*", "", html)
         tree = etree.parse(StringIO(html), parser)
         report_ids = set()
-        for link in tree.xpath('.//a'):
-            href = link.get('href')
+        for link in tree.xpath(".//a"):
+            href = link.get("href")
             if href:
-                m = re.search(r'jumpReport\(\'28862.aspx\?r=(\d+)', href)
+                m = re.search(r"jumpReport\(\'28862.aspx\?r=(\d+)", href)
                 if m:
                     report_ids.add(int(m.group(1), 10))
 
@@ -131,16 +155,21 @@ def main():
         min_report_id = min(report_ids) - 20
         max_report_id = max(report_ids) + 20
         for report_id in range(min_report_id, max_report_id + 1):
-            if report_id > 14191 and report_id < 15190: continue # Big jump
+            if report_id > 14191 and report_id < 15190:
+                continue  # Big jump
             get_document_from_id(report_id)
 
     else:
-        print("Either --daily, --start_range=START_ID and --end_range=END_ID must be specified")
+        print(
+            "Either --daily, --start_range=START_ID and --end_range=END_ID must be specified"
+        )
+
 
 if options.test:
     if not options.quiet:
         print("Running doctests...")
     import doctest
+
     doctest.testmod()
     sys.exit(0)
 else:
diff --git a/pyscraper/sp/parse-official-reports-new.py b/pyscraper/sp/parse-official-reports-new.py
index 93607ed7..fa9abd14 100755
--- a/pyscraper/sp/parse-official-reports-new.py
+++ b/pyscraper/sp/parse-official-reports-new.py
@@ -1,37 +1,39 @@
 #!/usr/bin/env python3
 # coding=UTF-8
 
-from html import escape
+import datetime
 import errno
-import sys
+import itertools
 import os
 import re
-import random
-import datetime
-import itertools
+import sys
 import time
-import traceback
-import dateutil.parser as dateparser
-from tempfile import NamedTemporaryFile
+from html import escape
 from optparse import OptionParser
-from common import tidy_string
-from common import non_tag_data_in
-from common import just_time
-from common import meeting_suspended
-from common import meeting_closed
+from tempfile import NamedTemporaryFile
 
+import dateutil.parser as dateparser
+from common import (
+    just_time,
+    meeting_closed,
+    meeting_suspended,
+    non_tag_data_in,
+    tidy_string,
+)
 from lxml import etree
 
-sys.path.append('../')
+sys.path.append("../")
 
-from bs4 import BeautifulSoup
-from bs4 import NavigableString
+from bs4 import BeautifulSoup, NavigableString
 
 from sp.resolvenames import memberList
 
-official_report_url_format = 'http://www.parliament.scot/parliamentarybusiness/report.aspx?r={0}&mode=html'
+official_report_url_format = (
+    "http://www.parliament.scot/parliamentarybusiness/report.aspx?r={0}&mode=html"
+)
+
+DIVISION_HEADINGS = ("FOR", "AGAINST", "ABSTENTIONS", "SPOILED VOTES")
 
-DIVISION_HEADINGS = ('FOR', 'AGAINST', 'ABSTENTIONS', 'SPOILED VOTES')
 
 def is_division_way(element, report_date=None):
     """If it's a division heading, return a normalized version, otherwise None
@@ -42,7 +44,7 @@ def is_division_way(element, report_date=None):
     (None, None, None)
     >>> is_division_way('abstentions ')
     ('ABSTENTIONS', None, None)
-    >>> is_division_way(":\xA0FOR")
+    >>> is_division_way(":\xa0FOR")
     ('FOR', None, None)
     >>> is_division_way('Abstention')
     ('ABSTENTIONS', None, None)
@@ -68,43 +70,51 @@ def is_division_way(element, report_date=None):
 
     tidied = tidy_string(non_tag_data_in(element)).upper()
     # Strip any non-word letters at the start and end:
-    tidied = re.sub(r'^\W*(.*?)\W*$', '\\1', tidied)
+    tidied = re.sub(r"^\W*(.*?)\W*$", "\\1", tidied)
 
     if tidied in DIVISION_HEADINGS:
         return (tidied, None, None)
-    elif tidied in ('ABSTENTION', 'ABSENTIONS'):
-        return ('ABSTENTIONS', None, None)
-    elif re.search('^THE FOLLOWING MEMBERS? TOOK THE OATH( AND REPEATED IT IN .*)?:?$', tidied):
-        return ('FOR', 'oath', None)
-    elif re.search('^THE FOLLOWING MEMBERS? MADE A SOLEMN AFFIRMATION( AND REPEATED IT IN .*)?:?$', tidied):
-        return ('FOR', 'affirmation', None)
+    elif tidied in ("ABSTENTION", "ABSENTIONS"):
+        return ("ABSTENTIONS", None, None)
+    elif re.search(
+        "^THE FOLLOWING MEMBERS? TOOK THE OATH( AND REPEATED IT IN .*)?:?$", tidied
+    ):
+        return ("FOR", "oath", None)
+    elif re.search(
+        "^THE FOLLOWING MEMBERS? MADE A SOLEMN AFFIRMATION( AND REPEATED IT IN .*)?:?$",
+        tidied,
+    ):
+        return ("FOR", "affirmation", None)
     elif len(tidied.split()) < 128:
         # The second regular expression could be *very* slow on
         # strings that begin 'FOR', so only try it on short strings
         # that might be introducing a division, and assume that there
         # are 2 to 4 words in the name:
-        m1 = re.search(r'(?i)^VOTES? FOR ([A-Z -]+)$', tidied)
-        m2 = re.search(r'^FOR ((?:[A-Z]+\s*){2,4})$', tidied)
+        m1 = re.search(r"(?i)^VOTES? FOR ([A-Z -]+)$", tidied)
+        m2 = re.search(r"^FOR ((?:[A-Z]+\s*){2,4})$", tidied)
         m = m1 or m2
         if m:
             person_name = m.group(1).title()
             person_id = None
             if report_date:
                 person_id = get_unique_person_id(person_name, report_date)
-            return ('FOR', person_name, person_id)
+            return ("FOR", person_name, person_id)
         else:
-            m = re.search(r'FOR OPTION (\d+)$', tidied)
+            m = re.search(r"FOR OPTION (\d+)$", tidied)
             if m:
-                return ('FOR', 'Option ' + m.group(1), None)
+                return ("FOR", "Option " + m.group(1), None)
     return (None, None, None)
 
+
 def first(iterable):
     for element in iterable:
         if element:
             return element
     return None
 
-member_vote_re = re.compile('''
+
+member_vote_re = re.compile(
+    """
         ^                               # Beginning of the string
         (?P<last_name>[^,\(\)0-9:]+)    # ... last name, >= 1 non-comma characters
         ,                               # ... then a comma
@@ -117,9 +127,12 @@ def first(iterable):
         (?P<party>\D*?)                 # ... party, a minimal match of any characters
         \)                              # ... close banana
         $                               # ... end of the string
-''', re.VERBOSE)
+""",
+    re.VERBOSE,
+)
 
-member_vote_fullname_re = re.compile('''
+member_vote_fullname_re = re.compile(
+    """
         ^                               # Beginning of the string
         (?P<full_name>[^,\(\)0-9:]+)    # ... full name, >= 1 non-comma characters
         \s*\(\(?                        # ... an arbitrary amout of whitespace and an open banana
@@ -129,9 +142,12 @@ def first(iterable):
         (?P<party>\D*?)                 # ... party, a minimal match of any characters
         \)                              # ... close banana
         $                               # ... end of the string
-''', re.VERBOSE)
+""",
+    re.VERBOSE,
+)
 
-member_vote_just_constituency_re = re.compile('''
+member_vote_just_constituency_re = re.compile(
+    """
         ^                               # Beginning of the string
         (?P<last_name>[^,\(\)0-9:]+)    # ... last name, >= 1 non-comma characters
         ,                               # ... then a comma
@@ -142,22 +158,27 @@ def first(iterable):
         (?P<constituency>[^\(\)0-9:]*?) # ... constituency, a minimal match of any characters
         \)\s*                           # ... close banana, whitespace
         $                               # ... end of the string
-''', re.VERBOSE)
+""",
+    re.VERBOSE,
+)
+
 
 def get_unique_person_id(tidied_speaker, on_date):
-    ids = memberList.match_whole_speaker(tidied_speaker,str(on_date))
+    ids = memberList.match_whole_speaker(tidied_speaker, str(on_date))
     if ids is None:
         # This special return value (None) indicates that the speaker
         # is something we know about, but not an MSP (e.g Lord
         # Advocate)
         return None
     elif len(ids) == 0:
-        log_speaker(tidied_speaker,str(on_date),"missing")
+        log_speaker(tidied_speaker, str(on_date), "missing")
         return None
     elif len(ids) == 1:
         return ids[0]
     else:
-        raise Exception("The speaker '%s' could not be resolved, found: %s" % (tidied_speaker, ids))
+        raise Exception(
+            "The speaker '%s' could not be resolved, found: %s" % (tidied_speaker, ids)
+        )
 
 
 def is_member_vote(element, vote_date, expecting_a_vote=True):
@@ -200,18 +221,22 @@ def is_member_vote(element, vote_date, expecting_a_vote=True):
     """
     tidied = tidy_string(non_tag_data_in(element))
 
-    from_first_and_last = lambda m: m and "%s %s (%s)" % (m.group('first_names'),
-                                                          m.group('last_name'),
-                                                          m.group('constituency'))
+    from_first_and_last = lambda m: m and "%s %s (%s)" % (
+        m.group("first_names"),
+        m.group("last_name"),
+        m.group("constituency"),
+    )
 
-    from_full = lambda m: m and m.group('full_name')
+    from_full = lambda m: m and m.group("full_name")
     vote_matches = (
         (member_vote_re, from_first_and_last),
         (member_vote_just_constituency_re, from_first_and_last),
-        (member_vote_fullname_re, from_full))
+        (member_vote_fullname_re, from_full),
+    )
 
-    reformed_name = first(processor(regexp.search(tidied))
-                        for regexp, processor in vote_matches)
+    reformed_name = first(
+        processor(regexp.search(tidied)) for regexp, processor in vote_matches
+    )
 
     if not reformed_name:
         return None
@@ -225,26 +250,30 @@ def is_member_vote(element, vote_date, expecting_a_vote=True):
     else:
         return person_id
 
+
 def log_speaker(speaker, date, message):
     if SPEAKERS_DEBUG:
         with open("speakers.txt", "a") as fp:
-            fp.write(str(date)+": ["+message+"] "+speaker+"\n")
+            fp.write(str(date) + ": [" + message + "] " + speaker + "\n")
+
 
 def filename_key(filename):
-    m = re.search(r'^(\d+)\.html$', filename)
+    m = re.search(r"^(\d+)\.html$", filename)
     if m:
         return int(m.group(1), 10)
     else:
         return 0
 
+
 def is_page_empty(soup):
     """Return true if there's any text in <div id="ReportView">
 
     If the page isn't empty, this takes rather a long time, so
     only use this in cases where we've found the title is empty
     already."""
-    report_view = soup.find('div', attrs={'id': 'ReportView'})
-    return not ''.join(report_view.findAll(text=True)).strip()
+    report_view = soup.find("div", attrs={"id": "ReportView"})
+    return not "".join(report_view.findAll(text=True)).strip()
+
 
 def get_page_title(soup):
     """Extract and clean up the title tag from a page
@@ -252,18 +281,21 @@ def get_page_title(soup):
     Returns a string with the page title minus cruft"""
 
     title = soup.title.text
-    title = re.sub('(?ims):.*$', '', title)
+    title = re.sub("(?ims):.*$", "", title)
     title = tidy_string(title)
 
     return title
 
+
 def get_title_and_date(soup, page_id):
     """Extract the session and date from a page
 
     Returns a (session, date) tuple, or if the page should be skipped
     returns (None, None)."""
 
-    title_elements = soup.findAll(attrs={"id" : "ReportView_ReportViewHtml_lblReportTitle"})
+    title_elements = soup.findAll(
+        attrs={"id": "ReportView_ReportViewHtml_lblReportTitle"}
+    )
     if len(title_elements) > 1:
         raise Exception("Too many title elements were found")
     elif len(title_elements) == 0:
@@ -273,9 +305,13 @@ def get_title_and_date(soup, page_id):
         if is_page_empty(soup):
             return (None, None)
         else:
-            raise Exception("No title was found in a page that's non-empty; the page ID was: %d" % (page_id,))
-    title = re.sub(r'\(Hybrid\) 202(\d)', r'202\1 (Hybrid)', title)
-    m = re.search(r'''(?x)
+            raise Exception(
+                "No title was found in a page that's non-empty; the page ID was: %d"
+                % (page_id,)
+            )
+    title = re.sub(r"\(Hybrid\) 202(\d)", r"202\1 (Hybrid)", title)
+    m = re.search(
+        r"""(?x)
             ^(.*) \s+
             (?: [(] (?:Hybrid) [)] \s* )?
             ( \d{1,2} [ ] \w+ [ ] \d{4} ) \s*
@@ -285,30 +321,105 @@ def get_title_and_date(soup, page_id):
                 | Test
                 | \1
             )?
-            $''', title)
+            $""",
+        title,
+    )
     if m:
-        session = m.group(1).rstrip(',')
+        session = m.group(1).rstrip(",")
         report_date = dateparser.parse(m.group(2)).date()
         return (session, report_date)
     else:
         # try committee format
-        m = re.search(r'^(.*)\s+(\d{1,2} \w+ \d{4})\s*(?: (?:Agenda|Draft).*)', title)
+        m = re.search(r"^(.*)\s+(\d{1,2} \w+ \d{4})\s*(?: (?:Agenda|Draft).*)", title)
         if m:
-            session = m.group(1).rstrip(',')
+            session = m.group(1).rstrip(",")
             report_date = dateparser.parse(m.group(2)).date()
             return (session, report_date)
         else:
-            raise Exception("Failed to parse the title and date from: {0}".format(title))
-
-acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
-      'big', 'blockquote', 'body', 'br', 'button', 'caption', 'center',
-      'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div',
-      'dl', 'dt', 'em', 'font', 'form', 'head', 'h1', 'h2', 'h3', 'h4',
-      'h5', 'h6', 'hr', 'html', 'i', 'img', 'input', 'ins', 'kbd', 'label',
-      'legend', 'li', 'link', 'map', 'menu', 'meta', 'noscript' 'ol',
-      'p', 'pre', 'q', 's', 'samp', 'script', 'small', 'span', 'strike',
-      'strong', 'style', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot',
-      'th', 'thead', 'title', 'tr', 'tt', 'u', 'ul', 'var', 'form', 'body']
+            raise Exception(
+                "Failed to parse the title and date from: {0}".format(title)
+            )
+
+
+acceptable_elements = [
+    "a",
+    "abbr",
+    "acronym",
+    "address",
+    "area",
+    "b",
+    "big",
+    "blockquote",
+    "body",
+    "br",
+    "button",
+    "caption",
+    "center",
+    "cite",
+    "code",
+    "col",
+    "colgroup",
+    "dd",
+    "del",
+    "dfn",
+    "dir",
+    "div",
+    "dl",
+    "dt",
+    "em",
+    "font",
+    "form",
+    "head",
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6",
+    "hr",
+    "html",
+    "i",
+    "img",
+    "input",
+    "ins",
+    "kbd",
+    "label",
+    "legend",
+    "li",
+    "link",
+    "map",
+    "menu",
+    "meta",
+    "noscript" "ol",
+    "p",
+    "pre",
+    "q",
+    "s",
+    "samp",
+    "script",
+    "small",
+    "span",
+    "strike",
+    "strong",
+    "style",
+    "sub",
+    "sup",
+    "table",
+    "tbody",
+    "td",
+    "tfoot",
+    "th",
+    "thead",
+    "title",
+    "tr",
+    "tt",
+    "u",
+    "ul",
+    "var",
+    "form",
+    "body",
+]
+
 
 def replace_unknown_tags(html):
     """Replace any tags that aren't in a whitelist with their escaped versions
@@ -336,6 +447,7 @@ def replace_unknown_tags(html):
     more&gt; before closing the paragraph.</p>
 
     """
+
     def replace_tag(match):
         tag_name = match.group(2).lower()
         if tag_name in acceptable_elements:
@@ -343,7 +455,8 @@ def replace_tag(match):
         else:
             return escape(match.group(0))
 
-    return re.sub(r'(?ims)(</?)(\w+)([^>]*/?>)', replace_tag, html)
+    return re.sub(r"(?ims)(</?)(\w+)([^>]*/?>)", replace_tag, html)
+
 
 def fix_inserted_br_in_vote_list(html):
     """Fix a common error in the vote lists of divisions
@@ -356,10 +469,10 @@ def fix_inserted_br_in_vote_list(html):
     (LD)<br/>Rumbles, Mr Mike (West Aberdeenshire and
     Kincardine) (LD)<br/>Russell, Michael (South of Scotland) (SNP)
     """
-    return re.sub(r'(\))<br/>(\((LD|Lab)\)<br/>)', '\\1 \\2', html)
+    return re.sub(r"(\))<br/>(\((LD|Lab)\)<br/>)", "\\1 \\2", html)
 
-class ParsedPage(object):
 
+class ParsedPage(object):
     def __init__(self, session, report_date, page_id):
         self.session = session
         self.report_date = report_date
@@ -371,27 +484,33 @@ def __unicode__(self):
 
     @property
     def normalized_session_name(self):
-        s = re.sub(r'\s+', '-', self.session)
-        s = re.sub(r'[^-\w]', '', s).lower()
-        if s in ('leaders-virtual-question-time',
-                 'members-virtual-question-time',
-                 'meeting-of-the-parliament-virtual',
-                 'meeting-of-the-parliament-virtual-session',
-                 'meeting-of-the-parliament-9-hybrid',
-                 'meeting-of-the-parliament-hybrid',
-                 'meeting-of-the-parliament-hybrid-',
-                 'meeting-of-the-parliament-hybrid-meeting',
-                 'meeting-of-the-parliament-hybrid-session'):
-            s = 'meeting-of-the-parliament'
+        s = re.sub(r"\s+", "-", self.session)
+        s = re.sub(r"[^-\w]", "", s).lower()
+        if s in (
+            "leaders-virtual-question-time",
+            "members-virtual-question-time",
+            "meeting-of-the-parliament-virtual",
+            "meeting-of-the-parliament-virtual-session",
+            "meeting-of-the-parliament-9-hybrid",
+            "meeting-of-the-parliament-hybrid",
+            "meeting-of-the-parliament-hybrid-",
+            "meeting-of-the-parliament-hybrid-meeting",
+            "meeting-of-the-parliament-hybrid-session",
+        ):
+            s = "meeting-of-the-parliament"
         return s
 
     @property
     def suggested_file_name(self):
-        return "%s/%s_%d.xml" % (self.normalized_session_name, self.report_date, self.page_id)
+        return "%s/%s_%d.xml" % (
+            self.normalized_session_name,
+            self.report_date,
+            self.page_id,
+        )
 
     def as_xml(self):
         base_id = "uk.org.publicwhip/spor/"
-        if self.normalized_session_name not in ('plenary', 'meeting-of-the-parliament'):
+        if self.normalized_session_name not in ("plenary", "meeting-of-the-parliament"):
             base_id += self.normalized_session_name + "/"
         base_id += str(self.report_date)
         xml = etree.Element("publicwhip")
@@ -405,18 +524,17 @@ def tidy_speeches(self):
         for section in self.sections:
             section.tidy_speeches()
 
-class Section(object):
 
+class Section(object):
     def __init__(self, title, url):
         self.title = title
         self.speeches_and_votes = []
         self.url = url
 
     def as_xml(self, section_base_id):
-        heading_element = etree.Element("major-heading",
-                                        url=self.url,
-                                        nospeaker="True",
-                                        id=section_base_id + ".0")
+        heading_element = etree.Element(
+            "major-heading", url=self.url, nospeaker="True", id=section_base_id + ".0"
+        )
         heading_element.text = self.title
         result = [heading_element]
         for i, speech_or_vote in enumerate(self.speeches_and_votes, 1):
@@ -431,13 +549,15 @@ def group_by_key(speech_or_division):
             s = speech_or_division
             return s.person_id or s.speaker_name or "nospeaker"
         else:
-            raise Exception("Unknown type %s passed to group_by_key" % (type(speech_or_division),))
+            raise Exception(
+                "Unknown type %s passed to group_by_key" % (type(speech_or_division),)
+            )
 
     def tidy_speeches(self):
         # First remove any empty speeches:
-        self.speeches_and_votes = [sv for sv in
-                                   self.speeches_and_votes
-                                   if not sv.empty()]
+        self.speeches_and_votes = [
+            sv for sv in self.speeches_and_votes if not sv.empty()
+        ]
         collapsed = []
         grouped = itertools.groupby(self.speeches_and_votes, Section.group_by_key)
         for key, sv_grouper in grouped:
@@ -451,8 +571,8 @@ def tidy_speeches(self):
                 collapsed.append(new_speech)
         self.speeches_and_votes = collapsed
 
-class Division(object):
 
+class Division(object):
     def __init__(self, report_date, url, divnumber, candidate=None, candidate_id=None):
         self.report_date = report_date
         self.url = url
@@ -475,18 +595,22 @@ def add_vote(self, which_way, voter_name, voter_id):
         self.votes[which_way].append((voter_name, voter_id))
 
     def as_xml(self, division_id):
-        attributes = {'url': self.url,
-                      'divdate': str(self.report_date),
-                      'nospeaker': "True",
-                      'divnumber': str(self.divnumber),
-                      'id': division_id}
+        attributes = {
+            "url": self.url,
+            "divdate": str(self.report_date),
+            "nospeaker": "True",
+            "divnumber": str(self.divnumber),
+            "id": division_id,
+        }
         if self.candidate:
-            attributes['candidate'] = self.candidate
+            attributes["candidate"] = self.candidate
         if self.candidate_id:
-            attributes['candidate_id'] = self.candidate_id
+            attributes["candidate_id"] = self.candidate_id
         result = etree.Element("division", **attributes)
+
         def to_attr(s):
-            return s.lower().replace(' ', '')
+            return s.lower().replace(" ", "")
+
         division_count = etree.Element("divisioncount")
         for way in DIVISION_HEADINGS:
             attribute = to_attr(way)
@@ -495,19 +619,16 @@ def to_attr(s):
         result.append(division_count)
         for way in DIVISION_HEADINGS:
             attribute_value = to_attr(way)
-            msp_list = etree.Element('msplist',
-                                     vote=attribute_value)
+            msp_list = etree.Element("msplist", vote=attribute_value)
             for voter_name, voter_id in self.votes[way]:
-                msp_name = etree.Element('mspname',
-                                         id=voter_id,
-                                         vote=attribute_value)
+                msp_name = etree.Element("mspname", id=voter_id, vote=attribute_value)
                 msp_name.text = voter_name
                 msp_list.append(msp_name)
             result.append(msp_list)
         return result
 
-class Speech(object):
 
+class Speech(object):
     def __init__(self, speaker_name, speech_date, last_time, url):
         self.speaker_name = speaker_name
         self.speech_date = speech_date
@@ -527,23 +648,23 @@ def update_person_id(self, tidied_speaker):
         self.speaker_name = tidied_speaker
 
     def as_xml(self, speech_id):
-        attributes = {'url': self.url,
-                      'id': speech_id}
+        attributes = {"url": self.url, "id": speech_id}
         if self.speaker_name:
-            attributes['speakername'] = self.speaker_name
+            attributes["speakername"] = self.speaker_name
             if self.person_id:
-                attributes['person_id'] = self.person_id
+                attributes["person_id"] = self.person_id
             else:
-                attributes['person_id'] = 'unknown'
+                attributes["person_id"] = "unknown"
         else:
-            attributes['nospeaker'] = 'true'
+            attributes["nospeaker"] = "true"
         result = etree.Element("speech", **attributes)
         for paragraph in self.paragraphs:
-            paragraph = paragraph.replace('&', '&amp;')
-            p = etree.fromstring('<p>%s</p>' % paragraph)
+            paragraph = paragraph.replace("&", "&amp;")
+            p = etree.fromstring("<p>%s</p>" % paragraph)
             result.append(p)
         return result
 
+
 def quick_parse_html(filename, page_id, original_url):
     with open(filename) as fp:
         html = fp.read()
@@ -553,12 +674,12 @@ def quick_parse_html(filename, page_id, original_url):
     # If this is an error page, there'll be a message like:
     #   <span id="ReportView_lblError">Please check the link you clicked, as it does not reference a valid Official Report</span>
     # ... so ignore those.
-    error = soup.find('span', attrs={'id': 'ReportView_lblError'})
-    if error and error.string and 'Please check the link' in error.string:
+    error = soup.find("span", attrs={"id": "ReportView_lblError"})
+    if error and error.string and "Please check the link" in error.string:
         return (None, None, None)
     page_title = get_page_title(soup)
-    if page_title == 'Search the Official Report - Parliamentary Business':
-        #This is what we get instead of a 404
+    if page_title == "Search the Official Report - Parliamentary Business":
+        # This is what we get instead of a 404
         return (None, None, None)
 
     session, report_date = get_title_and_date(soup, page_id)
@@ -567,16 +688,20 @@ def quick_parse_html(filename, page_id, original_url):
         return (None, None, None)
     return (session, report_date, soup)
 
+
 def parse_html(session, report_date, soup, page_id, original_url):
     divnumber = 0
-    report_view = soup.find('div', attrs={'id': 'ReportView'})
-    div_children_of_report_view = report_view.findChildren('div', recursive=False)
+    report_view = soup.find("div", attrs={"id": "ReportView"})
+    div_children_of_report_view = report_view.findChildren("div", recursive=False)
     if len(div_children_of_report_view) != 1:
-        raise Exception('We only expect one <div> child of <div id="ReportView">; there were %d in page with ID %d' % (len(div_children_of_report_view), page_id))
+        raise Exception(
+            'We only expect one <div> child of <div id="ReportView">; there were %d in page with ID %d'
+            % (len(div_children_of_report_view), page_id)
+        )
 
     main_div = div_children_of_report_view[0]
 
-    top_level_divs = main_div.findChildren('div', recursive=False)
+    top_level_divs = main_div.findChildren("div", recursive=False)
 
     # The first div should just contain links to sections further down
     # the page:
@@ -590,14 +715,19 @@ def parse_html(session, report_date, soup, page_id, original_url):
 
     contents_links = contents_div.findAll(True)
     for link in contents_links:
-        if link.name == 'br':
+        if link.name == "br":
             continue
-        if link.name != 'a':
-            raise Exception("There was something other than a <br> or an <a> in the supposed contents <div>, for page ID: %d" % (page_id,))
-        href = link['href']
-        m = re.search(r'#(.*)', href)
+        if link.name != "a":
+            raise Exception(
+                "There was something other than a <br> or an <a> in the supposed contents <div>, for page ID: %d"
+                % (page_id,)
+            )
+        href = link["href"]
+        m = re.search(r"#(.*)", href)
         if not m:
-            raise Exception("Failed to find the ID from '%s' in page with ID: %d" % (href, page_id))
+            raise Exception(
+                "Failed to find the ID from '%s' in page with ID: %d" % (href, page_id)
+            )
         contents_tuples.append((m.group(1), tidy_string(non_tag_data_in(link))))
 
     parsed_page = ParsedPage(session, report_date, page_id)
@@ -616,21 +746,22 @@ def parse_html(session, report_date, soup, page_id, original_url):
         # the top level, so just ignore those:
         if not len(str(top_level).strip()):
             continue
-        if top_level.name == 'h2':
-            section_title = tidy_string(non_tag_data_in(top_level, tag_replacement=' '))
+        if top_level.name == "h2":
+            section_title = tidy_string(non_tag_data_in(top_level, tag_replacement=" "))
             if not section_title:
-                raise Exception("There was an empty section title in page ID: %d" % (page_id))
-            parsed_page.sections.append(
-                Section(section_title, current_url))
-        elif top_level.name in ('br',):
+                raise Exception(
+                    "There was an empty section title in page ID: %d" % (page_id)
+                )
+            parsed_page.sections.append(Section(section_title, current_url))
+        elif top_level.name in ("br",):
             # Ignore line breaks - we use paragraphs instead
             continue
-        elif top_level.name == 'a':
+        elif top_level.name == "a":
             try:
-                current_url = original_url + "#" + top_level['id']
+                current_url = original_url + "#" + top_level["id"]
             except KeyError:
                 pass
-        elif top_level.name == 'div':
+        elif top_level.name == "div":
             # This div contains a speech, essentially:
 
             # the new style pages wraps speeches in p.span tags that we can ignore so
@@ -639,62 +770,80 @@ def parse_html(session, report_date, soup, page_id, original_url):
             # This does mean we are losing some formatting information but because it's
             # hardcoded style attributes in the spans it's arguable that we'd want to
             # remove them anyway.
-            for p in top_level.findChildren('p'):
+            for p in top_level.findChildren("p"):
                 if p.span or p.b:
-                    for span in p.findChildren('span'):
-                        if span.has_attr('style') and 'font-size:10pt;font-weight:bold' in span['style']:
-                            span.name = 'b'
+                    for span in p.findChildren("span"):
+                        if (
+                            span.has_attr("style")
+                            and "font-size:10pt;font-weight:bold" in span["style"]
+                        ):
+                            span.name = "b"
                         else:
                             span.unwrap()
                     p.unwrap()
 
             removed_number = None
             for speech_part in top_level:
-                if hasattr(speech_part, 'name') and speech_part.name != None:
-                    if speech_part.name == 'b':
+                if hasattr(speech_part, "name") and speech_part.name != None:
+                    if speech_part.name == "b":
                         speaker_name = non_tag_data_in(speech_part)
                         # If there's a leading question number remove that (and any whitespace)
-                        match = re.match(r'^\d+\.\s*', speaker_name)
+                        match = re.match(r"^\d+\.\s*", speaker_name)
                         if match:
-                            speaker_name = re.sub(r'^\d+\.\s*', '', speaker_name)
+                            speaker_name = re.sub(r"^\d+\.\s*", "", speaker_name)
                             removed_number = match.group(0)
                         # If there's a training colon, remove that (and any whitespace)
-                        speaker_name = re.sub(r'[\s:]*$', '', speaker_name)
-                        current_speech = Speech(tidy_string(speaker_name),
-                                                report_date,
-                                                current_time,
-                                                current_url)
-                        parsed_page.sections[-1].speeches_and_votes.append(current_speech)
-                    elif speech_part.name == 'br':
+                        speaker_name = re.sub(r"[\s:]*$", "", speaker_name)
+                        current_speech = Speech(
+                            tidy_string(speaker_name),
+                            report_date,
+                            current_time,
+                            current_url,
+                        )
+                        parsed_page.sections[-1].speeches_and_votes.append(
+                            current_speech
+                        )
+                    elif speech_part.name == "br":
                         # Ignore the line breaks...
                         pass
-                    elif speech_part.name == 'ul':
+                    elif speech_part.name == "ul":
                         current_speech.paragraphs.append(str(speech_part))
-                    elif speech_part.name in ('sup', 'sub'):
+                    elif speech_part.name in ("sup", "sub"):
                         # sometimes the degree symbol is used so we need
                         # to replace it with an entity and then convert to
                         # ascii otherwise we get a codec error
-                        current_speech.paragraphs[-1] += str(str(speech_part).replace('\xb0', '&#176;'))
-                    elif speech_part.name == 'a' and speech_part.text == '':
+                        current_speech.paragraphs[-1] += str(
+                            str(speech_part).replace("\xb0", "&#176;")
+                        )
+                    elif speech_part.name == "a" and speech_part.text == "":
                         # skip empty a anchors
                         pass
                     else:
-                        raise Exception("Unexpected tag '%s' in page ID: %d" % (speech_part, page_id))
+                        raise Exception(
+                            "Unexpected tag '%s' in page ID: %d"
+                            % (speech_part, page_id)
+                        )
                 elif isinstance(speech_part, NavigableString):
                     tidied_paragraph = tidy_string(speech_part)
                     if tidied_paragraph == "":
                         # just ignore blank lines
                         continue
                     # print "tidied_paragraph is", tidied_paragraph.encode('utf-8'), "of type", type(tidied_paragraph)
-                    division_way, division_candidate, division_candidate_id = is_division_way(tidied_paragraph, report_date)
-                    member_vote = is_member_vote(tidied_paragraph, report_date, expecting_a_vote=current_votes)
+                    division_way, division_candidate, division_candidate_id = (
+                        is_division_way(tidied_paragraph, report_date)
+                    )
+                    member_vote = is_member_vote(
+                        tidied_paragraph, report_date, expecting_a_vote=current_votes
+                    )
                     maybe_time = just_time(tidied_paragraph)
                     closed_time = meeting_closed(tidied_paragraph)
                     if closed_time:
                         current_time = closed_time
                     suspended_time_tuple = meeting_suspended(tidied_paragraph)
                     if suspended_time_tuple:
-                        suspended, suspension_time_type, suspension_time = suspended_time_tuple
+                        suspended, suspension_time_type, suspension_time = (
+                            suspended_time_tuple
+                        )
                     else:
                         suspended = False
                         suspension_time_type = suspension_time = None
@@ -703,22 +852,37 @@ def parse_html(session, report_date, soup, page_id, original_url):
                         # candidate, or the introduction to an
                         # oath-taking, add the text as a speech too:
                         if division_candidate:
-                            current_speech = Speech(None,
-                                                    report_date,
-                                                    current_time,
-                                                    current_url)
-                            parsed_page.sections[-1].speeches_and_votes.append(current_speech)
+                            current_speech = Speech(
+                                None, report_date, current_time, current_url
+                            )
+                            parsed_page.sections[-1].speeches_and_votes.append(
+                                current_speech
+                            )
                             current_speech.paragraphs.append(tidied_paragraph)
-                        if (not current_votes) or (current_votes.candidate != division_candidate):
-                            current_votes = Division(report_date, current_url, divnumber, division_candidate, division_candidate_id)
+                        if (not current_votes) or (
+                            current_votes.candidate != division_candidate
+                        ):
+                            current_votes = Division(
+                                report_date,
+                                current_url,
+                                divnumber,
+                                division_candidate,
+                                division_candidate_id,
+                            )
                             divnumber += 1
-                            parsed_page.sections[-1].speeches_and_votes.append(current_votes)
+                            parsed_page.sections[-1].speeches_and_votes.append(
+                                current_votes
+                            )
                         current_division_way = division_way
                     elif member_vote:
                         if current_votes is None:
                             print(tidied_paragraph)
-                            raise Exception("Got a member's vote before an indication of which way the vote is")
-                        current_votes.add_vote(current_division_way, tidied_paragraph, member_vote)
+                            raise Exception(
+                                "Got a member's vote before an indication of which way the vote is"
+                            )
+                        current_votes.add_vote(
+                            current_division_way, tidied_paragraph, member_vote
+                        )
                     elif maybe_time:
                         current_time = maybe_time
                     else:
@@ -738,42 +902,85 @@ def parse_html(session, report_date, soup, page_id, original_url):
                     if suspended and suspension_time:
                         current_time = suspension_time
                 else:
-                    raise Exception("Totally unparsed element:\n%s\n... unhandled in page ID: %d" % (speech_part, page_id))
+                    raise Exception(
+                        "Totally unparsed element:\n%s\n... unhandled in page ID: %d"
+                        % (speech_part, page_id)
+                    )
 
         else:
-            raise Exception("There was an unhandled element '%s' in page with ID: %d" % (top_level.name, page_id))
+            raise Exception(
+                "There was an unhandled element '%s' in page with ID: %d"
+                % (top_level.name, page_id)
+            )
 
     return parsed_page
 
-SPEAKERS_DEBUG = False
 
-if __name__ == '__main__':
+SPEAKERS_DEBUG = False
 
+if __name__ == "__main__":
     parser = OptionParser()
-    parser.add_option('-f', '--force', dest='force', action="store_true",
-                      help='force reparse of everything')
-    parser.add_option("--test", dest="doctest",
-                      default=False, action='store_true',
-                      help="Run all doctests in this file")
-    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
-                      default=False, help="produce very verbose output")
-    parser.add_option('-q', '--quiet', dest='quiet', action='store_true',
-                      default=False, help="produce very quiet output")
-    parser.add_option('--speakers-debug', dest='speakers_debug', action='store_true',
-                      default=False, help="log speakers that couldn't be found")
-    parser.add_option('--from', dest='from_date',
-                      default="1999-05-12",
-                      help="only parse files from this date onwards (inclusive)")
-    parser.add_option('--to', dest='to_date',
-                      default=str(datetime.date.today()),
-                      help="only parse files up to this date (inclusive)")
-    parser.add_option('--number', dest='file_number', type='int',
-                      default=None,
-                      help="only parse the source file with this number")
+    parser.add_option(
+        "-f",
+        "--force",
+        dest="force",
+        action="store_true",
+        help="force reparse of everything",
+    )
+    parser.add_option(
+        "--test",
+        dest="doctest",
+        default=False,
+        action="store_true",
+        help="Run all doctests in this file",
+    )
+    parser.add_option(
+        "-v",
+        "--verbose",
+        dest="verbose",
+        action="store_true",
+        default=False,
+        help="produce very verbose output",
+    )
+    parser.add_option(
+        "-q",
+        "--quiet",
+        dest="quiet",
+        action="store_true",
+        default=False,
+        help="produce very quiet output",
+    )
+    parser.add_option(
+        "--speakers-debug",
+        dest="speakers_debug",
+        action="store_true",
+        default=False,
+        help="log speakers that couldn't be found",
+    )
+    parser.add_option(
+        "--from",
+        dest="from_date",
+        default="1999-05-12",
+        help="only parse files from this date onwards (inclusive)",
+    )
+    parser.add_option(
+        "--to",
+        dest="to_date",
+        default=str(datetime.date.today()),
+        help="only parse files up to this date (inclusive)",
+    )
+    parser.add_option(
+        "--number",
+        dest="file_number",
+        type="int",
+        default=None,
+        help="only parse the source file with this number",
+    )
     (options, args) = parser.parse_args()
 
     if options.doctest:
         import doctest
+
         failure_count, test_count = doctest.testmod()
         sys.exit(0 if failure_count == 0 else 1)
 
@@ -794,17 +1001,22 @@ def parse_html(session, report_date, soup, page_id, original_url):
         # won't work properly, but will save lots of time in the
         # typical case.
         if not (file_number or options.force):
-            earliest_to_consider = datetime.datetime.combine(from_date,
-                                                             datetime.time())
+            earliest_to_consider = datetime.datetime.combine(from_date, datetime.time())
             earliest_to_consider -= datetime.timedelta(days=10)
 
-            last_modified = datetime.datetime.fromtimestamp(os.path.getmtime(html_filename))
+            last_modified = datetime.datetime.fromtimestamp(
+                os.path.getmtime(html_filename)
+            )
             if last_modified < earliest_to_consider:
                 if options.verbose:
-                    print("Skipping", html_filename, "(it's well before %s)" % (from_date,))
+                    print(
+                        "Skipping",
+                        html_filename,
+                        "(it's well before %s)" % (from_date,),
+                    )
                 continue
 
-        m = re.search(r'^(\d+)\.html$', filename)
+        m = re.search(r"^(\d+)\.html$", filename)
         if not m:
             if options.verbose:
                 print("Skipping", html_filename, "(wrong filename format)")
@@ -825,9 +1037,9 @@ def parse_html(session, report_date, soup, page_id, original_url):
             official_url = official_report_url_format.format(page_id)
             # Do a quick parse of the page first, to extract the date
             # so we know whether to bother with it:
-            session, report_date, soup = quick_parse_html(html_filename,
-                                                          page_id,
-                                                          official_url)
+            session, report_date, soup = quick_parse_html(
+                html_filename, page_id, official_url
+            )
             if session is None:
                 if options.verbose:
                     print("Skipping", html_filename, "(not useful after parsing)")
@@ -836,12 +1048,8 @@ def parse_html(session, report_date, soup, page_id, original_url):
                 if options.verbose:
                     print("Skipping", html_filename, "(outside requested date range)")
                 continue
-            parsed_page = parse_html(session,
-                                     report_date,
-                                     soup,
-                                     page_id,
-                                     official_url)
-        except Exception as e:
+            parsed_page = parse_html(session, report_date, soup, page_id, official_url)
+        except Exception:
             # print "parsing the file '%s' failed, with the exception:" % (filename,)
             # print unicode(e).encode('utf-8')
             # traceback.print_exc()
@@ -856,8 +1064,9 @@ def parse_html(session, report_date, soup, page_id, original_url):
 
             if options.verbose:
                 print("Parsed", parsed_page.suggested_file_name)
-            output_filename = os.path.join(xml_output_directory,
-                                           parsed_page.suggested_file_name)
+            output_filename = os.path.join(
+                xml_output_directory, parsed_page.suggested_file_name
+            )
             output_directory, output_leafname = os.path.split(output_filename)
             # Ensure that the directory exists:
             try:
@@ -868,13 +1077,14 @@ def parse_html(session, report_date, soup, page_id, original_url):
                     raise
 
             xml = etree.tostring(parsed_page.as_xml(), pretty_print=True)
-            with NamedTemporaryFile(delete=False,
-                                    dir=xml_output_directory) as ntf:
+            with NamedTemporaryFile(delete=False, dir=xml_output_directory) as ntf:
                 ntf.write(xml)
 
             changed_output = True
             if os.path.exists(output_filename):
-                if 0 == os.system("diff %s %s > /dev/null" % (ntf.name,output_filename)):
+                if 0 == os.system(
+                    "diff %s %s > /dev/null" % (ntf.name, output_filename)
+                ):
                     changed_output = False
 
             if changed_output:
@@ -882,12 +1092,11 @@ def parse_html(session, report_date, soup, page_id, original_url):
                     print("Parsed and changed", parsed_page.suggested_file_name)
                 os.chmod(ntf.name, 0o644)
                 os.rename(ntf.name, output_filename)
-                changedates_filename = os.path.join(xml_output_directory,
-                                                     output_directory,
-                                                     'changedates.txt')
-                with open(changedates_filename, 'a+') as fp:
-                    fp.write('%d,%s\n' % (time.time(),
-                                          output_leafname))
+                changedates_filename = os.path.join(
+                    xml_output_directory, output_directory, "changedates.txt"
+                )
+                with open(changedates_filename, "a+") as fp:
+                    fp.write("%d,%s\n" % (time.time(), output_leafname))
             else:
                 if options.verbose:
                     print("  not writing, since output is unchanged")
diff --git a/pyscraper/sp/resolvenames.py b/pyscraper/sp/resolvenames.py
index 64883f0d..9661af54 100755
--- a/pyscraper/sp/resolvenames.py
+++ b/pyscraper/sp/resolvenames.py
@@ -1,17 +1,17 @@
-import os
+import datetime
 import json
+import os
 import re
-import copy
-import sys
-import datetime
-import time
 
 from base_resolver import ResolverBase
 
-members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..', 'members'))
+members_dir = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "../..", "members")
+)
+
 
 class MemberList(ResolverBase):
-    import_organization_id = 'scottish-parliament'
+    import_organization_id = "scottish-parliament"
 
     # This will return a list of person ID strings or None.  If there
     # are no matches, the list will be empty.  If we recognize a valid
@@ -23,20 +23,22 @@ class MemberList(ResolverBase):
 
     # FIXME: use Set instead of lists
 
-    def match_whole_speaker(self,speaker_name,speaker_date):
-
-        #lfp = codecs.open("/var/tmp/all-names",'a','utf-8')
-        #lfp.write("%s\t%s\n"%(speaker_date,speaker_name))
-        #lfp.close()
+    def match_whole_speaker(self, speaker_name, speaker_date):
+        # lfp = codecs.open("/var/tmp/all-names",'a','utf-8')
+        # lfp.write("%s\t%s\n"%(speaker_date,speaker_name))
+        # lfp.close()
 
         # if speaker_date:
         #     print speaker_name+" [on date "+speaker_date + "]"
         # else:
         #     print speaker_date+" [no date]"
 
-        party = ''
+        party = ""
 
-        m = re.match('^(.*) \((Con|Lab|Labour|LD|SNP|SSP|Green|Ind|SSCUP|SCCUP|Sol)\s?\)(.*)$',speaker_name)
+        m = re.match(
+            "^(.*) \((Con|Lab|Labour|LD|SNP|SSP|Green|Ind|SSCUP|SCCUP|Sol)\s?\)(.*)$",
+            speaker_name,
+        )
         if m:
             speaker_name = m.group(1) + m.group(3)
             party = m.group(2)
@@ -56,13 +58,15 @@ def match_whole_speaker(self,speaker_name,speaker_date):
 
         # First, check the first part:
 
-        m = re.search('^([^\(]*)(.*)',speaker_name)
+        m = re.search("^([^\(]*)(.*)", speaker_name)
         first_part = m.group(1).strip()
         bracketed_parts = m.group(2).strip()
 
         all_matching_ids = ()
 
-        ids_from_first_part = memberList.match_string_somehow(first_part,speaker_date,party,False)
+        ids_from_first_part = memberList.match_string_somehow(
+            first_part, speaker_date, party, False
+        )
         if ids_from_first_part == None:
             return None
         else:
@@ -72,12 +76,14 @@ def match_whole_speaker(self,speaker_name,speaker_date):
             ids_so_far = ids_from_first_part
 
         while len(bracketed_parts) > 0:
-            m = re.search('\(([^\)]*)(\)(.*)|$)',bracketed_parts)
+            m = re.search("\(([^\)]*)(\)(.*)|$)", bracketed_parts)
             if not m:
                 break
             bracketed_part = m.group(1).strip()
             # print "   Got bracketed part: "+bracketed_part
-            ids_from_bracketed_part = memberList.match_string_somehow(bracketed_part,speaker_date,party,False)
+            ids_from_bracketed_part = memberList.match_string_somehow(
+                bracketed_part, speaker_date, party, False
+            )
             if ids_from_bracketed_part != None:
                 if len(ids_from_bracketed_part) == 1:
                     return ids_from_bracketed_part
@@ -86,7 +92,9 @@ def match_whole_speaker(self,speaker_name,speaker_date):
                 else:
                     if len(ids_so_far) > 0:
                         # Work out the intersection...
-                        ids_so_far = [x for x in ids_so_far if x in ids_from_bracketed_part]
+                        ids_so_far = [
+                            x for x in ids_so_far if x in ids_from_bracketed_part
+                        ]
                         if len(ids_so_far) == 1:
                             return ids_so_far
                     else:
@@ -98,7 +106,7 @@ def match_whole_speaker(self,speaker_name,speaker_date):
             if m.group(3):
                 bracketed_parts = m.group(3).strip()
             else:
-                bracketed_parts = ''
+                bracketed_parts = ""
 
         return ids_so_far
 
@@ -112,22 +120,24 @@ def match_whole_speaker(self,speaker_name,speaker_date):
 
     # FIXME: use Set instead of lists
 
-    def match_string_somehow(self,s,date,party,just_name):
-        s = re.sub('\s{2,}', ' ', s)
+    def match_string_somehow(self, s, date, party, just_name):
+        s = re.sub("\s{2,}", " ", s)
 
         s = s.replace("O\u2019", "O'")
-        if s == 'Katy Clark' and date >= '2020-09-03':
-            s = 'Baroness Clark of Kilwinning'
+        if s == "Katy Clark" and date >= "2020-09-03":
+            s = "Baroness Clark of Kilwinning"
 
         member_ids = []
 
         # Sometimes the names are written Lastname, FirstNames
         # (particularly in the reports of divisions.
 
-        comma_match = re.match('^([^,]*), (.*)',s)
+        comma_match = re.match("^([^,]*), (.*)", s)
         if comma_match:
             rearranged = comma_match.group(2) + " " + comma_match.group(1)
-            rearranged_result = self.match_string_somehow(rearranged,date,party,just_name)
+            rearranged_result = self.match_string_somehow(
+                rearranged, date, party, just_name
+            )
             if rearranged_result != None:
                 if len(rearranged_result) > 0:
                     return rearranged_result
@@ -137,103 +147,108 @@ def match_string_somehow(self,s,date,party,just_name):
         # ... otherwise just carry on without any rearragement.
 
         if not just_name:
-
-            office_name = s.replace('The ', '')
+            office_name = s.replace("The ", "")
             office_matches = self.offices.get(office_name)
             if office_matches:
                 for o in office_matches:
-                    if date and ( date < o['start_date'] or 'end_date' not in list(o.keys()) or date >= o['end_date'] ):
+                    if date and (
+                        date < o["start_date"]
+                        or "end_date" not in list(o.keys())
+                        or date >= o["end_date"]
+                    ):
                         continue
-                    member_ids.append(o['person_id'])
+                    member_ids.append(o["person_id"])
                 if len(member_ids) == 1:
                     return member_ids[0]
 
         fullname_matches = self.fullnames.get(s)
         if fullname_matches:
             for m in fullname_matches:
-                if date and date < m['start_date'] or date > m['end_date']:
+                if date and date < m["start_date"] or date > m["end_date"]:
                     continue
                 # get the full membership details so we can check the start_reason
-                mem = self.members.get(m['id'])
-                if re.search('The Presiding Officer',s) and mem['start_reason'] != 'became_presiding_officer':
+                mem = self.members.get(m["id"])
+                if (
+                    re.search("The Presiding Officer", s)
+                    and mem["start_reason"] != "became_presiding_officer"
+                ):
                     # There's some ambiguity about which of the
                     # presiding officers it is in this case...
                     continue
-                if m['id'] not in member_ids:
-                    member_ids.append(m['id'])
+                if m["id"] not in member_ids:
+                    member_ids.append(m["id"])
             if len(member_ids) == 1:
                 return list(map(self.membertoperson, member_ids))
 
         # Now check if this begins with a title:
 
-        title_match = re.search('^(Mr|Mgr|Sir|Ms|Mrs|Miss|Lord|Dr\.?) (.*)',s)
+        title_match = re.search("^(Mr|Mgr|Sir|Ms|Mrs|Miss|Lord|Dr\.?) (.*)", s)
         if title_match:
             title = title_match.group(1)
             rest_of_name = title_match.group(2)
 
-            if rest_of_name == 'Home Robertson':
-                rest_of_name = 'John Home Robertson'
+            if rest_of_name == "Home Robertson":
+                rest_of_name = "John Home Robertson"
 
-            if rest_of_name == 'John Munro' or rest_of_name == 'Munro':
-                rest_of_name = 'John Farquhar Munro'
+            if rest_of_name == "John Munro" or rest_of_name == "Munro":
+                rest_of_name = "John Farquhar Munro"
 
             # We should probably deal with these by using the title
             # attributes from sp-members.xml
 
-            if rest_of_name.lower() == 'macdonald':
-                if title == 'Ms':
-                    rest_of_name = 'Margo MacDonald'
+            if rest_of_name.lower() == "macdonald":
+                if title == "Ms":
+                    rest_of_name = "Margo MacDonald"
 
-            if title == 'Dr' and rest_of_name == 'Jackson':
-                rest_of_name = 'Sylvia Jackson'
+            if title == "Dr" and rest_of_name == "Jackson":
+                rest_of_name = "Sylvia Jackson"
 
             fullname_matches = self.fullnames.get(rest_of_name)
             if fullname_matches:
                 for m in fullname_matches:
-                    if date and date < m['start_date'] or date > m['end_date']:
+                    if date and date < m["start_date"] or date > m["end_date"]:
                         continue
-                    if m['id'] not in member_ids:
-                        member_ids.append(m['id'])
+                    if m["id"] not in member_ids:
+                        member_ids.append(m["id"])
                 if len(member_ids) == 1:
                     return list(map(self.membertoperson, member_ids))
 
             # Or if there's a single word, then this is probably just
             # a last name:
 
-            if re.match('^[^ ]+$',rest_of_name):
+            if re.match("^[^ ]+$", rest_of_name):
                 lastname_matches = self.lastnames.get(rest_of_name)
                 if lastname_matches:
                     for m in lastname_matches:
-                        if date and date < m['start_date'] or date > m['end_date']:
+                        if date and date < m["start_date"] or date > m["end_date"]:
                             continue
-                        if m['id'] not in member_ids:
-                            member_ids.append(m['id'])
+                        if m["id"] not in member_ids:
+                            member_ids.append(m["id"])
                     if len(member_ids) == 1:
                         return list(map(self.membertoperson, member_ids))
 
         if not just_name:
-
             constituency_matches = self.constoidmap.get(s)
             if constituency_matches:
                 for c in constituency_matches:
                     # print "       Got constituency id: "+c['id']
-                    members = self.considtomembermap.get(c['id'])
+                    members = self.considtomembermap.get(c["id"])
                     for m in members:
-                        if date and date < m['start_date'] or date > m['end_date']:
+                        if date and date < m["start_date"] or date > m["end_date"]:
                             continue
-                        if m['id'] not in member_ids:
-                            member_ids.append(m['id'])
+                        if m["id"] not in member_ids:
+                            member_ids.append(m["id"])
                     if len(member_ids) == 1:
                         return list(map(self.membertoperson, member_ids))
 
         # Just return the string for people that aren't members, but
         # we know are ones we understand.
 
-        if re.search('(Some [mM]embers|A [mM]ember|Several [mM]embers|Members)',s):
+        if re.search("(Some [mM]embers|A [mM]ember|Several [mM]embers|Members)", s):
             # print "Got some general group of people..."
             return None
 
-        if s in ('The Deputy Convener', 'The Convener'):
+        if s in ("The Deputy Convener", "The Convener"):
             return None
 
         return list(map(self.membertoperson, member_ids))
@@ -248,20 +263,20 @@ def reloadJSON(self):
         start_date = None
         end_date = None
 
-        self.offices = { }
-        with open(os.path.join(members_dir, 'sp-ministers.json')) as fp:
+        self.offices = {}
+        with open(os.path.join(members_dir, "sp-ministers.json")) as fp:
             offices_json = fp.read()
             offices = json.loads(offices_json)
 
         for office in offices:
-            self.offices.setdefault(office['role'], []).append(office)
+            self.offices.setdefault(office["role"], []).append(office)
 
     def list(self, date=None):
         if not date:
             date = datetime.date.today().isoformat()
         ids = []
         for m in self.members.values():
-            if 'start_date' in m and date >= m["start_date"] and date <= m["end_date"]:
+            if "start_date" in m and date >= m["start_date"] and date <= m["end_date"]:
                 ids.append(m["id"])
         return ids
 
@@ -271,4 +286,5 @@ def list_all_dates(self):
             ids.append(m["id"])
         return ids
 
+
 memberList = MemberList()
diff --git a/pyscraper/sp/wikipedia-msp.py b/pyscraper/sp/wikipedia-msp.py
index a1e63808..8888a79f 100755
--- a/pyscraper/sp/wikipedia-msp.py
+++ b/pyscraper/sp/wikipedia-msp.py
@@ -10,14 +10,15 @@
 
 import datetime
 import os
+import re
 import sys
 import urllib.parse
-import re
 
-file_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '..')
+file_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..")
 sys.path.insert(0, file_dir)
 
 from sp.resolvenames import memberList
+
 date_today = datetime.date.today().isoformat()
 
 # These were the original locations of these pages:
@@ -29,23 +30,23 @@
     "http://en.wikipedia.org/wiki/Members_of_the_5th_Scottish_Parliament",
     "http://en.wikipedia.org/wiki/Members_of_the_6th_Scottish_Parliament",
 ]
-wikimembers  = {}
+wikimembers = {}
 
-content = ''
+content = ""
 
 for u in wiki_index_urls:
-    leaf = re.sub('.*/','',u)
-    ur = open(file_dir + '/../rawdata/' + leaf)
+    leaf = re.sub(".*/", "", u)
+    ur = open(file_dir + "/../rawdata/" + leaf)
     content += ur.read()
     ur.close()
 
 matcher = '(?ims)<a href="(/wiki/[^"]+)" [^>]*?title="[^"]+"[^>]*>([^<]+)</a>'
 matches = re.findall(matcher, content)
 
-for (url, name) in matches:
+for url, name in matches:
     id_list = None
     try:
-        id_list = memberList.match_string_somehow(name, '', '', True)
+        id_list = memberList.match_string_somehow(name, "", "", True)
     except Exception as e:
         print(e, file=sys.stderr)
     if not id_list:
@@ -54,16 +55,16 @@
     for id_to_add in id_list:
         wikimembers[id_to_add] = url
 
-print('''<?xml version="1.0" encoding="UTF-8"?>
-<publicwhip>''')
+print("""<?xml version="1.0" encoding="UTF-8"?>
+<publicwhip>""")
 k = sorted(wikimembers)
 for id in k:
     url = urllib.parse.urljoin(wiki_index_urls[0], wikimembers[id])
     print('<personinfo id="%s" wikipedia_url="%s" />' % (id, url))
-print('</publicwhip>')
+print("</publicwhip>")
 
 wikimembers = set(wikimembers.keys())
-allmembers = set([ memberList.membertoperson(id) for id in memberList.list_all_dates() ])
+allmembers = set([memberList.membertoperson(id) for id in memberList.list_all_dates()])
 
 symdiff = allmembers.symmetric_difference(wikimembers)
 if len(symdiff) > 0:
diff --git a/pyscraper/sp_2024/__main__.py b/pyscraper/sp_2024/__main__.py
index 44b7f589..c7f3ad46 100644
--- a/pyscraper/sp_2024/__main__.py
+++ b/pyscraper/sp_2024/__main__.py
@@ -6,12 +6,14 @@
 
 from __future__ import annotations
 
+import datetime
+from pathlib import Path
+
+import click
+
+from .convert import convert_xml_to_twfy
 from .download import fetch_debates_for_dates
 from .parse import tidy_up_html
-from .convert import convert_xml_to_twfy
-import click
-from pathlib import Path
-import datetime
 
 file_dir = Path(__file__).parent
 parldata = Path(file_dir, "..", "..", "..", "parldata")
diff --git a/pyscraper/sp_2024/common.py b/pyscraper/sp_2024/common.py
index 11cf01c5..9016588e 100644
--- a/pyscraper/sp_2024/common.py
+++ b/pyscraper/sp_2024/common.py
@@ -1,16 +1,14 @@
 # A few functions that turn out to be useful in many of the Scottish
 # Parliament scraping scripts.
 
-import sys
 import datetime
+import sys
 
 sys.path.append("../")
-from bs4 import NavigableString
-from bs4 import Tag
-from bs4 import Comment
-
 import re
 
+from bs4 import Comment, NavigableString, Tag
+
 
 def non_tag_data_in(o, tag_replacement=""):
     if o.__class__ == NavigableString:
diff --git a/pyscraper/sp_2024/convert.py b/pyscraper/sp_2024/convert.py
index 66bbec80..9d1f25ea 100644
--- a/pyscraper/sp_2024/convert.py
+++ b/pyscraper/sp_2024/convert.py
@@ -35,10 +35,10 @@ class IDFactory:
     latest_minor: int = -1
 
     def _current_id(self) -> str:
-        if self.committee_slug in ('meeting-of-the-parliament', 'plenary'):
-            slug = ''
+        if self.committee_slug in ("meeting-of-the-parliament", "plenary"):
+            slug = ""
         else:
-            slug = self.committee_slug + '/'
+            slug = self.committee_slug + "/"
         return f"{self.base_id}{slug}{self.iso_date}.{self.latest_major}.{self.latest_minor}"
 
     def get_next_major_id(self) -> str:
@@ -85,7 +85,7 @@ def convert_xml_to_twfy(file_path: Path, output_dir: Path, verbose: bool = False
     timestamp = ""
 
     # remove [Draft] from title
-    title = re.sub(r'\[Draft\]( Business until \d\d:\d\d)?', '', title).strip()
+    title = re.sub(r"\[Draft\]( Business until \d\d:\d\d)?", "", title).strip()
 
     # get the date in format Thursday 9 June 2005
     date_str = datetime.date.fromisoformat(iso_date).strftime("%A %d %B %Y")
diff --git a/pyscraper/sp_2024/download.py b/pyscraper/sp_2024/download.py
index ae3b00dd..4bf87dc9 100644
--- a/pyscraper/sp_2024/download.py
+++ b/pyscraper/sp_2024/download.py
@@ -9,9 +9,9 @@
 
 from __future__ import annotations
 
+import re
 from itertools import groupby
 from pathlib import Path
-import re
 from typing import Iterator, NamedTuple
 from urllib.parse import parse_qs, urlparse
 
diff --git a/pyscraper/sp_2024/parse.py b/pyscraper/sp_2024/parse.py
index 0554f515..390cd4f6 100644
--- a/pyscraper/sp_2024/parse.py
+++ b/pyscraper/sp_2024/parse.py
@@ -76,7 +76,9 @@ def process_raw_html(raw_html: Tag, agenda_item_url: str) -> BeautifulSoup:
     # Deal with timestamps that are not inside anything first
     raw_html = str(raw_html)
     raw_html = re.sub(
-        '(?:<p class="or-contribution-box">)?\s*(.*?)\s*<br/>\s*<br/>', r"<timestamp>\1</timestamp>", raw_html
+        '(?:<p class="or-contribution-box">)?\s*(.*?)\s*<br/>\s*<br/>',
+        r"<timestamp>\1</timestamp>",
+        raw_html,
     )
     soup = BeautifulSoup(raw_html, "html.parser")
 
@@ -267,7 +269,7 @@ def tidy_up_html(xml_path: Path, output_dir: Path):
         raw_html = item.find("raw_html")
         parsed_data = process_raw_html(raw_html, agenda_item_url=agenda_item_url)
         # replace raw_html with parsed
-        item.find('raw_html').decompose()
+        item.find("raw_html").decompose()
         item.append(parsed_data.find("parsed"))
 
     # dump the soup to a file
diff --git a/pyscraper/test.py b/pyscraper/test.py
index e95509b9..b1ba70b8 100755
--- a/pyscraper/test.py
+++ b/pyscraper/test.py
@@ -2,15 +2,22 @@
 # -*- coding: utf-8 -*-
 
 import sys
-from resolvemembernames import memberList
+
 from lords.resolvenames import lordsList
+from resolvemembernames import memberList
 
-print(memberList.matchfullnamecons("Si\xf4n Simon", "Birmingham Erdington", "2006-01-22"))
+print(
+    memberList.matchfullnamecons("Si\xf4n Simon", "Birmingham Erdington", "2006-01-22")
+)
 sys.exit(0)
 
-print(lordsList.GetLordIDfname('Baroness Thatcher', None, '2006-05-01'))
-print(lordsList.GetLordIDfname('The Archbishop of York', None, '2006-05-01'))
-print(lordsList.GetLordIDfname('The Bishop of Southwell and Nottingham', None, '2006-05-01'))
+print(lordsList.GetLordIDfname("Baroness Thatcher", None, "2006-05-01"))
+print(lordsList.GetLordIDfname("The Archbishop of York", None, "2006-05-01"))
+print(
+    lordsList.GetLordIDfname(
+        "The Bishop of Southwell and Nottingham", None, "2006-05-01"
+    )
+)
 
 print(memberList.matchfullnamecons("Anne Moffat", "East Lothian", "2006-01-22"))
 print(memberList.matchfullnamecons("Anne Picking", "East Lothian", "2006-01-22"))
@@ -21,7 +28,9 @@
 print(memberList.canonicalcons("Aberdeen North", "2005-05-06"))
 
 print(memberList.matchdebatename("Solicitor-General", None, "2003-11-21"))
-print(memberList.matchdebatename("The Advocate-General for Scotland", None, "2004-07-30"))
+print(
+    memberList.matchdebatename("The Advocate-General for Scotland", None, "2004-07-30")
+)
 
 print(memberList.getmembersoneelection("uk.org.publicwhip/member/1238"))
 print(memberList.getmembersoneelection("uk.org.publicwhip/member/1353"))
@@ -41,6 +50,7 @@
 # print memberList.matchfullnamecons("The Prime Minister", None, "1992-11-21")
 print(memberList.matchfullnamecons("George Galloway", None, "1999-01-01"))
 print(memberList.matchfullnamecons("George Galloway", None, "2005-01-01"))
-print(memberList.matchfullnamecons("George Galloway", "Bethnal Green & Bow", "2005-01-01"))
+print(
+    memberList.matchfullnamecons("George Galloway", "Bethnal Green & Bow", "2005-01-01")
+)
 print(memberList.matchfullnamecons("George Galloway", None, "2005-05-06"))
-
diff --git a/pyscraper/unpack_hansard_zips.py b/pyscraper/unpack_hansard_zips.py
index a59f3a68..ee6f64ba 100755
--- a/pyscraper/unpack_hansard_zips.py
+++ b/pyscraper/unpack_hansard_zips.py
@@ -4,22 +4,19 @@
 import errno
 import json
 import os
-from os.path import exists, isdir, join
 import re
-import shutil
 import subprocess
-
+from os.path import exists, isdir, join
 from tempfile import NamedTemporaryFile
 
-from lxml import etree
 import requests
-
+from lxml import etree
 from miscfuncs import toppath
 
-atom_feed_url = 'http://api.data.parliament.uk/resources/files/feed?dataset=12'
-zip_directory = join(toppath, 'cmpages', 'hansardzips')
-json_index_filename = join(toppath, 'hansardfeed.json')
-atom_ns = {'namespaces': {'ns': 'http://www.w3.org/2005/Atom'}}
+atom_feed_url = "http://api.data.parliament.uk/resources/files/feed?dataset=12"
+zip_directory = join(toppath, "cmpages", "hansardzips")
+json_index_filename = join(toppath, "hansardfeed.json")
+atom_ns = {"namespaces": {"ns": "http://www.w3.org/2005/Atom"}}
 
 
 def mkdir_p(path):
@@ -40,9 +37,9 @@ def parse_datetime(s):
 
 
 def entry_directory(entry):
-    return '{}_{}'.format(
-        entry['id'],
-        entry['entry_updated'].replace(' ', '_'),
+    return "{}_{}".format(
+        entry["id"],
+        entry["entry_updated"].replace(" ", "_"),
     )
 
 
@@ -52,26 +49,23 @@ def get_atom_entries(hansard_atom_url):
 
     result = [
         {
-            'entry_updated': str(parse_datetime(
-                e.xpath('ns:updated', **atom_ns)[0].text
-            )),
-            'id': int(
-                re.search(
-                    r'/(\d+)\.zip$',
-                    e.xpath('ns:id', **atom_ns)[0].text
-                ).group(1)
+            "entry_updated": str(
+                parse_datetime(e.xpath("ns:updated", **atom_ns)[0].text)
+            ),
+            "id": int(
+                re.search(r"/(\d+)\.zip$", e.xpath("ns:id", **atom_ns)[0].text).group(1)
             ),
-            'link_url': e.xpath('ns:link', **atom_ns)[0].attrib['href'],
+            "link_url": e.xpath("ns:link", **atom_ns)[0].attrib["href"],
         }
-        for e in tree.xpath('/ns:feed/ns:entry', **atom_ns)
+        for e in tree.xpath("/ns:feed/ns:entry", **atom_ns)
     ]
     for entry in result:
-        entry['directory'] = entry_directory(entry)
+        entry["directory"] = entry_directory(entry)
     return result
 
 
 def entry_key(entry):
-    return '|'.join(str(entry[k]) for k in ('entry_updated', 'id'))
+    return "|".join(str(entry[k]) for k in ("entry_updated", "id"))
 
 
 # Load the existing entries:
@@ -87,15 +81,14 @@ def entry_key(entry):
 # Now add any entries that weren't already in the feed:
 
 for new_entry in sorted(
-        get_atom_entries(atom_feed_url),
-        key=lambda e: (e['entry_updated'], e['id'])
+    get_atom_entries(atom_feed_url), key=lambda e: (e["entry_updated"], e["id"])
 ):
     if entry_key(new_entry) not in existing_keys:
         entries.append(new_entry)
 
 # Write out the updated JSON:
 
-with open(json_index_filename, 'w') as f:
+with open(json_index_filename, "w") as f:
     json.dump(entries, f, indent=4, sort_keys=True)
 
 
@@ -104,21 +97,21 @@ class UnzipError(Exception):
 
 
 def extract_zip(zip_filename, destination_directory):
-    unzip_command = ['unzip', '-q', zip_filename, '-d', destination_directory]
+    unzip_command = ["unzip", "-q", zip_filename, "-d", destination_directory]
     p = subprocess.Popen(unzip_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     _, _ = p.communicate()
     # We get return code 1 quite often ("a warning was output")
     # because some of these zip files use backslash as a directory
     # separator:
     if p.returncode not in (0, 1):
-        raise UnzipError("Excuting {0} failed".format(' '.join(unzip_command)))
+        raise UnzipError("Excuting {0} failed".format(" ".join(unzip_command)))
     # Walk the whole directory before processing the unpacked files,
     # so we don't modified the tree while recursing through it:
     walked = os.walk(destination_directory)
     for dirpath, dirnames, filenames in walked:
         for filename in filenames:
             full_filename = join(dirpath, filename)
-            m = re.search(r'^(.*)\.zip$', filename, re.I)
+            m = re.search(r"^(.*)\.zip$", filename, re.I)
             if m:
                 # Then there's a ZIP file in the zip file - create a
                 # directory for it and unpack it.
@@ -128,31 +121,34 @@ def extract_zip(zip_filename, destination_directory):
                 try:
                     extract_zip(full_filename, full_subdir)
                     os.remove(full_filename)
-                except UnzipError as ue:
+                except UnzipError:
                     print("Unpacking failed for {0}".format(full_subdir))
 
+
 # Now download any new entries and unpack the zip files:
 
 for entry in entries:
-    subdir = join(zip_directory, entry['directory'])
+    subdir = join(zip_directory, entry["directory"])
     if exists(subdir):
         continue
     mkdir_p(subdir)
 
-    r = requests.get(entry['link_url'])
+    r = requests.get(entry["link_url"])
     ntf = NamedTemporaryFile(
-        prefix='{}-'.format(entry['id']), suffix='.zip', delete=False
+        prefix="{}-".format(entry["id"]), suffix=".zip", delete=False
     )
     ntf.write(r.content)
     ntf.close()
 
-    print("Unpacking top level zip file {}, downloaded from {}".format(
-        ntf.name, entry['link_url']
-    ))
+    print(
+        "Unpacking top level zip file {}, downloaded from {}".format(
+            ntf.name, entry["link_url"]
+        )
+    )
 
     try:
         extract_zip(ntf.name, subdir)
-    except UnzipError as ue:
+    except UnzipError:
         print("Unpacking failed, removing {0}".format(subdir))
-        #shutil.rmtree(subdir)
-        #raise
+        # shutil.rmtree(subdir)
+        # raise
diff --git a/pyscraper/wa/parse.py b/pyscraper/wa/parse.py
index c843f510..e429e15d 100755
--- a/pyscraper/wa/parse.py
+++ b/pyscraper/wa/parse.py
@@ -1,21 +1,21 @@
 #! /usr/bin/env python3
 
 import copy
-from dataclasses import dataclass, field
 import datetime
-from enum import Enum
-from html import unescape
 import os
-from pathlib import Path
 import re
 import sys
+from dataclasses import dataclass
+from html import unescape
+from pathlib import Path
+
 from bs4 import BeautifulSoup, NavigableString
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from contextexception import ContextException
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from wa.resolvenames import memberList
 
-parldata = '../../../parldata/'
+parldata = "../../../parldata/"
+
 
 @dataclass
 class Thing:
@@ -23,19 +23,19 @@ class Thing:
     minor: int
     date: datetime.date
     time: datetime.datetime = None
-    lang: str = ''
-    en: str = ''
-    cy: str = ''
-    url: str = ''
-    suffix: str = ''
+    lang: str = ""
+    en: str = ""
+    cy: str = ""
+    url: str = ""
+    suffix: str = ""
 
     is_text = True
     info = False
 
     def id(self, lang):
-        id = f'uk.org.publicwhip/senedd/{lang}/{self.date}.{self.major}.{self.minor}'
+        id = f"uk.org.publicwhip/senedd/{lang}/{self.date}.{self.major}.{self.minor}"
         if self.suffix:
-            id += f'.{self.suffix}'
+            id += f".{self.suffix}"
         return id
 
     def extra_attribs(self, tag, lang):
@@ -44,27 +44,27 @@ def extra_attribs(self, tag, lang):
     def in_lang(self, lang):
         return self.__str__(lang)
 
-    def __str__(self, lang='en'):
-        soup = BeautifulSoup("<body>", 'lxml')
+    def __str__(self, lang="en"):
+        soup = BeautifulSoup("<body>", "lxml")
         tag = soup.new_tag(self.element_name)
-        tag['id'] = self.id(lang)
+        tag["id"] = self.id(lang)
         if self.time:
-            tag['time'] = self.time.strftime("%H:%M")
+            tag["time"] = self.time.strftime("%H:%M")
         if self.url:
-            tag['url'] = self.url
+            tag["url"] = self.url
         self.extra_attribs(tag, lang)
         text = getattr(self, lang)
         if self.lang and lang != self.lang:
-            tag['original_lang'] = self.lang
+            tag["original_lang"] = self.lang
         if isinstance(text, (str, NavigableString)):
             if not text:
-                other_lang = 'en' if lang == 'cy' else 'cy'
-                tag['lang'] = other_lang
+                other_lang = "en" if lang == "cy" else "cy"
+                tag["lang"] = other_lang
                 text = getattr(self, other_lang)
         else:
             if not list(text):
-                other_lang = 'en' if lang == 'cy' else 'cy'
-                tag['lang'] = other_lang
+                other_lang = "en" if lang == "cy" else "cy"
+                tag["lang"] = other_lang
                 text = getattr(self, other_lang)
 
         if isinstance(text, (str, NavigableString)):
@@ -72,120 +72,150 @@ def __str__(self, lang='en'):
         else:
             text = list(copy.copy(text))
             for b in text:
-                if self.info and b.name == 'p':
-                    b['class'] = 'italic'
+                if self.info and b.name == "p":
+                    b["class"] = "italic"
                 tag.append(b)
 
         return str(tag)
 
+
 class MajorHeading(Thing):
-    element_name = 'major-heading'
+    element_name = "major-heading"
+
 
 class MinorHeading(Thing):
-    element_name = 'minor-heading'
+    element_name = "minor-heading"
+
 
 class Info(Thing):
-    element_name = 'speech'
+    element_name = "speech"
     is_text = False
     info = True
 
+
 @dataclass
 class Speech(Thing):
     speaker_id: int = 0
     speaker_name: str = ""
 
-    element_name = 'speech'
+    element_name = "speech"
     is_text = False
 
     def extra_attribs(self, tag, lang):
         if self.speaker_id:
-            tag['person_id'] = self.speaker_id
+            tag["person_id"] = self.speaker_id
         if self.speaker_name:
-            tag['speakername'] = self.speaker_name
+            tag["speakername"] = self.speaker_name
+
 
 @dataclass
 class Vote(Thing):
-    vote: str = ''
+    vote: str = ""
     divnumber: int = 0
-    title_en: str = ''
-    title_cy: str = ''
+    title_en: str = ""
+    title_cy: str = ""
 
-    element_name = 'division'
+    element_name = "division"
     is_text = False
 
     def extra_attribs(self, tag, lang):
-        tag['divdate'] = self.date
-        tag['divnumber'] = self.divnumber
-        if lang == 'cy':
-            tag['title'] = self.title_cy
+        tag["divdate"] = self.date
+        tag["divnumber"] = self.divnumber
+        if lang == "cy":
+            tag["title"] = self.title_cy
         else:
-            tag['title'] = self.title_en
+            tag["title"] = self.title_en
+
 
 class ParseDay:
     def parse_day(self, date, text, votes, qnr):
         self.date = date
-        if votes: self.parse_votes(votes)
+        if votes:
+            self.parse_votes(votes)
         self.parse_plenary(text)
-        if qnr: self.parse_qnr(qnr)
-        en = self.output('en')
-        cy = self.output('cy')
+        if qnr:
+            self.parse_qnr(qnr)
+        en = self.output("en")
+        cy = self.output("cy")
         return en, cy
 
     def output(self, lang):
         out = '<?xml version="1.0" encoding="utf-8"?>\n<publicwhip>\n'
         for speech in self.speeches:
             out += speech.in_lang(lang) + "\n"
-        out += '</publicwhip>\n'
+        out += "</publicwhip>\n"
         return out
 
     def add_speech(self, item, Typ, **kwargs):
-        agenda_id = item.Agenda_Item_ID.string.split('-')[1]
-        order_id = kwargs.pop('order_id', None)
+        agenda_id = item.Agenda_Item_ID.string.split("-")[1]
+        order_id = kwargs.pop("order_id", None)
         if order_id is None:
             order_id = int(item.Contribution_ID.string)
-        kwargs['date'] = datetime.datetime.strptime(item.MeetingDate.string, '%Y-%m-%dT%H:%M:%S').date()
-        if 'url' not in kwargs and item.Contribution_ID:
+        kwargs["date"] = datetime.datetime.strptime(
+            item.MeetingDate.string, "%Y-%m-%dT%H:%M:%S"
+        ).date()
+        if "url" not in kwargs and item.Contribution_ID:
             meeting_id = item.Meeting_ID.string
             contribution_id = item.Contribution_ID.string
-            kwargs['url'] = f'https://record.senedd.wales/Plenary/{meeting_id}#C{contribution_id}'
+            kwargs["url"] = (
+                f"https://record.senedd.wales/Plenary/{meeting_id}#C{contribution_id}"
+            )
         if item.ContributionTime and item.ContributionTime.string:
-            kwargs['time'] = datetime.datetime.strptime(item.ContributionTime.string, '%Y-%m-%dT%H:%M:%S')
+            kwargs["time"] = datetime.datetime.strptime(
+                item.ContributionTime.string, "%Y-%m-%dT%H:%M:%S"
+            )
         self.speeches.append(Typ(major=agenda_id, minor=order_id, **kwargs))
 
     def display_heading_agenda(self, item, typ, **kwargs):
         agenda_cy = str(item.Agenda_item_welsh.string)
         agenda_en = str(item.Agenda_item_english.string)
-        self.add_speech(item, typ, en=agenda_en, cy=agenda_cy, suffix='h', **kwargs)
+        self.add_speech(item, typ, en=agenda_en, cy=agenda_cy, suffix="h", **kwargs)
 
     def display_heading_text(self, item):
         lang, text_cy, text_en = self._verbatim_text(item, html=False)
         self.add_speech(item, MinorHeading, lang=lang, en=text_en, cy=text_cy)
 
     def display_speech(self, item, html=True, double_escaped=False, order_id=None):
-        lang, text_cy, text_en = self._verbatim_text(item, html=html, double_escaped=double_escaped)
+        lang, text_cy, text_en = self._verbatim_text(
+            item, html=html, double_escaped=double_escaped
+        )
         if not text_cy and not text_en:
             return
         member_id = item.Member_Id.string
         if member_id:
-            name = str(item.Member_name_English.string or '')
-            if member_id == '7':
+            name = str(item.Member_name_English.string or "")
+            if member_id == "7":
                 speaker_id = None
-                name = 'Member of the Senedd'
+                name = "Member of the Senedd"
             else:
                 try:
                     person = memberList.match_by_id(member_id, self.date)
                 except KeyError:
                     raise Exception(f"Could not find person for {name} {member_id}")
-                speaker_id = person['id']
-            self.add_speech(item, Speech, lang=lang, en=text_en, cy=text_cy, speaker_id=speaker_id, speaker_name=name, order_id=order_id)
+                speaker_id = person["id"]
+            self.add_speech(
+                item,
+                Speech,
+                lang=lang,
+                en=text_en,
+                cy=text_cy,
+                speaker_id=speaker_id,
+                speaker_name=name,
+                order_id=order_id,
+            )
         else:
-            self.add_speech(item, Info, lang=lang, en=text_en, cy=text_cy, order_id=order_id)
+            self.add_speech(
+                item, Info, lang=lang, en=text_en, cy=text_cy, order_id=order_id
+            )
 
     def display_info(self, item):
         lang, text_cy, text_en = self._verbatim_text(item)
         if not text_cy and not text_en:
             return
-        if b'In the bilingual version, the left-hand column' in text_en.encode_contents():
+        if (
+            b"In the bilingual version, the left-hand column"
+            in text_en.encode_contents()
+        ):
             return
         self.add_speech(item, Info, lang=lang, en=text_en, cy=text_cy)
 
@@ -197,76 +227,87 @@ def display_vote(self, item):
         try:
             division = self.divisions[vote_id]
         except KeyError:
-            self.add_speech(item, Info, en='Missing', cy='Missing', suffix='v')
+            self.add_speech(item, Info, en="Missing", cy="Missing", suffix="v")
             return
 
-        m = re.search('(?:record.(?:senedd|assembly).wales|cofnod.(?:senedd|cynulliad).cymru)/VoteOutcome/\d+/\?#V(\d+)', str(text_en))
-        url = 'https://' + m.group(0)
+        m = re.search(
+            "(?:record.(?:senedd|assembly).wales|cofnod.(?:senedd|cynulliad).cymru)/VoteOutcome/\d+/\?#V(\d+)",
+            str(text_en),
+        )
+        url = "https://" + m.group(0)
         number = m.group(1)
 
-        soup_en = BeautifulSoup("<body>", 'lxml')
-        soup_cy = BeautifulSoup("<body>", 'lxml')
+        soup_en = BeautifulSoup("<body>", "lxml")
+        soup_cy = BeautifulSoup("<body>", "lxml")
 
-        count_el = soup_en.new_tag('divisioncount')
-        count_el['for'] = division['for']
-        count_el['against'] = division['against']
-        count_el['abstain'] = division['abstain']
+        count_el = soup_en.new_tag("divisioncount")
+        count_el["for"] = division["for"]
+        count_el["against"] = division["against"]
+        count_el["abstain"] = division["abstain"]
         soup_en.body.append(count_el)
         soup_cy.body.append(copy.copy(count_el))
 
-        votes = {'for': [], 'against': [], 'abstain': [], 'didnotvote': []}
-        for id, (name, vote) in division['votes'].items():
-            vote_el = soup_en.new_tag('msname')
-            vote_el['person_id'] = id
-            vote_el['vote'] = vote
+        votes = {"for": [], "against": [], "abstain": [], "didnotvote": []}
+        for id, (name, vote) in division["votes"].items():
+            vote_el = soup_en.new_tag("msname")
+            vote_el["person_id"] = id
+            vote_el["vote"] = vote
             vote_el.string = name
             votes[vote].append(vote_el)
 
-        for way in ('for', 'against', 'abstain', 'didnotvote'):
-            list_el = soup_en.new_tag('mslist')
-            list_el['vote'] = way
+        for way in ("for", "against", "abstain", "didnotvote"):
+            list_el = soup_en.new_tag("mslist")
+            list_el["vote"] = way
             list_el.extend(votes[way])
             soup_en.body.append(list_el)
             soup_cy.body.append(copy.copy(list_el))
 
-        self.add_speech(item, Vote, en=soup_en.body, cy=soup_cy.body,
-            divnumber=number, title_en=division['name_en'], title_cy=division['name_cy'],
-            url=url, suffix='v')
+        self.add_speech(
+            item,
+            Vote,
+            en=soup_en.body,
+            cy=soup_cy.body,
+            divnumber=number,
+            title_en=division["name_en"],
+            title_cy=division["name_cy"],
+            url=url,
+            suffix="v",
+        )
 
     def _verbatim_text(self, item, html=True, double_escaped=False):
-        text_verbatim = item.contribution_verbatim.string or ''
-        text_translated = item.contribution_translated.string or ''
+        text_verbatim = item.contribution_verbatim.string or ""
+        text_translated = item.contribution_translated.string or ""
         if html:
             if text_translated:
-                if double_escaped: # ANR is double escaped
+                if double_escaped:  # ANR is double escaped
                     text_translated = unescape(text_translated)
-                text_translated = BeautifulSoup(text_translated, 'lxml').body
+                text_translated = BeautifulSoup(text_translated, "lxml").body
             if text_verbatim:
                 if double_escaped:
                     text_verbatim = unescape(text_verbatim)
-                text_verbatim = BeautifulSoup(text_verbatim, 'lxml').body
+                text_verbatim = BeautifulSoup(text_verbatim, "lxml").body
         if item.contribution_language:
             lang = str(item.contribution_language.string).lower()
-            if lang == 'cy':
+            if lang == "cy":
                 text_cy = text_verbatim
                 text_en = text_translated
-            elif lang == 'en':
+            elif lang == "en":
                 text_en = text_verbatim
                 text_cy = text_translated
         else:  # QNR does not have this, and only translates Welsh to English
             text_en = text_translated
             if text_verbatim == text_translated:
-                text_cy = ''
-                lang = 'en'
+                text_cy = ""
+                lang = "en"
             else:
                 text_cy = text_verbatim
-                lang = 'cy'
+                lang = "cy"
         return lang, text_cy, text_en
 
     def parse_votes(self, votes):
-        soup = BeautifulSoup(votes, 'xml')
+        soup = BeautifulSoup(votes, "xml")
         self.divisions = {}
-        for item in soup.find_all(['XML_Plenary_Vote', 'XML_Plenary-FifthSenedd_Vote']):
+        for item in soup.find_all(["XML_Plenary_Vote", "XML_Plenary-FifthSenedd_Vote"]):
             vote_id = int(item.Contribution_ID.string)
             if vote_id not in self.divisions:
                 self.divisions[vote_id] = {
@@ -275,29 +316,36 @@ def parse_votes(self, votes):
                     "for": item.VotesTotalFor.string,
                     "against": item.VotesTotalAgainst.string,
                     "abstain": item.VotesTotalAbstain.string,
-                    #"result_en": item.Vote_Result_Welsh.string,
-                    #"result_cy": item.Vote_Result_English.string,
-                    #"meeting_type": item.Meeting_type.string,
+                    # "result_en": item.Vote_Result_Welsh.string,
+                    # "result_cy": item.Vote_Result_English.string,
+                    # "meeting_type": item.Meeting_type.string,
                     "votes": {},
                 }
             vote = str(item.Results_Result.string).lower()
-            if not int(item.Member_Id.string): continue
+            if not int(item.Member_Id.string):
+                continue
             name = str(item.Member_name_English.string)
             try:
                 person = memberList.match_by_id(item.Member_Id.string, self.date)
             except KeyError:
-                raise Exception(f"Could not find person for {name} {item.Member_Id.string}")
-            self.divisions[vote_id]['votes'][person['id']] = (name, vote)
+                raise Exception(
+                    f"Could not find person for {name} {item.Member_Id.string}"
+                )
+            self.divisions[vote_id]["votes"][person["id"]] = (name, vote)
 
     def parse_qnr(self, qnr):
-        soup = BeautifulSoup(qnr, 'xml')
+        soup = BeautifulSoup(qnr, "xml")
         current_agenda_en = ""
         major = False
         c = 0  # There are duplicate order IDs in the QNR
-        for item in soup.find_all(['XML_Plenary_QNR_Bilingual', 'XML_Plenary-FifthSenedd_QNR_Bilingual']):
+        for item in soup.find_all(
+            ["XML_Plenary_QNR_Bilingual", "XML_Plenary-FifthSenedd_QNR_Bilingual"]
+        ):
             if not major:
                 major = True
-                self.add_speech(item, MajorHeading, en='QNR', cy='QNR', suffix='mh', order_id=c)
+                self.add_speech(
+                    item, MajorHeading, en="QNR", cy="QNR", suffix="mh", order_id=c
+                )
 
             agenda_en = str(item.Agenda_item_english.string)
             if agenda_en != current_agenda_en and agenda_en:
@@ -305,40 +353,42 @@ def parse_qnr(self, qnr):
                 self.display_heading_agenda(item, MinorHeading, order_id=c)
 
             ctype = str(item.contribution_type.string)
-            if ctype == 'QNR':
+            if ctype == "QNR":
                 self.display_speech(item, html=False, order_id=c)
-            elif ctype == 'ANR':
+            elif ctype == "ANR":
                 self.display_speech(item, double_escaped=True, order_id=c)
             else:
                 raise Exception(f"Unknown contribution type {ctype}")
             c += 1
 
     def parse_plenary(self, text):
-        soup = BeautifulSoup(text, 'xml')
+        soup = BeautifulSoup(text, "xml")
 
         self.speeches = []
-        current_agenda_id = '0'
-        for item in soup.find_all(['XML_Plenary_Bilingual', 'XML_Plenary-FifthSenedd_Bilingual']):
-            #entry_id = item.Contribution_ID
-            #tv_spoken = item.contribution_spoken_seneddTv
-            #tv_translated = item.contribution_translated_seneddTv
-
-            agenda_id = item.Agenda_Item_ID.string.split('-')[1]
+        current_agenda_id = "0"
+        for item in soup.find_all(
+            ["XML_Plenary_Bilingual", "XML_Plenary-FifthSenedd_Bilingual"]
+        ):
+            # entry_id = item.Contribution_ID
+            # tv_spoken = item.contribution_spoken_seneddTv
+            # tv_translated = item.contribution_translated_seneddTv
+
+            agenda_id = item.Agenda_Item_ID.string.split("-")[1]
             agenda_en = str(item.Agenda_item_english.string)
             if agenda_id != current_agenda_id and agenda_en:
                 current_agenda_id = agenda_id
                 self.display_heading_agenda(item, MajorHeading)
 
             ctype = str(item.contribution_type.string)
-            if ctype == 'C':
+            if ctype == "C":
                 self.display_speech(item)
-            elif ctype == 'I':
+            elif ctype == "I":
                 self.display_info(item)
-            elif ctype == 'B':
+            elif ctype == "B":
                 self.display_heading_text(item)
-            elif ctype == 'O':
+            elif ctype == "O":
                 self.display_speech(item)
-            elif ctype == 'V':
+            elif ctype == "V":
                 self.display_vote(item)
             else:
                 raise Exception(f"Unknown contribution type {ctype}")
@@ -346,41 +396,48 @@ def parse_plenary(self, text):
 
 def read_or_blank(f):
     if f.exists():
-        return open(f, encoding='utf-8-sig').read()
+        return open(f, encoding="utf-8-sig").read()
+
 
 def age_cmp(f, mtime):
     if f.exists():
         return f.stat().st_mtime < mtime
     return True
 
+
 def write_if_text(t, f):
     if t:
-        fp = open(f, 'w')
+        fp = open(f, "w")
         fp.write(t)
         fp.close()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     ind = Path(sys.argv[1])
     outd = Path(sys.argv[2])
-    for fn in sorted(os.scandir(ind / 'plenary'), reverse=True, key=lambda e: e.name):
-        out_en = outd / 'en' / fn.name
-        out_cy = outd / 'cy' / fn.name
-        in_plenary = ind / 'plenary' / fn.name
-        in_vote = ind / 'votes' / fn.name
-        in_qnr = ind / 'qnr' / fn.name
-
-        msg = 'Parsing'
+    for fn in sorted(os.scandir(ind / "plenary"), reverse=True, key=lambda e: e.name):
+        out_en = outd / "en" / fn.name
+        out_cy = outd / "cy" / fn.name
+        in_plenary = ind / "plenary" / fn.name
+        in_vote = ind / "votes" / fn.name
+        in_qnr = ind / "qnr" / fn.name
+
+        msg = "Parsing"
         if out_en.exists():
             out_mtime = out_en.stat().st_mtime
-            msg = 'Reparsing'
-            if age_cmp(in_plenary, out_mtime) and age_cmp(in_vote, out_mtime) and age_cmp(in_qnr, out_mtime):
+            msg = "Reparsing"
+            if (
+                age_cmp(in_plenary, out_mtime)
+                and age_cmp(in_vote, out_mtime)
+                and age_cmp(in_qnr, out_mtime)
+            ):
                 continue
 
         text = read_or_blank(in_plenary)
         votes = read_or_blank(in_vote)
         qnr = read_or_blank(in_qnr)
         date = fn.name[6:16]
-        print(msg, 'Senedd', date)
+        print(msg, "Senedd", date)
         en, cy = ParseDay().parse_day(date, text, votes, qnr)
         write_if_text(en, out_en)
         write_if_text(cy, out_cy)
diff --git a/pyscraper/wa/resolvenames.py b/pyscraper/wa/resolvenames.py
index e21da0e6..b3cb4239 100755
--- a/pyscraper/wa/resolvenames.py
+++ b/pyscraper/wa/resolvenames.py
@@ -1,12 +1,9 @@
-import re
-import datetime
-from contextexception import ContextException
-
 from base_resolver import ResolverBase
 
+
 class MemberList(ResolverBase):
     deputy_speaker = None
-    import_organization_id = 'welsh-parliament'
+    import_organization_id = "welsh-parliament"
 
     def reloadJSON(self):
         super(MemberList, self).reloadJSON()
@@ -14,20 +11,21 @@ def reloadJSON(self):
         self.import_people_json()
         self.senedd = {}
         for person in self.persons.values():
-            for identifier in person.get('identifiers', []):
-                if identifier.get('scheme') == 'senedd':
-                    id = identifier.get('identifier')
+            for identifier in person.get("identifiers", []):
+                if identifier.get("scheme") == "senedd":
+                    id = identifier.get("identifier")
                     self.senedd[id] = person
 
     def match_by_id(self, id, date):
-        if id == '1' or id == '5': # Presiding Officer
-            return self.senedd['162'] # Elin Jones since 2016
-        if id == '2': # Deputy
-            if date < '2021-05-12':
-                return self.senedd['161'] # Ann Jones
-            return self.senedd['205'] # David Rees
-        if id == '6' and date == '2016-05-18':
-            return self.senedd['102'] # Carwyn Jones was the First Minister Elect
+        if id == "1" or id == "5":  # Presiding Officer
+            return self.senedd["162"]  # Elin Jones since 2016
+        if id == "2":  # Deputy
+            if date < "2021-05-12":
+                return self.senedd["161"]  # Ann Jones
+            return self.senedd["205"]  # David Rees
+        if id == "6" and date == "2016-05-18":
+            return self.senedd["102"]  # Carwyn Jones was the First Minister Elect
         return self.senedd[id]
 
+
 memberList = MemberList()
diff --git a/pyscraper/wa/scrape.py b/pyscraper/wa/scrape.py
index b6dd042d..4807a6aa 100755
--- a/pyscraper/wa/scrape.py
+++ b/pyscraper/wa/scrape.py
@@ -3,6 +3,7 @@
 import datetime
 import os
 import re
+
 import requests
 import requests_cache
 from bs4 import BeautifulSoup
@@ -10,15 +11,15 @@
 FIFTH_PARLIAMENT = 401
 SIXTH_PARLIAMENT = 700
 
-DOMAIN = 'https://record.senedd.wales'
-HTML_PAGE_URL = '%s/XMLExport/?committee={}&page={}' % DOMAIN
+DOMAIN = "https://record.senedd.wales"
+HTML_PAGE_URL = "%s/XMLExport/?committee={}&page={}" % DOMAIN
 
 wa_dir = os.path.dirname(__file__)
-cmpages_dir = f'{wa_dir}/../../../parldata/cmpages/senedd'
+cmpages_dir = f"{wa_dir}/../../../parldata/cmpages/senedd"
 os.makedirs(cmpages_dir, exist_ok=True)
 
-cache_path = os.path.join(wa_dir, 'cache')
-requests_cache.install_cache(cache_path, expire_after=60*60*12)
+cache_path = os.path.join(wa_dir, "cache")
+requests_cache.install_cache(cache_path, expire_after=60 * 60 * 12)
 
 today = datetime.date.today()
 forcescrape = False
@@ -26,48 +27,49 @@
 
 
 def compare(old, new):
-    old = re.sub(b'generated="[^"]*"', b'', old)
-    new = re.sub(b'generated="[^"]*"', b'', new)
+    old = re.sub(b'generated="[^"]*"', b"", old)
+    new = re.sub(b'generated="[^"]*"', b"", new)
     return old == new
 
 
 def print_diff(old, new):
     import difflib
-    old = old.decode('utf-8').split('\n')
-    new = new.decode('utf-8').split('\n')
+
+    old = old.decode("utf-8").split("\n")
+    new = new.decode("utf-8").split("\n")
     diff = difflib.unified_diff(old, new)
     for d in diff:
         print(d)
 
 
 def reorder(data):
-    votes = re.findall(b'(?s)<XML_Plenary_Vote>.*?</XML_Plenary_Vote>', data)
-    votes.sort(key=lambda d: int(re.search(b'<ID>(.*?)</ID>', d).group(1)))
-    reordered = b'\r\n  '.join(votes)
-    data = re.sub(b'(?s)<XML_Plenary_Vote>.*</XML_Plenary_Vote>', reordered, data)
+    votes = re.findall(b"(?s)<XML_Plenary_Vote>.*?</XML_Plenary_Vote>", data)
+    votes.sort(key=lambda d: int(re.search(b"<ID>(.*?)</ID>", d).group(1)))
+    reordered = b"\r\n  ".join(votes)
+    data = re.sub(b"(?s)<XML_Plenary_Vote>.*</XML_Plenary_Vote>", reordered, data)
     return data
 
 
 def write_data(url, typ, date):
     url = DOMAIN + url
     os.makedirs(f"{cmpages_dir}/{typ}", exist_ok=True)
-    filename = f'{cmpages_dir}/{typ}/senedd{date}.xml'
+    filename = f"{cmpages_dir}/{typ}/senedd{date}.xml"
     data = requests.get(url).content
-    if typ == 'votes':
+    if typ == "votes":
         # Votes sometimes change order in the source data :-/
         data = reorder(data)
-    save = 'scraping'
+    save = "scraping"
     if os.path.isfile(filename):
-        current = open(filename, 'rb').read()
+        current = open(filename, "rb").read()
         if compare(current, data) and not forcescrape:
-            save = ''
+            save = ""
         else:
             # print_diff(current, data)
-            save = 'rescraping'
+            save = "rescraping"
 
     if save:
         print(f"Senedd {save} {url} {typ}/senedd{date}.xml")
-        open(filename, 'wb').write(data)
+        open(filename, "wb").write(data)
 
 
 if daily:
@@ -80,25 +82,27 @@ def write_data(url, typ, date):
     while page:
         url = HTML_PAGE_URL.format(parl, page)
         text = requests.get(url).text
-        if 'See More' in text and not daily:
+        if "See More" in text and not daily:
             page += 1
         else:
             page = 0
 
-        soup = BeautifulSoup(text, 'lxml')
-        for tr in soup.find_all('tr'):
-            tds = tr.find_all('td')
-            if not tds: continue
+        soup = BeautifulSoup(text, "lxml")
+        for tr in soup.find_all("tr"):
+            tds = tr.find_all("td")
+            if not tds:
+                continue
 
             date, typ, bilingual, welsh, english, qnr, votes = tds
-            assert next(typ.stripped_strings) in ('Plenary - Fifth Senedd', 'Plenary')
+            assert next(typ.stripped_strings) in ("Plenary - Fifth Senedd", "Plenary")
 
             date = next(date.stripped_strings)
-            date = datetime.datetime.strptime(date, '%d/%m/%Y %H:%M').date()
-            if date == today: continue
+            date = datetime.datetime.strptime(date, "%d/%m/%Y %H:%M").date()
+            if date == today:
+                continue
 
-            write_data(bilingual.a['href'], 'plenary', date)
+            write_data(bilingual.a["href"], "plenary", date)
             if qnr.a:
-                write_data(qnr.a['href'], 'qnr', date)
+                write_data(qnr.a["href"], "qnr", date)
             if votes.a:
-                write_data(votes.a['href'], 'votes', date)
+                write_data(votes.a["href"], "votes", date)
diff --git a/pyscraper/xmlfilewrite.py b/pyscraper/xmlfilewrite.py
index 5ab15f04..f03646b0 100644
--- a/pyscraper/xmlfilewrite.py
+++ b/pyscraper/xmlfilewrite.py
@@ -1,13 +1,14 @@
 # vim:sw=8:ts=8:nowrap
 
+
 def WriteXMLHeader(fout):
-	header = '<?xml version="1.0" encoding="UTF-8"?>\n'
-	fout.write(header)
+    header = '<?xml version="1.0" encoding="UTF-8"?>\n'
+    fout.write(header)
 
-	# These entity definitions for latin-1 chars are from here:
-	# http://www.w3.org/TR/REC-html40/sgml/entities.html
-	# also available at: http://www.csparks.com/CharacterEntities.html
-	entities = '''
+    # These entity definitions for latin-1 chars are from here:
+    # http://www.w3.org/TR/REC-html40/sgml/entities.html
+    # also available at: http://www.csparks.com/CharacterEntities.html
+    entities = """
 
 <!DOCTYPE publicwhip
 [
@@ -70,5 +71,5 @@ def WriteXMLHeader(fout):
 
 ]>
 
-'''
-	fout.write(entities)
+"""
+    fout.write(entities)
diff --git a/scripts/2016_data_update/dadem_import_ni.py b/scripts/2016_data_update/dadem_import_ni.py
index 363f6213..dfb1b313 100755
--- a/scripts/2016_data_update/dadem_import_ni.py
+++ b/scripts/2016_data_update/dadem_import_ni.py
@@ -9,61 +9,63 @@
 import sys
 import unicodedata
 
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
-CSV_FILE = os.path.join(os.path.dirname(__file__), '..', '..', 'rawdata', '2016_election', 'mla.csv')
-JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json')
+CSV_FILE = os.path.join(
+    os.path.dirname(__file__), "..", "..", "rawdata", "2016_election", "mla.csv"
+)
+JSON = os.path.join(os.path.dirname(__file__), "..", "..", "members", "people.json")
 
 
 def main():
     data = load_data()
     changed = update_from(CSV_FILE, data)
     if changed:
-        json.dump(data['json'], open(JSON, 'w'), indent=2, sort_keys=True)
+        json.dump(data["json"], open(JSON, "w"), indent=2, sort_keys=True)
 
 
 def update_from(csv_url, data):
     changed = False
     for name, party, cons, person_id in dadem_csv_reader(csv_url):
         # Here we have an elected person.
-        if party not in data['orgs']:
-            data['orgs'][party] = slugify(party)
-            data['json']['organizations'].append({'id': slugify(party), 'name': party})
+        if party not in data["orgs"]:
+            data["orgs"][party] = slugify(party)
+            data["json"]["organizations"].append({"id": slugify(party), "name": party})
 
-        if person_id not in data['persons']:
-            person_id = ''
+        if person_id not in data["persons"]:
+            person_id = ""
 
-        if person_id == '':
-            data['max_person_id'] += 1
-            person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
-            name['note'] = 'Main'
+        if person_id == "":
+            data["max_person_id"] += 1
+            person_id = "uk.org.publicwhip/person/%d" % data["max_person_id"]
+            name["note"] = "Main"
             new_person = {
-                'id': person_id,
-                'other_names': [name],
-                'shortcuts': {
-                    'current_party': party,
-                    'current_constituency': cons,
-                }
+                "id": person_id,
+                "other_names": [name],
+                "shortcuts": {
+                    "current_party": party,
+                    "current_constituency": cons,
+                },
             }
-            data['json']['persons'].append(new_person)
-            data['persons'][person_id] = new_person
+            data["json"]["persons"].append(new_person)
+            data["persons"][person_id] = new_person
 
         changed = True
-        data['max_mship_id'] += 1
+        data["max_mship_id"] += 1
         # out = u'NEW result {0}, {1} {2}, {3}, {4}, {5}\n'.format(
-            # data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id
+        # data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id
         # )
         # sys.stdout.write(out)
         mship = {
-            'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
-            'post_id': data['posts_by_name'][cons]['id'],
-            'on_behalf_of_id': data['orgs'][party],
-            'person_id': person_id,
-            'start_date': '2016-05-07',
-            'start_reason': 'regional_election',
+            "id": "uk.org.publicwhip/member/%d" % data["max_mship_id"],
+            "post_id": data["posts_by_name"][cons]["id"],
+            "on_behalf_of_id": data["orgs"][party],
+            "person_id": person_id,
+            "start_date": "2016-05-07",
+            "start_reason": "regional_election",
         }
-        data['json']['memberships'].append(mship)
-        data['existing'][cons] = mship
+        data["json"]["memberships"].append(mship)
+        data["existing"][cons] = mship
 
     return changed
 
@@ -74,67 +76,84 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
-    value = re.sub('[^\w\s-]', '', value).strip().lower()
-    return re.sub('[-\s]+', '-', value)
+    value = (
+        unicodedata.normalize("NFKD", str(value))
+        .encode("ascii", "ignore")
+        .decode("ascii")
+    )
+    value = re.sub("[^\w\s-]", "", value).strip().lower()
+    return re.sub("[-\s]+", "-", value)
 
 
 def load_data():
     """Load in existing JSON (including any new MPs already set)"""
     j = json.load(open(JSON))
-    persons = {p['id']: p for p in j['persons']}
-    posts = {p['id']: p for p in j['posts']}
-    posts_by_name = {p['area']['name']: p for p in j['posts'] if p['organization_id'] == 'northern-ireland-assembly' and 'end_date' not in p}
+    persons = {p["id"]: p for p in j["persons"]}
+    posts = {p["id"]: p for p in j["posts"]}
+    posts_by_name = {
+        p["area"]["name"]: p
+        for p in j["posts"]
+        if p["organization_id"] == "northern-ireland-assembly" and "end_date" not in p
+    }
     assert len(posts_by_name) == 18
-    orgs = {o['name']: o['id'] for o in j['organizations']}
-    max_person_id = max(int(p['id'].replace('uk.org.publicwhip/person/','')) for p in j['persons'])
+    orgs = {o["name"]: o["id"] for o in j["organizations"]}
+    max_person_id = max(
+        int(p["id"].replace("uk.org.publicwhip/person/", "")) for p in j["persons"]
+    )
 
     existing = {}
     max_mship_id = 0
-    mships = (m for m in j['memberships'] if 'post_id' in m and posts[m['post_id']]['organization_id'] == 'northern-ireland-assembly')
+    mships = (
+        m
+        for m in j["memberships"]
+        if "post_id" in m
+        and posts[m["post_id"]]["organization_id"] == "northern-ireland-assembly"
+    )
     for mship in mships:
-        max_mship_id = max(max_mship_id, int(mship['id'].replace('uk.org.publicwhip/member/','')))
-        if 'end_date' in mship:
+        max_mship_id = max(
+            max_mship_id, int(mship["id"].replace("uk.org.publicwhip/member/", ""))
+        )
+        if "end_date" in mship:
             continue  # Not a new MP
-        cons = posts[mship['post_id']]['area']['name']
+        cons = posts[mship["post_id"]]["area"]["name"]
         assert cons not in existing
         existing[cons] = mship
 
     return {
-        'json': j,
-        'persons': persons,
-        'posts_by_name': posts_by_name,
-        'orgs': orgs,
-        'max_person_id': max_person_id,
-        'max_mship_id': max_mship_id,
-        'existing': existing,
+        "json": j,
+        "persons": persons,
+        "posts_by_name": posts_by_name,
+        "orgs": orgs,
+        "max_person_id": max_person_id,
+        "max_mship_id": max_mship_id,
+        "existing": existing,
     }
 
 
 PARTY_YNMP_TO_TWFY = {
-    'Scottish Labour': 'Labour',
-    'Labour Party': 'Labour',
-    'Conservative Party': 'Conservative',
-    'Conservative and Unionist Party': 'Conservative',
-    'Scottish Conservative and Unionist Party': 'Conservative',
-    'Liberal Democrats': 'Liberal Democrat',
-    'Scottish Liberal Democrats': 'Liberal Democrat',
-    'Ulster Unionist Party': 'UUP',
-    'Speaker seeking re-election': 'Speaker',
-    'Scottish National Party (SNP)': 'Scottish National Party',
-    'Plaid Cymru - The Party of Wales': 'Plaid Cymru',
-    "Labour and Co-operative Party": 'Labour/Co-operative',
-    'Democratic Unionist Party - D.U.P.': 'DUP',
-    'Democratic Unionist Party': 'DUP',
-    'The Respect Party': 'Respect',
+    "Scottish Labour": "Labour",
+    "Labour Party": "Labour",
+    "Conservative Party": "Conservative",
+    "Conservative and Unionist Party": "Conservative",
+    "Scottish Conservative and Unionist Party": "Conservative",
+    "Liberal Democrats": "Liberal Democrat",
+    "Scottish Liberal Democrats": "Liberal Democrat",
+    "Ulster Unionist Party": "UUP",
+    "Speaker seeking re-election": "Speaker",
+    "Scottish National Party (SNP)": "Scottish National Party",
+    "Plaid Cymru - The Party of Wales": "Plaid Cymru",
+    "Labour and Co-operative Party": "Labour/Co-operative",
+    "Democratic Unionist Party - D.U.P.": "DUP",
+    "Democratic Unionist Party": "DUP",
+    "The Respect Party": "Respect",
     "SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party",
     "UK Independence Party (UKIP)": "UKIP",
     "UK Independence Party (UK I P)": "UKIP",
-    "Alliance - Alliance Party of Northern Ireland": 'Alliance',
-    "Alliance Party": 'Alliance',
-    'Green Party': 'Green',
-    'Scottish Green Party': 'Green',
-    'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice',
+    "Alliance - Alliance Party of Northern Ireland": "Alliance",
+    "Alliance Party": "Alliance",
+    "Green Party": "Green",
+    "Scottish Green Party": "Green",
+    "Traditional Unionist Voice - TUV": "Traditional Unionist Voice",
 }
 
 
@@ -142,22 +161,26 @@ def dadem_csv_reader(fn):
     if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
-        given = row['First']
-        family = row['Last']
-        party = row['Party'].decode('utf-8')
+        given = row["First"]
+        family = row["Last"]
+        party = row["Party"].decode("utf-8")
         party = PARTY_YNMP_TO_TWFY.get(party, party)
-        cons = row['Constituency'].decode('utf-8')
+        cons = row["Constituency"].decode("utf-8")
         person_id = None
-        if row['parlparse_id']:
-            person_id = 'uk.org.publicwhip/person/{0}'.format(row['parlparse_id'])
-        yield {'given_name': given, 'family_name': family}, party, cons, person_id
+        if row["parlparse_id"]:
+            person_id = "uk.org.publicwhip/person/{0}".format(row["parlparse_id"])
+        yield {"given_name": given, "family_name": family}, party, cons, person_id
 
 
 def mship_has_changed(old, new):
-    if old['name'] != new['name'] or old['on_behalf_of_id'] != new['on_behalf_of_id'] or old['person_id'] != new['person_id']:
+    if (
+        old["name"] != new["name"]
+        or old["on_behalf_of_id"] != new["on_behalf_of_id"]
+        or old["person_id"] != new["person_id"]
+    ):
         return True
     return False
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/scripts/2016_data_update/dadem_import_sp.py b/scripts/2016_data_update/dadem_import_sp.py
index e9effb1e..d8a78996 100755
--- a/scripts/2016_data_update/dadem_import_sp.py
+++ b/scripts/2016_data_update/dadem_import_sp.py
@@ -8,58 +8,70 @@
 import sys
 import unicodedata
 
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
-CSV_FILE = os.path.join(os.path.dirname(__file__), '..', '..', 'rawdata', '2016_election', 'msp.csv')
-JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json')
+CSV_FILE = os.path.join(
+    os.path.dirname(__file__), "..", "..", "rawdata", "2016_election", "msp.csv"
+)
+JSON = os.path.join(os.path.dirname(__file__), "..", "..", "members", "people.json")
 
 
 def main():
     data = load_data()
     changed = update_from(CSV_FILE, data)
     if changed:
-        json.dump(data['json'], open(JSON, 'w'), indent=2, sort_keys=True)
+        json.dump(data["json"], open(JSON, "w"), indent=2, sort_keys=True)
 
 
 def update_from(csv_url, data):
     changed = False
     for name, party, cons, person_id in dadem_csv_reader(csv_url):
         # Here we have an elected person.
-        if party not in data['orgs']:
-            data['orgs'][party] = slugify(party)
-            data['json']['organizations'].append({'id': slugify(party), 'name': party})
+        if party not in data["orgs"]:
+            data["orgs"][party] = slugify(party)
+            data["json"]["organizations"].append({"id": slugify(party), "name": party})
 
-        if person_id not in data['persons']:
-            person_id = ''
+        if person_id not in data["persons"]:
+            person_id = ""
 
-        if person_id == '':
-            data['max_person_id'] += 1
-            person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
-            name['note'] = 'Main'
+        if person_id == "":
+            data["max_person_id"] += 1
+            person_id = "uk.org.publicwhip/person/%d" % data["max_person_id"]
+            name["note"] = "Main"
             new_person = {
-                'id': person_id,
-                'other_names': [name],
-                'shortcuts': {
-                    'current_party': party,
-                    'current_constituency': cons,
-                }
+                "id": person_id,
+                "other_names": [name],
+                "shortcuts": {
+                    "current_party": party,
+                    "current_constituency": cons,
+                },
             }
-            data['json']['persons'].append(new_person)
-            data['persons'][person_id] = new_person
+            data["json"]["persons"].append(new_person)
+            data["persons"][person_id] = new_person
 
         changed = True
-        data['max_mship_id'] += 1
-        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
+        data["max_mship_id"] += 1
+        print(
+            "NEW result %s, %s %s, %s, %s, %s"
+            % (
+                data["max_mship_id"],
+                name["given_name"],
+                name["family_name"],
+                party,
+                cons,
+                person_id,
+            )
+        )
         mship = {
-            'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
-            'post_id': data['posts_by_name'][cons]['id'],
-            'on_behalf_of_id': data['orgs'][party],
-            'person_id': person_id,
-            'start_date': '2016-05-07',
-            'start_reason': 'regional_election',
+            "id": "uk.org.publicwhip/member/%d" % data["max_mship_id"],
+            "post_id": data["posts_by_name"][cons]["id"],
+            "on_behalf_of_id": data["orgs"][party],
+            "person_id": person_id,
+            "start_date": "2016-05-07",
+            "start_reason": "regional_election",
         }
-        data['json']['memberships'].append(mship)
-        data['existing'][cons] = mship
+        data["json"]["memberships"].append(mship)
+        data["existing"][cons] = mship
 
     return changed
 
@@ -70,65 +82,82 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
-    value = re.sub('[^\w\s-]', '', value).strip().lower()
-    return re.sub('[-\s]+', '-', value)
+    value = (
+        unicodedata.normalize("NFKD", str(value))
+        .encode("ascii", "ignore")
+        .decode("ascii")
+    )
+    value = re.sub("[^\w\s-]", "", value).strip().lower()
+    return re.sub("[-\s]+", "-", value)
 
 
 def load_data():
     """Load in existing JSON (including any new MPs already set)"""
     j = json.load(open(JSON))
-    persons = {p['id']: p for p in j['persons']}
-    posts = {p['id']: p for p in j['posts']}
-    posts_by_name = {p['area']['name']: p for p in j['posts'] if p['organization_id'] == 'scottish-parliament' and 'end_date' not in p}
+    persons = {p["id"]: p for p in j["persons"]}
+    posts = {p["id"]: p for p in j["posts"]}
+    posts_by_name = {
+        p["area"]["name"]: p
+        for p in j["posts"]
+        if p["organization_id"] == "scottish-parliament" and "end_date" not in p
+    }
     assert len(posts_by_name) == 81
-    orgs = {o['name']: o['id'] for o in j['organizations']}
-    max_person_id = max(int(p['id'].replace('uk.org.publicwhip/person/','')) for p in j['persons'])
+    orgs = {o["name"]: o["id"] for o in j["organizations"]}
+    max_person_id = max(
+        int(p["id"].replace("uk.org.publicwhip/person/", "")) for p in j["persons"]
+    )
 
     existing = {}
     max_mship_id = 0
-    mships = (m for m in j['memberships'] if 'post_id' in m and posts[m['post_id']]['organization_id'] == 'scottish-parliament')
+    mships = (
+        m
+        for m in j["memberships"]
+        if "post_id" in m
+        and posts[m["post_id"]]["organization_id"] == "scottish-parliament"
+    )
     for mship in mships:
-        max_mship_id = max(max_mship_id, int(mship['id'].replace('uk.org.publicwhip/member/','')))
-        if 'end_date' in mship:
+        max_mship_id = max(
+            max_mship_id, int(mship["id"].replace("uk.org.publicwhip/member/", ""))
+        )
+        if "end_date" in mship:
             continue  # Not a new MP
-        cons = posts[mship['post_id']]['area']['name']
+        cons = posts[mship["post_id"]]["area"]["name"]
         assert cons not in existing
         existing[cons] = mship
 
     return {
-        'json': j,
-        'persons': persons,
-        'posts_by_name': posts_by_name,
-        'orgs': orgs,
-        'max_person_id': max_person_id,
-        'max_mship_id': max_mship_id,
-        'existing': existing,
+        "json": j,
+        "persons": persons,
+        "posts_by_name": posts_by_name,
+        "orgs": orgs,
+        "max_person_id": max_person_id,
+        "max_mship_id": max_mship_id,
+        "existing": existing,
     }
 
 
 PARTY_YNMP_TO_TWFY = {
-    'Scottish Labour': 'Labour',
-    'Labour Party': 'Labour',
-    'Conservative Party': 'Conservative',
-    'Conservative and Unionist Party': 'Conservative',
-    'Scottish Conservative and Unionist Party': 'Conservative',
-    'Liberal Democrats': 'Liberal Democrat',
-    'Scottish Liberal Democrats': 'Liberal Democrat',
-    'Ulster Unionist Party': 'UUP',
-    'Speaker seeking re-election': 'Speaker',
-    'Scottish National Party (SNP)': 'Scottish National Party',
-    'Plaid Cymru - The Party of Wales': 'Plaid Cymru',
-    "Labour and Co-operative Party": 'Labour/Co-operative',
-    'Democratic Unionist Party - D.U.P.': 'DUP',
-    'The Respect Party': 'Respect',
+    "Scottish Labour": "Labour",
+    "Labour Party": "Labour",
+    "Conservative Party": "Conservative",
+    "Conservative and Unionist Party": "Conservative",
+    "Scottish Conservative and Unionist Party": "Conservative",
+    "Liberal Democrats": "Liberal Democrat",
+    "Scottish Liberal Democrats": "Liberal Democrat",
+    "Ulster Unionist Party": "UUP",
+    "Speaker seeking re-election": "Speaker",
+    "Scottish National Party (SNP)": "Scottish National Party",
+    "Plaid Cymru - The Party of Wales": "Plaid Cymru",
+    "Labour and Co-operative Party": "Labour/Co-operative",
+    "Democratic Unionist Party - D.U.P.": "DUP",
+    "The Respect Party": "Respect",
     "SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party",
     "UK Independence Party (UKIP)": "UKIP",
     "UK Independence Party (UK I P)": "UKIP",
-    "Alliance - Alliance Party of Northern Ireland": 'Alliance',
-    'Green Party': 'Green',
-    'Scottish Green Party': 'Green',
-    'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice',
+    "Alliance - Alliance Party of Northern Ireland": "Alliance",
+    "Green Party": "Green",
+    "Scottish Green Party": "Green",
+    "Traditional Unionist Voice - TUV": "Traditional Unionist Voice",
 }
 
 
@@ -136,22 +165,26 @@ def dadem_csv_reader(fn):
     if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
-        given = row['First']
-        family = row['Last']
-        party = row['Party'].decode('utf-8')
+        given = row["First"]
+        family = row["Last"]
+        party = row["Party"].decode("utf-8")
         party = PARTY_YNMP_TO_TWFY.get(party, party)
-        cons = row['Constituency'].decode('utf-8')
+        cons = row["Constituency"].decode("utf-8")
         person_id = None
-        if row['parlparse_id']:
-            person_id = 'uk.org.publicwhip/person/{0}'.format(row['parlparse_id'])
-        yield {'given_name': given, 'family_name': family}, party, cons, person_id
+        if row["parlparse_id"]:
+            person_id = "uk.org.publicwhip/person/{0}".format(row["parlparse_id"])
+        yield {"given_name": given, "family_name": family}, party, cons, person_id
 
 
 def mship_has_changed(old, new):
-    if old['name'] != new['name'] or old['on_behalf_of_id'] != new['on_behalf_of_id'] or old['person_id'] != new['person_id']:
+    if (
+        old["name"] != new["name"]
+        or old["on_behalf_of_id"] != new["on_behalf_of_id"]
+        or old["person_id"] != new["person_id"]
+    ):
         return True
     return False
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/scripts/2021-lam-update b/scripts/2021-lam-update
index faf02baf..f29e11fb 100644
--- a/scripts/2021-lam-update
+++ b/scripts/2021-lam-update
@@ -6,88 +6,107 @@ import json
 import os
 import sys
 
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
-CSV_URL = '/home/matthew/lam.csv'
-JSON = os.path.join(os.path.dirname(__file__), '..', 'members', 'people.json')
+CSV_URL = "/home/matthew/lam.csv"
+JSON = os.path.join(os.path.dirname(__file__), "..", "members", "people.json")
 
 
 def main():
     data = load_data()
     update_from(CSV_URL, data)
-    json.dump(data['json'], open(JSON + 'n', 'w'), indent=2, sort_keys=True)
-    os.rename(JSON + 'n', JSON)
+    json.dump(data["json"], open(JSON + "n", "w"), indent=2, sort_keys=True)
+    os.rename(JSON + "n", JSON)
 
 
 def update_from(csv_url, data):
     for first, last, cons, party in wtt_csv_reader(csv_url):
-        name = '%s %s' % (first, last)
-        person_id = data['people_by_name'].get(name)
+        name = "%s %s" % (first, last)
+        person_id = data["people_by_name"].get(name)
 
-        name = {'given_name': first, 'family_name': last, 'note': 'Main'}
+        name = {"given_name": first, "family_name": last, "note": "Main"}
         if not person_id:
-            data['max_person_id'] += 1
-            person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
+            data["max_person_id"] += 1
+            person_id = "uk.org.publicwhip/person/%d" % data["max_person_id"]
             new_person = {
-                'id': person_id,
+                "id": person_id,
                 "other_names": [name],
             }
-            data['json']['persons'].append(new_person)
-            data['persons'][person_id] = new_person
+            data["json"]["persons"].append(new_person)
+            data["persons"][person_id] = new_person
 
         # With the person done, now let's either update a membership or create a new membership
         new_mship = {
-            'on_behalf_of_id': data['orgs'][party],
-            'person_id': person_id,
-            'start_date': '2021-05-08',
-            'start_reason': 'regional_election',
+            "on_behalf_of_id": data["orgs"][party],
+            "person_id": person_id,
+            "start_date": "2021-05-08",
+            "start_reason": "regional_election",
         }
-        data['max_mship_id'] += 1
-        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
+        data["max_mship_id"] += 1
+        print(
+            "NEW result %s, %s %s, %s, %s, %s"
+            % (
+                data["max_mship_id"],
+                name["given_name"],
+                name["family_name"],
+                party,
+                cons,
+                person_id,
+            )
+        )
         mship = {
-            'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
-            'post_id': 'uk.org.publicwhip/cons/10839',
+            "id": "uk.org.publicwhip/member/%d" % data["max_mship_id"],
+            "post_id": "uk.org.publicwhip/cons/10839",
         }
-        data['json']['memberships'].append(mship)
+        data["json"]["memberships"].append(mship)
         mship.update(new_mship)
-        data.setdefault('dealt_with', []).append(cons)
+        data.setdefault("dealt_with", []).append(cons)
 
 
 def load_data():
     """Load in existing JSON"""
     j = json.load(open(JSON))
-    persons = {p['id']: p for p in j['persons']}
-    posts = {p['id']: p for p in j['posts']}
-    orgs = {o['name']: o['id'] for o in j['organizations']}
-    max_person_id = max(int(p['id'].replace('uk.org.publicwhip/person/','')) for p in j['persons'])
-
-    mships = [m for m in j['memberships'] if 'post_id' in m and posts[m['post_id']]['organization_id'] == 'london-assembly']
-    max_mship_id = max(int(mship['id'].replace('uk.org.publicwhip/member/','')) for mship in mships)
+    persons = {p["id"]: p for p in j["persons"]}
+    posts = {p["id"]: p for p in j["posts"]}
+    orgs = {o["name"]: o["id"] for o in j["organizations"]}
+    max_person_id = max(
+        int(p["id"].replace("uk.org.publicwhip/person/", "")) for p in j["persons"]
+    )
+
+    mships = [
+        m
+        for m in j["memberships"]
+        if "post_id" in m
+        and posts[m["post_id"]]["organization_id"] == "london-assembly"
+    ]
+    max_mship_id = max(
+        int(mship["id"].replace("uk.org.publicwhip/member/", "")) for mship in mships
+    )
 
     people_by_name = {}
     for mship in mships:
-        name = getNameFromPerson(persons[mship['person_id']])
+        name = getNameFromPerson(persons[mship["person_id"]])
         if name not in people_by_name:
-            people_by_name[name] = mship['person_id']
+            people_by_name[name] = mship["person_id"]
         else:
-            if people_by_name[name] != mship['person_id']:
-                raise Exception('Multiple people with name {}'.format(name))
+            if people_by_name[name] != mship["person_id"]:
+                raise Exception("Multiple people with name {}".format(name))
 
     return {
-        'json': j,
-        'persons': persons,
-        'orgs': orgs,
-        'max_person_id': max_person_id,
-        'max_mship_id': max_mship_id,
-        'people_by_name': people_by_name,
+        "json": j,
+        "persons": persons,
+        "orgs": orgs,
+        "max_person_id": max_person_id,
+        "max_mship_id": max_mship_id,
+        "people_by_name": people_by_name,
     }
 
 
 PARTY_WTT_TO_TWFY = {
-    'GLA Conservatives': 'Conservative',
-    'Liberal Democrat': 'Liberal Democrat',
-    "Labour Group": 'Labour',
-    'City Hall Greens': 'Green',
+    "GLA Conservatives": "Conservative",
+    "Liberal Democrat": "Liberal Democrat",
+    "Labour Group": "Labour",
+    "City Hall Greens": "Green",
 }
 
 
@@ -95,16 +114,21 @@ def wtt_csv_reader(fn):
     if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
-        row['Party'] = PARTY_WTT_TO_TWFY[row['Party']]
-        yield str(row['First'], 'utf-8'), str(row['Last'], 'utf-8'), str(row['Constituency'], 'utf-8'), str(row['Party'], 'utf-8')
+        row["Party"] = PARTY_WTT_TO_TWFY[row["Party"]]
+        yield (
+            str(row["First"], "utf-8"),
+            str(row["Last"], "utf-8"),
+            str(row["Constituency"], "utf-8"),
+            str(row["Party"], "utf-8"),
+        )
 
 
 def getNameFromPerson(person):
-    for name in person.get('other_names', []):
-        if name['note'] == 'Main':
-            return name['given_name'] + ' ' + name['family_name']
-    raise Exception('Unable to find main name for person {}'.format(person['id']))
+    for name in person.get("other_names", []):
+        if name["note"] == "Main":
+            return name["given_name"] + " " + name["family_name"]
+    raise Exception("Unable to find main name for person {}".format(person["id"]))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/scripts/2021-msp-update b/scripts/2021-msp-update
index 7207873b..7d710c34 100644
--- a/scripts/2021-msp-update
+++ b/scripts/2021-msp-update
@@ -9,108 +9,131 @@ import json
 import os
 import sys
 
-sys.path.append('../pyscraper')
-from sp.resolvenames import memberList
+sys.path.append("../pyscraper")
 from resolvemembernames import memberList as mpList
+from sp.resolvenames import memberList
 
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
+sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
 
-CSV_URL = '/home/matthew/2021msps'
-JSON = os.path.join(os.path.dirname(__file__), '..', 'members', 'people.json')
+CSV_URL = "/home/matthew/2021msps"
+JSON = os.path.join(os.path.dirname(__file__), "..", "members", "people.json")
 
 
 def main():
     data = load_data()
     update_from(CSV_URL, data)
-    json.dump(data['json'], open(JSON + 'n', 'w'), indent=2, sort_keys=True)
-    os.rename(JSON + 'n', JSON)
+    json.dump(data["json"], open(JSON + "n", "w"), indent=2, sort_keys=True)
+    os.rename(JSON + "n", JSON)
 
 
 def update_from(csv_url, data):
     for first, last, cons, party in wtt_csv_reader(csv_url):
-        name_cons = '%s %s (%s)' % (first, last, PARTY_TWFY_TO_PARL[party])
-        ids = memberList.match_whole_speaker(name_cons, '2021-05-01')
+        name_cons = "%s %s (%s)" % (first, last, PARTY_TWFY_TO_PARL[party])
+        ids = memberList.match_whole_speaker(name_cons, "2021-05-01")
         if not ids:
-            ids = mpList.fullnametoids('%s %s' % (first, last), '2018-01-01')
+            ids = mpList.fullnametoids("%s %s" % (first, last), "2018-01-01")
         if not ids:
-            ids = mpList.fullnametoids('%s %s' % (first, last), '2016-01-01')
+            ids = mpList.fullnametoids("%s %s" % (first, last), "2016-01-01")
         if not ids:
-            ids = mpList.fullnametoids('%s %s' % (first, last), '2014-01-01')
+            ids = mpList.fullnametoids("%s %s" % (first, last), "2014-01-01")
         ids = list(ids)
         person_id = ids[0] if len(ids) == 1 else None
 
-        name = {'given_name': first, 'family_name': last, 'note': 'Main'}
+        name = {"given_name": first, "family_name": last, "note": "Main"}
         if not person_id:
-            data['max_person_id'] += 1
-            person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
+            data["max_person_id"] += 1
+            person_id = "uk.org.publicwhip/person/%d" % data["max_person_id"]
             new_person = {
-                'id': person_id,
+                "id": person_id,
                 "other_names": [name],
-                'shortcuts': {
-                    'current_party': party,
-                    'current_constituency': data['posts_by_name'][cons]['area']['name'],
-                }
+                "shortcuts": {
+                    "current_party": party,
+                    "current_constituency": data["posts_by_name"][cons]["area"]["name"],
+                },
             }
-            data['json']['persons'].append(new_person)
-            data['persons'][person_id] = new_person
+            data["json"]["persons"].append(new_person)
+            data["persons"][person_id] = new_person
 
         # With the person done, now let's either update a membership or create a new membership
         new_mship = {
-            'on_behalf_of_id': data['orgs'][party],
-            'person_id': person_id,
-            'start_date': '2021-05-08',
-            'start_reason': 'regional_election',
+            "on_behalf_of_id": data["orgs"][party],
+            "person_id": person_id,
+            "start_date": "2021-05-08",
+            "start_reason": "regional_election",
         }
-        data['max_mship_id'] += 1
-        print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
+        data["max_mship_id"] += 1
+        print(
+            "NEW result %s, %s %s, %s, %s, %s"
+            % (
+                data["max_mship_id"],
+                name["given_name"],
+                name["family_name"],
+                party,
+                cons,
+                person_id,
+            )
+        )
         mship = {
-            'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
-            'post_id': data['posts_by_name'][cons]['id'],
+            "id": "uk.org.publicwhip/member/%d" % data["max_mship_id"],
+            "post_id": data["posts_by_name"][cons]["id"],
         }
-        data['json']['memberships'].append(mship)
+        data["json"]["memberships"].append(mship)
         mship.update(new_mship)
-        data.setdefault('dealt_with', []).append(cons)
+        data.setdefault("dealt_with", []).append(cons)
 
 
 def load_data():
     """Load in existing JSON"""
     j = json.load(open(JSON))
-    persons = {p['id']: p for p in j['persons']}
-    posts = {p['id']: p for p in j['posts']}
-    posts_by_name = {p['area']['name']: p for p in j['posts'] if p['organization_id'] == 'scottish-parliament' and 'end_date' not in p}
+    persons = {p["id"]: p for p in j["persons"]}
+    posts = {p["id"]: p for p in j["posts"]}
+    posts_by_name = {
+        p["area"]["name"]: p
+        for p in j["posts"]
+        if p["organization_id"] == "scottish-parliament" and "end_date" not in p
+    }
     assert len(posts_by_name) == 81  # 73 + 8
-    orgs = {o['name']: o['id'] for o in j['organizations']}
-    max_person_id = max(int(p['id'].replace('uk.org.publicwhip/person/','')) for p in j['persons'])
+    orgs = {o["name"]: o["id"] for o in j["organizations"]}
+    max_person_id = max(
+        int(p["id"].replace("uk.org.publicwhip/person/", "")) for p in j["persons"]
+    )
 
     max_mship_id = 0
-    mships = (m for m in j['memberships'] if 'post_id' in m and posts[m['post_id']]['organization_id'] == 'scottish-parliament')
-    max_mship_id = max(int(mship['id'].replace('uk.org.publicwhip/member/','')) for mship in mships)
+    mships = (
+        m
+        for m in j["memberships"]
+        if "post_id" in m
+        and posts[m["post_id"]]["organization_id"] == "scottish-parliament"
+    )
+    max_mship_id = max(
+        int(mship["id"].replace("uk.org.publicwhip/member/", "")) for mship in mships
+    )
 
     return {
-        'json': j,
-        'persons': persons,
-        'posts_by_name': posts_by_name,
-        'orgs': orgs,
-        'max_person_id': max_person_id,
-        'max_mship_id': max_mship_id,
+        "json": j,
+        "persons": persons,
+        "posts_by_name": posts_by_name,
+        "orgs": orgs,
+        "max_person_id": max_person_id,
+        "max_mship_id": max_mship_id,
     }
 
 
 PARTY_WTT_TO_TWFY = {
-    'Scottish Conservative and Unionist Party': 'Conservative',
-    'Scottish Liberal Democrats': 'Liberal Democrat',
-    "Scottish Labour": 'Labour/Co-operative',
-    'Scottish Green Party': 'Green',
-    'Scottish National Party': 'Scottish National Party',
+    "Scottish Conservative and Unionist Party": "Conservative",
+    "Scottish Liberal Democrats": "Liberal Democrat",
+    "Scottish Labour": "Labour/Co-operative",
+    "Scottish Green Party": "Green",
+    "Scottish National Party": "Scottish National Party",
 }
 
 
 PARTY_TWFY_TO_PARL = {
-    'Conservative': 'Con',
-    'Liberal Democrat': 'LD',
-    "Labour/Co-operative": 'Lab',
-    'Green': 'Green',
-    'Scottish National Party': 'SNP',
+    "Conservative": "Con",
+    "Liberal Democrat": "LD",
+    "Labour/Co-operative": "Lab",
+    "Green": "Green",
+    "Scottish National Party": "SNP",
 }
 
 
@@ -118,9 +141,9 @@ def wtt_csv_reader(fn):
     if isinstance(fn, str):
         fn = open(fn)
     for row in csv.DictReader(fn):
-        row['Party'] = PARTY_WTT_TO_TWFY[row['Party']]
-        yield row['First'], row['Last'], row['Constituency'], row['Party']
+        row["Party"] = PARTY_WTT_TO_TWFY[row["Party"]]
+        yield row["First"], row["Last"], row["Constituency"], row["Party"]
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/scripts/add-new-lords b/scripts/add-new-lords
index e976ec95..31a6245b 100755
--- a/scripts/add-new-lords
+++ b/scripts/add-new-lords
@@ -3,6 +3,7 @@
 import datetime
 import re
 import urllib.request
+
 from lxml import etree
 from popolo import Popolo
 from popolo.utils import new_id
@@ -14,15 +15,16 @@ data = Popolo()
 
 
 def canon_name(n):
-    n = re.sub(' St\. +', ' St ', n)
-    n = re.sub('^The ', '', n)
-    n = re.sub(' De ', ' de ', n)
-    n = re.sub('Lord (Archb|B)ishop ', r'\1ishop ', n)
-    n = re.sub('\u2019', "'", n)
+    n = re.sub(" St\. +", " St ", n)
+    n = re.sub("^The ", "", n)
+    n = re.sub(" De ", " de ", n)
+    n = re.sub("Lord (Archb|B)ishop ", r"\1ishop ", n)
+    n = re.sub("\u2019", "'", n)
     return n
 
 
-lord_match = re.compile(''' ^
+lord_match = re.compile(
+    """ ^
     (?P<the> The \s+ )?     # Optional 'The ' at the start
     (?P<prefix> [^\s]* )    # Title, e.g. Baroness
     (?:\s+ (?P<name>        # Optional name; one or more words, which must not be 'of'
@@ -33,117 +35,107 @@ lord_match = re.compile(''' ^
         \s+ of \s+
         (?P<lordof>.*)
     )?
-$ ''', re.X)
+$ """,
+    re.X,
+)
 
 party_map = {
-    'Liberal Democrats': 'liberal-democrat',
-    'Liberal Democrat': 'liberal-democrat',
-    'Conservative': 'conservative',
-    'Crossbench': 'crossbench',
-    'Labour': 'labour',
-    'Green Party': 'green',
-    'Plaid Cymru': 'plaid-cymru',
-    'Bishops': 'bishop',
-    'Democratic Unionist Party': 'dup',
-    'Non-affiliated': 'non-affiliated',
-    'Ulster Unionist Party': 'uup'
+    "Liberal Democrats": "liberal-democrat",
+    "Liberal Democrat": "liberal-democrat",
+    "Conservative": "conservative",
+    "Crossbench": "crossbench",
+    "Labour": "labour",
+    "Green Party": "green",
+    "Plaid Cymru": "plaid-cymru",
+    "Bishops": "bishop",
+    "Democratic Unionist Party": "dup",
+    "Non-affiliated": "non-affiliated",
+    "Ulster Unionist Party": "uup",
 }
 
-type_map = {
-    'Life peer': 'L',
-    'Excepted Hereditary': 'HP',
-    'Bishops': 'B'
-}
+type_map = {"Life peer": "L", "Excepted Hereditary": "HP", "Bishops": "B"}
 
 
 def construct_name(member):
-    given_name = member.find('BasicDetails/GivenForename').text
-    middle_names = member.find('BasicDetails/GivenMiddleNames').text
-    surname = member.find('BasicDetails/GivenSurname').text
-    title_parts = lord_match.search(member.find('DisplayAs').text)
-    the = title_parts.group('the')
-    prefix = title_parts.group('prefix')
-    lordname = title_parts.group('name')
-    lordof = title_parts.group('lordof') or ''
-    if the and prefix == 'Lord' and lordname in ('Bishop', 'Archbishop'):
+    given_name = member.find("BasicDetails/GivenForename").text
+    middle_names = member.find("BasicDetails/GivenMiddleNames").text
+    surname = member.find("BasicDetails/GivenSurname").text
+    title_parts = lord_match.search(member.find("DisplayAs").text)
+    the = title_parts.group("the")
+    prefix = title_parts.group("prefix")
+    lordname = title_parts.group("name")
+    lordof = title_parts.group("lordof") or ""
+    if the and prefix == "Lord" and lordname in ("Bishop", "Archbishop"):
         prefix = lordname
-        lordname = ''
+        lordname = ""
     name = {
-        'given_name': given_name,
-        'honorific_prefix': prefix,
-        'lordname': lordname,
-        'lordofname': lordof,
-        'lordofname_full': '',
-        'county': '',
-        'note': 'Main',
+        "given_name": given_name,
+        "honorific_prefix": prefix,
+        "lordname": lordname,
+        "lordofname": lordof,
+        "lordofname_full": "",
+        "county": "",
+        "note": "Main",
     }
     if middle_names:
-        name['additional_name'] = middle_names
+        name["additional_name"] = middle_names
     if surname and surname != lordname:
-        name['surname'] = surname
+        name["surname"] = surname
     return name
 
 
 def construct_person(member, person_id, name):
     ids = [
-        {
-            'scheme': 'pims_id',
-            'identifier': member.attrib['Pims_Id']
-        },
-        {
-            'scheme': 'datadotparl_id',
-            'identifier': member.attrib['Member_Id']
-        }
+        {"scheme": "pims_id", "identifier": member.attrib["Pims_Id"]},
+        {"scheme": "datadotparl_id", "identifier": member.attrib["Member_Id"]},
     ]
-    person = {
-        'id': person_id,
-        'identifiers': ids,
-        'other_names': [name]
-    }
+    person = {"id": person_id, "identifiers": ids, "other_names": [name]}
     return person
 
+
 def construct_membership(member, person_id, lord_id):
-    lord_type = type_map[member.find('MemberFrom').text]
-    party = party_map[member.find('Party').text]
-    start_date = re.sub('T.*$', '', member.find('HouseStartDate').text)
+    lord_type = type_map[member.find("MemberFrom").text]
+    party = party_map[member.find("Party").text]
+    start_date = re.sub("T.*$", "", member.find("HouseStartDate").text)
     membership = {
-        'id': lord_id,
-        'identifiers': [{
-            'identifier': lord_type,
-            'scheme': 'peeragetype'
-        }],
-        'label': 'Peer',
-        'on_behalf_of_id': party,
-        'organization_id': 'house-of-lords',
-        'person_id': person_id,
-        'role': 'Peer',
-        'start_date': start_date
+        "id": lord_id,
+        "identifiers": [{"identifier": lord_type, "scheme": "peeragetype"}],
+        "label": "Peer",
+        "on_behalf_of_id": party,
+        "organization_id": "house-of-lords",
+        "person_id": person_id,
+        "role": "Peer",
+        "start_date": start_date,
     }
-    if not membership['start_date'] or not membership['on_behalf_of_id']:
+    if not membership["start_date"] or not membership["on_behalf_of_id"]:
         print("Missing start date or party!")
         sys.exit()
     return membership
 
 
-LORDS_URL = 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house=Lords|membership=all/BasicDetails/'
+LORDS_URL = "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house=Lords|membership=all/BasicDetails/"
 parl_members = etree.parse(urllib.request.urlopen(LORDS_URL)).getroot()
-parl_members = sorted(parl_members, key=lambda x: x.find('HouseStartDate').text)
+parl_members = sorted(parl_members, key=lambda x: x.find("HouseStartDate").text)
 
 changed = False
 person_id = data.max_person_id()
 for member in parl_members:
     # We only know about Lords post House of Lords Act 1999
-    end_date = member.find('HouseEndDate').text or ''
-    end_date = end_date.replace('T00:00:00', '')
-    if end_date and end_date < '1999-11-12':
+    end_date = member.find("HouseEndDate").text or ""
+    end_date = end_date.replace("T00:00:00", "")
+    if end_date and end_date < "1999-11-12":
         continue
 
-    name = canon_name(member.find('DisplayAs').text)
-    person = data.get_person(id=member.attrib['Member_Id'], scheme='datadotparl_id')
+    name = canon_name(member.find("DisplayAs").text)
+    person = data.get_person(id=member.attrib["Member_Id"], scheme="datadotparl_id")
     if not person:
         person_id = new_id(person_id)
         lord_id = new_id(data.max_lord_id())
-        print("{} is a new Lord, person ID {}, lord ID {}".format(name, person_id, lord_id))
+        print(
+            "{} is a new Lord, person ID {}, lord ID {}".format(
+                name, person_id, lord_id
+            )
+        )
 
         name = construct_name(member)
         person = construct_person(member, person_id, name)
@@ -152,34 +144,57 @@ for member in parl_members:
         data.add_person(person)
         data.add_membership(membership)
         changed = True
-    elif data.names[person['id']] == name:
+    elif data.names[person["id"]] == name:
         # Same lord name, assume already got them. Check they are still current
-        mships = data.memberships.of_person(person['id'])
+        mships = data.memberships.of_person(person["id"])
         mships = [m for m in mships]
-        mships.sort(key=lambda x: x.get('end_date', '9999-12-31'))
+        mships.sort(key=lambda x: x.get("end_date", "9999-12-31"))
         mship = mships[-1]
-        if end_date and 'end_date' not in mship:
-            mship['end_date'] = end_date
+        if end_date and "end_date" not in mship:
+            mship["end_date"] = end_date
             changed = True
-        elif end_date != mship.get('end_date', ''):
-            us = mship.get('end_date', '-')
-            if name == 'Bishop of Bradford' and end_date == '2010-07-14' and us == '2010-07-31': continue
-            if name == 'Lord Greaves' and end_date == '2021-03-23' and us == '2021-03-24': continue
-            if name == 'Baroness Greengross' and end_date == '2022-06-23' and us == '2022-06-29': continue
-            print('*', name, 'Parl=', end_date, 'Us=', mship.get('end_date', '-'))
+        elif end_date != mship.get("end_date", ""):
+            us = mship.get("end_date", "-")
+            if (
+                name == "Bishop of Bradford"
+                and end_date == "2010-07-14"
+                and us == "2010-07-31"
+            ):
+                continue
+            if (
+                name == "Lord Greaves"
+                and end_date == "2021-03-23"
+                and us == "2021-03-24"
+            ):
+                continue
+            if (
+                name == "Baroness Greengross"
+                and end_date == "2022-06-23"
+                and us == "2022-06-29"
+            ):
+                continue
+            print("*", name, "Parl=", end_date, "Us=", mship.get("end_date", "-"))
         continue
     else:
         lord_id = new_id(data.max_lord_id())
-        print("{} is a new Lord already in the system, person ID {}, lord ID {}".format(name, person['id'], lord_id))
+        print(
+            "{} is a new Lord already in the system, person ID {}, lord ID {}".format(
+                name, person["id"], lord_id
+            )
+        )
 
         name = construct_name(member)
-        membership = construct_membership(member, person['id'], lord_id)
-        names = [n for n in person['other_names'] if n['note'] == 'Main']
-        latest_name = sorted(names, key=lambda x: x.get('start_date', '0000-00-00'), reverse=True)[0]
-        day_before = datetime.datetime.strptime(membership['start_date'], '%Y-%m-%d') - datetime.timedelta(days=1)
-        latest_name['end_date'] = day_before.date().isoformat()
-        name['start_date'] = membership['start_date']
-        person['other_names'].append(name)
+        membership = construct_membership(member, person["id"], lord_id)
+        names = [n for n in person["other_names"] if n["note"] == "Main"]
+        latest_name = sorted(
+            names, key=lambda x: x.get("start_date", "0000-00-00"), reverse=True
+        )[0]
+        day_before = datetime.datetime.strptime(
+            membership["start_date"], "%Y-%m-%d"
+        ) - datetime.timedelta(days=1)
+        latest_name["end_date"] = day_before.date().isoformat()
+        name["start_date"] = membership["start_date"]
+        person["other_names"].append(name)
 
         data.add_membership(membership)
         changed = True
diff --git a/scripts/add-new-mlas b/scripts/add-new-mlas
index edee18df..7c4e032d 100755
--- a/scripts/add-new-mlas
+++ b/scripts/add-new-mlas
@@ -1,114 +1,126 @@
 #!/usr/bin/env python3
 
 import datetime
-import requests
 
+import requests
 from popolo import Popolo
 from popolo.utils import new_id
 
-URL = 'https://data.niassembly.gov.uk/members_json.ashx?m=GetAllCurrentMembers'
+URL = "https://data.niassembly.gov.uk/members_json.ashx?m=GetAllCurrentMembers"
 
 # Manual overrides
 CONS_FIXES = {
-    'East Belfast': 'Belfast East',
-    'West Belfast': 'Belfast West',
-    'South Belfast': 'Belfast South',
-    'North Belfast': 'Belfast North',
+    "East Belfast": "Belfast East",
+    "West Belfast": "Belfast West",
+    "South Belfast": "Belfast South",
+    "North Belfast": "Belfast North",
 }
 PARTY_FIXES = {
-    'Alliance Party': 'Alliance',
-    'Democratic Unionist Party': 'DUP',
-    'Ulster Unionist Party': 'UUP',
+    "Alliance Party": "Alliance",
+    "Democratic Unionist Party": "DUP",
+    "Ulster Unionist Party": "UUP",
 }
 
 data = Popolo()
 
 # Map from names to IDs
-AREA_NAME_TO_POST = {post['area']['name']: post['id'] for post in data.posts.values() if 'NI' in post['id']}
-ORG_NAME_TO_POST = {org['name']: org['id'] for org in data.orgs.values()}
+AREA_NAME_TO_POST = {
+    post["area"]["name"]: post["id"]
+    for post in data.posts.values()
+    if "NI" in post["id"]
+}
+ORG_NAME_TO_POST = {org["name"]: org["id"] for org in data.orgs.values()}
 # List of all memberships of NIA we already have
-MSHIPS = data.memberships.in_org('northern-ireland-assembly')
+MSHIPS = data.memberships.in_org("northern-ireland-assembly")
 # IDs to start working from for adding new entries
 person_id = data.max_person_id()
 mla_id = data.max_mla_id()
 
+
 def get_party_id(name):
     name = PARTY_FIXES.get(name, name)
     return ORG_NAME_TO_POST[name]
 
+
 def get_post_id(name):
     name = CONS_FIXES.get(name, name)
     return AREA_NAME_TO_POST[name]
 
+
 def construct_name(member):
     given_name = member["MemberFirstName"]
     family_name = member["MemberLastName"]
     name = {
-        'given_name': given_name,
-        'family_name': family_name,
-        'note': 'Main',
+        "given_name": given_name,
+        "family_name": family_name,
+        "note": "Main",
     }
     return name
 
+
 def construct_person(member, person_id, name):
-    ids = [ {
-        'scheme': 'data.niassembly.gov.uk',
-        'identifier': member["PersonId"]
-    } ]
-    person = {
-        'id': person_id,
-        'identifiers': ids,
-        'other_names': [name]
-    }
+    ids = [{"scheme": "data.niassembly.gov.uk", "identifier": member["PersonId"]}]
+    person = {"id": person_id, "identifiers": ids, "other_names": [name]}
     return person
 
+
 def construct_membership(member, person_id, mla_id):
-    party = get_party_id(member['PartyName'])
+    party = get_party_id(member["PartyName"])
     post_id = get_post_id(member["ConstituencyName"])
     membership = {
-        'id': mla_id,
-        'on_behalf_of_id': party,
-        'post_id': post_id,
-        'person_id': person_id,
-        'start_date': datetime.date.today().isoformat(),
+        "id": mla_id,
+        "on_behalf_of_id": party,
+        "post_id": post_id,
+        "person_id": person_id,
+        "start_date": datetime.date.today().isoformat(),
     }
     return membership
 
 
 changed = False
 members = requests.get(URL).json()
-for member in members['AllMembersList']['Member']:
-    person = data.get_person(id=member['PersonId'], scheme='data.niassembly.gov.uk')
+for member in members["AllMembersList"]["Member"]:
+    person = data.get_person(id=member["PersonId"], scheme="data.niassembly.gov.uk")
     # The below was only needed on the first run, to match on names before the IDs were then present
     if not person:
         name = f'{member["MemberFirstName"]} {member["MemberLastName"]}'
-        people = [p for p in data.persons.values() if data.names[p['id']] == name and len(MSHIPS.of_person(p['id']))]
-        if name == 'Mark Durkan':  # Otherwise two Mark Durkans to confuse
-            people = [p for p in people if p['id'] == 'uk.org.publicwhip/person/25143']
+        people = [
+            p
+            for p in data.persons.values()
+            if data.names[p["id"]] == name and len(MSHIPS.of_person(p["id"]))
+        ]
+        if name == "Mark Durkan":  # Otherwise two Mark Durkans to confuse
+            people = [p for p in people if p["id"] == "uk.org.publicwhip/person/25143"]
         assert len(people) <= 1
         if people:
             person = people[0]
             changed = True
-            person.setdefault('identifiers', []).append({"scheme": "data.niassembly.gov.uk", "identifier": member['PersonId']})
+            person.setdefault("identifiers", []).append(
+                {"scheme": "data.niassembly.gov.uk", "identifier": member["PersonId"]}
+            )
 
     if person:  # Check is not current member
-        mships = MSHIPS.of_person(person['id'])
+        mships = MSHIPS.of_person(person["id"])
         mships = [m for m in mships]
-        mships.sort(key=lambda x: x.get('end_date', '9999-12-31'))
+        mships.sort(key=lambda x: x.get("end_date", "9999-12-31"))
         mship = mships[-1]
-        if 'end_date' not in mship:
+        if "end_date" not in mship:
             continue
         mla_id = new_id(mla_id)
-        print(f"{member['MemberFullDisplayName']} is a new MLA already in the system, person ID {person['id']}, MLA ID {mla_id}")
+        print(
+            f"{member['MemberFullDisplayName']} is a new MLA already in the system, person ID {person['id']}, MLA ID {mla_id}"
+        )
     else:
         mla_id = new_id(mla_id)
         person_id = new_id(person_id)
         name = construct_name(member)
         person = construct_person(member, person_id, name)
         data.add_person(person)
-        print(f"{member['MemberFullDisplayName']} is a new MLA, person ID {person['id']}, MLA ID {mla_id}")
+        print(
+            f"{member['MemberFullDisplayName']} is a new MLA, person ID {person['id']}, MLA ID {mla_id}"
+        )
 
-    membership = construct_membership(member, person['id'], mla_id)
+    membership = construct_membership(member, person["id"], mla_id)
     data.add_membership(membership)
     changed = True
 
diff --git a/scripts/datadotparl/crawl-members b/scripts/datadotparl/crawl-members
index 0cda9c3c..aa197aed 100755
--- a/scripts/datadotparl/crawl-members
+++ b/scripts/datadotparl/crawl-members
@@ -10,27 +10,32 @@ import os
 import urllib.request
 import xml.etree.ElementTree as etree
 
-parser = argparse.ArgumentParser(description='Fetch latest member data from data.parliament.uk')
-parser.add_argument('--verbose', action='store_true')
+parser = argparse.ArgumentParser(
+    description="Fetch latest member data from data.parliament.uk"
+)
+parser.add_argument("--verbose", action="store_true")
 ARGS = parser.parse_args()
 
 today = datetime.date.today().isoformat()
 source_urls = {
-  'all-current-commons': 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/membership=all|commonsmemberbetween=2015-03-01and%s/' % today,
-  'all-lords': 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house=Lords|membership=all/'
-  # 'all-commons': 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house*Commons',
+    "all-current-commons": "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/membership=all|commonsmemberbetween=2015-03-01and%s/"
+    % today,
+    "all-lords": "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house=Lords|membership=all/",
+    # 'all-commons': 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/house*Commons',
 }
 
-member_url_templ = 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/id=%s/FullBiog/'
+member_url_templ = "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/id=%s/FullBiog/"
 
-OUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../rawdata/datadotparl')
+OUT_DIR = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "../../rawdata/datadotparl"
+)
 if not os.path.exists(OUT_DIR):
     os.makedirs(OUT_DIR)
 
 
 def write(url, fn):
-  data = urllib.request.urlopen(url).read()
-  open(fn, 'wb').write(data)
+    data = urllib.request.urlopen(url).read()
+    open(fn, "wb").write(data)
 
 
 def verbose(s):
@@ -38,21 +43,23 @@ def verbose(s):
         print(s)
 
 
-verbose('Update members XML from data.parliament')
+verbose("Update members XML from data.parliament")
 
 member_ids = set()
 for source_type, parl_members_url in source_urls.items():
-  members_fn = '%s/%s.xml' % (OUT_DIR, source_type)
-  write(parl_members_url, members_fn)
-  parl_members = etree.parse(members_fn).getroot()
-  if parl_members.tag != 'Members':
-    raise Exception('Root tag is not "Members" as expected, got "%s".' % parl_members.tag)
-  member_ids |= set(m.attrib['Member_Id'] for m in parl_members)
+    members_fn = "%s/%s.xml" % (OUT_DIR, source_type)
+    write(parl_members_url, members_fn)
+    parl_members = etree.parse(members_fn).getroot()
+    if parl_members.tag != "Members":
+        raise Exception(
+            'Root tag is not "Members" as expected, got "%s".' % parl_members.tag
+        )
+    member_ids |= set(m.attrib["Member_Id"] for m in parl_members)
 
-  verbose('Found %d members from %s.xml.' % (len(parl_members), source_type))
+    verbose("Found %d members from %s.xml." % (len(parl_members), source_type))
 
 for member_id in member_ids:
-  member_url = member_url_templ % member_id
-  write(member_url, '%s/%s.xml' % (OUT_DIR, member_id))
+    member_url = member_url_templ % member_id
+    write(member_url, "%s/%s.xml" % (OUT_DIR, member_id))
 
-verbose('Done!')
+verbose("Done!")
diff --git a/scripts/datadotparl/json-add-new-parl-ids b/scripts/datadotparl/json-add-new-parl-ids
index f10a387a..598dfc73 100755
--- a/scripts/datadotparl/json-add-new-parl-ids
+++ b/scripts/datadotparl/json-add-new-parl-ids
@@ -2,87 +2,115 @@
 
 import os
 import sys
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 
 import datetime
 import re
 import urllib.request
+
 from lxml import etree
 from popolo import Popolo
 
 parser = etree.ETCompatXMLParser()
 etree.set_default_parser(parser)
 
-def add_identifier(pid, lookup):
-    if pid not in parl_ids or 'datadotparl_id' not in parl_ids[pid]:
-        ddp_id = parl_members[lookup].attrib['Member_Id']
-        print(f'{pid} ({lookup}) => {ddp_id}')
-        data.persons[pid].setdefault('identifiers', []).append({'scheme': 'datadotparl_id', 'identifier': ddp_id})
 
-    if pid not in parl_ids or 'pims_id' not in parl_ids[pid]:
-        pims_id = parl_members[lookup].attrib['Pims_Id']
+def add_identifier(pid, lookup):
+    if pid not in parl_ids or "datadotparl_id" not in parl_ids[pid]:
+        ddp_id = parl_members[lookup].attrib["Member_Id"]
+        print(f"{pid} ({lookup}) => {ddp_id}")
+        data.persons[pid].setdefault("identifiers", []).append(
+            {"scheme": "datadotparl_id", "identifier": ddp_id}
+        )
+
+    if pid not in parl_ids or "pims_id" not in parl_ids[pid]:
+        pims_id = parl_members[lookup].attrib["Pims_Id"]
         if pims_id:
-            print(f'pims - {pid} ({lookup}) => {pims_id}')
-            data.persons[pid].setdefault('identifiers', []).append({'scheme': 'pims_id', 'identifier': pims_id})
+            print(f"pims - {pid} ({lookup}) => {pims_id}")
+            data.persons[pid].setdefault("identifiers", []).append(
+                {"scheme": "pims_id", "identifier": pims_id}
+            )
 
 
 data = Popolo()
 
 parl_ids = {}
 for person in data.persons.values():
-    for i in person.get('identifiers', []):
-        if i['scheme'] == 'datadotparl_id' or i['scheme'] == 'pims_id':
-            parl_ids.setdefault(person['id'], {})[i['scheme']] = i['identifier']
+    for i in person.get("identifiers", []):
+        if i["scheme"] == "datadotparl_id" or i["scheme"] == "pims_id":
+            parl_ids.setdefault(person["id"], {})[i["scheme"]] = i["identifier"]
 
 
 # Commons
 
 cur_dir = os.path.dirname(__file__)
-source_file = os.path.join(cur_dir, '..', '..', 'rawdata', 'datadotparl', 'all-current-commons.xml')
+source_file = os.path.join(
+    cur_dir, "..", "..", "rawdata", "datadotparl", "all-current-commons.xml"
+)
 parl_members = etree.parse(source_file).getroot()
-parl_members = {member.find('MemberFrom').text.lower(): member for member in parl_members if member.find('CurrentStatus').get('IsActive') == 'True'}
+parl_members = {
+    member.find("MemberFrom").text.lower(): member
+    for member in parl_members
+    if member.find("CurrentStatus").get("IsActive") == "True"
+}
 
 today = datetime.date.today().isoformat()
-mships = [m for m in data.memberships.in_org('house-of-commons').on(today) if m['person_id'] not in parl_ids or len(parl_ids[m['person_id']]) < 2]
+mships = [
+    m
+    for m in data.memberships.in_org("house-of-commons").on(today)
+    if m["person_id"] not in parl_ids or len(parl_ids[m["person_id"]]) < 2
+]
 
 for m in mships:
-    add_identifier(m['person_id'], data.posts[m['post_id']]['area']['name'].lower())
+    add_identifier(m["person_id"], data.posts[m["post_id"]]["area"]["name"].lower())
 
 
 # Lords
 
+
 def canon_name(n):
     n = n.lower()
-    n = re.sub(' st\. +', ' st ', n)
-    n = re.sub('^the ', '', n)
-    n = re.sub('^lord ((arch)?bishop )', r'\1', n)
+    n = re.sub(" st\. +", " st ", n)
+    n = re.sub("^the ", "", n)
+    n = re.sub("^lord ((arch)?bishop )", r"\1", n)
     return n
 
-LORDS_URL = 'http://data.parliament.uk/membersdataplatform/services/mnis/members/query/membership=all|house=Lords/'
+
+LORDS_URL = "http://data.parliament.uk/membersdataplatform/services/mnis/members/query/membership=all|house=Lords/"
 parl_members = etree.parse(urllib.request.urlopen(LORDS_URL)).getroot()
 parl_by_name = {}
 for member in parl_members:
-    end_date = member.find('HouseEndDate').text or ''
-    end_date = end_date.replace('T00:00:00', '')
-    if end_date and end_date < '1999-11-12': continue
-    name = canon_name(member.find('DisplayAs').text)
-    if member.attrib['Member_Id'] == '3877': end_date = '2010-07-31'  # Bishop of Bradford
-    #if name == 'viscount oxfuird': name = 'viscount of oxfuird'
-    if name == 'lord grenfell': end_date = '2014-03-31'
-    #print (name, end_date)
+    end_date = member.find("HouseEndDate").text or ""
+    end_date = end_date.replace("T00:00:00", "")
+    if end_date and end_date < "1999-11-12":
+        continue
+    name = canon_name(member.find("DisplayAs").text)
+    if member.attrib["Member_Id"] == "3877":
+        end_date = "2010-07-31"  # Bishop of Bradford
+    # if name == 'viscount oxfuird': name = 'viscount of oxfuird'
+    if name == "lord grenfell":
+        end_date = "2014-03-31"
+    # print (name, end_date)
     parl_by_name[(name, end_date)] = member
 parl_members = parl_by_name
 
-mships = [m for m in data.memberships.in_org('house-of-lords') if m['person_id'] not in parl_ids or len(parl_ids[m['person_id']]) < 2]
+mships = [
+    m
+    for m in data.memberships.in_org("house-of-lords")
+    if m["person_id"] not in parl_ids or len(parl_ids[m["person_id"]]) < 2
+]
 mships_by_pid = {}
 for m in mships:
-    mships_by_pid.setdefault(m['person_id'], []).append(m)
+    mships_by_pid.setdefault(m["person_id"], []).append(m)
 mships = []
 for pid, ms in mships_by_pid.items():
-    ms.sort(key=lambda x: x.get('end_date', '9999-12-31'), reverse=True)
+    ms.sort(key=lambda x: x.get("end_date", "9999-12-31"), reverse=True)
     mships.append(ms[0])
 
 for m in mships:
-    add_identifier(m['person_id'], (data.names[m['person_id']].lower(), m.get('end_date', '')))
+    add_identifier(
+        m["person_id"], (data.names[m["person_id"]].lower(), m.get("end_date", ""))
+    )
 
 data.dump()
diff --git a/scripts/datadotparl/update-members b/scripts/datadotparl/update-members
index 6cde819a..564c4da6 100755
--- a/scripts/datadotparl/update-members
+++ b/scripts/datadotparl/update-members
@@ -3,22 +3,25 @@
 # Grabs the latest members data from data.parliament XML and update accordingly.
 # Get data using crawl-datadotparl-members.py
 
-import sys
 import argparse
-from collections import OrderedDict
 import json
 import re
-from lxml import etree
+import sys
+from collections import OrderedDict
 from datetime import datetime
 
-parser = argparse.ArgumentParser(description='Fetch latest member data from data.parliament.uk')
-parser.add_argument('--verbose', action='store_true')
+from lxml import etree
+
+parser = argparse.ArgumentParser(
+    description="Fetch latest member data from data.parliament.uk"
+)
+parser.add_argument("--verbose", action="store_true")
 ARGS = parser.parse_args()
 
 # Source XML files to use for the list of members
 source_files = [
-  'all-current-commons',
-  'all-lords',
+    "all-current-commons",
+    "all-lords",
 ]
 
 parser = etree.ETCompatXMLParser()
@@ -26,7 +29,7 @@ etree.set_default_parser(parser)
 
 
 def slugify(s):
-    return re.sub('[^\w ]', '', s).replace(' ', '-').lower()
+    return re.sub("[^\w ]", "", s).replace(" ", "-").lower()
 
 
 def verbose(s):
@@ -40,215 +43,258 @@ class Popolo(object):
         self.memberships = []
 
     def output(self, fn):
-        self.memberships = sorted(self.memberships, key=lambda x: (x['person_id'], x['id']))
-        self.organizations = sorted(self.organizations, key=lambda x: x['id'])
-        out = OrderedDict(sorted({
-            'organizations': self.organizations,
-            'memberships': self.memberships,
-        }.items()))
-        json.dump(out, open('../../members/ministers-2010.json', 'w'), indent=2)
+        self.memberships = sorted(
+            self.memberships, key=lambda x: (x["person_id"], x["id"])
+        )
+        self.organizations = sorted(self.organizations, key=lambda x: x["id"])
+        out = OrderedDict(
+            sorted(
+                {
+                    "organizations": self.organizations,
+                    "memberships": self.memberships,
+                }.items()
+            )
+        )
+        json.dump(out, open("../../members/ministers-2010.json", "w"), indent=2)
 
     def add_membership(self, mship):
         self.memberships.append(mship)
 
     def add_organization(self, org, id=None, **kwargs):
         id = id or slugify(org)
-        org = OrderedDict(sorted({'id': id, 'name': org}.items()))
+        org = OrderedDict(sorted({"id": id, "name": org}.items()))
         org.update(kwargs)
         if org not in self.organizations:
             self.organizations.append(org)
         return id
 
+
 popolo = Popolo()
 
 
 class TimePeriod(object):
-  def __init__(self, el):
-    self.el = el
-
-  def make_date(self, s):
-    try:
-      return datetime.strptime(self.el.find(s).text, '%Y-%m-%dT00:01:00').strftime('%Y-%m-%d')
-    except:
-      return datetime.strptime(self.el.find(s).text, '%Y-%m-%dT00:00:00').strftime('%Y-%m-%d')
-
-  @property
-  def dates(self):
-    start = self.make_date('StartDate')
-    end = self.make_date('EndDate') if self.el.find('EndDate').text else '9999-12-31'
-    return start, end
+    def __init__(self, el):
+        self.el = el
+
+    def make_date(self, s):
+        try:
+            return datetime.strptime(
+                self.el.find(s).text, "%Y-%m-%dT00:01:00"
+            ).strftime("%Y-%m-%d")
+        except:
+            return datetime.strptime(
+                self.el.find(s).text, "%Y-%m-%dT00:00:00"
+            ).strftime("%Y-%m-%d")
+
+    @property
+    def dates(self):
+        start = self.make_date("StartDate")
+        end = (
+            self.make_date("EndDate") if self.el.find("EndDate").text else "9999-12-31"
+        )
+        return start, end
 
 
 class Position(object):
-  def __init__(self, position, positions):
-    self.position = position
-    self.list = positions
-
-  def chair_check(self, chair_date, periods, to_date):
-    if chair_date[1] < to_date:
-      periods.append({'position': 'Chair', 'fromdate': chair_date[0], 'todate': chair_date[1]})
-      periods.append({'position': '', 'fromdate': chair_date[1], 'todate': to_date})
-    else:
-      periods.append({'position': 'Chair', 'fromdate': chair_date[0], 'todate': to_date})
-
-  @property
-  def time_periods(self):
-    position = self.position
-    attrs = {}
-    if self.list.name_is_dept:
-      attrs['position'] = ''
-    elif position.find('HansardName').text is not None:
-      attrs['position'] = position.find('HansardName').text
-    else:
-      attrs['position'] = position.find('Name').text
-    position_dates = TimePeriod(self.position).dates
-    attrs['fromdate'], attrs['todate'] = position_dates
-    periods = [ attrs ]
-    chairdates = position.find('ChairDates')
-    if chairdates is None or not len(chairdates):
-      return periods
-
-    chair_dates = [TimePeriod(x).dates for x in chairdates]
-    unique_dates = []
-    seen = {}
-    for c in chair_dates:
-        if c in seen: continue
-        seen[c] = 1
-        unique_dates.append(c)
-    chair_dates = unique_dates
-
-    if len(chair_dates) > 2:
-      raise Exception("Doesn't currently handle more than two chair dates")
-
-    if len(chair_dates) == 2:
-      first_to_date = chair_dates[1][0]
-    else:
-      first_to_date = position_dates[1]
-
-    if chair_dates[0][0] <= position_dates[0]:
-      attrs['position'] = 'Chair'
-      if chair_dates[0][1] < position_dates[1]:
-        attrs['todate'] = chair_dates[0][1]
-        periods.append({'position': '', 'fromdate': chair_dates[0][1], 'todate': first_to_date})
-    else:
-      attrs['todate'] = chair_dates[0][0]
-      self.chair_check(chair_dates[0], periods, first_to_date)
-
-    if len(chair_dates) == 2:
-      self.chair_check(chair_dates[1], periods, position_dates[1])
-
-    return periods
-
-  def generate(self):
-    position = self.position
-    for period in self.time_periods:
-      attrs = OrderedDict()
-      attrs['id'] = 'uk.parliament.data/Member/%s/%s/%s%s' % (
-        self.list.person['datadotparl_id'], self.list.typ, position.attrib['Id'], self.counter)
-      attrs['source'] = 'datadotparl/%s' % self.list.typ.lower()
-      if self.list.name_is_dept:
-          dept = position.find('Name').text
-          if not re.search('(?i)panel|committee|commission|court', dept):
-              dept = '%s Committee' % dept
-      else:
-          dept = ''
-      if period['position']:
-          attrs['role'] = period['position']
-      attrs['person_id'] = self.list.person['id']
-      # XXX Will need to cover Lords etc at some point
-      attrs['organization_id'] = popolo.add_organization(dept or 'House of Commons')
-      attrs['start_date'] = period['fromdate']
-      if period['todate'] != '9999-12-31':
-          attrs['end_date'] = period['todate']
-      popolo.add_membership(attrs)
-
-  @property
-  def counter(self):
-    extra = ''
-    id = self.position.attrib['Id']
-    if self.list.counter.setdefault(id, 0):
-      extra = '/%d' % self.list.counter[id]
-    self.list.counter[id] += 1
-    return extra
+    def __init__(self, position, positions):
+        self.position = position
+        self.list = positions
+
+    def chair_check(self, chair_date, periods, to_date):
+        if chair_date[1] < to_date:
+            periods.append(
+                {
+                    "position": "Chair",
+                    "fromdate": chair_date[0],
+                    "todate": chair_date[1],
+                }
+            )
+            periods.append(
+                {"position": "", "fromdate": chair_date[1], "todate": to_date}
+            )
+        else:
+            periods.append(
+                {"position": "Chair", "fromdate": chair_date[0], "todate": to_date}
+            )
+
+    @property
+    def time_periods(self):
+        position = self.position
+        attrs = {}
+        if self.list.name_is_dept:
+            attrs["position"] = ""
+        elif position.find("HansardName").text is not None:
+            attrs["position"] = position.find("HansardName").text
+        else:
+            attrs["position"] = position.find("Name").text
+        position_dates = TimePeriod(self.position).dates
+        attrs["fromdate"], attrs["todate"] = position_dates
+        periods = [attrs]
+        chairdates = position.find("ChairDates")
+        if chairdates is None or not len(chairdates):
+            return periods
+
+        chair_dates = [TimePeriod(x).dates for x in chairdates]
+        unique_dates = []
+        seen = {}
+        for c in chair_dates:
+            if c in seen:
+                continue
+            seen[c] = 1
+            unique_dates.append(c)
+        chair_dates = unique_dates
+
+        if len(chair_dates) > 2:
+            raise Exception("Doesn't currently handle more than two chair dates")
+
+        if len(chair_dates) == 2:
+            first_to_date = chair_dates[1][0]
+        else:
+            first_to_date = position_dates[1]
+
+        if chair_dates[0][0] <= position_dates[0]:
+            attrs["position"] = "Chair"
+            if chair_dates[0][1] < position_dates[1]:
+                attrs["todate"] = chair_dates[0][1]
+                periods.append(
+                    {
+                        "position": "",
+                        "fromdate": chair_dates[0][1],
+                        "todate": first_to_date,
+                    }
+                )
+        else:
+            attrs["todate"] = chair_dates[0][0]
+            self.chair_check(chair_dates[0], periods, first_to_date)
+
+        if len(chair_dates) == 2:
+            self.chair_check(chair_dates[1], periods, position_dates[1])
+
+        return periods
+
+    def generate(self):
+        position = self.position
+        for period in self.time_periods:
+            attrs = OrderedDict()
+            attrs["id"] = "uk.parliament.data/Member/%s/%s/%s%s" % (
+                self.list.person["datadotparl_id"],
+                self.list.typ,
+                position.attrib["Id"],
+                self.counter,
+            )
+            attrs["source"] = "datadotparl/%s" % self.list.typ.lower()
+            if self.list.name_is_dept:
+                dept = position.find("Name").text
+                if not re.search("(?i)panel|committee|commission|court", dept):
+                    dept = "%s Committee" % dept
+            else:
+                dept = ""
+            if period["position"]:
+                attrs["role"] = period["position"]
+            attrs["person_id"] = self.list.person["id"]
+            # XXX Will need to cover Lords etc at some point
+            attrs["organization_id"] = popolo.add_organization(
+                dept or "House of Commons"
+            )
+            attrs["start_date"] = period["fromdate"]
+            if period["todate"] != "9999-12-31":
+                attrs["end_date"] = period["todate"]
+            popolo.add_membership(attrs)
+
+    @property
+    def counter(self):
+        extra = ""
+        id = self.position.attrib["Id"]
+        if self.list.counter.setdefault(id, 0):
+            extra = "/%d" % self.list.counter[id]
+        self.list.counter[id] += 1
+        return extra
 
 
 class Positions(object):
-  def __init__(self, data, person):
-    self.data = data
-    self.person = person
-
-  def filter(self, typ, name_is_dept=False):
-    container = typ + 's'
-    if self.data.find(container) is None:
-      return
-    self.counter = {}
-    self.typ = typ
-    self.name_is_dept = name_is_dept
-    for p in self.data.find(container).findall(typ):
-      pos = Position(p, self)
-      yield pos
+    def __init__(self, data, person):
+        self.data = data
+        self.person = person
+
+    def filter(self, typ, name_is_dept=False):
+        container = typ + "s"
+        if self.data.find(container) is None:
+            return
+        self.counter = {}
+        self.typ = typ
+        self.name_is_dept = name_is_dept
+        for p in self.data.find(container).findall(typ):
+            pos = Position(p, self)
+            yield pos
 
 
 def parse(fn, root_tag):
-  try:
-    xml = etree.parse(fn).getroot()
-  except:
-    raise Exception('Data from %s does not seem to be valid XML.' % fn)
-  if xml.tag != root_tag:
-    raise Exception('Root tag in %s is not "%s" as expected, got "%s"' % (fn, root_tag, xml.tag))
-  return xml
+    try:
+        xml = etree.parse(fn).getroot()
+    except:
+        raise Exception("Data from %s does not seem to be valid XML." % fn)
+    if xml.tag != root_tag:
+        raise Exception(
+            'Root tag in %s is not "%s" as expected, got "%s"' % (fn, root_tag, xml.tag)
+        )
+    return xml
 
 
 parl_member_ids = set()
 for source_file in source_files:
-  parl_members_slim = parse('../../rawdata/datadotparl/' + source_file + '.xml', 'Members')
-  parl_member_ids |= set(member.attrib['Member_Id'] for member in parl_members_slim)
-  verbose('Found %d members from %s.xml.' % (len(parl_members_slim), source_file))
-verbose('Total of %d distinct people.' % len(parl_member_ids))
+    parl_members_slim = parse(
+        "../../rawdata/datadotparl/" + source_file + ".xml", "Members"
+    )
+    parl_member_ids |= set(member.attrib["Member_Id"] for member in parl_members_slim)
+    verbose("Found %d members from %s.xml." % (len(parl_members_slim), source_file))
+verbose("Total of %d distinct people." % len(parl_member_ids))
 
 # Enrich the data!
 parl_members = dict()
 for member_id in parl_member_ids:
-  member_fullbio_xml = parse('../../rawdata/datadotparl/' + member_id + '.xml', 'Members')
-  parl_members[member_id] = member_fullbio_xml.find('Member')
+    member_fullbio_xml = parse(
+        "../../rawdata/datadotparl/" + member_id + ".xml", "Members"
+    )
+    parl_members[member_id] = member_fullbio_xml.find("Member")
 
 ##########
 # At this point parl_members contains a complete set of data.
 ##########
 
 # Import people.xml
-people_json = json.load(open('../../members/people.json'))
+people_json = json.load(open("../../members/people.json"))
 people = []
-for person in people_json['persons']:
-  for i in person.get('identifiers', []):
-    if i['scheme'] == 'datadotparl_id':
-      person['datadotparl_id'] = i['identifier']
-      people.append(person)
-verbose(str(len(people)) + ' people with data.parliament ID.')
+for person in people_json["persons"]:
+    for i in person.get("identifiers", []):
+        if i["scheme"] == "datadotparl_id":
+            person["datadotparl_id"] = i["identifier"]
+            people.append(person)
+verbose(str(len(people)) + " people with data.parliament ID.")
 
 for person in people:
-  try:
-    parl_data = parl_members[person['datadotparl_id']]
-  except KeyError:
-    sys.stderr.write(
-      "we have an ID for {0} but no matching file downloaded from data.parliament" \
-      .format(person['datadotparl_id'])
-    )
-    continue
-
-  positions = Positions(parl_data, person)
-
-  for position in positions.filter('GovernmentPost'):
-    position.generate()
-  for position in positions.filter('OppositionPost'):
-    position.generate()
-  for position in positions.filter('ParliamentaryPost'):
-    position.generate()
-  for position in positions.filter('Committee', name_is_dept=True):
-    position.generate()
-
-verbose('Done for all source files.')
+    try:
+        parl_data = parl_members[person["datadotparl_id"]]
+    except KeyError:
+        sys.stderr.write(
+            "we have an ID for {0} but no matching file downloaded from data.parliament".format(
+                person["datadotparl_id"]
+            )
+        )
+        continue
+
+    positions = Positions(parl_data, person)
+
+    for position in positions.filter("GovernmentPost"):
+        position.generate()
+    for position in positions.filter("OppositionPost"):
+        position.generate()
+    for position in positions.filter("ParliamentaryPost"):
+        position.generate()
+    for position in positions.filter("Committee", name_is_dept=True):
+        position.generate()
+
+verbose("Done for all source files.")
 
 # Write out the updated ministers file
-popolo.output('../../members/ministers-2010.json')
-verbose('ministers-2010.json updated!')
+popolo.output("../../members/ministers-2010.json")
+verbose("ministers-2010.json updated!")
diff --git a/scripts/fetch-mp-eu-ref-positions b/scripts/fetch-mp-eu-ref-positions
index 0224bfd8..629a1f6b 100755
--- a/scripts/fetch-mp-eu-ref-positions
+++ b/scripts/fetch-mp-eu-ref-positions
@@ -6,27 +6,28 @@ import os
 import sys
 import urllib.request
 
-rawdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'rawdata'))
-members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'members'))
-path_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'pyscraper' ))
+rawdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "rawdata"))
+members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "members"))
+path_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "pyscraper"))
 
 sys.path.append(path_dir)
 
-base_url = 'http://www.bbc.co.uk/indepthtoolkit/data-sets/my_mps_stance_on_eu'
+base_url = "http://www.bbc.co.uk/indepthtoolkit/data-sets/my_mps_stance_on_eu"
 
 from resolvemembernames import MemberList
 
-OUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../members/')
+OUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../members/")
 
 
 def urlopen(url):
     request = urllib.request.Request(url)
-    request.add_header('User-Agent', 'TheyWorkForYou/1.0')
+    request.add_header("User-Agent", "TheyWorkForYou/1.0")
     return urllib.request.urlopen(request)
 
-data_file = os.path.join(rawdata_dir, 'eu_ref_positions.json')
+
+data_file = os.path.join(rawdata_dir, "eu_ref_positions.json")
 data = urlopen(base_url).read()
-with open(data_file, 'w+') as fp:
+with open(data_file, "w+") as fp:
     fp.write(data)
 
 with open(data_file) as fp:
@@ -36,11 +37,13 @@ members = MemberList()
 
 positions = {}
 for position in position_data.values():
-    if position['constituency_name'] == 'Sheffield Hillsborough and Brightside':
-        position['constituency_name'] = 'Sheffield, Brightside and Hillsborough'
-    member = members.matchfullnamecons(position['mp_name'], position['constituency_name'], '2016-06-24')
-    positions[member[0]] = position['outcome']
-
-out = os.path.join(members_dir, 'eu_ref_positions.json')
-with open(out, 'w') as fp:
+    if position["constituency_name"] == "Sheffield Hillsborough and Brightside":
+        position["constituency_name"] = "Sheffield, Brightside and Hillsborough"
+    member = members.matchfullnamecons(
+        position["mp_name"], position["constituency_name"], "2016-06-24"
+    )
+    positions[member[0]] = position["outcome"]
+
+out = os.path.join(members_dir, "eu_ref_positions.json")
+with open(out, "w") as fp:
     fp.write(json.dumps(positions))
diff --git a/scripts/fetch-pw-json b/scripts/fetch-pw-json
index 51a07f29..428a83bd 100755
--- a/scripts/fetch-pw-json
+++ b/scripts/fetch-pw-json
@@ -4,24 +4,27 @@
 
 import os
 import re
-import requests
 
+import requests
 from bs4 import BeautifulSoup
 
-base_url = 'https://www.publicwhip.org.uk/data/popolo/'
+base_url = "https://www.publicwhip.org.uk/data/popolo/"
 headers = {
-    'User-Agent': 'TheyWorkForYou/1.0',
+    "User-Agent": "TheyWorkForYou/1.0",
 }
 
-OUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../parldata/scrapedjson/policy-motions')
+OUT_DIR = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "../../parldata/scrapedjson/policy-motions",
+)
 
 
 all_json = requests.get(base_url, headers=headers).content
-soup = BeautifulSoup(all_json, 'lxml')
-json_files = soup.find_all( href=re.compile("json") )
+soup = BeautifulSoup(all_json, "lxml")
+json_files = soup.find_all(href=re.compile("json"))
 
 for json in json_files:
-    url = "%s%s" % ( base_url, json['href'] )
-    out = "%s/%s" % ( OUT_DIR, json['href'] )
+    url = "%s%s" % (base_url, json["href"])
+    out = "%s/%s" % (OUT_DIR, json["href"])
     data = requests.get(url, headers=headers).content
-    open(out, 'wb').write(data)
+    open(out, "wb").write(data)
diff --git a/scripts/fetch_london_assembly.py b/scripts/fetch_london_assembly.py
index 1a774827..b1bde769 100755
--- a/scripts/fetch_london_assembly.py
+++ b/scripts/fetch_london_assembly.py
@@ -6,14 +6,12 @@
 import logging
 
 import requests
-
 from popolo import Popolo
-from popolo.utils import edit_file, new_id
-
+from popolo.utils import new_id
 
 # Query to retrieve London Assembly memberships from Wikidata
 
-WIKIDATA_SPARQL_QUERY = '''SELECT ?item ?itemLabel ?node ?parliamentarygroup ?election ?starttime ?endtime ?endcause ?twfy_id WHERE {
+WIKIDATA_SPARQL_QUERY = """SELECT ?item ?itemLabel ?node ?parliamentarygroup ?election ?starttime ?endtime ?endcause ?twfy_id WHERE {
     ?node ps:P39 wd:Q56573014 .
     ?item p:P39 ?node .
     ?node pq:P580 ?starttime .
@@ -24,7 +22,7 @@
     OPTIONAL { ?node pq:P2715 ?election }
     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
 }
-ORDER BY ?item ?starttime'''
+ORDER BY ?item ?starttime"""
 
 
 # Map of party Wikidata identifiers to the on_behalf_of_id slugs used in
@@ -35,302 +33,330 @@
 # expect to see.
 
 PARTY_TO_ON_BEHALF_OF_ID = {
-    'Q9624':     'liberal-democrat',
-    'Q10647':    'ukip',
-    'Q56577681': 'labour',
-    'Q56578473': 'green',
-    'Q61584795': 'conservative',
-    'Q61586635': 'brexit-alliance'
+    "Q9624": "liberal-democrat",
+    "Q10647": "ukip",
+    "Q56577681": "labour",
+    "Q56578473": "green",
+    "Q61584795": "conservative",
+    "Q61586635": "brexit-alliance",
 }
 
 # The Wikidata reason for a party change is used in a couple of places:
-WD_PARTY_CHANGE_OBJECT = 'Q30580660'
+WD_PARTY_CHANGE_OBJECT = "Q30580660"
 
 # Unlike start reasons, end reasons are generally explicit.
 END_REASON_MAP = {
-    WD_PARTY_CHANGE_OBJECT: 'changed_party',
-    'Q63323711': 'regional_election'  # This is a generic 'end of legislative term' object
+    WD_PARTY_CHANGE_OBJECT: "changed_party",
+    "Q63323711": "regional_election",  # This is a generic 'end of legislative term' object
 }
 
-logger = logging.getLogger('import-members-from-wikidata')
+logger = logging.getLogger("import-members-from-wikidata")
 logging.basicConfig()
 
 parser = argparse.ArgumentParser()
 
-parser.add_argument("-v", "--verbose", help="output all messages, instead of just warnings",
-                    action="store_true")
+parser.add_argument(
+    "-v",
+    "--verbose",
+    help="output all messages, instead of just warnings",
+    action="store_true",
+)
 
-parser.add_argument("--create", help="create new people where no match is found",
-                    action="store_true")
+parser.add_argument(
+    "--create", help="create new people where no match is found", action="store_true"
+)
 
 args = parser.parse_args()
 
 if args.verbose:
     logger.setLevel(logging.DEBUG)
 
-logger.debug('Importing London Assembly Members from Wikidata')
+logger.debug("Importing London Assembly Members from Wikidata")
 
-url = 'https://query.wikidata.org/sparql'
-data = requests.get(url, params={'query': WIKIDATA_SPARQL_QUERY, 'format': 'json'}).json()
+url = "https://query.wikidata.org/sparql"
+data = requests.get(
+    url, params={"query": WIKIDATA_SPARQL_QUERY, "format": "json"}
+).json()
 
 pp_data = Popolo()
 party_changes = {}
 
-for item in data['results']['bindings']:
-
+for item in data["results"]["bindings"]:
     member = {
-        'wikidata_id': item['item']['value'].rsplit('/', 1)[-1],
-        'name': item['itemLabel']['value'],
-        'membership_id': item['node']['value'].rsplit('/', 1)[-1],
-        'party': item['parliamentarygroup']['value'].rsplit('/', 1)[-1],
-        'start_date': item['starttime']['value'].split('T')[0],  # Datetime from Wikidata is always ISO 8601 with timestamp
+        "wikidata_id": item["item"]["value"].rsplit("/", 1)[-1],
+        "name": item["itemLabel"]["value"],
+        "membership_id": item["node"]["value"].rsplit("/", 1)[-1],
+        "party": item["parliamentarygroup"]["value"].rsplit("/", 1)[-1],
+        "start_date": item["starttime"]["value"].split("T")[
+            0
+        ],  # Datetime from Wikidata is always ISO 8601 with timestamp
     }
 
-    if 'twfy_id' in item:
-        member['parlparse_id'] = item['twfy_id']['value']
+    if "twfy_id" in item:
+        member["parlparse_id"] = item["twfy_id"]["value"]
 
-    if 'election' in item:
-        member['election'] = item['election']['value'].rsplit('/', 1)[-1]
+    if "election" in item:
+        member["election"] = item["election"]["value"].rsplit("/", 1)[-1]
 
-    if 'endtime' in item:
-        member['end_date'] = item['endtime']['value'].split('T')[0]
+    if "endtime" in item:
+        member["end_date"] = item["endtime"]["value"].split("T")[0]
 
-    if 'endcause' in item:
-        member['end_cause'] = item['endcause']['value'].rsplit('/', 1)[-1]
+    if "endcause" in item:
+        member["end_cause"] = item["endcause"]["value"].rsplit("/", 1)[-1]
 
-    logger.debug('{} ({}) found in Wikidata'.format(member['name'], member['wikidata_id']))
+    logger.debug(
+        "{} ({}) found in Wikidata".format(member["name"], member["wikidata_id"])
+    )
 
     # Try retrieve this person by Wikidata ID, if that is known
-    pp_person = pp_data.get_person(id=member['wikidata_id'], scheme='wikidata')
+    pp_person = pp_data.get_person(id=member["wikidata_id"], scheme="wikidata")
 
     if pp_person:
-
-        pp_id = pp_person['id'].rsplit('/', 1)[-1]
+        pp_id = pp_person["id"].rsplit("/", 1)[-1]
 
         # This person has been matched on Wikidata ID. Hooray!
 
-        logger.debug('{} ({}) matched to existing person {} by Wikidata ID'.format(
-            member['name'],
-            member['wikidata_id'],
-            pp_id
-        ))
+        logger.debug(
+            "{} ({}) matched to existing person {} by Wikidata ID".format(
+                member["name"], member["wikidata_id"], pp_id
+            )
+        )
 
         # Run a sanity check that Parlparse IDs match.
-        if 'parlparse_id' not in member:
-            logger.warning('{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.'.format(
-                member['name'],
-                member['wikidata_id'],
-                pp_id
-            ))
+        if "parlparse_id" not in member:
+            logger.warning(
+                "{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+                    member["name"], member["wikidata_id"], pp_id
+                )
+            )
 
         else:
-
-            if member['parlparse_id'] != pp_id:
-                logger.warning('{} ({}) has a ParlParse ID of {}, expected {}.'.format(
-                    member['name'],
-                    member['wikidata_id'],
-                    member['parlparse_id'],
-                    pp_id
-                ))
+            if member["parlparse_id"] != pp_id:
+                logger.warning(
+                    "{} ({}) has a ParlParse ID of {}, expected {}.".format(
+                        member["name"],
+                        member["wikidata_id"],
+                        member["parlparse_id"],
+                        pp_id,
+                    )
+                )
             else:
-                logger.debug('{} ({}) has expected ParlParse ID'.format(
-                    member['name'],
-                    member['wikidata_id'],
-                ))
+                logger.debug(
+                    "{} ({}) has expected ParlParse ID".format(
+                        member["name"],
+                        member["wikidata_id"],
+                    )
+                )
 
     else:
-
         # This person hasn't been matched on Wikidata ID. Can we do it by ParlParse ID?
 
-        if 'parlparse_id' in member:
-            pp_person = pp_data.get_person(id=member['parlparse_id'], scheme='wikidata')
+        if "parlparse_id" in member:
+            pp_person = pp_data.get_person(id=member["parlparse_id"], scheme="wikidata")
 
             if pp_person:
+                pp_id = pp_person["id"].rsplit("/", 1)[-1]
 
-                pp_id = pp_person['id'].rsplit('/', 1)[-1]
-
-                logger.debug('{} ({}) matched to existing person {} by ParlParse ID'.format(
-                    member['name'],
-                    member['wikidata_id'],
-                    pp_id))
+                logger.debug(
+                    "{} ({}) matched to existing person {} by ParlParse ID".format(
+                        member["name"], member["wikidata_id"], pp_id
+                    )
+                )
 
                 # We have a person matched on ParlParse. They don't have a Wikidata ID. Set it.
-                if 'identifiers' not in pp_person:
-                    pp_person['identifiers'] = []
+                if "identifiers" not in pp_person:
+                    pp_person["identifiers"] = []
 
-                pp_person['identifiers'].append(
-                    {
-                        'scheme': 'wikidata',
-                        'identifier': member['wikidata_id']
-                    }
+                pp_person["identifiers"].append(
+                    {"scheme": "wikidata", "identifier": member["wikidata_id"]}
                 )
 
-                pp_data.persons[pp_person['id']].update(pp_person)
+                pp_data.persons[pp_person["id"]].update(pp_person)
 
-                logger.warning('{} has had Wikidata ID {} added to their ParlParse person entry.'.format(
-                    member['name'],
-                    member['wikidata_id']
-                ))
+                logger.warning(
+                    "{} has had Wikidata ID {} added to their ParlParse person entry.".format(
+                        member["name"], member["wikidata_id"]
+                    )
+                )
 
     # Have we explicitly matched, or do we need to try names or mint new people?
     if pp_person:
-
-        logger.debug('Matched with ParlParse member {} by explicit ID'.format(pp_person['id']))
+        logger.debug(
+            "Matched with ParlParse member {} by explicit ID".format(pp_person["id"])
+        )
 
     else:
-
         if args.create:
-
             # New people should be created.
 
-            logger.debug('Minting new ID.')
+            logger.debug("Minting new ID.")
 
             new_person_id = new_id(pp_data.max_person_id())
-            logger.debug('New ID is {}'.format(new_person_id))
+            logger.debug("New ID is {}".format(new_person_id))
 
             new_person = {
-              "id": new_person_id,
-              "identifiers": [
-                {
-                  "identifier": member['wikidata_id'],
-                  "scheme": "wikidata"
-                }
-              ],
-              "other_names": [
-                {
-                  "family_name": member['name'].rpartition(' ')[2],
-                  "given_name": member['name'].rpartition(' ')[0],
-                  "note": "Main"
-                }
-              ]
+                "id": new_person_id,
+                "identifiers": [
+                    {"identifier": member["wikidata_id"], "scheme": "wikidata"}
+                ],
+                "other_names": [
+                    {
+                        "family_name": member["name"].rpartition(" ")[2],
+                        "given_name": member["name"].rpartition(" ")[0],
+                        "note": "Main",
+                    }
+                ],
             }
             pp_data.add_person(new_person)
             pp_person = pp_data.get_person(id=new_person_id)
 
         else:
-
             # This prompts a human to check the match and, if correct, hook it up on Wikidata.
-            pp_person = pp_data.get_person(name=member['name'])
+            pp_person = pp_data.get_person(name=member["name"])
 
             if pp_person:
-                pp_id = pp_person['id'].rsplit('/', 1)[-1]
-                logger.warning('{} ({}) appears to match {} by name.'.format(
-                    member['name'],
-                    member['wikidata_id'],
-                    pp_id
-                ))
-                logger.warning('If this is correct, add TheyWorkForYou ID {} to their Wikidata entry. If not, run with --create to mint new IDs.'.format(pp_id))
+                pp_id = pp_person["id"].rsplit("/", 1)[-1]
+                logger.warning(
+                    "{} ({}) appears to match {} by name.".format(
+                        member["name"], member["wikidata_id"], pp_id
+                    )
+                )
+                logger.warning(
+                    "If this is correct, add TheyWorkForYou ID {} to their Wikidata entry. If not, run with --create to mint new IDs.".format(
+                        pp_id
+                    )
+                )
 
             else:
-
                 # If we make it here, we have nothing. Tell the person to run with --create.
 
-                logger.warning('{} ({}) cannot be matched on any ID or name. Run with --create to mint new IDs.'.format(
-                    member['name'],
-                    member['wikidata_id']
-                ))
+                logger.warning(
+                    "{} ({}) cannot be matched on any ID or name. Run with --create to mint new IDs.".format(
+                        member["name"], member["wikidata_id"]
+                    )
+                )
 
     # By this point, if pp_person exists all is good, if not then it should be skipped and an error raised.
     if pp_person:
-
         # Can we match up the party?
-        if member['party'] in PARTY_TO_ON_BEHALF_OF_ID:
-
+        if member["party"] in PARTY_TO_ON_BEHALF_OF_ID:
             # Try find an existing membership matching our identifier.
 
-            logger.debug('Finding membership for ID {}'.format(member['membership_id']))
+            logger.debug("Finding membership for ID {}".format(member["membership_id"]))
 
-            pp_membership = pp_data.memberships.with_id(member['membership_id'], scheme='wikidata')
+            pp_membership = pp_data.memberships.with_id(
+                member["membership_id"], scheme="wikidata"
+            )
 
             if pp_membership:
-                logger.debug('Matched to membership {}'.format(pp_membership['id']))
+                logger.debug("Matched to membership {}".format(pp_membership["id"]))
 
             else:
                 new_membership_id = new_id(pp_data.max_londonassembly_id())
-                logger.debug('No matching membership found. Creating new with id {}'.format(new_membership_id))
+                logger.debug(
+                    "No matching membership found. Creating new with id {}".format(
+                        new_membership_id
+                    )
+                )
 
                 new_membership = {
-                  "id": new_membership_id,
-                  "identifiers": [
-                    {
-                      "identifier": member['membership_id'],
-                      "scheme": "wikidata"
-                    }
-                  ],
-                  "person_id": pp_person['id']
+                    "id": new_membership_id,
+                    "identifiers": [
+                        {"identifier": member["membership_id"], "scheme": "wikidata"}
+                    ],
+                    "person_id": pp_person["id"],
                 }
 
                 pp_data.add_membership(new_membership)
                 pp_membership = pp_data.memberships.with_id(id=new_membership_id)
 
             # Update membership details
-            pp_membership['organization_id'] = 'london-assembly'
-            pp_membership['on_behalf_of_id'] = PARTY_TO_ON_BEHALF_OF_ID[member['party']]
+            pp_membership["organization_id"] = "london-assembly"
+            pp_membership["on_behalf_of_id"] = PARTY_TO_ON_BEHALF_OF_ID[member["party"]]
             # This is static for now, but if in future we want to differentiate members by constituency this will need to change.
-            pp_membership['post_id'] = 'uk.org.publicwhip/cons/10839'
-            pp_membership['start_date'] = member['start_date']
+            pp_membership["post_id"] = "uk.org.publicwhip/cons/10839"
+            pp_membership["start_date"] = member["start_date"]
 
             # If the membership has an election set, start_reason is probably going to be regional_election
-            if 'election' in member:
-                pp_membership['start_reason'] = 'regional_election'
+            if "election" in member:
+                pp_membership["start_reason"] = "regional_election"
 
             # If there's a party change for this person with an end_date matching this membership's start_date, it's a
             # fair bet that the two are a pair and the start_reason here is also changed_party.
-            elif member['wikidata_id'] in party_changes and party_changes[member['wikidata_id']] == member['start_date']:
-                logger.debug('Found matching party change on {} for {}'.format(party_changes[member['wikidata_id']], member['wikidata_id']))
-                pp_membership['start_reason'] = 'changed_party'
+            elif (
+                member["wikidata_id"] in party_changes
+                and party_changes[member["wikidata_id"]] == member["start_date"]
+            ):
+                logger.debug(
+                    "Found matching party change on {} for {}".format(
+                        party_changes[member["wikidata_id"]], member["wikidata_id"]
+                    )
+                )
+                pp_membership["start_reason"] = "changed_party"
 
             else:
-                pp_membership['start_reason'] = 'unknown'
-                logger.warning('Cannot determine start cause of membership for {} ({}) starting {}.'.format(
-                    member['name'],
-                    member['wikidata_id'],
-                    member['start_date']
-                ))
-
-            if 'end_date' in member:
-                pp_membership['end_date'] = member['end_date']
+                pp_membership["start_reason"] = "unknown"
+                logger.warning(
+                    "Cannot determine start cause of membership for {} ({}) starting {}.".format(
+                        member["name"], member["wikidata_id"], member["start_date"]
+                    )
+                )
+
+            if "end_date" in member:
+                pp_membership["end_date"] = member["end_date"]
                 # If the membership is ended, also provide an end reason
-                if 'end_cause' in member:
-                    if member['end_cause'] in END_REASON_MAP:
-                        pp_membership['end_reason'] = END_REASON_MAP[member['end_cause']]
+                if "end_cause" in member:
+                    if member["end_cause"] in END_REASON_MAP:
+                        pp_membership["end_reason"] = END_REASON_MAP[
+                            member["end_cause"]
+                        ]
 
                         # If this is a party change, add it to the list so we can use it to detect its pair.
-                        if member['end_cause'] == WD_PARTY_CHANGE_OBJECT:
-                            party_changes[member['wikidata_id']] = member['end_date']
-                            logger.debug('Recorded that {} changed their party on {}'.format(member['wikidata_id'], member['end_date']))
+                        if member["end_cause"] == WD_PARTY_CHANGE_OBJECT:
+                            party_changes[member["wikidata_id"]] = member["end_date"]
+                            logger.debug(
+                                "Recorded that {} changed their party on {}".format(
+                                    member["wikidata_id"], member["end_date"]
+                                )
+                            )
 
                     else:
-                        pp_membership['end_reason'] = 'unknown'
-                        logger.warning('End cause {} is not mapped.'.format(member['end_cause']))
+                        pp_membership["end_reason"] = "unknown"
+                        logger.warning(
+                            "End cause {} is not mapped.".format(member["end_cause"])
+                        )
                 else:
-                    logger.warning('Membership for {} ({}) starting {} does not have an end cause in Wikidata.'.format(
-                        member['name'],
-                        member['wikidata_id'],
-                        member['start_date']
-                    ))
-                    pp_membership['end_reason'] = 'unknown'
+                    logger.warning(
+                        "Membership for {} ({}) starting {} does not have an end cause in Wikidata.".format(
+                            member["name"], member["wikidata_id"], member["start_date"]
+                        )
+                    )
+                    pp_membership["end_reason"] = "unknown"
             else:
                 # It's possible a date will be removed from a membership in Wikidata.
                 # This makes sure if that's happened the date is also removed in ParlParse.
-                pp_membership.pop('end_date', None)
+                pp_membership.pop("end_date", None)
 
-            logger.debug('Membership updated.'.format(pp_membership['id']))
+            logger.debug("Membership updated.".format())
 
         else:
-
             # We can't match this party.
 
-            logger.error('Could not match {} to party name. Edit the map in fetch_london_assembly.py to fix.'.format(member['party']))
+            logger.error(
+                "Could not match {} to party name. Edit the map in fetch_london_assembly.py to fix.".format(
+                    member["party"]
+                )
+            )
 
     else:
-        logger.error('Skipping doing anything with {} ({}). This shouldn\'t happen.'.format(
-            member['name'],
-            member['wikidata_id']
-        ))
+        logger.error(
+            "Skipping doing anything with {} ({}). This shouldn't happen.".format(
+                member["name"], member["wikidata_id"]
+            )
+        )
 
-logger.debug('Writing data to people.json')
+logger.debug("Writing data to people.json")
 
 pp_data.dump()
 
-logger.debug('Done!')
+logger.debug("Done!")
diff --git a/scripts/fetch_scottish_ministers.py b/scripts/fetch_scottish_ministers.py
index fc7e9d52..78419e2d 100755
--- a/scripts/fetch_scottish_ministers.py
+++ b/scripts/fetch_scottish_ministers.py
@@ -1,70 +1,70 @@
 #!/usr/bin/env python3
 
-import os
 import json
+import os
 import sys
 import urllib.request
 
 import dateutil.parser as dateparser
 
-rawdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'rawdata'))
-members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'members'))
-path_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'pyscraper' ))
+rawdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "rawdata"))
+members_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "members"))
+path_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "pyscraper"))
 
 sys.path.append(path_dir)
 
 from sp.resolvenames import memberList
 
 api_points = {
-    'sp_minister_data.json': 'https://data.parliament.scot/api/membergovernmentroles/json',
-    'sp_minister_type.json': 'https://data.parliament.scot/api/governmentroles/json',
-    'sp_msps.json': 'https://data.parliament.scot/api/members/json'
+    "sp_minister_data.json": "https://data.parliament.scot/api/membergovernmentroles/json",
+    "sp_minister_type.json": "https://data.parliament.scot/api/governmentroles/json",
+    "sp_msps.json": "https://data.parliament.scot/api/members/json",
 }
 
 for outfile, url in api_points.items():
     data = urllib.request.urlopen(url).read()
     output = os.path.join(rawdata_dir, outfile)
-    with open(output, 'wb+') as fp:
+    with open(output, "wb+") as fp:
         fp.write(data)
 
-with open(os.path.join(rawdata_dir, 'sp_minister_data.json')) as fp:
+with open(os.path.join(rawdata_dir, "sp_minister_data.json")) as fp:
     ministers = json.load(fp)
 
-with open(os.path.join(rawdata_dir, 'sp_minister_type.json')) as fp:
+with open(os.path.join(rawdata_dir, "sp_minister_type.json")) as fp:
     minister_type = json.load(fp)
 
-with open(os.path.join(rawdata_dir, 'sp_msps.json')) as fp:
+with open(os.path.join(rawdata_dir, "sp_msps.json")) as fp:
     msps = json.load(fp)
 
 type_to_name = {}
 id_to_name = {}
 
 for msp in msps:
-    id_to_name[msp['PersonID']] = msp['ParliamentaryName']
+    id_to_name[msp["PersonID"]] = msp["ParliamentaryName"]
 
 for min_type in minister_type:
-    type_to_name[min_type['ID']] = min_type['Name']
+    type_to_name[min_type["ID"]] = min_type["Name"]
 
 new_ministers = []
 for minister in ministers:
-    name = id_to_name[minister['PersonID']]
-    role = type_to_name[minister['GovernmentRoleID']]
-    start = dateparser.parse(minister['ValidFromDate']).date().isoformat()
-    end = '9999-12-31'
-    if minister['ValidUntilDate']:
-        end = dateparser.parse(minister['ValidUntilDate']).date().isoformat()
+    name = id_to_name[minister["PersonID"]]
+    role = type_to_name[minister["GovernmentRoleID"]]
+    start = dateparser.parse(minister["ValidFromDate"]).date().isoformat()
+    end = "9999-12-31"
+    if minister["ValidUntilDate"]:
+        end = dateparser.parse(minister["ValidUntilDate"]).date().isoformat()
     person_id = memberList.match_whole_speaker(name, start)
     new_minister = {
-        'id': 'scot.parliament.data/membergovernmentroles/%s' % minister['ID'],
-        'source': 'https://data.parliament.scot/api/membergovernmentroles/json',
-        'role': role,
-        'person_id': person_id,
-        'organization_id': 'scottish-parliament',
-        'start_date': start
+        "id": "scot.parliament.data/membergovernmentroles/%s" % minister["ID"],
+        "source": "https://data.parliament.scot/api/membergovernmentroles/json",
+        "role": role,
+        "person_id": person_id,
+        "organization_id": "scottish-parliament",
+        "start_date": start,
     }
-    if end != '9999-12-31':
-        new_minister['end_date'] = end
+    if end != "9999-12-31":
+        new_minister["end_date"] = end
     new_ministers.append(new_minister)
 
-with open(os.path.join(members_dir, 'sp-ministers.json'), 'w') as sp_fp:
+with open(os.path.join(members_dir, "sp-ministers.json"), "w") as sp_fp:
     json.dump(new_ministers, sp_fp, indent=2)
diff --git a/scripts/fetch_wikidata_from_everypolitician.py b/scripts/fetch_wikidata_from_everypolitician.py
index f0e45f36..5efe9aac 100644
--- a/scripts/fetch_wikidata_from_everypolitician.py
+++ b/scripts/fetch_wikidata_from_everypolitician.py
@@ -3,98 +3,115 @@
 # Fetch a legislature from EveryPolitician, and for all people with a ParlParse
 # ID and Wikidata ID import the Wikidata ID.
 
+import argparse
+import logging
+
 from everypolitician import EveryPolitician
 from popolo import Popolo
-from popolo.utils import edit_file
 
-import argparse
-import logging
 
 def getPersonIdentifierBySchema(person, scheme):
-    if 'identifiers' in person:
-        for identifier in person['identifiers']:
-            if identifier['scheme'] == scheme:
-                return identifier['identifier']
+    if "identifiers" in person:
+        for identifier in person["identifiers"]:
+            if identifier["scheme"] == scheme:
+                return identifier["identifier"]
     return None
 
 
-logger = logging.getLogger('fetch_wikidata_from_everypolitician')
+logger = logging.getLogger("fetch_wikidata_from_everypolitician")
 logging.basicConfig()
 
 parser = argparse.ArgumentParser()
 
-parser.add_argument("-v", "--verbose", help="output all messages, instead of just warnings",
-                    action="store_true")
+parser.add_argument(
+    "-v",
+    "--verbose",
+    help="output all messages, instead of just warnings",
+    action="store_true",
+)
 
 args = parser.parse_args()
 
 if args.verbose:
     logger.setLevel(logging.DEBUG)
 
-logger.info('Fetching Wikidata identifiers from EveryPolitician')
+logger.info("Fetching Wikidata identifiers from EveryPolitician")
 
 
 legislatures_to_fetch = [
-    {
-        'ep_country_slug': 'UK',
-        'ep_legislature_slug': 'Commons'
-    },
-    {
-        'ep_country_slug': 'Scotland',
-        'ep_legislature_slug': 'Parliament'
-    },
-    {
-        'ep_country_slug': 'Northern-Ireland',
-        'ep_legislature_slug': 'Assembly'
-    },
+    {"ep_country_slug": "UK", "ep_legislature_slug": "Commons"},
+    {"ep_country_slug": "Scotland", "ep_legislature_slug": "Parliament"},
+    {"ep_country_slug": "Northern-Ireland", "ep_legislature_slug": "Assembly"},
 ]
 
 pp_data = Popolo()
 
 for legislature in legislatures_to_fetch:
-
-    ep_country = EveryPolitician().country(legislature['ep_country_slug'])
-    ep_legislature = ep_country.legislature(legislature['ep_legislature_slug'])
+    ep_country = EveryPolitician().country(legislature["ep_country_slug"])
+    ep_legislature = ep_country.legislature(legislature["ep_legislature_slug"])
 
     ep_people = ep_legislature.popolo().persons
 
-    logger.info('{}/{}: Found {} people.'.format(legislature['ep_country_slug'],
-                                                 legislature['ep_legislature_slug'],
-                                                 len(ep_people)
-                                                 ))
+    logger.info(
+        "{}/{}: Found {} people.".format(
+            legislature["ep_country_slug"],
+            legislature["ep_legislature_slug"],
+            len(ep_people),
+        )
+    )
 
     for ep_person in ep_people:
-        if ep_person.identifier_value('parlparse') and ep_person.wikidata:
-            pp_person = pp_data.get_person(id=ep_person.identifier_value('parlparse'))
+        if ep_person.identifier_value("parlparse") and ep_person.wikidata:
+            pp_person = pp_data.get_person(id=ep_person.identifier_value("parlparse"))
 
-            pp_wikidata_identifier = getPersonIdentifierBySchema(pp_person, 'wikidata')
+            pp_wikidata_identifier = getPersonIdentifierBySchema(pp_person, "wikidata")
 
             if not pp_wikidata_identifier:
-                logger.info('{} ({}) is missing a Wikidata identifier of {}, fixing...'.format(ep_person.name.encode('utf-8'), pp_person['id'], ep_person.wikidata))
+                logger.info(
+                    "{} ({}) is missing a Wikidata identifier of {}, fixing...".format(
+                        ep_person.name.encode("utf-8"),
+                        pp_person["id"],
+                        ep_person.wikidata,
+                    )
+                )
 
-                if 'identifiers' not in pp_person:
-                    pp_person['identifiers'] = []
+                if "identifiers" not in pp_person:
+                    pp_person["identifiers"] = []
 
-                pp_person['identifiers'].append(
-                    {
-                        'scheme': 'wikidata',
-                        'identifier': ep_person.wikidata
-                    }
+                pp_person["identifiers"].append(
+                    {"scheme": "wikidata", "identifier": ep_person.wikidata}
                 )
 
             elif pp_wikidata_identifier != ep_person.wikidata:
-                logger.warning('{} ({}) has a Wikidata identifier mismatch ({} in Parlparse vs {} in EveryPolitician). Please resolve manually!'.format(ep_person.name.encode('utf-8'), pp_person['id'], pp_wikidata_identifier, ep_person.wikidata))
+                logger.warning(
+                    "{} ({}) has a Wikidata identifier mismatch ({} in Parlparse vs {} in EveryPolitician). Please resolve manually!".format(
+                        ep_person.name.encode("utf-8"),
+                        pp_person["id"],
+                        pp_wikidata_identifier,
+                        ep_person.wikidata,
+                    )
+                )
 
             else:
-                logger.debug('{} ({}) matches identifier {}.'.format(ep_person.name.encode('utf-8'), pp_person['id'], ep_person.wikidata))
+                logger.debug(
+                    "{} ({}) matches identifier {}.".format(
+                        ep_person.name.encode("utf-8"),
+                        pp_person["id"],
+                        ep_person.wikidata,
+                    )
+                )
 
-            pp_data.persons[pp_person['id']].update(pp_person)
+            pp_data.persons[pp_person["id"]].update(pp_person)
 
         else:
-            logger.info('Skipping person {}, does not have both ParlParse and Wikidata IDs.'.format(ep_person.name.encode('utf-8')))
+            logger.info(
+                "Skipping person {}, does not have both ParlParse and Wikidata IDs.".format(
+                    ep_person.name.encode("utf-8")
+                )
+            )
 
-logger.debug('Writing data to people.json')
+logger.debug("Writing data to people.json")
 
 pp_data.dump()
 
-logger.debug('Done!')
+logger.debug("Done!")
diff --git a/scripts/json-add-membership b/scripts/json-add-membership
index 210acc36..bf272806 100755
--- a/scripts/json-add-membership
+++ b/scripts/json-add-membership
@@ -2,29 +2,30 @@
 
 import argparse
 import sys
-from popolo import Popolo
-from popolo.utils import edit_file, new_id, get_person_from_name
 
+from popolo import Popolo
+from popolo.utils import edit_file, get_person_from_name, new_id
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--name')
-parser.add_argument('--id')
-parser.add_argument('--new', action='store_true')
+parser.add_argument("--name")
+parser.add_argument("--id")
+parser.add_argument("--new", action="store_true")
 parser.add_argument(
-    '--type', required=True, choices=('lord', 'mp', 'msp', 'mla', 'londonassembly'))
+    "--type", required=True, choices=("lord", "mp", "msp", "mla", "londonassembly")
+)
 ARGS = parser.parse_args()
 
 if not (ARGS.id or ARGS.name or ARGS.new):
-    raise Exception('Must specify ID, name, or --new')
+    raise Exception("Must specify ID, name, or --new")
 
 data = Popolo()
 
 max_ids = {
-    'lord': data.max_lord_id(),
-    'mp': data.max_mp_id(),
-    'msp': data.max_msp_id(),
-    'mla': data.max_mla_id(),
-    'londonassembly': data.max_londonassembly_id(),
+    "lord": data.max_lord_id(),
+    "mp": data.max_mp_id(),
+    "msp": data.max_msp_id(),
+    "mla": data.max_mla_id(),
+    "londonassembly": data.max_londonassembly_id(),
 }
 new_mship_id = new_id(max_ids[ARGS.type])
 
@@ -37,64 +38,70 @@ mships = []
 if ARGS.new:
     person_id = new_id(data.max_person_id())
 else:
-    mships = list(data.memberships.of_person(person['id']))
-    person_id = person['id']
+    mships = list(data.memberships.of_person(person["id"]))
+    person_id = person["id"]
 
 blank_name = {
-    'given_name': '',
-    'note': 'Main',
+    "given_name": "",
+    "note": "Main",
 }
 blank_mship = {
-    'id': new_mship_id,
-    'on_behalf_of_id': '',
-    'person_id': person_id,
-    'start_date': '',
+    "id": new_mship_id,
+    "on_behalf_of_id": "",
+    "person_id": person_id,
+    "start_date": "",
 }
 
-if ARGS.type == 'lord':
-    blank_name.update({
-        'honorific_prefix': '',
-        'lordname': '',
-        'lordofname': '',
-        'lordofname_full': '',
-        'county': '',
-        'start_date': ''
-    })
-    blank_mship.update({
-        'identifiers': [{'identifier': 'L', 'scheme': 'peeragetype'}],
-        'label': 'Peer',
-        'role': 'Peer',
-        'organization_id': 'house-of-lords',
-    })
+if ARGS.type == "lord":
+    blank_name.update(
+        {
+            "honorific_prefix": "",
+            "lordname": "",
+            "lordofname": "",
+            "lordofname_full": "",
+            "county": "",
+            "start_date": "",
+        }
+    )
+    blank_mship.update(
+        {
+            "identifiers": [{"identifier": "L", "scheme": "peeragetype"}],
+            "label": "Peer",
+            "role": "Peer",
+            "organization_id": "house-of-lords",
+        }
+    )
     if not ARGS.new:
-        person['other_names'].append(blank_name)
+        person["other_names"].append(blank_name)
 else:
-    blank_name.update({
-        'family_name': '',
-    })
-    blank_mship.update({
-        'post_id': '',
-        'start_reason': '',
-    })
+    blank_name.update(
+        {
+            "family_name": "",
+        }
+    )
+    blank_mship.update(
+        {
+            "post_id": "",
+            "start_reason": "",
+        }
+    )
 
 mships.append(blank_mship)
 if ARGS.new:
     person = {
-        'id': person_id,
-        'other_names': [blank_name],
+        "id": person_id,
+        "other_names": [blank_name],
     }
 
 edit_data = {
-    'person': person,
-    'memberships': mships,
-    '_meta': [
-        '*** Save an empty file to abort.'
-    ]
+    "person": person,
+    "memberships": mships,
+    "_meta": ["*** Save an empty file to abort."],
 }
 if ARGS.new:
-    edit_data['_meta'] = [
-        '*** Save an empty file, or leave any of start_date/ on_behalf_of_id/',
-        '*** family_name/ lordname/ lordofname blank to abort.'
+    edit_data["_meta"] = [
+        "*** Save an empty file, or leave any of start_date/ on_behalf_of_id/",
+        "*** family_name/ lordname/ lordofname blank to abort.",
     ]
 
 new_edit_data = edit_file(edit_data)
@@ -103,23 +110,24 @@ if not new_edit_data or edit_data == new_edit_data:
     sys.exit()
 
 if ARGS.new:
-    name = new_edit_data['person']['other_names'][0]
-    mship = new_edit_data['memberships'][0]
-    if not mship['start_date'] or not mship['on_behalf_of_id']:
+    name = new_edit_data["person"]["other_names"][0]
+    mship = new_edit_data["memberships"][0]
+    if not mship["start_date"] or not mship["on_behalf_of_id"]:
         sys.exit()
-    if (ARGS.type == 'lord' and not name['lordname'] and not name['lordofname']) \
-            or (ARGS.type != 'lord' and not name['family_name']):
+    if (ARGS.type == "lord" and not name["lordname"] and not name["lordofname"]) or (
+        ARGS.type != "lord" and not name["family_name"]
+    ):
         sys.exit()
 
-edit_mships = {m['id']: m for m in new_edit_data['memberships']}
+edit_mships = {m["id"]: m for m in new_edit_data["memberships"]}
 if ARGS.new:
-    data.add_person(new_edit_data['person'])
+    data.add_person(new_edit_data["person"])
 else:
-    data.persons[person['id']].update(new_edit_data['person'])
+    data.persons[person["id"]].update(new_edit_data["person"])
     for m in data.memberships:
-        if m['id'] in edit_mships:
-            m.update(edit_mships[m['id']])
-            del edit_mships[m['id']]
+        if m["id"] in edit_mships:
+            m.update(edit_mships[m["id"]])
+            del edit_mships[m["id"]]
 
 for m in edit_mships.values():
     data.add_membership(m)
diff --git a/scripts/json-body-end b/scripts/json-body-end
index 185192ef..ad1115dd 100755
--- a/scripts/json-body-end
+++ b/scripts/json-body-end
@@ -1,19 +1,21 @@
 #!/usr/bin/env python3
 
 import argparse
+
 from popolo import Popolo
+
 data = Popolo()
 
-parser = argparse.ArgumentParser(description='End all current members in a body')
-parser.add_argument('--body', required=True, choices=data.houses)
-parser.add_argument('--check', required=True, type=int)
-parser.add_argument('--end-date', required=True)
+parser = argparse.ArgumentParser(description="End all current members in a body")
+parser.add_argument("--body", required=True, choices=data.houses)
+parser.add_argument("--check", required=True, type=int)
+parser.add_argument("--end-date", required=True)
 args = parser.parse_args()
 
 mships = data.memberships.in_org(args.body).current()
 assert len(mships) == args.check, len(mships)
 for mship in mships:
-    mship['end_date'] = args.end_date
-    mship['end_reason'] = 'dissolution'
+    mship["end_date"] = args.end_date
+    mship["end_reason"] = "dissolution"
 
 data.dump()
diff --git a/scripts/json-change-party b/scripts/json-change-party
index be5be874..ae94cc50 100755
--- a/scripts/json-change-party
+++ b/scripts/json-change-party
@@ -2,29 +2,28 @@
 
 import argparse
 import datetime
-from popolo import Popolo
-from popolo.utils import new_id, get_person_from_name
 
+from popolo import Popolo
+from popolo.utils import get_person_from_name, new_id
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--name')
-parser.add_argument('--id')
-parser.add_argument('--party', required=True)
-parser.add_argument('--date', required=True)
-parser.add_argument(
-    '--type', required=True, choices=('lord', 'mp', 'msp', 'mla'))
+parser.add_argument("--name")
+parser.add_argument("--id")
+parser.add_argument("--party", required=True)
+parser.add_argument("--date", required=True)
+parser.add_argument("--type", required=True, choices=("lord", "mp", "msp", "mla"))
 ARGS = parser.parse_args()
 
 if not (ARGS.id or ARGS.name):
-    raise Exception('Must specify ID or name')
+    raise Exception("Must specify ID or name")
 
 data = Popolo()
 
 max_ids = {
-    'lord': data.max_lord_id(),
-    'mp': data.max_mp_id(),
-    'msp': data.max_msp_id(),
-    'mla': data.max_mla_id(),
+    "lord": data.max_lord_id(),
+    "mp": data.max_mp_id(),
+    "msp": data.max_msp_id(),
+    "mla": data.max_mla_id(),
 }
 new_mship_id = new_id(max_ids[ARGS.type])
 
@@ -34,43 +33,51 @@ elif ARGS.name:
     person = get_person_from_name(ARGS.name, data)
 
 mships = []
-mships = sorted(list(data.memberships.of_person(person['id'])), key=lambda m: m['start_date'])
-person_id = person['id']
+mships = sorted(
+    list(data.memberships.of_person(person["id"])), key=lambda m: m["start_date"]
+)
+person_id = person["id"]
 
-date = datetime.date(*map(int, ARGS.date.split('-')))
+date = datetime.date(*map(int, ARGS.date.split("-")))
 end_date = date - datetime.timedelta(days=1)
 
 new_mship = {
-    'id': new_mship_id,
-    'on_behalf_of_id': ARGS.party,
-    'person_id': person_id,
-    'start_date': date.isoformat(),
-    'start_reason': 'changed_party',
+    "id": new_mship_id,
+    "on_behalf_of_id": ARGS.party,
+    "person_id": person_id,
+    "start_date": date.isoformat(),
+    "start_reason": "changed_party",
 }
 
-mships[-1].update({
-    'end_date': end_date.isoformat(),
-    'end_reason': 'changed_party',
-})
-if ARGS.type == 'lord':
-    new_mship.update({
-        'identifiers': [{'identifier': 'L', 'scheme': 'peeragetype'}],
-        'label': 'Peer',
-        'role': 'Peer',
-        'organization_id': 'house-of-lords',
-    })
+mships[-1].update(
+    {
+        "end_date": end_date.isoformat(),
+        "end_reason": "changed_party",
+    }
+)
+if ARGS.type == "lord":
+    new_mship.update(
+        {
+            "identifiers": [{"identifier": "L", "scheme": "peeragetype"}],
+            "label": "Peer",
+            "role": "Peer",
+            "organization_id": "house-of-lords",
+        }
+    )
 else:
-    new_mship.update({
-        'post_id': mships[-1]['post_id'],
-    })
+    new_mship.update(
+        {
+            "post_id": mships[-1]["post_id"],
+        }
+    )
 
 mships.append(new_mship)
 
-edit_mships = {m['id']: m for m in mships}
+edit_mships = {m["id"]: m for m in mships}
 for m in data.memberships:
-    if m['id'] in edit_mships:
-        m.update(edit_mships[m['id']])
-        del edit_mships[m['id']]
+    if m["id"] in edit_mships:
+        m.update(edit_mships[m["id"]])
+        del edit_mships[m["id"]]
 
 for m in edit_mships.values():
     data.add_membership(m)
diff --git a/scripts/json-edit-person b/scripts/json-edit-person
index b1918d60..2b20f03b 100755
--- a/scripts/json-edit-person
+++ b/scripts/json-edit-person
@@ -2,13 +2,13 @@
 
 import argparse
 import sys
+
 from popolo import Popolo
 from popolo.utils import edit_file, get_person_from_name
 
-
 parser = argparse.ArgumentParser()
-parser.add_argument('--name')
-parser.add_argument('--id')
+parser.add_argument("--name")
+parser.add_argument("--id")
 ARGS = parser.parse_args()
 
 data = Popolo()
@@ -18,31 +18,31 @@ if ARGS.id:
 elif ARGS.name:
     person = get_person_from_name(ARGS.name, data)
 else:
-    raise Exception('Must specify ID or name')
+    raise Exception("Must specify ID or name")
 
-mships = data.memberships.of_person(person['id'])
+mships = data.memberships.of_person(person["id"])
 
 edit_data = {
-    '_max_ids': {
-        'lord': data.max_lord_id(),
-        'mp': data.max_mp_id(),
-        'msp': data.max_msp_id(),
-        'mla': data.max_mla_id(),
+    "_max_ids": {
+        "lord": data.max_lord_id(),
+        "mp": data.max_mp_id(),
+        "msp": data.max_msp_id(),
+        "mla": data.max_mla_id(),
     },
-    'person': person,
-    'memberships': list(mships),
+    "person": person,
+    "memberships": list(mships),
 }
 new_edit_data = edit_file(edit_data)
 
 if edit_data == new_edit_data:
     sys.exit()
 
-data.persons[person['id']].update(new_edit_data['person'])
-edit_mships = {m['id']: m for m in new_edit_data['memberships']}
+data.persons[person["id"]].update(new_edit_data["person"])
+edit_mships = {m["id"]: m for m in new_edit_data["memberships"]}
 for m in data.memberships:
-    if m['id'] in edit_mships:
-        m.update(edit_mships[m['id']])
-        del edit_mships[m['id']]
+    if m["id"] in edit_mships:
+        m.update(edit_mships[m["id"]])
+        del edit_mships[m["id"]]
 
 for m in edit_mships.values():
     data.add_membership(m)
diff --git a/scripts/json-end-membership b/scripts/json-end-membership
index 8a191def..1aa95f8c 100755
--- a/scripts/json-end-membership
+++ b/scripts/json-end-membership
@@ -2,36 +2,37 @@
 #
 # End a current membership
 
-import datetime
 import argparse
+import datetime
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--name', required=True)
-parser.add_argument('--date', required=True)
-parser.add_argument('--reason', required=True)
+parser.add_argument("--name", required=True)
+parser.add_argument("--date", required=True)
+parser.add_argument("--reason", required=True)
 ARGS = parser.parse_args()
 try:
-    datetime.datetime.strptime(ARGS.date, '%Y-%m-%d')
+    datetime.datetime.strptime(ARGS.date, "%Y-%m-%d")
 except ValueError:
-    parser.error('Date in wrong format')
+    parser.error("Date in wrong format")
 
 from popolo import Popolo
+
 data = Popolo()
 
 person = data.get_person(name=ARGS.name)
 if len(person) != 1:
-    raise Exception('Did not get one person: %s' % person)
+    raise Exception("Did not get one person: %s" % person)
 person = person[0]
 
-mships = data.memberships.of_person(person['id'])
+mships = data.memberships.of_person(person["id"])
 mships_current = mships.current()
 if not len(mships_current):
-    raise Exception('No current membership, previous ones: %s' % mships)
+    raise Exception("No current membership, previous ones: %s" % mships)
 elif len(mships_current) > 1:
-    raise Exception('Multiple current memberships: %s' % mships_current)
+    raise Exception("Multiple current memberships: %s" % mships_current)
 
 mship = mships_current.memberships[0]
-mship['end_date'] = ARGS.date
-mship['end_reason'] = ARGS.reason
+mship["end_date"] = ARGS.date
+mship["end_reason"] = ARGS.reason
 
 data.dump()
diff --git a/scripts/json-merge-people b/scripts/json-merge-people
index 725bf4e9..99f1df8a 100755
--- a/scripts/json-merge-people
+++ b/scripts/json-merge-people
@@ -5,42 +5,43 @@
 import argparse
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--base', required=True)
-parser.add_argument('--alternate', required=True)
+parser.add_argument("--base", required=True)
+parser.add_argument("--alternate", required=True)
 ARGS = parser.parse_args()
 
 from popolo import Popolo
+
 data = Popolo()
 
-ARGS.base = 'uk.org.publicwhip/person/%s' % ARGS.base
-ARGS.alternate = 'uk.org.publicwhip/person/%s' % ARGS.alternate
+ARGS.base = "uk.org.publicwhip/person/%s" % ARGS.base
+ARGS.alternate = "uk.org.publicwhip/person/%s" % ARGS.alternate
 
 personB = data.get_person(id=ARGS.base)
-mships = data.memberships.of_person(personB['id'])
-personB_start = min(m['start_date'] for m in mships)
-personB_end = max(m.get('end_date', '9999-12-31') for m in mships)
-if len(personB['other_names']) == 1:
-    personB['other_names'][0]['start_date'] = personB_start
-    if personB_end != '9999-12-31':
-        personB['other_names'][0]['end_date'] = personB_end
+mships = data.memberships.of_person(personB["id"])
+personB_start = min(m["start_date"] for m in mships)
+personB_end = max(m.get("end_date", "9999-12-31") for m in mships)
+if len(personB["other_names"]) == 1:
+    personB["other_names"][0]["start_date"] = personB_start
+    if personB_end != "9999-12-31":
+        personB["other_names"][0]["end_date"] = personB_end
 
 person = data.get_person(id=ARGS.alternate)
-mships = data.memberships.of_person(person['id'])
-person_start = min(m['start_date'] for m in mships)
-person_end = max(m.get('end_date', '9999-12-31') for m in mships)
+mships = data.memberships.of_person(person["id"])
+person_start = min(m["start_date"] for m in mships)
+person_end = max(m.get("end_date", "9999-12-31") for m in mships)
 
 for k, v in person.items():
-    if k == 'other_names' and len(v) == 1:
-        v[0]['start_date'] = person_start
-        if person_end != '9999-12-31':
-            v[0]['end_date'] = person_end
+    if k == "other_names" and len(v) == 1:
+        v[0]["start_date"] = person_start
+        if person_end != "9999-12-31":
+            v[0]["end_date"] = person_end
     if isinstance(v, list):
         personB.setdefault(k, []).extend(v)
-    if k != 'id':
+    if k != "id":
         del person[k]
-person['redirect'] = ARGS.base
+person["redirect"] = ARGS.base
 
 for m in mships:
-    m['person_id'] = ARGS.base
+    m["person_id"] = ARGS.base
 
 data.dump()
diff --git a/scripts/json-new-ids b/scripts/json-new-ids
index 1e8e1f72..5e7da4dc 100755
--- a/scripts/json-new-ids
+++ b/scripts/json-new-ids
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 from popolo import Popolo
+
 data = Popolo()
 
 print("Current maximum Lord ID:", data.max_lord_id())
diff --git a/scripts/json-nia-2017-new b/scripts/json-nia-2017-new
index 1b59d82b..af380cb9 100755
--- a/scripts/json-nia-2017-new
+++ b/scripts/json-nia-2017-new
@@ -1,68 +1,80 @@
 #!/usr/bin/env python3
 
 import os
-from bs4 import BeautifulSoup
+
 import requests
 import requests_cache
-
+from bs4 import BeautifulSoup
 from popolo import Popolo
 from popolo.utils import new_id
 
-URL = 'http://www.bbc.co.uk/news/politics/northern-ireland-constituencies/N060000%02d'
+URL = "http://www.bbc.co.uk/news/politics/northern-ireland-constituencies/N060000%02d"
 # Manual overrides
 PARTY_FIXES = {
-    'democratic-unionist': 'dup',
-    'sdlp': 'social-democratic-and-labour-party',
-    'sinn-f\xe9in': 'sinn-fein',
-    'ulster-unionist': 'uup',
+    "democratic-unionist": "dup",
+    "sdlp": "social-democratic-and-labour-party",
+    "sinn-f\xe9in": "sinn-fein",
+    "ulster-unionist": "uup",
 }
 NAME_FIXES = {
-    'Chris Hazzard': 'Christopher Hazzard',
-    'Sin\xe9ad Bradley': 'Sinead Bradley',
-    'Stephen Aiken': 'Steve Aiken',
-    'Se\xe1n Lynch': 'Sean Lynch',
+    "Chris Hazzard": "Christopher Hazzard",
+    "Sin\xe9ad Bradley": "Sinead Bradley",
+    "Stephen Aiken": "Steve Aiken",
+    "Se\xe1n Lynch": "Sean Lynch",
 }
 
 # Set up requests cache
-cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache')
-requests_cache.install_cache(cache_path, expire_after=60*10)
+cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
+requests_cache.install_cache(cache_path, expire_after=60 * 10)
 
 data = Popolo()
 
 # Map from area name to post ID
-AREA_NAME_TO_POST = {post['area']['name']: post['id'] for post in data.posts.values() if 'NI' in post['id']}
+AREA_NAME_TO_POST = {
+    post["area"]["name"]: post["id"]
+    for post in data.posts.values()
+    if "NI" in post["id"]
+}
 # List of all memberships of NIA we already have
-MSHIPS = data.memberships.in_org('northern-ireland-assembly')
+MSHIPS = data.memberships.in_org("northern-ireland-assembly")
 # IDs to start working from for adding new entries
 max_mship_id = data.max_mla_id()
 max_person_id = data.max_person_id()
 
-for i in range(1, 18+1):
+for i in range(1, 18 + 1):
     # Get data out of HTML
     soup = BeautifulSoup(requests.get(URL % i).text)
-    cons = soup.find('h1').string.replace('&', 'and')
-    summary = soup.find(id='ni2017_constituency_summary')
-    rows = summary.findAll(class_='results-table__body-row')
+    cons = soup.find("h1").string.replace("&", "and")
+    summary = soup.find(id="ni2017_constituency_summary")
+    rows = summary.findAll(class_="results-table__body-row")
     for row in rows:
-        typ = row.find(
-            class_='results-table__body-item--ni-summary').find(
-            class_='results-table__body-text').string
-        party = row.find(class_='results-table__party-name--long').string.strip()
-        name = row.find(
-            class_='results-table__body-item--candidate-ni-summary').find(
-            class_='results-table__body-text').string.strip()
+        typ = (
+            row.find(class_="results-table__body-item--ni-summary")
+            .find(class_="results-table__body-text")
+            .string
+        )
+        party = row.find(class_="results-table__party-name--long").string.strip()
+        name = (
+            row.find(class_="results-table__body-item--candidate-ni-summary")
+            .find(class_="results-table__body-text")
+            .string.strip()
+        )
 
         # Ignore rows we don't care about, and canonicalise those we do
-        if typ != 'Elected':
+        if typ != "Elected":
             continue
-        party = party.replace(' Party', '').lower().replace(' ', '-')
+        party = party.replace(" Party", "").lower().replace(" ", "-")
         party = PARTY_FIXES.get(party, party)
         name = NAME_FIXES.get(name, name)
 
         # We only want to match people with the same name and who have been MLAs
-        people = [p for p in data.persons.values() if data.names[p['id']] == name and len(MSHIPS.of_person(p['id']))]
-        if name == 'Mark H Durkan':  # Otherwise two Mark Durkans to confuse
-            people = [{'id': 'uk.org.publicwhip/person/25143'}]
+        people = [
+            p
+            for p in data.persons.values()
+            if data.names[p["id"]] == name and len(MSHIPS.of_person(p["id"]))
+        ]
+        if name == "Mark H Durkan":  # Otherwise two Mark Durkans to confuse
+            people = [{"id": "uk.org.publicwhip/person/25143"}]
         assert len(people) < 2
 
         # Existing or new person
@@ -70,14 +82,12 @@ for i in range(1, 18+1):
             person = people[0]
         else:
             max_person_id = new_id(max_person_id)
-            given, family = name.split(' ')
+            given, family = name.split(" ")
             person = {
                 "id": max_person_id,
-                "other_names": [ {
-                    "family_name": family,
-                    "given_name": given,
-                    "note": "Main"
-                } ]
+                "other_names": [
+                    {"family_name": family, "given_name": given, "note": "Main"}
+                ],
             }
             data.add_person(person)
 
@@ -85,13 +95,13 @@ for i in range(1, 18+1):
         max_mship_id = new_id(max_mship_id)
         mship = {
             "id": max_mship_id,
-            'on_behalf_of_id': party,
-            'person_id': person['id'],
-            'post_id': AREA_NAME_TO_POST[cons],
+            "on_behalf_of_id": party,
+            "person_id": person["id"],
+            "post_id": AREA_NAME_TO_POST[cons],
             "start_date": "2017-03-03",
-            "start_reason": "regional_election"
+            "start_reason": "regional_election",
         }
-        print(cons, name, mship['on_behalf_of_id'], mship['person_id'])
+        print(cons, name, mship["on_behalf_of_id"], mship["person_id"])
         data.add_membership(mship)
 
 data.dump()
diff --git a/scripts/json-nia-2022-new b/scripts/json-nia-2022-new
index 6d430a1f..15863cd7 100755
--- a/scripts/json-nia-2022-new
+++ b/scripts/json-nia-2022-new
@@ -1,62 +1,70 @@
 #!/usr/bin/env python3
 
 import os
+
 import requests
 import requests_cache
-
 from popolo import Popolo
 from popolo.utils import new_id
 
-URL = 'http://data.niassembly.gov.uk/members_json.ashx?m=GetAllCurrentMembers'
+URL = "http://data.niassembly.gov.uk/members_json.ashx?m=GetAllCurrentMembers"
 
 # Manual overrides
 CONS_FIXES = {
-    'East Belfast': 'Belfast East',
-    'West Belfast': 'Belfast West',
-    'South Belfast': 'Belfast South',
-    'North Belfast': 'Belfast North',
+    "East Belfast": "Belfast East",
+    "West Belfast": "Belfast West",
+    "South Belfast": "Belfast South",
+    "North Belfast": "Belfast North",
 }
 PARTY_FIXES = {
-    'alliance-party': 'alliance',
-    'democratic-unionist-party': 'dup',
-    'sinn-f\xe9in': 'sinn-fein',
-    'ulster-unionist-party': 'uup',
+    "alliance-party": "alliance",
+    "democratic-unionist-party": "dup",
+    "sinn-f\xe9in": "sinn-fein",
+    "ulster-unionist-party": "uup",
 }
 NAME_FIXES = {
-    'Tom Buchanan': 'Thomas Buchanan',
+    "Tom Buchanan": "Thomas Buchanan",
 }
 
 # Set up requests cache
-cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache')
-requests_cache.install_cache(cache_path, expire_after=60*10)
+cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
+requests_cache.install_cache(cache_path, expire_after=60 * 10)
 
 data = Popolo()
 
 # Map from area name to post ID
-AREA_NAME_TO_POST = {post['area']['name']: post['id'] for post in data.posts.values() if 'NI' in post['id']}
+AREA_NAME_TO_POST = {
+    post["area"]["name"]: post["id"]
+    for post in data.posts.values()
+    if "NI" in post["id"]
+}
 # List of all memberships of NIA we already have
-MSHIPS = data.memberships.in_org('northern-ireland-assembly')
+MSHIPS = data.memberships.in_org("northern-ireland-assembly")
 # IDs to start working from for adding new entries
 max_mship_id = data.max_mla_id()
 max_person_id = data.max_person_id()
 
 members = requests.get(URL).json()
-for member in members['AllMembersList']['Member']:
-    pid = member['PersonId']
-    party = member['PartyName'].lower().replace(' ', '-')
-    cons = member['ConstituencyName']
-    given = member['MemberFirstName']
-    family = member['MemberLastName']
-    name = '%s %s' % (given, family)
+for member in members["AllMembersList"]["Member"]:
+    pid = member["PersonId"]
+    party = member["PartyName"].lower().replace(" ", "-")
+    cons = member["ConstituencyName"]
+    given = member["MemberFirstName"]
+    family = member["MemberLastName"]
+    name = "%s %s" % (given, family)
 
     cons = CONS_FIXES.get(cons, cons)
     party = PARTY_FIXES.get(party, party)
     name = NAME_FIXES.get(name, name)
 
     # We only want to match people with the same name and who have been MLAs
-    people = [p for p in data.persons.values() if data.names[p['id']] == name and len(MSHIPS.of_person(p['id']))]
-    if name == 'Mark Durkan':  # Otherwise two Mark Durkans to confuse
-        people = [{'id': 'uk.org.publicwhip/person/25143'}]
+    people = [
+        p
+        for p in data.persons.values()
+        if data.names[p["id"]] == name and len(MSHIPS.of_person(p["id"]))
+    ]
+    if name == "Mark Durkan":  # Otherwise two Mark Durkans to confuse
+        people = [{"id": "uk.org.publicwhip/person/25143"}]
     assert len(people) < 2, people
 
     # Existing or new person
@@ -66,15 +74,15 @@ for member in members['AllMembersList']['Member']:
         max_person_id = new_id(max_person_id)
         person = {
             "id": max_person_id,
-            "identifiers": [ {
-                "identifier": pid,
-                "scheme": "data.niassembly.gov.uk",
-            } ],
-            "other_names": [ {
-                "family_name": family,
-                "given_name": given,
-                "note": "Main"
-            } ]
+            "identifiers": [
+                {
+                    "identifier": pid,
+                    "scheme": "data.niassembly.gov.uk",
+                }
+            ],
+            "other_names": [
+                {"family_name": family, "given_name": given, "note": "Main"}
+            ],
         }
         data.add_person(person)
 
@@ -82,13 +90,13 @@ for member in members['AllMembersList']['Member']:
     max_mship_id = new_id(max_mship_id)
     mship = {
         "id": max_mship_id,
-        'on_behalf_of_id': party,
-        'person_id': person['id'],
-        'post_id': AREA_NAME_TO_POST[cons],
+        "on_behalf_of_id": party,
+        "person_id": person["id"],
+        "post_id": AREA_NAME_TO_POST[cons],
         "start_date": "2022-05-05",
-        "start_reason": "regional_election"
+        "start_reason": "regional_election",
     }
-    print(cons, name, mship['on_behalf_of_id'], mship['person_id'])
+    print(cons, name, mship["on_behalf_of_id"], mship["person_id"])
     data.add_membership(mship)
 
 data.dump()
diff --git a/scripts/popolo/__init__.py b/scripts/popolo/__init__.py
index 8841b130..e482f302 100644
--- a/scripts/popolo/__init__.py
+++ b/scripts/popolo/__init__.py
@@ -3,7 +3,7 @@
 import os
 
 cur_dir = os.path.dirname(__file__)
-JSON = os.path.join(cur_dir, '..', '..', 'members', 'people.json')
+JSON = os.path.join(cur_dir, "..", "..", "members", "people.json")
 
 
 class Memberships(object):
@@ -18,49 +18,70 @@ def __len__(self):
         return len(self.memberships)
 
     def __str__(self):
-        s = '[\n'
+        s = "[\n"
         for m in self:
-            s += '%(id)s, %(person_id)s, ' % m
-            if 'start_reason' in m: s += '(%(start_reason)s) ' % m
-            s += '%(start_date)s - ' % m
-            if 'end_date' in m: s += '%(end_date)s' % m
-            if 'end_reason' in m: s += ' (%(end_reason)s)' % m
-            if 'post_id' in m:
-                s += ' ' + self.data.posts[m['post_id']]['area']['name']
-                s += ' %(post_id)s' % m
-            s += '\n'
-        s += ']'
+            s += "%(id)s, %(person_id)s, " % m
+            if "start_reason" in m:
+                s += "(%(start_reason)s) " % m
+            s += "%(start_date)s - " % m
+            if "end_date" in m:
+                s += "%(end_date)s" % m
+            if "end_reason" in m:
+                s += " (%(end_reason)s)" % m
+            if "post_id" in m:
+                s += " " + self.data.posts[m["post_id"]]["area"]["name"]
+                s += " %(post_id)s" % m
+            s += "\n"
+        s += "]"
         return s
 
     @property
     def memberships(self):
-        return [m for m in self._memberships if 'redirect' not in m]
+        return [m for m in self._memberships if "redirect" not in m]
 
     def in_org(self, house):
-        if house == 'house-of-lords':
-            mships = [m for m in self.memberships if 'organization_id' in m and m['organization_id'] == house]
+        if house == "house-of-lords":
+            mships = [
+                m
+                for m in self.memberships
+                if "organization_id" in m and m["organization_id"] == house
+            ]
         else:
-            mships = [m for m in self.memberships if 'post_id' in m and self.data.posts[m['post_id']]['organization_id'] == house]
+            mships = [
+                m
+                for m in self.memberships
+                if "post_id" in m
+                and self.data.posts[m["post_id"]]["organization_id"] == house
+            ]
         return Memberships(mships, self.data)
 
     def of_person(self, pid):
-        mships = [m for m in self.memberships if m['person_id'] == pid]
+        mships = [m for m in self.memberships if m["person_id"] == pid]
         return Memberships(mships, self.data)
 
     def on(self, date):
-        mships = [m for m in self.memberships if m.get('start_date', '0000-00-00') <= date <= m.get('end_date', '9999-12-31')]
+        mships = [
+            m
+            for m in self.memberships
+            if m.get("start_date", "0000-00-00")
+            <= date
+            <= m.get("end_date", "9999-12-31")
+        ]
         return Memberships(mships, self.data)
 
     def with_id(self, id, scheme=None):
         if scheme:
             for m in self.memberships:
-                if 'identifiers' in m:
-                    for identifier in m['identifiers']:
-                        if identifier['scheme'] == scheme and identifier['identifier'] == id:
+                if "identifiers" in m:
+                    for identifier in m["identifiers"]:
+                        if (
+                            identifier["scheme"] == scheme
+                            and identifier["identifier"] == id
+                        ):
                             return m
         else:
             for m in self.memberships:
-                if m['id'] == id:
+                if m["id"] == id:
                     return m
 
         return None
@@ -74,44 +95,48 @@ def __init__(self):
         self.load(JSON)
 
     def update_persons_map(self):
-        self.persons = {p['id']: p for p in self.json['persons'] if 'redirect' not in p}
-        self.posts = {p['id']: p for p in self.json['posts']}
-        self.orgs = {o['id']: o for o in self.json['organizations']}
+        self.persons = {p["id"]: p for p in self.json["persons"] if "redirect" not in p}
+        self.posts = {p["id"]: p for p in self.json["posts"]}
+        self.orgs = {o["id"]: o for o in self.json["organizations"]}
         self.names = {}
         self.identifiers = {}
 
         for p in self.persons.values():
-            names = [n for n in p['other_names'] if n['note'] == 'Main']
-            n = sorted(names, key=lambda x: x.get('start_date', '0000-00-00'), reverse=True)[0]
-            if 'lordname' in n:
-                name = n['honorific_prefix']
-                if n['lordname']:
-                    name += ' ' + n['lordname']
-                if n['lordofname']:
-                    name += ' of ' + n['lordofname']
+            names = [n for n in p["other_names"] if n["note"] == "Main"]
+            n = sorted(
+                names, key=lambda x: x.get("start_date", "0000-00-00"), reverse=True
+            )[0]
+            if "lordname" in n:
+                name = n["honorific_prefix"]
+                if n["lordname"]:
+                    name += " " + n["lordname"]
+                if n["lordofname"]:
+                    name += " of " + n["lordofname"]
             else:
-                name = n['family_name']
-                if 'given_name' in n:
-                    name = n['given_name'] + ' ' + name
-            self.names[p['id']] = name
+                name = n["family_name"]
+                if "given_name" in n:
+                    name = n["given_name"] + " " + name
+            self.names[p["id"]] = name
 
-            for i in p.get('identifiers', []):
-                self.identifiers.setdefault(i['scheme'], {})[i['identifier']] = p
+            for i in p.get("identifiers", []):
+                self.identifiers.setdefault(i["scheme"], {})[i["identifier"]] = p
 
     def update_memberships(self):
         self.memberships = None
-        self.memberships = Memberships(self.json['memberships'], self)
+        self.memberships = Memberships(self.json["memberships"], self)
 
     def update_orgs(self):
-        self.orgs = {o['id']: o for o in self.json['organizations']}
+        self.orgs = {o["id"]: o for o in self.json["organizations"]}
 
     def update_posts(self):
-        self.posts = {p['id']: p for p in self.json['posts']}
+        self.posts = {p["id"]: p for p in self.json["posts"]}
 
     @property
     def houses(self):
-        post_houses = set(m['organization_id'] for m in self.posts.values())
-        org_houses = set(m['organization_id'] for m in self.memberships if 'organization_id' in m)
+        post_houses = set(m["organization_id"] for m in self.posts.values())
+        org_houses = set(
+            m["organization_id"] for m in self.memberships if "organization_id" in m
+        )
         return post_houses.union(org_houses)
 
     def load(self, json_file):
@@ -121,22 +146,27 @@ def load(self, json_file):
 
     def _get_thing(self, things, id=None, name=None, scheme=None, typ=None):
         if name:
-            if typ == 'person':
-                return [p for p in things.values() if self.names[p['id']] == name]
+            if typ == "person":
+                return [p for p in things.values() if self.names[p["id"]] == name]
             else:
-                return [o for o in things.values() if o['name'] == name]
+                return [o for o in things.values() if o["name"] == name]
         if id:
             if scheme:
                 try:
-                    return next(p for p in things.values() for i in p.get('identifiers', []) if i['scheme'] == scheme and i['identifier'] == id)
+                    return next(
+                        p
+                        for p in things.values()
+                        for i in p.get("identifiers", [])
+                        if i["scheme"] == scheme and i["identifier"] == id
+                    )
                 except StopIteration:
                     return None
             else:
-                return next(o for o in things.values() if o['id'] == id)
+                return next(o for o in things.values() if o["id"] == id)
 
     # Get a person either by name, by parlparse ID, or if scheme is specified by another identifier
     def get_person(self, id=None, name=None, scheme=None):
-        return self._get_thing(self.persons, id, name, scheme, 'person')
+        return self._get_thing(self.persons, id, name, scheme, "person")
 
     def get_organization(self, id=None, name=None, scheme=None):
         return self._get_thing(self.orgs, id, name, scheme)
@@ -145,43 +175,61 @@ def get_post(self, id=None, name=None, scheme=None):
         return self._get_thing(self.posts, id, name, scheme)
 
     def add_person(self, person):
-        self.json['persons'].append(person)
+        self.json["persons"].append(person)
         self.update_persons_map()
 
     def add_membership(self, mship):
-        self.json['memberships'].append(mship)
+        self.json["memberships"].append(mship)
         self.update_memberships()
 
     def add_organization(self, org):
-        self.json['organizations'].append(org)
+        self.json["organizations"].append(org)
         self.update_orgs()
 
     def add_post(self, post):
-        self.json['posts'].append(post)
+        self.json["posts"].append(post)
         self.update_posts()
 
-    def _max_member_id(self, house, type='member', range_start=0):
+    def _max_member_id(self, house, type="member", range_start=0):
         house_memberships = self.memberships.in_org(house)
         if house_memberships:
-            id = max(int(m['id'].replace('uk.org.publicwhip/%s/' % type, '')) for m in house_memberships)
+            id = max(
+                int(m["id"].replace("uk.org.publicwhip/%s/" % type, ""))
+                for m in house_memberships
+            )
         else:
             id = range_start
-        return 'uk.org.publicwhip/%s/%d' % (type, id)
+        return "uk.org.publicwhip/%s/%d" % (type, id)
 
-    def max_post_id(self, house, type='cons', range_start=0):
-        house_posts = [p for p in self.posts.values() if p['organization_id'] == house]
+    def max_post_id(self, house, type="cons", range_start=0):
+        house_posts = [p for p in self.posts.values() if p["organization_id"] == house]
         if house_posts:
-            id = max(int(p['id'].replace('uk.org.publicwhip/%s/' % type, '')) for p in house_posts)
+            id = max(
+                int(p["id"].replace("uk.org.publicwhip/%s/" % type, ""))
+                for p in house_posts
+            )
         else:
             id = range_start
-        return 'uk.org.publicwhip/%s/%d' % (type, id)
-
-    max_mp_id = lambda self: self._max_member_id('house-of-commons', range_start=0)                 # Range ends at 69999
-    max_ms_id = lambda self: self._max_member_id('welsh-parliament', range_start=70000)             # Range ends at 79999
-    max_msp_id = lambda self: self._max_member_id('scottish-parliament', range_start=80000)         # Range ends at 89999
-    max_mla_id = lambda self: self._max_member_id('northern-ireland-assembly', range_start=90000)   # Range ends at 99999
-    max_lord_id = lambda self: self._max_member_id('house-of-lords', 'lord', range_start=100000)    # Range ends at 199999
-    max_londonassembly_id = lambda self: self._max_member_id('london-assembly', range_start=200000) # Range ends at 299999
+        return "uk.org.publicwhip/%s/%d" % (type, id)
+
+    max_mp_id = lambda self: self._max_member_id(
+        "house-of-commons", range_start=0
+    )  # Range ends at 69999
+    max_ms_id = lambda self: self._max_member_id(
+        "welsh-parliament", range_start=70000
+    )  # Range ends at 79999
+    max_msp_id = lambda self: self._max_member_id(
+        "scottish-parliament", range_start=80000
+    )  # Range ends at 89999
+    max_mla_id = lambda self: self._max_member_id(
+        "northern-ireland-assembly", range_start=90000
+    )  # Range ends at 99999
+    max_lord_id = lambda self: self._max_member_id(
+        "house-of-lords", "lord", range_start=100000
+    )  # Range ends at 199999
+    max_londonassembly_id = lambda self: self._max_member_id(
+        "london-assembly", range_start=200000
+    )  # Range ends at 299999
 
     def max_person_id(self):
         id = max(p for p in self.persons.keys())
@@ -189,13 +237,15 @@ def max_person_id(self):
 
     def _verify(self, coll):
         seen = set()
-        dupe = [x['id'] for x in self.json[coll] if x['id'] in seen or seen.add(x['id'])]
-        assert len(dupe) == 0, 'Duplicate %s IDs: %s' % (coll, dupe)
+        dupe = [
+            x["id"] for x in self.json[coll] if x["id"] in seen or seen.add(x["id"])
+        ]
+        assert len(dupe) == 0, "Duplicate %s IDs: %s" % (coll, dupe)
 
     def verify(self):
-        self._verify('persons')
-        self._verify('memberships')
+        self._verify("persons")
+        self._verify("memberships")
 
     def dump(self):
         self.verify()
-        json.dump(self.json, open(JSON, 'w'), indent=2, sort_keys=True)
+        json.dump(self.json, open(JSON, "w"), indent=2, sort_keys=True)
diff --git a/scripts/popolo/menu.py b/scripts/popolo/menu.py
index 66b4474b..631a71fc 100644
--- a/scripts/popolo/menu.py
+++ b/scripts/popolo/menu.py
@@ -1,10 +1,10 @@
 # coding: utf-8
 
 
-import termios
 import fcntl
-import sys
 import os
+import sys
+import termios
 
 
 def read_single_keypress():
@@ -16,7 +16,7 @@ def read_single_keypress():
     termios.tcsetattr(fd, termios.TCSANOW, newattr)
     fcntl.fcntl(fd, fcntl.F_SETFL, oldflags | os.O_NONBLOCK)
     try:
-        ret = ''
+        ret = ""
         while 1:
             try:
                 c = sys.stdin.read(1)
@@ -38,15 +38,15 @@ class Menu(object):
 
     def output(self, refresh=False):
         if refresh:
-            print('\033[%dA' % (len(self.choices) + 1))
+            print("\033[%dA" % (len(self.choices) + 1))
         for i, c in enumerate(self.choices):
             if i == self.selected:
-                print('\033[0;34m→', end='')
+                print("\033[0;34m→", end="")
             else:
-                print(' ', end='')
-            print(c, end='')
+                print(" ", end="")
+            print(c, end="")
             if i == self.selected:
-                print('\033[0m')
+                print("\033[0m")
             else:
                 print()
 
@@ -60,16 +60,16 @@ def pick(self):
     def loop(self):
         while 1:
             char = read_single_keypress()
-            if char in (None, '\x1b', 'q', 'Q'):
+            if char in (None, "\x1b", "q", "Q"):
                 return None
-            if char == '\x1b[A' and self.selected > 0:
+            if char == "\x1b[A" and self.selected > 0:
                 self.selected -= 1
                 self.output(True)
-            if char == '\x1b[B' and self.selected < len(self.choices)-1:
+            if char == "\x1b[B" and self.selected < len(self.choices) - 1:
                 self.selected += 1
                 self.output(True)
-            if char == '\n':
+            if char == "\n":
                 return self.selected
-            if char >= '1' and char <= '9':
+            if char >= "1" and char <= "9":
                 self.selected = ord(char) - 49
                 self.output(True)
diff --git a/scripts/popolo/utils.py b/scripts/popolo/utils.py
index ff907c71..de65abf2 100644
--- a/scripts/popolo/utils.py
+++ b/scripts/popolo/utils.py
@@ -2,16 +2,17 @@
 import os
 import re
 import tempfile
+
 from popolo.menu import Menu
 
 
 def new_id(max_id):
-    base, id = re.match(r'(.*/)(\d+)$', max_id).groups()
-    return '%s%d' % (base, int(id) + 1)
+    base, id = re.match(r"(.*/)(\d+)$", max_id).groups()
+    return "%s%d" % (base, int(id) + 1)
 
 
 def edit_file(edit_data):
-    fp = tempfile.NamedTemporaryFile(mode='w', delete=False, encoding='utf-8')
+    fp = tempfile.NamedTemporaryFile(mode="w", delete=False, encoding="utf-8")
     json.dump(edit_data, fp, sort_keys=True, indent=2)
     fp.close()
     try:
@@ -35,20 +36,21 @@ def get_person_from_name(name, data):
         opts = []
         for p in person:
             mships = sorted(
-                data.memberships.of_person(p['id']),
-                key=lambda x: x.get('end_date', '9999-12-31'), reverse=True)
+                data.memberships.of_person(p["id"]),
+                key=lambda x: x.get("end_date", "9999-12-31"),
+                reverse=True,
+            )
             if len(mships):
                 m = mships[0]
-                mship = '%s, %s-%s' % (
-                    m['id'], m['start_date'], m.get('end_date', ''))
+                mship = "%s, %s-%s" % (m["id"], m["start_date"], m.get("end_date", ""))
             else:
-                mship = ''
-            opts.append('%s (%s)' % (p['id'], mship))
+                mship = ""
+            opts.append("%s (%s)" % (p["id"], mship))
         menu = Menu(opts)
         choice = menu.pick()
         if choice is None:
-            raise Exception('Did not pick a person')
+            raise Exception("Did not pick a person")
         return person[choice]
     elif len(person) != 1:
-        raise Exception('Did not get any person')
+        raise Exception("Did not get any person")
     return person[0]
diff --git a/scripts/quickupdate b/scripts/quickupdate
index 7f6e9b52..26cdf62f 100755
--- a/scripts/quickupdate
+++ b/scripts/quickupdate
@@ -3,42 +3,46 @@
 # Thrown together
 # Currently only rescrapes/parses missing UK and NI from preivous day
 
+import datetime
 import glob
-import datetime, os
-
-base = os.path.expanduser('~/')
-base_html = base + 'parldata/cmpages/'
-base_zip = base + 'parldata/cmpages/hansardzips/'
-base_xml = base + 'parldata/scrapedxml/'
-base_scraper = base + 'parlparse/pyscraper/'
-
-min = { 'debates': 'debates/debates', 'lords': 'lordspages/daylord' }
-dirs = [ min.copy() for x in range(5) ]
-for x in (0,1):
-    dirs[x]['ni'] = 'ni/ni'
+import os
+
+base = os.path.expanduser("~/")
+base_html = base + "parldata/cmpages/"
+base_zip = base + "parldata/cmpages/hansardzips/"
+base_xml = base + "parldata/scrapedxml/"
+base_scraper = base + "parlparse/pyscraper/"
+
+min = {"debates": "debates/debates", "lords": "lordspages/daylord"}
+dirs = [min.copy() for x in range(5)]
+for x in (0, 1):
+    dirs[x]["ni"] = "ni/ni"
 # Not every Monday, though
-for x in (0,1,2,3):
-    dirs[x]['westminhall'] = 'westminhall/westminster'
+for x in (0, 1, 2, 3):
+    dirs[x]["westminhall"] = "westminhall/westminster"
+
+suffixes = {"ni": ""}
 
-suffixes = { 'ni': '' }
 
 def run_command(dir, cmd):
-    #print 'Running', cmd
-    if dir: print('cd %s%s' % (base_scraper, dir))
-    #os.chdir(base_scraper + dir)
+    # print 'Running', cmd
+    if dir:
+        print("cd %s%s" % (base_scraper, dir))
+    # os.chdir(base_scraper + dir)
     print(cmd)
-    #status, output = commands.getstatusoutput(cmd)
-    #print 'Exit status =', status
-    #print 'Output', output
+    # status, output = commands.getstatusoutput(cmd)
+    # print 'Exit status =', status
+    # print 'Output', output
+
 
 today = datetime.date.today()
 wday = today.weekday()
 
-if wday == 0: # Monday
+if wday == 0:  # Monday
     dirs = dirs[4]
     yesterday = (today - datetime.timedelta(days=3)).isoformat()
-elif wday >= 1 and wday <= 5: # Tuesday to Saturday
-    dirs = dirs[wday-1]
+elif wday >= 1 and wday <= 5:  # Tuesday to Saturday
+    dirs = dirs[wday - 1]
     yesterday = (today - datetime.timedelta(days=1)).isoformat()
 else:
     dirs = dirs[4]
@@ -46,36 +50,40 @@ else:
 
 rescrape = []
 for type, dir in dirs.items():
-    suffix = suffixes[type] if type in suffixes else 'a'
-    file = base_html + dir + yesterday + suffix + '.%s'
-    if yesterday < '2016-04-11' and not os.path.exists(file % 'html') and not os.path.exists(file % 'json'):
+    suffix = suffixes[type] if type in suffixes else "a"
+    file = base_html + dir + yesterday + suffix + ".%s"
+    if (
+        yesterday < "2016-04-11"
+        and not os.path.exists(file % "html")
+        and not os.path.exists(file % "json")
+    ):
         rescrape.append(type)
 
 no_hansard_zips = False
-if yesterday > '2016-04-11':
-    pattern = '{0}*{1}*'.format(base_zip, today.isoformat())
+if yesterday > "2016-04-11":
+    pattern = "{0}*{1}*".format(base_zip, today.isoformat())
     if len(glob.glob(pattern)) == 0:
         no_hansard_zips = True
 
 reparse = []
 for type, dir in dirs.items():
-    suffix = suffixes[type] if type in suffixes else 'a'
-    file = base_xml + dir + yesterday + suffix + '.xml'
+    suffix = suffixes[type] if type in suffixes else "a"
+    file = base_xml + dir + yesterday + suffix + ".xml"
     if not os.path.exists(file):
         reparse.append(type)
 
 # Scrape
 if no_hansard_zips:
-    run_command('', './unpack_hansard_zips.py')
+    run_command("", "./unpack_hansard_zips.py")
 
 # Parse
 if reparse:
-    if 'ni' in reparse:
-        run_command('', './lazyrunall.py --date=' + yesterday + ' parse ni --patchtool')
-    reparse = [t for t in reparse if t != 'ni']
+    if "ni" in reparse:
+        run_command("", "./lazyrunall.py --date=" + yesterday + " parse ni --patchtool")
+    reparse = [t for t in reparse if t != "ni"]
     if reparse:
-        run_command('', './process_hansard.py # not parsed ' + ' '.join(reparse))
+        run_command("", "./process_hansard.py # not parsed " + " ".join(reparse))
 
-print('# Parse/fix any older things from the cron email now')
+print("# Parse/fix any older things from the cron email now")
 
-run_command('', 'other-sites-update 0')
+run_command("", "other-sites-update 0")
diff --git a/scripts/welsh-parliament/dual-posts.py b/scripts/welsh-parliament/dual-posts.py
index 124b7e44..b9be05a4 100644
--- a/scripts/welsh-parliament/dual-posts.py
+++ b/scripts/welsh-parliament/dual-posts.py
@@ -4,7 +4,9 @@
 # Lord, and add their Wikidata ID to people.json so they match later on
 
 import logging
-from os import path, sys
+import sys
+from os import path
+
 import requests
 
 # To allow import popolo
@@ -14,11 +16,13 @@
 
 popolo = Popolo()
 
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-dual.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-dual.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
@@ -40,7 +44,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential Members of the Senedd who were also MPs from Wikidata
@@ -50,8 +54,11 @@
     name = item["memberLabel"]["value"]
     person = popolo.get_person(id=wikidata_id, scheme="wikidata")
     if person:
-        logger.debug("{} ({}) matched to existing person {} by Wikidata ID".format(
-            name, wikidata_id, person["id"]))
+        logger.debug(
+            "{} ({}) matched to existing person {} by Wikidata ID".format(
+                name, wikidata_id, person["id"]
+            )
+        )
     else:
         logger.debug("Finding parlparse ID for {} ({})".format(name, wikidata_id))
         matches = popolo.get_person(name=name)
@@ -60,11 +67,19 @@
             continue
         if len(matches) > 1:
             logger.debug("Multiple matches, filtering to modern ones")
-            matches = [m for m in matches if popolo.memberships.of_person(m["id"]).on('2010-01-01')]
+            matches = [
+                m
+                for m in matches
+                if popolo.memberships.of_person(m["id"]).on("2010-01-01")
+            ]
             assert len(matches) == 1
         match = matches[0]
-        logger.debug("Adding ID {} to person {} ({})".format(wikidata_id, name, match["id"]))
-        popolo.persons[match["id"]]["identifiers"].append({"scheme": "wikidata", "identifier": wikidata_id})
+        logger.debug(
+            "Adding ID {} to person {} ({})".format(wikidata_id, name, match["id"])
+        )
+        popolo.persons[match["id"]]["identifiers"].append(
+            {"scheme": "wikidata", "identifier": wikidata_id}
+        )
 
 query = """
 SELECT DISTINCT
@@ -80,7 +95,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential Members of the Senedd who were also Lords from Wikidata
@@ -90,28 +105,37 @@
     name = item["memberLabel"]["value"]
     person = popolo.get_person(id=wikidata_id, scheme="wikidata")
     if person:
-        logger.debug("{} ({}) matched to existing person {} by Wikidata ID".format(
-            name, wikidata_id, person["id"]))
+        logger.debug(
+            "{} ({}) matched to existing person {} by Wikidata ID".format(
+                name, wikidata_id, person["id"]
+            )
+        )
     else:
         logger.debug("Finding parlparse ID for {} ({})".format(name, wikidata_id))
-        if ',' in name:
-            name = name.split(', ')[1].replace('Baron ', 'Lord ')
+        if "," in name:
+            name = name.split(", ")[1].replace("Baron ", "Lord ")
         else:
             # Special cases
-            if name == 'Jenny Randerson':
-                name = 'Jennifer Randerson'
-            if name == 'Nick Bourne':
-                name = 'Nicholas Bourne'
-            first, last = name.split(' ')
+            if name == "Jenny Randerson":
+                name = "Jennifer Randerson"
+            if name == "Nick Bourne":
+                name = "Nicholas Bourne"
+            first, last = name.split(" ")
             for person in popolo.persons.values():
                 for n in person["other_names"]:
-                    if n.get("lordname") == last and n.get("given_name").startswith(first):
+                    if n.get("lordname") == last and n.get("given_name").startswith(
+                        first
+                    ):
                         name = popolo.names[person["id"]]
 
         matches = popolo.get_person(name=name)
         assert len(matches) == 1
         match = matches[0]
-        logger.debug("Adding ID {} to person {} ({})".format(wikidata_id, name, match["id"]))
-        popolo.persons[match["id"]]["identifiers"].append({"scheme": "wikidata", "identifier": wikidata_id})
+        logger.debug(
+            "Adding ID {} to person {} ({})".format(wikidata_id, name, match["id"])
+        )
+        popolo.persons[match["id"]]["identifiers"].append(
+            {"scheme": "wikidata", "identifier": wikidata_id}
+        )
 
 popolo.dump()
diff --git a/scripts/welsh-parliament/memberships.py b/scripts/welsh-parliament/memberships.py
index b193f123..e80c7eb4 100644
--- a/scripts/welsh-parliament/memberships.py
+++ b/scripts/welsh-parliament/memberships.py
@@ -1,21 +1,25 @@
 #!/usr/bin/env python3
 
-from os import path, sys
+import sys
+from os import path
 
 # To allow import popolo
 sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 
 import logging
+
 import requests
 from popolo import Popolo
 from popolo.utils import new_id
 
 # Logging:
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-memberships.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-memberships.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
@@ -48,7 +52,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential memberships of Members of the Senedd from Wikidata
@@ -60,25 +64,25 @@
     else:
         on_behalf_of_id = item["parliamentarygroup"]["value"]
     candidate = {
-        'wikidata_id': item['term']['value'].rsplit('/', 1)[-1],
-        'name': item['memberLabel']['value'],
-        'person_id': item['member']['value'].rsplit("/", 1)[-1],
-        'post_id': item['post']['value'].rsplit('/', 1)[-1],
-        'on_behalf_of_id': on_behalf_of_id.rsplit("/", 1)[-1],
-        'start_date': item['starttime']['value'].split('T')[0],
+        "wikidata_id": item["term"]["value"].rsplit("/", 1)[-1],
+        "name": item["memberLabel"]["value"],
+        "person_id": item["member"]["value"].rsplit("/", 1)[-1],
+        "post_id": item["post"]["value"].rsplit("/", 1)[-1],
+        "on_behalf_of_id": on_behalf_of_id.rsplit("/", 1)[-1],
+        "start_date": item["starttime"]["value"].split("T")[0],
     }
 
-    if 'twfy_id' in item:
-        candidate['parlparse_id'] = item['twfy_id']['value']
+    if "twfy_id" in item:
+        candidate["parlparse_id"] = item["twfy_id"]["value"]
 
-    if 'election' in item:
-        candidate['start_reason'] = item['election']['value'].rsplit('/', 1)[-1]
+    if "election" in item:
+        candidate["start_reason"] = item["election"]["value"].rsplit("/", 1)[-1]
 
-    if 'endtime' in item:
-        candidate['end_date'] = item['endtime']['value'].split('T')[0]
+    if "endtime" in item:
+        candidate["end_date"] = item["endtime"]["value"].split("T")[0]
 
-    if 'endcause' in item:
-        candidate['end_reason'] = item['endcause']['value'].rsplit('/', 1)[-1]
+    if "endcause" in item:
+        candidate["end_reason"] = item["endcause"]["value"].rsplit("/", 1)[-1]
 
     candidates.append(candidate)
 
@@ -88,71 +92,81 @@
 # Check whether each candidate already exists, if not, create them
 
 for candidate in candidates:
+    logger.debug(
+        "Looking at {}'s membership ({}):".format(
+            candidate["name"], candidate["wikidata_id"]
+        )
+    )
 
-    logger.debug("Looking at {}'s membership ({}):".format(
-        candidate["name"],
-        candidate["wikidata_id"]
-    ))
-
-    membership = popolo.memberships.with_id(id=candidate["wikidata_id"], scheme="wikidata")
+    membership = popolo.memberships.with_id(
+        id=candidate["wikidata_id"], scheme="wikidata"
+    )
 
     if membership:
-
-        logger.debug("{}'s membership ({}) matched to existing membership {} by Wikidata ID".format(
-            candidate["name"],
-            candidate["wikidata_id"],
-            membership["id"]
-        ))
+        logger.debug(
+            "{}'s membership ({}) matched to existing membership {} by Wikidata ID".format(
+                candidate["name"], candidate["wikidata_id"], membership["id"]
+            )
+        )
 
         if "parlparse_id" not in candidate:
-            logger.warning("{}'s membership ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
-                candidate["name"],
-                candidate["wikidata_id"],
-                membership["id"]
-            ))
+            logger.warning(
+                "{}'s membership ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+                    candidate["name"], candidate["wikidata_id"], membership["id"]
+                )
+            )
 
         else:
-
             if candidate["parlparse_id"] != membership["id"]:
-                logger.warning("{}'s membership ({}) has a parlparse ID of {}, expected {}.".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                    candidate["parlparse_id"],
-                    membership["id"]
-                ))
+                logger.warning(
+                    "{}'s membership ({}) has a parlparse ID of {}, expected {}.".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                        candidate["parlparse_id"],
+                        membership["id"],
+                    )
+                )
 
             else:
-                logger.debug("{}'s membership ({}) has expected parlparse ID".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                ))
+                logger.debug(
+                    "{}'s membership ({}) has expected parlparse ID".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                    )
+                )
 
     else:
-
-        logger.debug("Creating parlparse ID for {}'s membership ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug(
+            "Creating parlparse ID for {}'s membership ({})".format(
+                candidate["name"], candidate["wikidata_id"]
+            )
+        )
 
         new_membership_id = new_id(popolo.max_ms_id())
         logger.debug("Parlparse ID is {}".format(new_membership_id))
 
         new_membership = {
-          "id": new_membership_id,
-          "identifiers": [
-            {
-              "identifier": candidate['wikidata_id'],
-              "scheme": "wikidata"
-            }
-          ],
-          "start_date": candidate["start_date"]
+            "id": new_membership_id,
+            "identifiers": [
+                {"identifier": candidate["wikidata_id"], "scheme": "wikidata"}
+            ],
+            "start_date": candidate["start_date"],
         }
 
         if "person_id" in candidate:
-            new_membership["person_id"] = popolo.get_person(id=candidate["person_id"], scheme="wikidata")["id"]
+            new_membership["person_id"] = popolo.get_person(
+                id=candidate["person_id"], scheme="wikidata"
+            )["id"]
 
         if "post_id" in candidate:
-            new_membership["post_id"] = popolo.get_post(id=candidate["post_id"], scheme="wikidata")["id"]
+            new_membership["post_id"] = popolo.get_post(
+                id=candidate["post_id"], scheme="wikidata"
+            )["id"]
 
         if "on_behalf_of_id" in candidate:
-            new_membership["on_behalf_of_id"] = popolo.get_organization(id=candidate["on_behalf_of_id"], scheme="wikidata")["id"]
+            new_membership["on_behalf_of_id"] = popolo.get_organization(
+                id=candidate["on_behalf_of_id"], scheme="wikidata"
+            )["id"]
 
         if "start_reason" in candidate:
             new_membership["start_reason"] = candidate["start_reason"]
diff --git a/scripts/welsh-parliament/official-ids.py b/scripts/welsh-parliament/official-ids.py
index d2a20d59..16228c39 100644
--- a/scripts/welsh-parliament/official-ids.py
+++ b/scripts/welsh-parliament/official-ids.py
@@ -1,43 +1,47 @@
 #!/usr/bin/env python3
 
-from os import path, sys
+import sys
+from os import path
 
 # To allow import popolo
 sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 
 import logging
+from xml.etree import ElementTree
+
 import requests
 from popolo import Popolo
-from xml.etree import ElementTree
 
 # Logging:
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-official-ids.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-official-ids.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
-url = 'https://business.senedd.wales/mgwebservice.asmx/GetCouncillorsByWard'
+url = "https://business.senedd.wales/mgwebservice.asmx/GetCouncillorsByWard"
 r = requests.get(url)
 data = ElementTree.fromstring(r.content)
 
 # Gather all potential Members of the Senedd from official API
 
 NAME_FIXES = {
-    'Eluned Morgan': 'Baroness Morgan of Ely',
+    "Eluned Morgan": "Baroness Morgan of Ely",
 }
 
 candidates = []
-for item in data.findall('.//councillor'):
-    name = item.find('fullusername').text
-    name = name.replace(' MS', '')
-    name = name.replace('Rt. Hon. ', '')
+for item in data.findall(".//councillor"):
+    name = item.find("fullusername").text
+    name = name.replace(" MS", "")
+    name = name.replace("Rt. Hon. ", "")
     name = NAME_FIXES.get(name, name)
     candidate = {
-        "official_id": item.find('councillorid').text,
-        "name"       : name,
+        "official_id": item.find("councillorid").text,
+        "name": name,
     }
     candidates.append(candidate)
 
@@ -46,28 +50,41 @@
 # Check whether each candidate already exists, if not, create them
 
 for candidate in candidates:
-
-    logger.debug("Looking at {} ({}):".format(
-        candidate["name"],
-        candidate["official_id"]
-    ))
+    logger.debug(
+        "Looking at {} ({}):".format(candidate["name"], candidate["official_id"])
+    )
 
     person = popolo.get_person(id=candidate["official_id"], scheme="senedd")
 
     if person:
-        logger.debug("{} ({}) matched to existing person {} by ID".format(
-            candidate["name"], candidate["official_id"], person["id"]))
+        logger.debug(
+            "{} ({}) matched to existing person {} by ID".format(
+                candidate["name"], candidate["official_id"], person["id"]
+            )
+        )
     else:
         matches = popolo.get_person(name=candidate["name"])
-        matches = [m for m in matches if len(popolo.memberships.of_person(m["id"]).in_org('welsh-parliament'))]
+        matches = [
+            m
+            for m in matches
+            if len(popolo.memberships.of_person(m["id"]).in_org("welsh-parliament"))
+        ]
         if not matches:
-            logger.warning('Could not find matching person for {}'.format(candidate["name"]))
-            sys.exit('Could not find matching person for {}'.format(candidate["name"]))
+            logger.warning(
+                "Could not find matching person for {}".format(candidate["name"])
+            )
+            sys.exit("Could not find matching person for {}".format(candidate["name"]))
         if len(matches) > 1:
-            sys.exit('Too many matches for {}'.format(candidate["name"]))
+            sys.exit("Too many matches for {}".format(candidate["name"]))
         person = matches[0]
-        logger.debug("Adding identifier {} to person {}".format(candidate["official_id"], person["id"]))
-        person["identifiers"].append({ "identifier": candidate["official_id"], "scheme": "senedd" })
+        logger.debug(
+            "Adding identifier {} to person {}".format(
+                candidate["official_id"], person["id"]
+            )
+        )
+        person["identifiers"].append(
+            {"identifier": candidate["official_id"], "scheme": "senedd"}
+        )
 
 logger.debug("Writing data to people.json")
 popolo.dump()
diff --git a/scripts/welsh-parliament/organizations.py b/scripts/welsh-parliament/organizations.py
index 50664628..04b1a512 100644
--- a/scripts/welsh-parliament/organizations.py
+++ b/scripts/welsh-parliament/organizations.py
@@ -1,21 +1,25 @@
 #!/usr/bin/env python3
 
-from os import path, sys
+import sys
+from os import path
 
 # To allow import popolo
 sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 
 import logging
+import re
+
 import requests
 from popolo import Popolo
-import re
 
 # Logging:
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-organizations.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-organizations.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
@@ -35,7 +39,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential Senedd organizations from Wikidata
@@ -45,12 +49,12 @@
     if "partofLabel" in item:
         candidate = {
             "wikidata_id": item["partof"]["value"].rsplit("/", 1)[-1],
-            "name"       : item["partofLabel"]["value"]
+            "name": item["partofLabel"]["value"],
         }
     else:
         candidate = {
             "wikidata_id": item["parliamentarygroup"]["value"].rsplit("/", 1)[-1],
-            "name"       : item["parliamentarygroupLabel"]["value"]
+            "name": item["parliamentarygroupLabel"]["value"],
         }
 
     if candidate["name"] == "independent politician":
@@ -63,62 +67,62 @@
 # Check whether each candidate already exists, if not, create them
 
 for candidate in candidates:
-
-    logger.debug("Looking at {} ({}):".format(
-        candidate["name"],
-        candidate["wikidata_id"]
-    ))
+    logger.debug(
+        "Looking at {} ({}):".format(candidate["name"], candidate["wikidata_id"])
+    )
 
     org = popolo.get_organization(id=candidate["wikidata_id"], scheme="wikidata")
 
     if org:
-
-        logger.debug("{} ({}) matched to existing org {} by Wikidata ID".format(
-            candidate["name"],
-            candidate["wikidata_id"],
-            org["id"]
-        ))
+        logger.debug(
+            "{} ({}) matched to existing org {} by Wikidata ID".format(
+                candidate["name"], candidate["wikidata_id"], org["id"]
+            )
+        )
 
         if "parlparse_id" not in candidate:
-            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
-                candidate["name"],
-                candidate["wikidata_id"],
-                org["id"]
-            ))
+            logger.warning(
+                "{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+                    candidate["name"], candidate["wikidata_id"], org["id"]
+                )
+            )
 
         else:
-
             if candidate["parlparse_id"] != org["id"]:
-                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                    candidate["parlparse_id"],
-                    org["id"]
-                ))
+                logger.warning(
+                    "{} ({}) has a parlparse ID of {}, expected {}.".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                        candidate["parlparse_id"],
+                        org["id"],
+                    )
+                )
 
             else:
-                logger.debug("{} ({}) has expected parlparse ID".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                ))
+                logger.debug(
+                    "{} ({}) has expected parlparse ID".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                    )
+                )
 
     else:
+        logger.debug(
+            "Creating parlparse ID for {} ({})".format(
+                candidate["name"], candidate["wikidata_id"]
+            )
+        )
 
-        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
-
-        new_org_id = re.sub("'", "", re.sub(r'\s', '-', candidate["name"])).lower()
+        new_org_id = re.sub("'", "", re.sub(r"\s", "-", candidate["name"])).lower()
         logger.debug("Parlparse ID is {}".format(new_org_id))
 
         new_org = {
-          "classification": "party",
-          "id": new_org_id,
-          "identifiers": [
-            {
-              "identifier": candidate["wikidata_id"],
-              "scheme": "wikidata"
-            }
-          ],
-          "name": candidate["name"]
+            "classification": "party",
+            "id": new_org_id,
+            "identifiers": [
+                {"identifier": candidate["wikidata_id"], "scheme": "wikidata"}
+            ],
+            "name": candidate["name"],
         }
 
         popolo.add_organization(new_org)
diff --git a/scripts/welsh-parliament/persons.py b/scripts/welsh-parliament/persons.py
index ed1d0559..f4526eb9 100644
--- a/scripts/welsh-parliament/persons.py
+++ b/scripts/welsh-parliament/persons.py
@@ -1,21 +1,25 @@
 #!/usr/bin/env python3
 
-from os import path, sys
+import sys
+from os import path
 
 # To allow import popolo
 sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 
 import logging
+
 import requests
 from popolo import Popolo
 from popolo.utils import new_id
 
 # Logging:
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-persons.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-persons.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
@@ -34,7 +38,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential Members of the Senedd from Wikidata
@@ -43,9 +47,9 @@
 for item in data["results"]["bindings"]:
     candidate = {
         "wikidata_id": item["member"]["value"].rsplit("/", 1)[-1],
-        "name"       : item["memberLabel"]["value"],
+        "name": item["memberLabel"]["value"],
         "family_name": item["memberLabel"]["value"].rpartition(" ")[2],
-        "given_name" : item["memberLabel"]["value"].rpartition(" ")[0],
+        "given_name": item["memberLabel"]["value"].rpartition(" ")[0],
     }
 
     if "parlparse_id" in item:
@@ -58,67 +62,67 @@
 # Check whether each candidate already exists, if not, create them
 
 for candidate in candidates:
-
-    logger.debug("Looking at {} ({}):".format(
-        candidate["name"],
-        candidate["wikidata_id"]
-    ))
+    logger.debug(
+        "Looking at {} ({}):".format(candidate["name"], candidate["wikidata_id"])
+    )
 
     person = popolo.get_person(id=candidate["wikidata_id"], scheme="wikidata")
 
     if person:
-
-        logger.debug("{} ({}) matched to existing person {} by Wikidata ID".format(
-            candidate["name"],
-            candidate["wikidata_id"],
-            person["id"]
-        ))
+        logger.debug(
+            "{} ({}) matched to existing person {} by Wikidata ID".format(
+                candidate["name"], candidate["wikidata_id"], person["id"]
+            )
+        )
 
         if "parlparse_id" not in candidate:
-            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
-                candidate["name"],
-                candidate["wikidata_id"],
-                person["id"]
-            ))
+            logger.warning(
+                "{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+                    candidate["name"], candidate["wikidata_id"], person["id"]
+                )
+            )
 
         else:
-
             if candidate["parlparse_id"] != person["id"]:
-                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                    candidate["parlparse_id"],
-                    person["id"]
-                ))
+                logger.warning(
+                    "{} ({}) has a parlparse ID of {}, expected {}.".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                        candidate["parlparse_id"],
+                        person["id"],
+                    )
+                )
 
             else:
-                logger.debug("{} ({}) has expected parlparse ID".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                ))
+                logger.debug(
+                    "{} ({}) has expected parlparse ID".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                    )
+                )
 
     else:
-
-        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
+        logger.debug(
+            "Creating parlparse ID for {} ({})".format(
+                candidate["name"], candidate["wikidata_id"]
+            )
+        )
 
         new_person_id = new_id(popolo.max_person_id())
         logger.debug("Parlparse ID is {}".format(new_person_id))
 
         new_person = {
-          "id": new_person_id,
-          "identifiers": [
-            {
-              "identifier": candidate["wikidata_id"],
-              "scheme": "wikidata"
-            }
-          ],
-          "other_names": [
-            {
-              "family_name": candidate["family_name"],
-              "given_name": candidate["given_name"],
-              "note": "Main"
-            }
-          ]
+            "id": new_person_id,
+            "identifiers": [
+                {"identifier": candidate["wikidata_id"], "scheme": "wikidata"}
+            ],
+            "other_names": [
+                {
+                    "family_name": candidate["family_name"],
+                    "given_name": candidate["given_name"],
+                    "note": "Main",
+                }
+            ],
         }
 
         popolo.add_person(new_person)
diff --git a/scripts/welsh-parliament/posts.py b/scripts/welsh-parliament/posts.py
index 7a908784..219a8377 100644
--- a/scripts/welsh-parliament/posts.py
+++ b/scripts/welsh-parliament/posts.py
@@ -1,21 +1,25 @@
 #!/usr/bin/env python3
 
-from os import path, sys
+import sys
+from os import path
 
 # To allow import popolo
 sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 
 import logging
+
 import requests
 from popolo import Popolo
 from popolo.utils import new_id
 
 # Logging:
-logging.basicConfig(filename=path.join(path.abspath(__file__), "../../logs/log-posts.txt"),
-                    filemode='a',
-                    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
-                    datefmt="%Y-%m-%d %H:%M:%S",
-                    level=logging.DEBUG)
+logging.basicConfig(
+    filename=path.join(path.abspath(__file__), "../../logs/log-posts.txt"),
+    filemode="a",
+    format="[%(asctime)s] [%(levelname)-8s] --- %(message)s (%(filename)s:%(lineno)s)",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    level=logging.DEBUG,
+)
 
 logger = logging.getLogger()
 
@@ -34,7 +38,7 @@
 }
 """
 
-r = requests.get(url, params = {"format": "json", "query": query})
+r = requests.get(url, params={"format": "json", "query": query})
 data = r.json()
 
 # Gather all potential Senedd posts from Wikidata
@@ -43,7 +47,7 @@
 for item in data["results"]["bindings"]:
     candidate = {
         "wikidata_id": item["post"]["value"].rsplit("/", 1)[-1],
-        "name"       : item["postLabel"]["value"],
+        "name": item["postLabel"]["value"],
     }
 
     if "inceptionYear" in item:
@@ -60,73 +64,71 @@
 # Check whether each candidate already exists, if not, create them
 
 for candidate in candidates:
-
-    logger.debug("Looking at {} ({}):".format(
-        candidate["name"],
-        candidate["wikidata_id"]
-    ))
+    logger.debug(
+        "Looking at {} ({}):".format(candidate["name"], candidate["wikidata_id"])
+    )
 
     post = popolo.get_post(id=candidate["wikidata_id"], scheme="wikidata")
 
     if post:
-
-        logger.debug("{} ({}) matched to existing post {} by Wikidata ID".format(
-            candidate["name"],
-            candidate["wikidata_id"],
-            post["id"]
-        ))
+        logger.debug(
+            "{} ({}) matched to existing post {} by Wikidata ID".format(
+                candidate["name"], candidate["wikidata_id"], post["id"]
+            )
+        )
 
         if "parlparse_id" not in candidate:
-            logger.warning("{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
-                candidate["name"],
-                candidate["wikidata_id"],
-                post["id"]
-            ))
+            logger.warning(
+                "{} ({}) does not have a TWFY ID set in Wikidata. Expected {}.".format(
+                    candidate["name"], candidate["wikidata_id"], post["id"]
+                )
+            )
 
         else:
-
             if candidate["parlparse_id"] != post["id"]:
-                logger.warning("{} ({}) has a parlparse ID of {}, expected {}.".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                    candidate["parlparse_id"],
-                    post["id"]
-                ))
+                logger.warning(
+                    "{} ({}) has a parlparse ID of {}, expected {}.".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                        candidate["parlparse_id"],
+                        post["id"],
+                    )
+                )
 
             else:
-                logger.debug("{} ({}) has expected parlparse ID".format(
-                    candidate["name"],
-                    candidate["wikidata_id"],
-                ))
+                logger.debug(
+                    "{} ({}) has expected parlparse ID".format(
+                        candidate["name"],
+                        candidate["wikidata_id"],
+                    )
+                )
 
     else:
+        logger.debug(
+            "Creating parlparse ID for {} ({})".format(
+                candidate["name"], candidate["wikidata_id"]
+            )
+        )
 
-        logger.debug("Creating parlparse ID for {} ({})".format(candidate["name"], candidate["wikidata_id"]))
-
-        new_post_id = new_id(popolo.max_post_id("welsh-parliament", range_start = 70000))
+        new_post_id = new_id(popolo.max_post_id("welsh-parliament", range_start=70000))
         logger.debug("Parlparse ID is {}".format(new_post_id))
 
         new_post = {
-          "area": {
-              "name": candidate["name"]
-          },
-          "id": new_post_id,
-          "identifiers": [
-            {
-              "identifier": candidate["wikidata_id"],
-              "scheme": "wikidata"
-            }
-          ],
-          "label": "MS for {}".format(candidate["name"]),
-          "organization_id": "welsh-parliament",
-          "role": "MS",
+            "area": {"name": candidate["name"]},
+            "id": new_post_id,
+            "identifiers": [
+                {"identifier": candidate["wikidata_id"], "scheme": "wikidata"}
+            ],
+            "label": "MS for {}".format(candidate["name"]),
+            "organization_id": "welsh-parliament",
+            "role": "MS",
         }
 
         if "start_date" in candidate:
-            new_post["start_date"] =  candidate["start_date"]
+            new_post["start_date"] = candidate["start_date"]
 
         if "end_date" in candidate:
-            new_post["end_date"] =  candidate["end_date"]
+            new_post["end_date"] = candidate["end_date"]
 
         popolo.add_post(new_post)
 
diff --git a/scripts/ynmp/update.py b/scripts/ynmp/update.py
index 94953107..4eb3f361 100644
--- a/scripts/ynmp/update.py
+++ b/scripts/ynmp/update.py
@@ -5,102 +5,136 @@
 import json
 import os
 import re
-import sys
 import unicodedata
 import urllib.request
 
-DATE = '2024-07-04'
-CSV_URL = f'https://candidates.democracyclub.org.uk/data/export_csv/?election_id=parl.{DATE}&extra_fields=elected&extra_fields=tied_vote_winner&extra_fields=results_source&extra_fields=mnis_id&extra_fields=twfy_id&format=csv'
-JSON = os.path.join(os.path.dirname(__file__), '..', '..', 'members', 'people.json')
+DATE = "2024-07-04"
+CSV_URL = f"https://candidates.democracyclub.org.uk/data/export_csv/?election_id=parl.{DATE}&extra_fields=elected&extra_fields=tied_vote_winner&extra_fields=results_source&extra_fields=mnis_id&extra_fields=twfy_id&format=csv"
+JSON = os.path.join(os.path.dirname(__file__), "..", "..", "members", "people.json")
 
 
 def main():
     data = load_data()
     changed = update_from(CSV_URL, data)
     if changed:
-        json.dump(data['json'], open(JSON + 'n', 'w'), indent=2, sort_keys=True)
-        os.rename(JSON + 'n', JSON)
+        json.dump(data["json"], open(JSON + "n", "w"), indent=2, sort_keys=True)
+        os.rename(JSON + "n", JSON)
 
 
 def update_from(csv_url, data):
     changed = False
     for ynmp_id, name, party, cons, person_id, mnis_id in ynmp_csv_reader(csv_url):
         # Add a new party if it's not one we know
-        if party not in data['orgs']:
-            data['orgs'][party] = slugify(party)
-            data['json']['organizations'].append({'id': slugify(party), 'name': party})
+        if party not in data["orgs"]:
+            data["orgs"][party] = slugify(party)
+            data["json"]["organizations"].append({"id": slugify(party), "name": party})
 
         # Must be a person ID we recognise
-        if person_id not in data['persons']:
-            person_id = ''
+        if person_id not in data["persons"]:
+            person_id = ""
 
         # If we already have a result for this constituency, but the DC row has no person ID, get our result's person ID
-        if cons in data['existing'] and not person_id:
-            person_id = data['existing'][cons]['person_id']
+        if cons in data["existing"] and not person_id:
+            person_id = data["existing"][cons]["person_id"]
             # If they've previously been removed, we don't care.
-            if person_id == 'uk.org.publicwhip/person/0':
-                person_id = ''
+            if person_id == "uk.org.publicwhip/person/0":
+                person_id = ""
 
         # Okay, now we either need to attach the ID to a person, or add a new person
-        identifier = {'scheme': 'yournextmp', 'identifier': ynmp_id}
+        identifier = {"scheme": "yournextmp", "identifier": ynmp_id}
         if person_id:
-            if identifier not in data['persons'][person_id].setdefault('identifiers', []):
-                data['persons'][person_id]['identifiers'].append(identifier)
+            if identifier not in data["persons"][person_id].setdefault(
+                "identifiers", []
+            ):
+                data["persons"][person_id]["identifiers"].append(identifier)
         else:
-            data['max_person_id'] += 1
-            person_id = 'uk.org.publicwhip/person/%d' % data['max_person_id']
-            name['note'] = 'Main'
+            data["max_person_id"] += 1
+            person_id = "uk.org.publicwhip/person/%d" % data["max_person_id"]
+            name["note"] = "Main"
             identifiers = [identifier]
             if mnis_id:
-                identifiers.append({'scheme': 'datadotparl_id', 'identifier': mnis_id})
+                identifiers.append({"scheme": "datadotparl_id", "identifier": mnis_id})
             new_person = {
-                'id': person_id,
-                "other_names": [ name ],
-                'identifiers': identifiers,
-                'shortcuts': {
-                    'current_party': party,
-                    'current_constituency': data['posts_by_name'][cons]['area']['name'],
-                }
+                "id": person_id,
+                "other_names": [name],
+                "identifiers": identifiers,
+                "shortcuts": {
+                    "current_party": party,
+                    "current_constituency": data["posts_by_name"][cons]["area"]["name"],
+                },
             }
-            data['json']['persons'].append(new_person)
-            data['persons'][person_id] = new_person
+            data["json"]["persons"].append(new_person)
+            data["persons"][person_id] = new_person
 
         # With the person done, now let's either update a membership or create a new membership
         new_mship = {
-            'on_behalf_of_id': data['orgs'][party],
-            'person_id': person_id,
-            'start_date': '2024-07-05',
-            'start_reason': 'general_election',
+            "on_behalf_of_id": data["orgs"][party],
+            "person_id": person_id,
+            "start_date": "2024-07-05",
+            "start_reason": "general_election",
         }
-        if cons in data['existing']:
-            mship = data['existing'][cons]
+        if cons in data["existing"]:
+            mship = data["existing"][cons]
             if mship_has_changed(mship, new_mship):
                 changed = True
-                print("Updating %s with %s %s, %s, %s, %s" % (mship['id'], name['given_name'], name['family_name'], party, cons, person_id))
+                print(
+                    "Updating %s with %s %s, %s, %s, %s"
+                    % (
+                        mship["id"],
+                        name["given_name"],
+                        name["family_name"],
+                        party,
+                        cons,
+                        person_id,
+                    )
+                )
         else:
             changed = True
-            data['max_mship_id'] += 1
-            print("NEW result %s, %s %s, %s, %s, %s" % (data['max_mship_id'], name['given_name'], name['family_name'], party, cons, person_id))
+            data["max_mship_id"] += 1
+            print(
+                "NEW result %s, %s %s, %s, %s, %s"
+                % (
+                    data["max_mship_id"],
+                    name["given_name"],
+                    name["family_name"],
+                    party,
+                    cons,
+                    person_id,
+                )
+            )
             mship = {
-                'id': 'uk.org.publicwhip/member/%d' % data['max_mship_id'],
-                'post_id': data['posts_by_name'][cons]['id'],
+                "id": "uk.org.publicwhip/member/%d" % data["max_mship_id"],
+                "post_id": data["posts_by_name"][cons]["id"],
             }
-            data['json']['memberships'].append(mship)
-            data['existing'][cons] = mship
+            data["json"]["memberships"].append(mship)
+            data["existing"][cons] = mship
         if changed:
             mship.update(new_mship)
-        data.setdefault('dealt_with', []).append(cons)
+        data.setdefault("dealt_with", []).append(cons)
 
     # Now loop through all the existing ones not dealt with, and mark them as rescinded
-    for cons in data['existing']:
-        mship = data['existing'][cons]
-        if cons not in data['dealt_with'] and mship['person_id'] != 'uk.org.publicwhip/person/0':
+    for cons in data["existing"]:
+        mship = data["existing"][cons]
+        if (
+            cons not in data["dealt_with"]
+            and mship["person_id"] != "uk.org.publicwhip/person/0"
+        ):
             # This row has been removed from the CSV
-            print("Removing result from %s (was %s, %s, %s)" % (mship['id'], mship['post_id'], mship['on_behalf_of_id'], mship['person_id']))
-            mship.update({
-                'on_behalf_of_id': 'none',
-                'person_id': 'uk.org.publicwhip/person/0',
-            })
+            print(
+                "Removing result from %s (was %s, %s, %s)"
+                % (
+                    mship["id"],
+                    mship["post_id"],
+                    mship["on_behalf_of_id"],
+                    mship["person_id"],
+                )
+            )
+            mship.update(
+                {
+                    "on_behalf_of_id": "none",
+                    "person_id": "uk.org.publicwhip/person/0",
+                }
+            )
             changed = True
 
     return changed
@@ -112,61 +146,78 @@ def slugify(value):
     aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
     Also strips leading and trailing whitespace.
     """
-    value = unicodedata.normalize('NFKD', str(value)).encode('ascii', 'ignore').decode('ascii')
-    value = re.sub(r'[^\w\s-]', '', value).strip().lower()
-    return re.sub(r'[-\s]+', '-', value)
+    value = (
+        unicodedata.normalize("NFKD", str(value))
+        .encode("ascii", "ignore")
+        .decode("ascii")
+    )
+    value = re.sub(r"[^\w\s-]", "", value).strip().lower()
+    return re.sub(r"[-\s]+", "-", value)
 
 
 def load_data():
     """Load in existing JSON (including any new MPs already set)"""
     j = json.load(open(JSON))
-    persons = {p['id']: p for p in j['persons']}
-    posts = {p['id']: p for p in j['posts']}
-    posts_by_name = {slugify(p['area']['name']): p for p in j['posts'] if p['organization_id'] == 'house-of-commons' and 'end_date' not in p}
+    persons = {p["id"]: p for p in j["persons"]}
+    posts = {p["id"]: p for p in j["posts"]}
+    posts_by_name = {
+        slugify(p["area"]["name"]): p
+        for p in j["posts"]
+        if p["organization_id"] == "house-of-commons" and "end_date" not in p
+    }
     assert len(posts_by_name) == 650
-    orgs = {o['name']: o['id'] for o in j['organizations']}
-    max_person_id = max(int(p['id'].replace('uk.org.publicwhip/person/','')) for p in j['persons'])
+    orgs = {o["name"]: o["id"] for o in j["organizations"]}
+    max_person_id = max(
+        int(p["id"].replace("uk.org.publicwhip/person/", "")) for p in j["persons"]
+    )
 
     existing = {}
     max_mship_id = 0
-    mships = (m for m in j['memberships'] if 'post_id' in m and posts[m['post_id']]['organization_id'] == 'house-of-commons')
+    mships = (
+        m
+        for m in j["memberships"]
+        if "post_id" in m
+        and posts[m["post_id"]]["organization_id"] == "house-of-commons"
+    )
     for mship in mships:
-        max_mship_id = max(max_mship_id, int(mship['id'].replace('uk.org.publicwhip/member/','')))
-        if 'end_date' in mship:
+        max_mship_id = max(
+            max_mship_id, int(mship["id"].replace("uk.org.publicwhip/member/", ""))
+        )
+        if "end_date" in mship:
             continue  # Not a new MP
-        cons = slugify(posts[mship['post_id']]['area']['name'])
+        cons = slugify(posts[mship["post_id"]]["area"]["name"])
         assert cons not in existing
         existing[cons] = mship
 
     return {
-        'json': j,
-        'persons': persons,
-        'posts_by_name': posts_by_name,
-        'orgs': orgs,
-        'max_person_id': max_person_id,
-        'max_mship_id': max_mship_id,
-        'existing': existing,
+        "json": j,
+        "persons": persons,
+        "posts_by_name": posts_by_name,
+        "orgs": orgs,
+        "max_person_id": max_person_id,
+        "max_mship_id": max_mship_id,
+        "existing": existing,
     }
 
 
 PARTY_YNMP_TO_TWFY = {
-    'Labour Party': 'Labour',
-    'Conservative and Unionist Party': 'Conservative',
-    'Liberal Democrats': 'Liberal Democrat',
-    'Ulster Unionist Party': 'UUP',
-    'Speaker seeking re-election': 'Speaker',
-    'Scottish National Party (SNP)': 'Scottish National Party',
-    'Plaid Cymru - The Party of Wales': 'Plaid Cymru',
-    "Labour and Co-operative Party": 'Labour/Co-operative',
-    'Democratic Unionist Party - D.U.P.': 'DUP',
+    "Labour Party": "Labour",
+    "Conservative and Unionist Party": "Conservative",
+    "Liberal Democrats": "Liberal Democrat",
+    "Ulster Unionist Party": "UUP",
+    "Speaker seeking re-election": "Speaker",
+    "Scottish National Party (SNP)": "Scottish National Party",
+    "Plaid Cymru - The Party of Wales": "Plaid Cymru",
+    "Labour and Co-operative Party": "Labour/Co-operative",
+    "Democratic Unionist Party - D.U.P.": "DUP",
     "SDLP (Social Democratic & Labour Party)": "Social Democratic and Labour Party",
     "UK Independence Party (UKIP)": "UKIP",
-    "Alliance - Alliance Party of Northern Ireland": 'Alliance',
-    'Green Party': 'Green',
-    'Scottish Green Party': 'Green',
-    'Traditional Unionist Voice - TUV': 'Traditional Unionist Voice',
-    'Alba Party': 'Alba',
-    'Workers Party of Britain': 'Workers Party',
+    "Alliance - Alliance Party of Northern Ireland": "Alliance",
+    "Green Party": "Green",
+    "Scottish Green Party": "Green",
+    "Traditional Unionist Voice - TUV": "Traditional Unionist Voice",
+    "Alba Party": "Alba",
+    "Workers Party of Britain": "Workers Party",
 }
 
 
@@ -175,29 +226,52 @@ def ynmp_csv_reader(fn):
         fn = urllib.request.urlopen(fn)
         fn = codecs.getreader("utf-8")(fn)  # Stream in as Unicode
     for row in csv.DictReader(fn):
-        assert row['election_id'] == f'parl.{DATE}'
-        name = row['person_name'].strip()
+        assert row["election_id"] == f"parl.{DATE}"
+        name = row["person_name"].strip()
         # TWFY has separate first/last name fields. This should catch most.
-        m = re.match('(?i)(.*?) (?:.*? )*?((?:van |de |der |den |von |st |duncan |lloyd )*[^ ]*)$|^[^ ]*$', name)
+        m = re.match(
+            "(?i)(.*?) (?:.*? )*?((?:van |de |der |den |von |st |duncan |lloyd )*[^ ]*)$|^[^ ]*$",
+            name,
+        )
         given, family = m.groups()
-        party = row['party_name']
+        party = row["party_name"]
         party = PARTY_YNMP_TO_TWFY.get(party, party)
-        m = re.match(rf'parl\.(.*)\.{DATE}', row['ballot_paper_id'])
+        m = re.match(rf"parl\.(.*)\.{DATE}", row["ballot_paper_id"])
         cons = m.group(1)
-        m = re.search(r'(\d+)', row['twfy_id'])
-        person_id = 'uk.org.publicwhip/person/' + m.group(1) if m else None
-        ynmp_id = int(row['person_id'])
-        #print(ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id'])
-        if not row['elected'] or row['elected'] in ('f', 'false', 'False', 0, '0', 'n', 'N', 'No', 'no'):
+        m = re.search(r"(\d+)", row["twfy_id"])
+        person_id = "uk.org.publicwhip/person/" + m.group(1) if m else None
+        ynmp_id = int(row["person_id"])
+        # print(ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id'])
+        if not row["elected"] or row["elected"] in (
+            "f",
+            "false",
+            "False",
+            0,
+            "0",
+            "n",
+            "N",
+            "No",
+            "no",
+        ):
             continue
-        yield ynmp_id, {'given_name': given, 'family_name': family}, party, cons, person_id, row['mnis_id']
+        yield (
+            ynmp_id,
+            {"given_name": given, "family_name": family},
+            party,
+            cons,
+            person_id,
+            row["mnis_id"],
+        )
 
 
 def mship_has_changed(old, new):
-    if old['on_behalf_of_id'] != new['on_behalf_of_id'] or old['person_id'] != new['person_id']:
+    if (
+        old["on_behalf_of_id"] != new["on_behalf_of_id"]
+        or old["person_id"] != new["person_id"]
+    ):
         return True
     return False
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/wrans-2014/parse.py b/wrans-2014/parse.py
index a8885ed1..3d9c5152 100755
--- a/wrans-2014/parse.py
+++ b/wrans-2014/parse.py
@@ -8,7 +8,6 @@
 import datetime
 import json
 import os
-import re
 import urllib.parse
 from xml.sax.saxutils import escape
 
@@ -16,89 +15,105 @@
 import requests
 import requests_cache
 
-
 # Command line arguments
 yesterday = datetime.date.today() - datetime.timedelta(days=1)
-parser = argparse.ArgumentParser(description='Scrape/parse new Written Answers/Statements database.')
-parser.add_argument('--house', required=True, choices=['commons', 'lords'], help='Which house to fetch')
-parser.add_argument('--type', required=True, choices=['answers', 'statements'], help='What sort of thing to fetch')
-parser.add_argument('--date', default=yesterday.isoformat(), help='date to fetch')
-parser.add_argument('--members', required=True, help='filename of membership JSON')
-parser.add_argument('--out', required=True, help='directory in which to place output')
+parser = argparse.ArgumentParser(
+    description="Scrape/parse new Written Answers/Statements database."
+)
+parser.add_argument(
+    "--house", required=True, choices=["commons", "lords"], help="Which house to fetch"
+)
+parser.add_argument(
+    "--type",
+    required=True,
+    choices=["answers", "statements"],
+    help="What sort of thing to fetch",
+)
+parser.add_argument("--date", default=yesterday.isoformat(), help="date to fetch")
+parser.add_argument("--members", required=True, help="filename of membership JSON")
+parser.add_argument("--out", required=True, help="directory in which to place output")
 ARGS = parser.parse_args()
 
 # Monkey patch, monkey patch, do the funky patch
-cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cache')
-requests_cache.install_cache(cache_path, expire_after=60*60*12)
-
-HOST = 'https://questions-statements.parliament.uk'
-API_HOST = 'https://questions-statements-api.parliament.uk'
-if ARGS.type == 'answers':
-    URL_INDEX = HOST + '/written-questions'
-    API_INDEX = API_HOST + '/api/writtenquestions/questions'
+cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "cache")
+requests_cache.install_cache(cache_path, expire_after=60 * 60 * 12)
+
+HOST = "https://questions-statements.parliament.uk"
+API_HOST = "https://questions-statements-api.parliament.uk"
+if ARGS.type == "answers":
+    URL_INDEX = HOST + "/written-questions"
+    API_INDEX = API_HOST + "/api/writtenquestions/questions"
 else:
-    URL_INDEX = HOST + '/written-statements'
-    API_INDEX = API_HOST + '/api/writtenstatements/statements'
+    URL_INDEX = HOST + "/written-statements"
+    API_INDEX = API_HOST + "/api/writtenstatements/statements"
 
 with open(ARGS.members) as fp:
     MEMBERS = json.load(fp)
     DATADOTPARL_ID_TO_PERSON_ID = {}
-    for person in MEMBERS['persons']:
-        for i in person.get('identifiers', []):
-            if i['scheme'] == 'datadotparl_id':
-                DATADOTPARL_ID_TO_PERSON_ID[int(i['identifier'])] = person['id']
+    for person in MEMBERS["persons"]:
+        for i in person.get("identifiers", []):
+            if i["scheme"] == "datadotparl_id":
+                DATADOTPARL_ID_TO_PERSON_ID[int(i["identifier"])] = person["id"]
 
 
 def main():
     params = {
-        'take': 20,
-        'house': ARGS.house.title(),
-        'expandMember': 'true',
+        "take": 20,
+        "house": ARGS.house.title(),
+        "expandMember": "true",
     }
-    if ARGS.type == 'answers':
+    if ARGS.type == "answers":
         writtens = Questions()
-        get_from_list(writtens, dict(params, answeredWhenFrom=ARGS.date, answeredWhenTo=ARGS.date))
-        get_from_list(writtens, dict(params, correctedWhenFrom=ARGS.date, correctedWhenTo=ARGS.date))
+        get_from_list(
+            writtens, dict(params, answeredWhenFrom=ARGS.date, answeredWhenTo=ARGS.date)
+        )
+        get_from_list(
+            writtens,
+            dict(params, correctedWhenFrom=ARGS.date, correctedWhenTo=ARGS.date),
+        )
         # Make sure we have all grouped questions (some might actually not have
         # been returned due to being on another day)
         for uin, qn in writtens.by_id.items():
             qn.groupedQuestions = list(map(writtens.get_by_uin, qn.groupedQuestions))
     else:
         writtens = Statements()
-        params['madeWhenFrom'] = ARGS.date
-        params['madeWhenTo'] = ARGS.date
+        params["madeWhenFrom"] = ARGS.date
+        params["madeWhenTo"] = ARGS.date
         get_from_list(writtens, params)
 
     output = str(writtens)
     if output:
-        if ARGS.type == 'answers':
-            filename = 'lordswrans' if ARGS.house == 'lords' else 'answers'
+        if ARGS.type == "answers":
+            filename = "lordswrans" if ARGS.house == "lords" else "answers"
         else:
-            filename = 'lordswms' if ARGS.house == 'lords' else 'ministerial'
-        filename += ARGS.date + '.xml'
-        with open(os.path.join(ARGS.out, filename), 'w') as fp:
+            filename = "lordswms" if ARGS.house == "lords" else "ministerial"
+        filename += ARGS.date + ".xml"
+        with open(os.path.join(ARGS.out, filename), "w") as fp:
             fp.write(output)
-        print("* %s Written %s: found %d new items" % (ARGS.house.title(), ARGS.type.title(), writtens.number))
+        print(
+            "* %s Written %s: found %d new items"
+            % (ARGS.house.title(), ARGS.type.title(), writtens.number)
+        )
 
 
 def get_from_list(writtens, params):
     params = urllib.parse.urlencode(params)
-    url_page = '%s?%s' % (API_INDEX, params)
+    url_page = "%s?%s" % (API_INDEX, params)
     errors = 0
     skip = 0
     while url_page:
-        url = '%s&skip=%d' % (url_page, skip)
+        url = "%s&skip=%d" % (url_page, skip)
         j = requests.get(url).json()
-        if not j['results']:
+        if not j["results"]:
             break
-        if j.get('status') == 500:
+        if j.get("status") == 500:
             requests.Session().cache.delete_url(url_page)
             errors += 1
             if errors >= 5:
-                raise Exception('Too many server errors, giving up: %s' % j['title'])
+                raise Exception("Too many server errors, giving up: %s" % j["title"])
             continue
         writtens.add_from_json(j)
-        if writtens.number < j['totalResults']:
+        if writtens.number < j["totalResults"]:
             skip += 20
         else:
             url_page = None
@@ -112,24 +127,29 @@ def __init__(self, *args, **kwargs):
 
 class WrittenThing(AttrDict):
     def find_speaker(self, speaker):
-        person_id = DATADOTPARL_ID_TO_PERSON_ID[speaker['id']]
-        return AttrDict(id=person_id, name=speaker['name'])
+        person_id = DATADOTPARL_ID_TO_PERSON_ID[speaker["id"]]
+        return AttrDict(id=person_id, name=speaker["name"])
 
     def find_date(self, date):
-        return date.replace('T00:00:00', '')
+        return date.replace("T00:00:00", "")
 
     def fix_text(self, text):
         soup = bs4.BeautifulSoup(text, features="lxml")
         return soup.body.encode_contents().decode()
 
     def get_detail(self):
-        url = '%s/%s' % (API_INDEX, self['id'])
-        return requests.get(url).json()['value']
+        url = "%s/%s" % (API_INDEX, self["id"])
+        return requests.get(url).json()["value"]
 
     def add_attachments(self, attachments):
-        out = ''
+        out = ""
         for a in attachments:
-            out += '<p><a href="%s">%s</a> (%s, %.1fKB)</p>' % (escape(a['url']), escape(a['title']), a['fileType'], a['fileSizeBytes']/1024.0)
+            out += '<p><a href="%s">%s</a> (%s, %.1fKB)</p>' % (
+                escape(a["url"]),
+                escape(a["title"]),
+                a["fileType"],
+                a["fileSizeBytes"] / 1024.0,
+            )
         return out
 
 
@@ -140,8 +160,8 @@ def __init__(self):
 
     def add_from_json(self, data):
         """Provide the API JSON, parse out all its things"""
-        for result in data['results']:
-            item = result['value']
+        for result in data["results"]:
+            item = result["value"]
             self.add_item(item)
 
     @property
@@ -158,16 +178,16 @@ def add_item(self, item):
     def __str__(self):
         """Outputs the things, grouped by department, as parlparse XML"""
         if not self.by_dept:
-            return ''
-        out = '<publicwhip>\n'
+            return ""
+        out = "<publicwhip>\n"
         for dept, deptitems in self.by_dept.items():
-            out += '''
+            out += """
 <major-heading id="uk.org.publicwhip/{gid}/{item.date}.{item.uin}.mh" nospeaker="true">
     {dept}
 </major-heading>
-'''.format(dept=dept, item=deptitems[0], gid=self.gid_type)
-            out += ''.join([ '%s' % item for item in deptitems ])
-        out += '\n</publicwhip>'
+""".format(dept=dept, item=deptitems[0], gid=self.gid_type)
+            out += "".join(["%s" % item for item in deptitems])
+        out += "\n</publicwhip>"
         return out
 
 
@@ -180,91 +200,98 @@ def __init__(self, st):
         self.speaker = self.find_speaker(self.member)
 
         data = self.get_detail()
-        self.statement = self.fix_text(data['text'])
-        self.statement += self.add_attachments(data['attachments'])
+        self.statement = self.fix_text(data["text"])
+        self.statement += self.add_attachments(data["attachments"])
 
     def __str__(self):
-        return '''
+        return """
 <minor-heading id="uk.org.publicwhip/wms/{st.date}.{st.uin}.h" nospeaker="true">
     {st.heading}
 </minor-heading>
 <speech id="uk.org.publicwhip/wms/{st.date}.{st.uin}.0" person_id="{st.speaker.id}" speakername="{st.speaker.name}" url="{url_root}/written-statements/detail/{st.date}/{st.uin}">
     {st.statement}
 </speech>
-'''.format(st=self, url_root=HOST)
+""".format(st=self, url_root=HOST)
 
 
 class Question(WrittenThing):
     def __init__(self, qn):
         super(Question, self).__init__(qn)
         self.date = self.find_date(self.dateTabled)
-        self.heading = escape(self.heading or 'Question')
+        self.heading = escape(self.heading or "Question")
         self.asker = self.find_speaker(self.askingMember)
 
         data = self.get_detail()
-        self.question = escape(data['questionText'])
+        self.question = escape(data["questionText"])
         self.answerer = self.find_speaker(self.answeringMember)
         self.date_answer = self.find_date(self.dateAnswered)
-        self.answer = self.fix_text(data['answerText'])
-        self.answer += self.add_attachments(data['attachments'])
+        self.answer = self.fix_text(data["answerText"])
+        self.answer += self.add_attachments(data["attachments"])
 
     @property
     def secondary_group_question(self):
-        return self.groupedQuestions and self.uin > min([x['uin'] for x in self.groupedQuestions])
+        return self.groupedQuestions and self.uin > min(
+            [x["uin"] for x in self.groupedQuestions]
+        )
 
     @property
     def questions_xml(self):
         qns = [self] + self.groupedQuestions
-        return ''.join(['''
+        return "".join(
+            [
+                """
 <ques id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.q{i}" person_id="{qn.asker.id}" speakername="{qn.asker.name}" url="{url_root}/written-questions/detail/{qn.date}/{qn.uin}">
     <p qnum="{qn.uin}">{qn.question}</p>
-</ques>'''.format(i=i, qn=qn, url_root=HOST) for i, qn in enumerate(qns)])
+</ques>""".format(i=i, qn=qn, url_root=HOST)
+                for i, qn in enumerate(qns)
+            ]
+        )
 
     @property
     def answers_xml(self):
-        return '''
+        return """
 <reply id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.r{i}" person_id="{qn.answerer.id}" speakername="{qn.answerer.name}">
     {qn.answer}
-</reply>'''.format(i=0, qn=self)
+</reply>""".format(i=0, qn=self)
 
     def __str__(self):
         # TODO If we were to import unanswered questions, we would want to
         # redirect them if they then got grouped together when answered. This
         # might work, but isn't needed for now.
         if self.secondary_group_question:
-            return ''
+            return ""
         #    oldgid = "uk.org.publicwhip/wrans/{qn.date_asked}.{qn.uid}.h".format(qn=self)
         #    newgid = "uk.org.publicwhip/wrans/{qn.date_asked}.{id}.h".format(qn=self, id=min(self.groupedquestions))
         #    matchtype = 'altques'
         #    return '<gidredirect oldgid="%s" newgid="%s" matchtype="%s"/>\n' % (oldgid, newgid, matchtype)
 
-        return '''
+        return """
 <minor-heading id="uk.org.publicwhip/wrans/{qn.date}.{qn.uin}.h" nospeaker="true">
     {qn.heading}
 </minor-heading>
 {qn.questions_xml}
 {qn.answers_xml}
-'''.format(qn=self)
+""".format(qn=self)
 
 
 class Statements(WrittenThings):
-    gid_type = 'wms'
+    gid_type = "wms"
     model = Statement
 
 
 class Questions(WrittenThings):
-    gid_type = 'wrans'
+    gid_type = "wrans"
     model = Question
 
     def get_by_uin(self, uin):
         if uin in self.by_id:
             return self.by_id[uin]
 
-        qn = requests.get(API_INDEX, params={'expandMember': 'true', 'uin': uin}).json()
-        qn = qn['results'][0]['value']
+        qn = requests.get(API_INDEX, params={"expandMember": "true", "uin": uin}).json()
+        qn = qn["results"][0]["value"]
         self.add_item(qn)
         return qn
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

From d4d315431b65f6e2cd50e8f62850ef0f3ac3b52d Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Wed, 25 Sep 2024 12:22:57 +0000
Subject: [PATCH 5/8] Fix missing imports

Remaining actually bad errors, mostly
missing a package or variable in an error
that rarely happens?
---
 london-mayors-questions/questions.py | 4 ++--
 pyscraper/new_hansard.py             | 1 +
 pyscraper/ni/parse.py                | 2 +-
 pyscraper/regmem/filter.py           | 1 +
 scripts/add-new-lords                | 1 +
 5 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/london-mayors-questions/questions.py b/london-mayors-questions/questions.py
index f5123ed4..41c0a79b 100755
--- a/london-mayors-questions/questions.py
+++ b/london-mayors-questions/questions.py
@@ -276,7 +276,7 @@ def parseQuestionPage(content):
     }
 
     # Try parse the actual answers out
-    answers_object = parseAnswersFromQuestionPage(main_content)
+    answers_object = parseAnswersFromQuestionPage(main_content, canonical_url)
 
     # Got answers?
 
@@ -292,7 +292,7 @@ def parseQuestionPage(content):
     return question_object
 
 
-def parseAnswersFromQuestionPage(page_content):
+def parseAnswersFromQuestionPage(page_content, canonical_url):
     """Given page content, see if we can get answers."""
 
     # Look to see if there are any answers given
diff --git a/pyscraper/new_hansard.py b/pyscraper/new_hansard.py
index 85de2209..43d517ac 100755
--- a/pyscraper/new_hansard.py
+++ b/pyscraper/new_hansard.py
@@ -1,6 +1,7 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+import codecs
 import datetime
 import io
 import os
diff --git a/pyscraper/ni/parse.py b/pyscraper/ni/parse.py
index e3532de3..84420ce2 100755
--- a/pyscraper/ni/parse.py
+++ b/pyscraper/ni/parse.py
@@ -27,7 +27,7 @@ def time_period(self, ptext, optional=False):
         match = re.search("(\d\d?)(?:[.:]\s*(\d\d?))? ?(am|pm|noon|midnight)", ptext)
         if not match:
             if not optional:
-                raise ContextException("Time not found in TimePeriod %s" % p)
+                raise ContextException("Time not found in TimePeriod %s" % ptext)
             return None
         hour = int(match.group(1))
         if hour < 12 and match.group(3) == "pm":
diff --git a/pyscraper/regmem/filter.py b/pyscraper/regmem/filter.py
index 5386631e..53bd9aa6 100755
--- a/pyscraper/regmem/filter.py
+++ b/pyscraper/regmem/filter.py
@@ -3,6 +3,7 @@
 
 import os
 import re
+import sys
 
 import miscfuncs
 from bs4 import BeautifulSoup
diff --git a/scripts/add-new-lords b/scripts/add-new-lords
index 31a6245b..edbf8c04 100755
--- a/scripts/add-new-lords
+++ b/scripts/add-new-lords
@@ -2,6 +2,7 @@
 
 import datetime
 import re
+import sys
 import urllib.request
 
 from lxml import etree

From a4543e88be34765ce3ec34c30560d2d1caa57f13 Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Thu, 26 Sep 2024 08:17:06 +0000
Subject: [PATCH 6/8] Include silent fix as part of linting

---
 script/lint | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/script/lint b/script/lint
index 28d42b1c..b78ca7a4 100755
--- a/script/lint
+++ b/script/lint
@@ -2,9 +2,15 @@
 
 poetry run ruff format .
 
+# We run the fix check so it can auto fix issues we're ignoring below
+# but we don't want to fail the check if it can't fix them
+# because there are known issues
+# we fail on a smaller subset below
+poetry run ruff check . -s --fix || true
+
 # This ignores a number of linting checks that are *problems* and so
 # we want to be able to see in editor (and can't put in pyproject.toml)
 # but we don't want to have to fix everything (given it's working fineish)
 # to see new issues
 # this is the 'using is' for equality, top module imports broken by chdir, don't use lambdas, etc 
-poetry run ruff check . --fix --config 'lint.ignore = ["E501", "E402", "E731", "E722", "F841", "E711", "E712"]'
\ No newline at end of file
+poetry run ruff check . --config 'lint.ignore = ["E501", "E402", "E731", "E722", "F841", "E711", "E712"]'
\ No newline at end of file

From 1abcad5abc737476238ae0ea04dd5c93ca789ce5 Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 1 Oct 2024 20:02:17 +0000
Subject: [PATCH 7/8] Add lint action

---
 .github/workflows/lint.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .github/workflows/lint.yml

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..1ef3bfab
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,28 @@
+name: Lint
+
+on: [push]
+
+jobs:
+  test:
+    name: Lint
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.9'
+
+    - name: setup ruff
+      run: pip install ruff
+
+    - name: check ruff linter
+      run: |
+        ruff check . --fix-only --output-format=github
+        ruff check . --output-format=github --config 'lint.ignore = ["E501", "E402", "E731", "E722", "F841", "E711", "E712"]'
+
+    - name: check ruff formatter
+      run: ruff format --check .
\ No newline at end of file

From fac0b1accefb7850cc266212fcb2eff306e1874a Mon Sep 17 00:00:00 2001
From: Alex Parsons <alex@alexparsons.co.uk>
Date: Tue, 1 Oct 2024 20:32:39 +0000
Subject: [PATCH 8/8] Update mirror action (allow force push)

---
 .github/workflows/mirror.yml | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/mirror.yml b/.github/workflows/mirror.yml
index 2c5f554a..f5a5d0cf 100644
--- a/.github/workflows/mirror.yml
+++ b/.github/workflows/mirror.yml
@@ -3,6 +3,11 @@ name: Push mirror to git.mysociety.org
 on:
   push:
   workflow_dispatch:
+    inputs:
+      force_push:
+        description: 'Force push branch'
+        type: boolean
+        required: false
 
 jobs:
   sync:
@@ -11,15 +16,28 @@ jobs:
     steps:
 
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       with:
         fetch-depth: '0'
 
     - name: Push branch to git.mysociety.org
       id: push_to_mirror
-      uses: mysociety/action-git-pusher@v1.1.1
+      if: ${{ github.event.inputs.force_push == 'false' || !github.event.inputs.force_push }}
+      uses: mysociety/action-git-pusher@v1.2.0
       with:
         git_ssh_key: ${{ secrets.PUBLICCVS_GIT_KEY }}
         ssh_known_hosts: ${{ secrets.GIT_KNOWN_HOSTS }}
         tag: ${{ github.ref_name }} 
-        remote: 'ssh://gh-public@git.mysociety.org/data/git/public/parlparse.git'
\ No newline at end of file
+        remote: 'ssh://gh-public@git.mysociety.org/data/git/public/parlparse.git'
+
+
+    - name: Push branch to git.mysociety.org (force)
+      id: push_to_mirror_force
+      if: ${{ github.event.inputs.force_push == 'true' }}
+      uses: mysociety/action-git-pusher@v1.2.0
+      with:
+        git_ssh_key: ${{ secrets.PUBLICCVS_GIT_KEY }}
+        ssh_known_hosts: ${{ secrets.GIT_KNOWN_HOSTS }}
+        tag: ${{ github.ref_name }} 
+        remote: 'ssh://gh-public@git.mysociety.org/data/git/public/parlparse.git'
+        extra_git_config: --force
\ No newline at end of file