diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..01b077ab
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,22 @@
+sudo: false
+language: python
+ - "3.4"
+ - TOXENV=py33 TEST_SUITE=clive
+ - TOXENV=py34 TEST_SUITE=clive
+ - TEST_SUITE=validate
+ - bin/travis/setup.sh $TEST_SUITE
+ - bin/travis/dispatch.sh $TEST_SUITE
+ email: false
+ irc:
+ channels: "irc.freenode.net#pyvideo"
+ on_success: always
+ on_failure: always
diff --git a/bin/travis/dispatch.sh b/bin/travis/dispatch.sh
new file mode 100755
index 00000000..f72272be
--- /dev/null
+++ b/bin/travis/dispatch.sh
@@ -0,0 +1,33 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+case $SUITE in
+ clive )
+ cd src/ && tox
+ ;;
+ validate )
+ clive-cmd validate data/
+ ;;
+ * )
+ echo "Unknown test suite '$SUITE'."
+ exit 1
+ ;;
diff --git a/bin/travis/setup.sh b/bin/travis/setup.sh
new file mode 100755
index 00000000..5ca3f2a5
--- /dev/null
+++ b/bin/travis/setup.sh
@@ -0,0 +1,27 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+pip install -U tox
+case $SUITE in
+ validate )
+ cd src && pip install .
+ ;;
diff --git a/src/.editorconfig b/src/.editorconfig
new file mode 100644
index 00000000..d4a2c440
--- /dev/null
+++ b/src/.editorconfig
@@ -0,0 +1,21 @@
+# http://editorconfig.org
+root = true
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+indent_style = tab
+end_of_line = crlf
+insert_final_newline = false
+indent_style = tab
diff --git a/src/.travis.yml b/src/.travis.yml
new file mode 100644
index 00000000..0d50ac23
--- /dev/null
+++ b/src/.travis.yml
@@ -0,0 +1,17 @@
+# Config file for automatic testing at travis-ci.org
+# This file will be regenerated if you run travis_pypi_setup.py
+language: python
+ - TOXENV=py35
+ - TOXENV=py34
+ - TOXENV=py33
+# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install: pip install -U tox
+# command to run tests, e.g. python setup.py test
+script: tox
diff --git a/src/AUTHORS.rst b/src/AUTHORS.rst
new file mode 100644
index 00000000..1017fa9d
--- /dev/null
+++ b/src/AUTHORS.rst
@@ -0,0 +1,8 @@
+See ``git log --format="%an" | sort -u``
diff --git a/src/HISTORY.rst b/src/HISTORY.rst
new file mode 100644
index 00000000..5cd1c817
--- /dev/null
+++ b/src/HISTORY.rst
@@ -0,0 +1,8 @@
+0.1.0 (in development)
diff --git a/src/LICENSE b/src/LICENSE
new file mode 100644
index 00000000..e15d0b1b
--- /dev/null
+++ b/src/LICENSE
@@ -0,0 +1,14 @@
+Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+GNU Affero General Public License for more details.
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
diff --git a/src/MANIFEST.in b/src/MANIFEST.in
new file mode 100644
index 00000000..2bb6bb1c
--- /dev/null
+++ b/src/MANIFEST.in
@@ -0,0 +1,11 @@
+include AUTHORS.rst
+include CONTRIBUTING.rst
+include HISTORY.rst
+include LICENSE
+include README.rst
+recursive-include tests *
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
+recursive-include docs *.rst conf.py Makefile make.bat
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 00000000..c56bc520
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,70 @@
+.PHONY: clean-pyc clean-build docs clean
+import os, webbrowser, sys
+ from urllib import pathname2url
+ from urllib.request import pathname2url
+webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
+ @echo "clean - remove all build, test, coverage and Python artifacts"
+ @echo "clean-build - remove build artifacts"
+ @echo "clean-pyc - remove Python file artifacts"
+ @echo "clean-test - remove test and coverage artifacts"
+ @echo "lint - check style with flake8"
+ @echo "test - run tests quickly with the default Python"
+ @echo "test-all - run tests on every Python version with tox"
+ @echo "coverage - check code coverage quickly with the default Python"
+ @echo "docs - generate Sphinx HTML documentation, including API docs"
+ @echo "install - install the package to the active Python's site-packages"
+clean: clean-build clean-pyc clean-test
+ rm -fr build/
+ rm -fr dist/
+ rm -fr .eggs/
+ find . -name '*.egg-info' -exec rm -fr {} +
+ find . -name '*.egg' -exec rm -f {} +
+ find . -name '*.pyc' -exec rm -f {} +
+ find . -name '*.pyo' -exec rm -f {} +
+ find . -name '*~' -exec rm -f {} +
+ find . -name '__pycache__' -exec rm -fr {} +
+ rm -fr .tox/
+ rm -f .coverage
+ rm -fr htmlcov/
+ flake8 clive tests
+ python setup.py test
+ tox
+ coverage run --source clive setup.py test
+ coverage report -m
+ coverage html
+ $(BROWSER) htmlcov/index.html
+ rm -f docs/clive.rst
+ rm -f docs/modules.rst
+ sphinx-apidoc -o docs/ clive
+ $(MAKE) -C docs clean
+ $(MAKE) -C docs html
+ $(BROWSER) docs/_build/html/index.html
+install: clean
+ python setup.py install
diff --git a/src/README.rst b/src/README.rst
new file mode 100644
index 00000000..e99e4788
--- /dev/null
+++ b/src/README.rst
@@ -0,0 +1,8 @@
+pyvideo-data clive
+Data manipulation tools for pyvideo-data.
+* Free software: AGPLv3
+* Documentation: https://pyvideo-data.readthedocs.org/
diff --git a/src/clive/__init__.py b/src/clive/__init__.py
new file mode 100755
index 00000000..0b9d515d
--- /dev/null
+++ b/src/clive/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+__author__ = ''
+__email__ = ''
+__version__ = '0.1.0'
diff --git a/src/clive/cmdline.py b/src/clive/cmdline.py
new file mode 100644
index 00000000..a37d03b6
--- /dev/null
+++ b/src/clive/cmdline.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+import sys
+import traceback
+from textwrap import dedent
+import click
+from clive import __version__
+from clive.lib import load_json_data
+from clive.validate import validate_item
+USAGE = '%prog [options] [command] [command-options]'
+VERSION = 'clive ' + __version__
+def click_run():
+ sys.excepthook = exception_handler
+ cli(obj={})
+def cli():
+ pass
+@click.argument('paths', nargs=-1, type=click.Path(exists=True))
+def validate(ctx, paths):
+ if not paths:
+ raise click.UsageError('No files or directories specified.')
+ error_count = 0
+ for path in paths:
+ data = load_json_data(path)
+ print('Looking at %d items...' % len(data))
+ for fn, item in data:
+ try:
+ validate_item(fn, item)
+ except ValueError as ve:
+ click.echo('Error: %s:' % fn, err=True)
+ click.echo(ve, err=True)
+ error_count += 1
+ # FIXME: Validate things that need to be unique across the
+ # dataset here.
+ # FIXME: Validate file format? i.e. 2-space indents? Sort order?
+ print('Done!')
+ ctx.exit(code=1 if error_count else 0)
+def exception_handler(exc_type, exc_value, exc_tb):
+ click.echo(dedent("""\
+ Oh no! Clive has thrown an error while trying to do stuff. Please write
+ up a bug report with the specifics so that we can fix it.
+ https://github.com/pyvideo/pyvideo-data/issues
+ Here is some information you can copy and paste into the bug report:
+ """))
+ click.echo('---')
+ click.echo('Clive: %s' % repr(__version__))
+ click.echo('Python: %s' % repr(sys.version))
+ click.echo('Command line: %s' % repr(sys.argv))
+ click.echo()
+ click.echo(
+ ''.join(traceback.format_exception(exc_type, exc_value, exc_tb)))
+ click.echo('---')
diff --git a/src/clive/lib.py b/src/clive/lib.py
new file mode 100644
index 00000000..ecf506a4
--- /dev/null
+++ b/src/clive/lib.py
@@ -0,0 +1,65 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+import json
+import os
+def load_json_data(path):
+ """Parses and returns all video files for a path
+ :arg path: a file or directory
+ :returns: list of (filename, data) tuples for all .json files
+ """
+ if not path or not os.path.exists(path):
+ return []
+ if os.path.isfile(path):
+ if not path.endswith('.json'):
+ all_files = []
+ else:
+ all_files = [path]
+ else:
+ all_files = []
+ for root, dirs, files in os.walk(path):
+ all_files.extend(
+ [os.path.join(root, fn) for fn in files if fn.endswith('.json')]
+ )
+ data = []
+ for fn in sorted(all_files):
+ with open(fn, 'r') as fp:
+ data.append((fn, json.load(fp)))
+ return data
+def save_json_data(data_items):
+ """Takes list of (fn, data) tuples and saves them all to disk
+ :arg data_items: list of (fn, data) tuples to save
+ """
+ for fn, data in data_items:
+ with open(fn, 'w') as fp:
+ # FIXME: We really want an explicit sorting of the keys and not
+ # sort alphabetically. Maybe switch sort_keys to False and then use
+ # an OrderedDict, build the dicts by hand and then dump?
+ json.dump(data, fp, indent=2, sort_keys=True)
diff --git a/src/clive/validate.py b/src/clive/validate.py
new file mode 100644
index 00000000..4bb4a557
--- /dev/null
+++ b/src/clive/validate.py
@@ -0,0 +1,190 @@
+# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# GNU Affero General Public License for more details.
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+import re
+import time
+class T:
+ def __init__(self, required=False, *args, **kwargs):
+ self.required = required
+ def validate(self, val):
+ if self.required and val is None:
+ raise ValueError('value required')
+class IntT(T):
+ def validate(self, val):
+ super().validate(val)
+ if val is None:
+ return
+ if not isinstance(val, int):
+ raise ValueError('value is not a valid int: %r' % val)
+class BoolT(T):
+ def validate(self, val):
+ super().validate(val)
+ if val is None:
+ return
+ if val not in (True, False):
+ raise ValueError('value is not a valid bool: %r' % val)
+SLUG_RE = re.compile(r'^[a-zA-Z0-9_-]+$')
+class TextT(T):
+ def __init__(self, required=False, slug=False, markdown=False, url=False,
+ *args, **kwargs):
+ super().__init__(required=required, *args, **kwargs)
+ self.markdown = markdown
+ self.slug = slug
+ self.url = url
+ def validate(self, val):
+ super().validate(val)
+ if val is None:
+ return
+ if not isinstance(val, str):
+ raise ValueError('value is not a valid text value: %r' % val)
+ # FIXME: markdown check here
+ # FIXME: slug check here
+ if self.slug and not SLUG_RE.match(val):
+ raise ValueError('value is not a valid slug: %r' % val)
+ # FIXME: url check here
+class DateT(T):
+ def validate(self, val):
+ super().validate(val)
+ if val is None:
+ return
+ try:
+ time.strptime(val, '%Y-%m-%d')
+ return True
+ except ValueError:
+ raise ValueError('value is not date in YYYY-MM-DD format: %r' % val)
+class ListOfT(T):
+ def __init__(self, subtype, required=False, *args, **kwargs):
+ super().__init__(required=required, *args, **kwargs)
+ self.subtype = subtype
+ def validate(self, val):
+ super().validate(val)
+ if not isinstance(val, (tuple, list)):
+ raise ValueError('value is not a list: %r' % val)
+ for item in val:
+ self.subtype.validate(item)
+class DictOfT(T):
+ def __init__(self, keyvals, required=False, *args, **kwargs):
+ super().__init__(required=required, *args, **kwargs)
+ self.keyvals = keyvals
+ self.all_keys = set(self.keyvals.keys())
+ def validate(self, val):
+ if not isinstance(val, dict):
+ raise ValueError('value is not a dict: %r' % val)
+ # Verify all keys are known
+ if not set(val.keys()).issubset(self.all_keys):
+ raise ValueError('unknown keys: %s' % repr(
+ set(val.keys()) - self.all_keys
+ ))
+ # Verify values
+ for key, item in val.items():
+ try:
+ self.keyvals[key].validate(item)
+ except ValueError as ve:
+ # FIXME: Need to include the key here, but this is gross.
+ raise ValueError('%s: %s' % (key, repr(ve)))
+REQS = {
+ 'video': DictOfT({
+ # FIXME: This is a leftover from pyvideo. Do we need this?
+ 'id': IntT(),
+ # FIXME: This could be inferred from the directory.
+ 'category': TextT(required=True),
+ # FIXME: This has to be unique across the data-set. That's tricky.
+ 'slug': TextT(required=True, slug=True),
+ 'title': TextT(required=True),
+ 'summary': TextT(required=True, markdown=True),
+ 'description': TextT(markdown=True),
+ 'quality_notes': TextT(markdown=True),
+ 'language': TextT(required=True),
+ 'copyright_text': TextT(required=True),
+ 'thumbnail_url': TextT(url=True),
+ 'duration': IntT(),
+ 'videos': ListOfT(
+ DictOfT({
+ 'length': IntT(),
+ 'url': TextT(required=True, url=True),
+ # FIXME: This needs thinking.
+ 'type': TextT(required=True)
+ })
+ ),
+ 'source_url': TextT(url=True),
+ 'recorded': DateT(),
+ 'tags': ListOfT(TextT()),
+ 'speakers': ListOfT(TextT()),
+ }),
+ 'category': DictOfT({
+ 'title': TextT(required=True),
+ 'description': TextT(markdown=True),
+ 'url': TextT(url=True),
+ 'start_date': DateT(),
+ # FIXME: This has to be unique across the data-set. Can we just use the
+ # directory name?
+ 'slug': TextT(required=True, slug=True),
+ })
+def validate_item(fn, json_data):
+ # FIXME: This is kind of cheating. Need a better way to distinguish data
+ # types.
+ type_ = 'category' if fn.endswith('category.json') else 'video'
+ REQS[type_].validate(json_data)
+def validate_items(items):
+ errors = []
+ for fn, data in items:
+ try:
+ validate_item(data)
+ except ValueError as ve:
+ errors.append((fn, str(ve)))
+ return errors
diff --git a/src/requirements_dev.txt b/src/requirements_dev.txt
new file mode 100644
index 00000000..b525503c
--- /dev/null
+++ b/src/requirements_dev.txt
@@ -0,0 +1,10 @@
+# Docs
+# Code quality
+# Testing
diff --git a/src/setup.cfg b/src/setup.cfg
new file mode 100644
index 00000000..8a22baa6
--- /dev/null
+++ b/src/setup.cfg
@@ -0,0 +1,11 @@
+current_version = 0.1.0
+commit = True
+tag = True
+universal = 1
diff --git a/src/setup.py b/src/setup.py
new file mode 100755
index 00000000..8f30e078
--- /dev/null
+++ b/src/setup.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+ from setuptools import setup
+except ImportError:
+ from distutils.core import setup
+with open('README.rst') as readme_file:
+ readme = readme_file.read()
+with open('HISTORY.rst') as history_file:
+ history = history_file.read()
+requirements = [
+ 'click',
+ name='clive',
+ version='0.1.0',
+ description='Data tools for pyvideo-data.',
+ long_description=readme + '\n\n' + history,
+ url='https://github.com/pyvideo/pyvideo-data',
+ packages=[
+ 'clive',
+ ],
+ package_dir={
+ 'clive': 'clive'
+ },
+ include_package_data=True,
+ install_requires=requirements,
+ license='AGPLv3',
+ zip_safe=False,
+ entry_points="""
+ [console_scripts]
+ clive-cmd=clive.cmdline:click_run
+ """,
+ classifiers=[
+ 'Development Status :: 2 - Pre-Alpha',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
+ 'Natural Language :: English',
+ 'Programming Language :: Python :: 3',
+ 'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: 3.5',
+ ],
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
new file mode 100755
index 00000000..40a96afc
--- /dev/null
+++ b/src/tests/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/src/tests/test_lib.py b/src/tests/test_lib.py
new file mode 100644
index 00000000..f53f86d1
--- /dev/null
+++ b/src/tests/test_lib.py
@@ -0,0 +1,44 @@
+from clive.lib import load_json_data
+class TestLoadJsonFiles:
+ def test_bad_paths(self):
+ assert load_json_data(None) == []
+ assert load_json_data('') == []
+ assert load_json_data('/nonexistent/file') == []
+ def test_non_json_file(self, tmpdir):
+ path = tmpdir.join('foo.txt')
+ path.write('test file')
+ assert load_json_data(tmpdir.strpath) == []
+ assert load_json_data(path.strpath) == []
+ def test_json_file(self, tmpdir):
+ path = tmpdir.join('foo.json')
+ path.write('{}')
+ assert load_json_data(tmpdir.strpath) == [(path.strpath, {})]
+ assert load_json_data(path.strpath) == [(path.strpath, {})]
+ def test_directory(self, tmpdir):
+ cat_path = tmpdir.join('pycon').mkdir()
+ path1 = cat_path.join('foo1.json')
+ path1.write('{}')
+ path2 = cat_path.join('foo2.json')
+ path2.write('{}')
+ path3 = tmpdir.join('djangocon').mkdir().join('foo3.json')
+ path3.write('{}')
+ assert (
+ load_json_data(tmpdir.strpath) ==
+ [
+ # Note: djangocon comes first because it's sorted
+ (path3.strpath, {}),
+ (path1.strpath, {}),
+ (path2.strpath, {}),
+ ]
+ )
diff --git a/src/tests/test_validate.py b/src/tests/test_validate.py
new file mode 100755
index 00000000..74265289
--- /dev/null
+++ b/src/tests/test_validate.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+from clive.validate import validate_item
+# def validate_empty():
+# assert validate_item({})
diff --git a/src/tox.ini b/src/tox.ini
new file mode 100644
index 00000000..2e7a8d36
--- /dev/null
+++ b/src/tox.ini
@@ -0,0 +1,12 @@
+envlist = py33, py34, py35
+setenv =
+ PYTHONPATH = {toxinidir}:{toxinidir}/clive
+commands = python setup.py test
+; If you want to make tox run the tests with the same versions, create a
+; requirements.txt with the pinned versions and uncomment the following lines:
+; deps =
+; -r{toxinidir}/requirements.txt