diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..01b077ab --- /dev/null +++ b/.travis.yml @@ -0,0 +1,22 @@ +sudo: false +language: python +python: + - "3.4" + +env: + - TOXENV=py33 TEST_SUITE=clive + - TOXENV=py34 TEST_SUITE=clive + - TEST_SUITE=validate + +install: + - bin/travis/setup.sh $TEST_SUITE + +script: + - bin/travis/dispatch.sh $TEST_SUITE + +notifications: + email: false + irc: + channels: "irc.freenode.net#pyvideo" + on_success: always + on_failure: always diff --git a/bin/travis/dispatch.sh b/bin/travis/dispatch.sh new file mode 100755 index 00000000..f72272be --- /dev/null +++ b/bin/travis/dispatch.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +SUITE=${1:-all} + +case $SUITE in + clive ) + cd src/ && tox + ;; + + validate ) + clive-cmd validate data/ + ;; + + * ) + echo "Unknown test suite '$SUITE'." + exit 1 + ;; +esac diff --git a/bin/travis/setup.sh b/bin/travis/setup.sh new file mode 100755 index 00000000..5ca3f2a5 --- /dev/null +++ b/bin/travis/setup.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +SUITE=${1:-all} + +pip install -U tox + +case $SUITE in + validate ) + cd src && pip install . + ;; + +esac diff --git a/src/.editorconfig b/src/.editorconfig new file mode 100644 index 00000000..d4a2c440 --- /dev/null +++ b/src/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/src/.travis.yml b/src/.travis.yml new file mode 100644 index 00000000..0d50ac23 --- /dev/null +++ b/src/.travis.yml @@ -0,0 +1,17 @@ +# Config file for automatic testing at travis-ci.org +# This file will be regenerated if you run travis_pypi_setup.py + +language: python + +env: + - TOXENV=py35 + - TOXENV=py34 + - TOXENV=py33 + +# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: pip install -U tox + +# command to run tests, e.g. python setup.py test +script: tox + + diff --git a/src/AUTHORS.rst b/src/AUTHORS.rst new file mode 100644 index 00000000..1017fa9d --- /dev/null +++ b/src/AUTHORS.rst @@ -0,0 +1,8 @@ +======= +Credits +======= + +Contributors +============ + +See ``git log --format="%an" | sort -u`` diff --git a/src/HISTORY.rst b/src/HISTORY.rst new file mode 100644 index 00000000..5cd1c817 --- /dev/null +++ b/src/HISTORY.rst @@ -0,0 +1,8 @@ +======= +History +======= + +0.1.0 (in development) +====================== + +* FIXME diff --git a/src/LICENSE b/src/LICENSE new file mode 100644 index 00000000..e15d0b1b --- /dev/null +++ b/src/LICENSE @@ -0,0 +1,14 @@ +Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . diff --git a/src/MANIFEST.in b/src/MANIFEST.in new file mode 100644 index 00000000..2bb6bb1c --- /dev/null +++ b/src/MANIFEST.in @@ -0,0 +1,11 @@ +include AUTHORS.rst +include CONTRIBUTING.rst +include HISTORY.rst +include LICENSE +include README.rst + +recursive-include tests * +recursive-exclude * __pycache__ +recursive-exclude * *.py[co] + +recursive-include docs *.rst conf.py Makefile make.bat diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 00000000..c56bc520 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,70 @@ +.PHONY: clean-pyc clean-build docs clean +define BROWSER_PYSCRIPT +import os, webbrowser, sys +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: + @echo "clean - remove all build, test, coverage and Python artifacts" + @echo "clean-build - remove build artifacts" + @echo "clean-pyc - remove Python file artifacts" + @echo "clean-test - remove test and coverage artifacts" + @echo "lint - check style with flake8" + @echo "test - run tests quickly with the default Python" + @echo "test-all - run tests on every Python version with tox" + @echo "coverage - check code coverage quickly with the default Python" + @echo "docs - generate Sphinx HTML documentation, including API docs" + @echo "install - install the package to the active Python's site-packages" + +clean: clean-build clean-pyc clean-test + +clean-build: + rm -fr build/ + rm -fr dist/ + rm -fr .eggs/ + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '*.egg' -exec rm -f {} + + +clean-pyc: + find . -name '*.pyc' -exec rm -f {} + + find . -name '*.pyo' -exec rm -f {} + + find . -name '*~' -exec rm -f {} + + find . -name '__pycache__' -exec rm -fr {} + + +clean-test: + rm -fr .tox/ + rm -f .coverage + rm -fr htmlcov/ + +lint: + flake8 clive tests + +test: + python setup.py test + +test-all: + tox + +coverage: + coverage run --source clive setup.py test + coverage report -m + coverage html + $(BROWSER) htmlcov/index.html + +docs: + rm -f docs/clive.rst + rm -f docs/modules.rst + sphinx-apidoc -o docs/ clive + $(MAKE) -C docs clean + $(MAKE) -C docs html + $(BROWSER) docs/_build/html/index.html + +install: clean + python setup.py install diff --git a/src/README.rst b/src/README.rst new file mode 100644 index 00000000..e99e4788 --- /dev/null +++ b/src/README.rst @@ -0,0 +1,8 @@ +================== +pyvideo-data clive +================== + +Data manipulation tools for pyvideo-data. + +* Free software: AGPLv3 +* Documentation: https://pyvideo-data.readthedocs.org/ diff --git a/src/clive/__init__.py b/src/clive/__init__.py new file mode 100755 index 00000000..0b9d515d --- /dev/null +++ b/src/clive/__init__.py @@ -0,0 +1,18 @@ +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +__author__ = '' +__email__ = '' +__version__ = '0.1.0' diff --git a/src/clive/cmdline.py b/src/clive/cmdline.py new file mode 100644 index 00000000..a37d03b6 --- /dev/null +++ b/src/clive/cmdline.py @@ -0,0 +1,87 @@ +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys +import traceback +from textwrap import dedent + +import click + +from clive import __version__ +from clive.lib import load_json_data +from clive.validate import validate_item + + +USAGE = '%prog [options] [command] [command-options]' +VERSION = 'clive ' + __version__ + + +def click_run(): + sys.excepthook = exception_handler + cli(obj={}) + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.argument('paths', nargs=-1, type=click.Path(exists=True)) +@click.pass_context +def validate(ctx, paths): + if not paths: + raise click.UsageError('No files or directories specified.') + + error_count = 0 + + for path in paths: + data = load_json_data(path) + print('Looking at %d items...' % len(data)) + for fn, item in data: + try: + validate_item(fn, item) + except ValueError as ve: + click.echo('Error: %s:' % fn, err=True) + click.echo(ve, err=True) + error_count += 1 + + # FIXME: Validate things that need to be unique across the + # dataset here. + + # FIXME: Validate file format? i.e. 2-space indents? Sort order? + + print('Done!') + ctx.exit(code=1 if error_count else 0) + + +def exception_handler(exc_type, exc_value, exc_tb): + click.echo(dedent("""\ + Oh no! Clive has thrown an error while trying to do stuff. Please write + up a bug report with the specifics so that we can fix it. + + https://github.com/pyvideo/pyvideo-data/issues + + Here is some information you can copy and paste into the bug report: + + """)) + click.echo('---') + click.echo('Clive: %s' % repr(__version__)) + click.echo('Python: %s' % repr(sys.version)) + click.echo('Command line: %s' % repr(sys.argv)) + click.echo() + click.echo( + ''.join(traceback.format_exception(exc_type, exc_value, exc_tb))) + click.echo('---') diff --git a/src/clive/lib.py b/src/clive/lib.py new file mode 100644 index 00000000..ecf506a4 --- /dev/null +++ b/src/clive/lib.py @@ -0,0 +1,65 @@ +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import json +import os + + +def load_json_data(path): + """Parses and returns all video files for a path + + :arg path: a file or directory + :returns: list of (filename, data) tuples for all .json files + + """ + if not path or not os.path.exists(path): + return [] + + if os.path.isfile(path): + if not path.endswith('.json'): + all_files = [] + else: + all_files = [path] + + else: + all_files = [] + + for root, dirs, files in os.walk(path): + all_files.extend( + [os.path.join(root, fn) for fn in files if fn.endswith('.json')] + ) + + data = [] + + for fn in sorted(all_files): + with open(fn, 'r') as fp: + data.append((fn, json.load(fp))) + + return data + + +def save_json_data(data_items): + """Takes list of (fn, data) tuples and saves them all to disk + + :arg data_items: list of (fn, data) tuples to save + + """ + + for fn, data in data_items: + with open(fn, 'w') as fp: + # FIXME: We really want an explicit sorting of the keys and not + # sort alphabetically. Maybe switch sort_keys to False and then use + # an OrderedDict, build the dicts by hand and then dump? + json.dump(data, fp, indent=2, sort_keys=True) diff --git a/src/clive/validate.py b/src/clive/validate.py new file mode 100644 index 00000000..4bb4a557 --- /dev/null +++ b/src/clive/validate.py @@ -0,0 +1,190 @@ +# Copyright (C) 2015, 2016 Sheila Miguez, Will Kahn-Greene +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import re +import time + + +class T: + def __init__(self, required=False, *args, **kwargs): + self.required = required + + def validate(self, val): + if self.required and val is None: + raise ValueError('value required') + + +class IntT(T): + def validate(self, val): + super().validate(val) + if val is None: + return + + if not isinstance(val, int): + raise ValueError('value is not a valid int: %r' % val) + + +class BoolT(T): + def validate(self, val): + super().validate(val) + if val is None: + return + + if val not in (True, False): + raise ValueError('value is not a valid bool: %r' % val) + + +SLUG_RE = re.compile(r'^[a-zA-Z0-9_-]+$') + + +class TextT(T): + def __init__(self, required=False, slug=False, markdown=False, url=False, + *args, **kwargs): + super().__init__(required=required, *args, **kwargs) + self.markdown = markdown + self.slug = slug + self.url = url + + def validate(self, val): + super().validate(val) + if val is None: + return + + if not isinstance(val, str): + raise ValueError('value is not a valid text value: %r' % val) + + # FIXME: markdown check here + + # FIXME: slug check here + if self.slug and not SLUG_RE.match(val): + raise ValueError('value is not a valid slug: %r' % val) + + # FIXME: url check here + + +class DateT(T): + def validate(self, val): + super().validate(val) + if val is None: + return + try: + time.strptime(val, '%Y-%m-%d') + return True + except ValueError: + raise ValueError('value is not date in YYYY-MM-DD format: %r' % val) + + +class ListOfT(T): + def __init__(self, subtype, required=False, *args, **kwargs): + super().__init__(required=required, *args, **kwargs) + self.subtype = subtype + + def validate(self, val): + super().validate(val) + if not isinstance(val, (tuple, list)): + raise ValueError('value is not a list: %r' % val) + + for item in val: + self.subtype.validate(item) + + +class DictOfT(T): + def __init__(self, keyvals, required=False, *args, **kwargs): + super().__init__(required=required, *args, **kwargs) + self.keyvals = keyvals + self.all_keys = set(self.keyvals.keys()) + + def validate(self, val): + if not isinstance(val, dict): + raise ValueError('value is not a dict: %r' % val) + + # Verify all keys are known + if not set(val.keys()).issubset(self.all_keys): + raise ValueError('unknown keys: %s' % repr( + set(val.keys()) - self.all_keys + )) + + # Verify values + for key, item in val.items(): + try: + self.keyvals[key].validate(item) + except ValueError as ve: + # FIXME: Need to include the key here, but this is gross. + raise ValueError('%s: %s' % (key, repr(ve))) + + +REQS = { + 'video': DictOfT({ + # FIXME: This is a leftover from pyvideo. Do we need this? + 'id': IntT(), + + # FIXME: This could be inferred from the directory. + 'category': TextT(required=True), + + # FIXME: This has to be unique across the data-set. That's tricky. + 'slug': TextT(required=True, slug=True), + + 'title': TextT(required=True), + 'summary': TextT(required=True, markdown=True), + 'description': TextT(markdown=True), + 'quality_notes': TextT(markdown=True), + 'language': TextT(required=True), + 'copyright_text': TextT(required=True), + 'thumbnail_url': TextT(url=True), + 'duration': IntT(), + 'videos': ListOfT( + DictOfT({ + 'length': IntT(), + 'url': TextT(required=True, url=True), + + # FIXME: This needs thinking. + 'type': TextT(required=True) + }) + ), + 'source_url': TextT(url=True), + 'recorded': DateT(), + 'tags': ListOfT(TextT()), + 'speakers': ListOfT(TextT()), + }), + + 'category': DictOfT({ + 'title': TextT(required=True), + 'description': TextT(markdown=True), + 'url': TextT(url=True), + 'start_date': DateT(), + + # FIXME: This has to be unique across the data-set. Can we just use the + # directory name? + 'slug': TextT(required=True, slug=True), + }) +} + + +def validate_item(fn, json_data): + # FIXME: This is kind of cheating. Need a better way to distinguish data + # types. + type_ = 'category' if fn.endswith('category.json') else 'video' + REQS[type_].validate(json_data) + + +def validate_items(items): + errors = [] + for fn, data in items: + try: + validate_item(data) + except ValueError as ve: + errors.append((fn, str(ve))) + + return errors diff --git a/src/requirements_dev.txt b/src/requirements_dev.txt new file mode 100644 index 00000000..b525503c --- /dev/null +++ b/src/requirements_dev.txt @@ -0,0 +1,10 @@ +# Docs +Sphinx==1.3.1 + +# Code quality +flake8==2.4.1 + +# Testing +coverage==4.0 +pytest==2.8.7 +tox==2.1.1 diff --git a/src/setup.cfg b/src/setup.cfg new file mode 100644 index 00000000..8a22baa6 --- /dev/null +++ b/src/setup.cfg @@ -0,0 +1,11 @@ +[bumpversion] +current_version = 0.1.0 +commit = True +tag = True + +[bumpversion:file:setup.py] + +[bumpversion:file:clive/__init__.py] + +[wheel] +universal = 1 diff --git a/src/setup.py b/src/setup.py new file mode 100755 index 00000000..8f30e078 --- /dev/null +++ b/src/setup.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + + +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + + +with open('README.rst') as readme_file: + readme = readme_file.read() + +with open('HISTORY.rst') as history_file: + history = history_file.read() + +requirements = [ + 'click', +] + +setup( + name='clive', + version='0.1.0', + description='Data tools for pyvideo-data.', + long_description=readme + '\n\n' + history, + url='https://github.com/pyvideo/pyvideo-data', + packages=[ + 'clive', + ], + package_dir={ + 'clive': 'clive' + }, + include_package_data=True, + install_requires=requirements, + license='AGPLv3', + zip_safe=False, + entry_points=""" + [console_scripts] + clive-cmd=clive.cmdline:click_run + """, + classifiers=[ + 'Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', + 'Natural Language :: English', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + ], +) diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100755 index 00000000..40a96afc --- /dev/null +++ b/src/tests/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/src/tests/test_lib.py b/src/tests/test_lib.py new file mode 100644 index 00000000..f53f86d1 --- /dev/null +++ b/src/tests/test_lib.py @@ -0,0 +1,44 @@ +from clive.lib import load_json_data + + +class TestLoadJsonFiles: + def test_bad_paths(self): + assert load_json_data(None) == [] + assert load_json_data('') == [] + assert load_json_data('/nonexistent/file') == [] + + def test_non_json_file(self, tmpdir): + path = tmpdir.join('foo.txt') + path.write('test file') + + assert load_json_data(tmpdir.strpath) == [] + assert load_json_data(path.strpath) == [] + + def test_json_file(self, tmpdir): + path = tmpdir.join('foo.json') + path.write('{}') + + assert load_json_data(tmpdir.strpath) == [(path.strpath, {})] + assert load_json_data(path.strpath) == [(path.strpath, {})] + + def test_directory(self, tmpdir): + cat_path = tmpdir.join('pycon').mkdir() + + path1 = cat_path.join('foo1.json') + path1.write('{}') + path2 = cat_path.join('foo2.json') + path2.write('{}') + + path3 = tmpdir.join('djangocon').mkdir().join('foo3.json') + path3.write('{}') + + assert ( + load_json_data(tmpdir.strpath) == + [ + # Note: djangocon comes first because it's sorted + (path3.strpath, {}), + + (path1.strpath, {}), + (path2.strpath, {}), + ] + ) diff --git a/src/tests/test_validate.py b/src/tests/test_validate.py new file mode 100755 index 00000000..74265289 --- /dev/null +++ b/src/tests/test_validate.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +from clive.validate import validate_item + + +# def validate_empty(): +# assert validate_item({}) diff --git a/src/tox.ini b/src/tox.ini new file mode 100644 index 00000000..2e7a8d36 --- /dev/null +++ b/src/tox.ini @@ -0,0 +1,12 @@ +[tox] +envlist = py33, py34, py35 + +[testenv] +setenv = + PYTHONPATH = {toxinidir}:{toxinidir}/clive +commands = python setup.py test + +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following lines: +; deps = +; -r{toxinidir}/requirements.txt