diff --git a/.dockerignore b/.dockerignore index a194c65ef..6382f6868 100644 --- a/.dockerignore +++ b/.dockerignore @@ -82,9 +82,8 @@ workflows **/*.zip # Project local configurations -config/weaver.ini -config/wps_processes.yml -config/data_sources.json +!config/*.example +config/* # Old project sources [Bb]in diff --git a/.github/labeler-files.yml b/.github/labeler-files.yml index 1678cfde6..f43619830 100644 --- a/.github/labeler-files.yml +++ b/.github/labeler-files.yml @@ -15,7 +15,7 @@ ci/operations: - Dockerfile* - setup.* - requirements* - - tests/travis-ci/**/* + - tests/smoke/**/* ci/doc: - "*.rst" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..5357a2d1b --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,98 @@ +# run test suites + +name: Tests +on: + - pull_request + - push + +jobs: + # see: https://github.com/fkirc/skip-duplicate-actions + skip_duplicate: + continue-on-error: true + runs-on: ubuntu-latest + outputs: + should_skip: ${{ steps.skip_check.outputs.should_skip }} + steps: + - id: skip_check + uses: fkirc/skip-duplicate-actions@master + with: + concurrent_skipping: "same_content" + skip_after_successful_duplicate: "true" + do_not_skip: '["pull_request", "workflow_dispatch", "schedule"]' + + # see: https://github.com/actions/setup-python + tests: + needs: skip_duplicate + if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }} + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.allow-failure }} + env: + # override make command to install directly in active python + CONDA_COMMAND: "" + services: + # Label used to access the service container + mongodb: + image: mongo:3.4.23 # DockerHub + ports: + - "27017:27017" + strategy: + matrix: + os: [ubuntu-latest] + python-version: ["3.6", "3.7", "3.8"] + allow-failure: [false] + test-case: [test-unit, test-func] + include: + # linter tests + - os: ubuntu-latest + python-version: 3.7 + allow-failure: false + test-case: check + # documentation build + - os: ubuntu-latest + python-version: 3.7 + allow-failure: false + test-case: docs + # coverage test + - os: ubuntu-latest + python-version: 3.7 + allow-failure: false + test-case: coverage + # smoke test of Docker image + - os: ubuntu-latest + python-version: 3.7 # doesn't matter which one (in docker), but match default of repo + allow-failure: false + test-case: test-docker + # EMS end-2-end Workflow tests + - os: ubuntu-latest + python-version: 3.7 + allow-failure: true + test-case: test-workflow + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: "0" + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install Dependencies + run: make install-pkg install-dev version + - name: Display Packages + run: pip freeze + #- name: Setup Environment Variables + # uses: c-py/action-dotenv-to-setenv@v2 + # with: + # env-file: ./ci/weaver.env + - name: Display Environment Variables + run: | + hash -r + env | sort + - name: Run Tests + run: make stop ${{ matrix.test-case }} + - name: Upload coverage report + uses: codecov/codecov-action@v1 + if: ${{ success() && matrix.test-case == 'coverage' }} + with: + files: ./reports/coverage.xml + fail_ci_if_error: true + verbose: true diff --git a/.gitignore b/.gitignore index 47a1835fd..016e385db 100644 --- a/.gitignore +++ b/.gitignore @@ -82,10 +82,5 @@ venv *.zip ./workflow[s] -# project local configurations -config/weaver.ini -config/wps_processes.yml -config/data_sources.json - # old project sources [Bb]in diff --git a/.pylintrc b/.pylintrc index 97955bc5d..810eb67c7 100644 --- a/.pylintrc +++ b/.pylintrc @@ -85,7 +85,6 @@ disable=C0111,missing-docstring, W0613,unused-argument, W0622,redefined-builtin, W0640,cell-var-from-loop, - W0703,broad-except, W0706,try-except-raise, W0707,raise-missing-from, W1508,invalid-envvar-default @@ -532,5 +531,4 @@ min-public-methods=0 # Exceptions that will emit a warning when being caught. Defaults to # "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception +overgeneral-exceptions=BaseException diff --git a/.travis.yml b/.travis.yml index 348c75be6..0a0dc2a16 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,7 +6,7 @@ python: - "3.7" # default test python - "2.7" - "3.6" - - "3.8" + - "3.8.3" # enforce minor as it default to 3.8.7 which conda cannot find env: global: - secure: j7fuAPDXdl0SF1My1f0e3hr8mq4idP41jsz98Y1hN30GGOmIputScxIwSjIBi8zFCJ+K4RleMIoWLhnFsMeMD4DapRqDe5YnJJFOyfiyplzEcn4yONYRJf1nzfXBrpoGSEibgr/e3uYK62bkantI+b0pBPNAKbRjbRKKwTKEAxWb1pfaAMEQMB8/Wmvo0XYAZRB96H4q/7Dtd4CaIqh99Fs/9TjaGdNNtDtjwL0AWPXNfamsA8oNW0frNp1lEYpJpRE+PHFiKWZWyrRqjDA/UNU2UE/2H1O53I6tUXGJjORrUGRp1M9Xq/NhC8C4l7KrdpYH7YkYdvRDts3+XmJZ5kCsGB35YLP8DF1yUXIL4ZMskegBsBKqKKgnXGo4uYkaHp0QR2r8v2tdO8q3JWGYLSCgGVu5YkHU6Nlqf98W5LQLYSuZEkLplrk9vISHNS6TI4G3m7huToY1HwOu01hTZHAEhr/ouUxOfu6pQs8ZYVCyj98o65KFlFCRa7qKHc2/lXVbFPaOeQmFYqCGfFMTB0G0DHlsyOJk3QzoTr0HxJlbEwh9XLJOAnGO/TwcOUZndlj0+lTuCTpeBhjHywr+b00HnYtJ3cVnUVjT8TZydG2ATogeKRvOe0m58ug36EUGwbjr0JoN2hHjeFNSdvTUzFFFmRBY9rps47rzEDyRBGc= @@ -34,9 +34,9 @@ services: - docker before_install: # setup config files for travis tests - - source tests/travis-ci/weaver.env - - cp -f tests/travis-ci/data_sources.json config/ - - cp -f tests/travis-ci/wps_processes.yml config/ + - source tests/smoke/weaver.env + - cp -f tests/smoke/data_sources.json config/ + - cp -f tests/smoke/wps_processes.yml config/ - cp -f config/weaver.ini.example config/weaver.ini # for 'make start' when needed # display details - make --version @@ -80,7 +80,7 @@ jobs: - stage: test name: "Extra Tests" script: - - make TESTS='(slow or testbed14) and not functional' test-spec + - make SPEC='(slow or testbed14) and not functional' test-spec - stage: test name: "Windows Test (experimental)" os: windows @@ -91,8 +91,8 @@ jobs: - choco install python3 make - export PATH="/c/Python38:/c/Python38/Scripts:$PATH" - python -m pip install --upgrade pip wheel - - source tests/travis-ci/weaver.env - - cp -f tests/travis-ci/data_sources.json ./ + - source tests/smoke/weaver.env + - cp -f tests/smoke/data_sources.json ./ - make --version - make info - stage: smoke-test diff --git a/CHANGES.rst b/CHANGES.rst index 3171449ec..65fd29020 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,10 +9,37 @@ Changes Changes: -------- - Add support of YAML format for loading ``weaver.data_sources`` definition. +- Pre-install ``Docker`` CLI in ``worker`` image to avoid bad practice of mounting it from the host. +- Adjust WPS request dispatching such that process jobs get executed by ``Celery`` worker as intended + (see `#21 `_ and `#126 `_). +- Move WPS XML endpoint functions under separate ``weaver.wps.utils`` and ``weaver.wps.views`` to remove the need to + constantly handle circular imports issues due to processing related operations that share some code. +- Move core processing of job operation by ``Celery`` worker under ``weaver.processes.execution`` in order to separate + those components from functions specific for producing WPS-REST API responses. +- Handle WPS-1/2 requests submitted by GET KVP or POST XML request with ``application/json`` in ``Accept`` header to + return the same body content as if directly calling their corresponding WPS-REST endpoints. +- Remove ``request`` parameter of every database store methods since they were not used nor provided most of the time. +- Changed all forbidden access responses related to visibility status to return ``403`` instead of ``401``. +- Add more tests for Docker applications and test suite execution with Github Actions. +- Add more details in sample configurations and provide an example ``docker-compose.yml`` configuration that defines a + *typical* `Weaver` API / Worker combination with ``docker-proxy`` for sibling container execution. +- Add captured ``stdout`` and ``stderr`` details in job log following CWL execution error when retrievable. +- Document the `WPS` KVP/XML endpoint within the generated OpenAPI specification. +- Disable auto-generation of ``request_options.yml`` file from corresponding empty example file and allow application + to start if no such configuration was provided. +- Remove every Python 2 backward compatibility references and operations. +- Drop Python 2 and Python 3.5 support. Fixes: ------ -- No change. +- Target ``PyWPS-4.4`` to resolve multiple invalid dependency requirements breaking installed packages over builtin + Python packages and other compatibility fixes + (see `geopython/pywps #568 `_). +- Fix retrieval of database connexion to avoid warning of ``MongoClient`` opened before fork of processes. +- Fix indirect dependency ``oauthlib`` missing from ``esgf-compute-api`` (``cwt``) package. +- Fix inconsistent ``python`` reference resolution of ``builtin`` applications when executed locally and in tests + (using virtual/conda environment) compared to within Weaver Docker image (using OS python). +- Fix many typing definitions. `1.14.0 `_ (2021-01-11) ======================================================================== diff --git a/Makefile b/Makefile index 5cf9edf33..cbef57a61 100644 --- a/Makefile +++ b/Makefile @@ -28,26 +28,33 @@ CONDA_ENV_REAL_TARGET_PATH := $(realpath $(CONDA_ENV_PATH)) CONDA_ENV_REAL_ACTIVE_PATH := $(realpath ${CONDA_PREFIX}) # environment already active - use it directly ifneq ("$(CONDA_ENV_REAL_ACTIVE_PATH)", "") - CONDA_ENV_MODE := [using active environment] - CONDA_ENV := $(notdir $(CONDA_ENV_REAL_ACTIVE_PATH)) - CONDA_CMD := + CONDA_ENV_MODE := [using active environment] + CONDA_ENV := $(notdir $(CONDA_ENV_REAL_ACTIVE_PATH)) + CONDA_CMD := endif # environment not active but it exists - activate and use it ifneq ($(CONDA_ENV_REAL_TARGET_PATH), "") - CONDA_ENV := $(notdir $(CONDA_ENV_REAL_TARGET_PATH)) + CONDA_ENV := $(notdir $(CONDA_ENV_REAL_TARGET_PATH)) endif # environment not active and not found - create, activate and use it ifeq ("$(CONDA_ENV)", "") - CONDA_ENV := $(APP_NAME) + CONDA_ENV := $(APP_NAME) endif # update paths for environment activation ifeq ("$(CONDA_ENV_REAL_ACTIVE_PATH)", "") - CONDA_ENV_MODE := [will activate environment] - CONDA_CMD := source "$(CONDA_HOME)/bin/activate" "$(CONDA_ENV)"; + CONDA_ENV_MODE := [will activate environment] + CONDA_CMD := source "$(CONDA_HOME)/bin/activate" "$(CONDA_ENV)"; endif DOWNLOAD_CACHE ?= $(APP_ROOT)/downloads PYTHON_VERSION ?= `python -c 'import platform; print(platform.python_version())'` -PIP_XARGS ?= --use-feature=2020-resolver +PIP_USE_FEATURE := `python -c '\ + import pip; \ + from distutils.version import LooseVersion; \ + print(LooseVersion(pip.__version__) < LooseVersion("21.0"))'` +PIP_XARGS ?= +ifeq ("$(PIP_USE_FEATURE)", "True") + PIP_XARGS := --use-feature=2020-resolver $(PIP_XARGS) +endif # choose conda installer depending on your OS CONDA_URL = https://repo.continuum.io/miniconda @@ -66,7 +73,7 @@ REPORTS_DIR := $(APP_ROOT)/reports .DEFAULT_GOAL := help -## --- Informative targets --- ## +## -- Informative targets ------------------------------------------------------------------------------------------- ## .PHONY: all all: help @@ -86,8 +93,14 @@ _NORMAL := \033[0m .PHONY: help # note: use "\#\#" to escape results that would self-match in this target's search definition help: ## print this help message (default) - @echo "$(_SECTION)=== $(APP_NAME) help ===$(_NORMAL)" - @echo "Please use 'make ' where is one of:" + @echo "$(_SECTION)=======================================$(_NORMAL)" + @echo "$(_SECTION) $(APP_NAME) help $(_NORMAL)" + @echo "$(_SECTION)=======================================$(_NORMAL)" + @echo "Please use 'make ' where is one of below options." + @echo "" + @echo "NOTE:" + @echo " Targets suffixed '-only' can be called as ' to run setup before their main operation." + @echo "" # @grep -E '^[a-zA-Z_-]+:.*?\#\# .*$$' $(MAKEFILE_LIST) \ # | awk 'BEGIN {FS = ":.*?\#\# "}; {printf " $(_TARGET)%-24s$(_NORMAL) %s\n", $$1, $$2}' @grep -E '\#\#.*$$' "$(APP_ROOT)/$(MAKEFILE_NAME)" \ @@ -96,6 +109,9 @@ help: ## print this help message (default) /:/ {printf " $(_TARGET)%-24s$(_NORMAL) %s\n", $$1, $$2} \ ' +.PHONY: targets +targets: help + .PHONY: version version: ## display current version @-echo "$(APP_NAME) version: $(APP_VERSION)" @@ -110,14 +126,14 @@ info: ## display make information @echo " Conda Env Name $(CONDA_ENV)" @echo " Conda Env Path $(CONDA_ENV_REAL_ACTIVE_PATH)" @echo " Conda Binary $(CONDA_BIN)" - @echo " Conda Actication $(CONDA_ENV_MODE)" + @echo " Conda Activation $(CONDA_ENV_MODE)" @echo " Conda Command $(CONDA_CMD)" @echo " Application Name $(APP_NAME)" @echo " Application Root $(APP_ROOT)" - @echo " Donwload Cache $(DOWNLOAD_CACHE)" + @echo " Download Cache $(DOWNLOAD_CACHE)" @echo " Docker Repository $(DOCKER_REPO)" -## -- Conda targets -- ## +## -- Conda targets ------------------------------------------------------------------------------------------------- ## .PHONY: conda-base conda-base: ## obtain and install a missing conda distribution @@ -160,10 +176,10 @@ conda-pinned: conda-env ## pin the conda version .PHONY: conda-env-export conda-env-export: ## export the conda environment - @echo "Exporting conda enviroment..." + @echo "Exporting conda environment..." @test -d $(CONDA_ENV_PATH) && "$(CONDA_BIN)" env export -n $(CONDA_ENV) -f environment.yml -## -- Build targets -- ## +## -- Build targets ------------------------------------------------------------------------------------------------- ## .PHONY: install install: install-all ## alias for 'install-all' target @@ -177,25 +193,25 @@ install-all: install-sys install-pkg install-pip install-dev ## install applica .PHONY: install-dev install-dev: install-pip ## install development and test dependencies @echo "Installing development packages with pip..." - @-bash -c '$(CONDA_CMD) pip install $(PIP_XARGS) -r $(APP_ROOT)/requirements-dev.txt' + @-bash -c '$(CONDA_CMD) pip install $(PIP_XARGS) -r "$(APP_ROOT)/requirements-dev.txt"' @echo "Install with pip complete. Test service with 'make test*' variations." .PHONY: install-pkg install-pkg: install-pip ## install application package dependencies @echo "Installing base packages with pip..." - @-bash -c "$(CONDA_CMD) pip install $(PIP_XARGS) -r $(APP_ROOT)/requirements.txt --no-cache-dir" + @-bash -c "$(CONDA_CMD) pip install $(PIP_XARGS) -r "$(APP_ROOT)/requirements.txt" --no-cache-dir" @echo "Install with pip complete." # don't use 'PIP_XARGS' in this case since extra features could not yet be supported by pip being installed/updated .PHONY: install-sys install-sys: conda-env ## install system dependencies and required installers/runners @echo "Installing system dependencies..." - @bash -c '$(CONDA_CMD) pip install --upgrade -r $(APP_ROOT)/requirements-sys.txt' + @bash -c '$(CONDA_CMD) pip install --upgrade -r "$(APP_ROOT)/requirements-sys.txt"' .PHONY: install-pip install-pip: ## install application as a package to allow import from another python package @echo "Installing package with pip..." - @-bash -c '$(CONDA_CMD) pip install $(PIP_XARGS) $(APP_ROOT)' + @-bash -c '$(CONDA_CMD) pip install $(PIP_XARGS) --upgrade -e "$(APP_ROOT)" --no-cache' @echo "Install with pip complete." .PHONY: install-raw @@ -204,7 +220,7 @@ install-raw: ## install without any requirements or dependencies (suppose everyt @-bash -c '$(CONDA_CMD) pip install $(PIP_XARGS) -e "$(APP_ROOT)" --no-deps' @echo "Install package complete." -## -- Cleanup targets -- ## +## -- Cleanup targets ----------------------------------------------------------------------------------------------- ## .PHONY: clean clean: clean-all ## alias for 'clean-all' target @@ -236,7 +252,9 @@ clean-docs: install-dev clean-docs-dirs ## remove documentation artefacts .PHONY: clean-docs-dirs clean-docs-dirs: ## remove documentation artefacts (minimal) @echo "Removing documentation directories..." + @-rm -fr "$(APP_ROOT)/docs/_build" @-rm -fr "$(APP_ROOT)/docs/build" + @-rm -fr "$(APP_ROOT)/docs/source/autoapi" @-rm -fr "$(APP_ROOT)/docs/html" @-rm -fr "$(APP_ROOT)/docs/xml" @@ -253,6 +271,8 @@ clean-test: ## remove files created by tests and coverage analysis @-rm -f "$(APP_ROOT)/.coverage" @-rm -f "$(APP_ROOT)/coverage.*" @-rm -fr "$(APP_ROOT)/coverage" + @-rm -fr "$(REPORTS_DIR)/coverage" + @-rm -fr "$(REPORTS_DIR)/test-*.xml" .PHONY: clean-reports clean-reports: ## remove report files generated by code checks @@ -261,137 +281,235 @@ clean-reports: ## remove report files generated by code checks .PHONY: clean-dist clean-dist: clean ## remove *all* files that are not controlled by 'git' except *.bak and makefile configuration @echo "Cleaning distribution..." - @git diff --quiet HEAD || echo "There are uncommited changes! Not doing 'git clean'..." + @git diff --quiet HEAD || echo "There are uncommitted changes! Not doing 'git clean'..." @-git clean -dfx -e *.bak -e Makefile.config -## -- Testing targets -- ## +## -- Testing targets ----------------------------------------------------------------------------------------------- ## +## -- [variants '-only' without '-only' suffix are also available with pre-install setup] + +# -v: list of test names with PASS/FAIL/SKIP/ERROR/etc. next to it +# -vv: extended collection of stdout/stderr on top of test results +TEST_VERBOSITY ?= -v + +# autogen tests variants with pre-install of dependencies using the '-only' target references +TESTS := unit func workflow online offline no-tb14 spec coverage +TESTS := $(addprefix test-, $(TESTS)) + +$(TESTS): test-%: install-dev test-%-only .PHONY: test -test: clean-test test-all ## alias for 'test-all' target +test: clean-test test-all ## alias for 'test-all' target .PHONY: test-all -test-all: install-dev ## run all tests (including long running tests) +test-all: install-dev test-only ## run all tests (including long running tests) + +.PHONY: test-only +test-only: mkdir-reports ## run all tests but without prior validation of installed dependencies @echo "Running all tests (including slow and online tests)..." - @bash -c "$(CONDA_CMD) pytest tests -v --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-unit -test-unit: install-dev ## run unit tests (skip long running and online tests) +.PHONY: test-unit-only +test-unit-only: mkdir-reports ## run unit tests (skip long running and online tests) @echo "Running tests (skip slow and online tests)..." - @bash -c "$(CONDA_CMD) pytest tests -v -m 'not slow and not online and not functional' \ - --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "not slow and not online and not functional" --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-func -test-func: install-dev ## run functional tests (online and usage specific) +.PHONY: test-func-only +test-func-only: mkdir-reports ## run functional tests (online and usage specific) @echo "Running functional tests..." - @bash -c "$(CONDA_CMD) pytest tests -v -m 'functional' --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "functional" --junitxml "$(REPORTS_DIR)/test-results.xml"' + +.PHONY: test-workflow-only +test-workflow-only: mkdir-reports ## run EMS workflow End-2-End tests + @echo "Running workflow tests..." + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "workflow" --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-online -test-online: install-dev ## run online tests (running instance required) +.PHONY: test-online-only +test-online-only: mkdir-reports ## run online tests (running instance required) @echo "Running online tests (running instance required)..." - @bash -c "$(CONDA_CMD) pytest tests -v -m 'online' --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "online" --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-offline -test-offline: install-dev ## run offline tests (not marked as online) +.PHONY: test-offline-only +test-offline-only: mkdir-reports ## run offline tests (not marked as online) @echo "Running offline tests (not marked as online)..." - @bash -c "$(CONDA_CMD) pytest tests -v -m 'not online' --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "not online" --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-no-tb14 -test-no-tb14: install-dev ## run all tests except ones marked for 'Testbed-14' +.PHONY: test-no-tb14-only +test-no-tb14-only: mkdir-reports ## run all tests except ones marked for 'Testbed-14' @echo "Running all tests except ones marked for 'Testbed-14'..." - @bash -c "$(CONDA_CMD) pytest tests -v -m 'not testbed14' --junitxml $(APP_ROOT)/tests/results.xml" + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "not testbed14" --junitxml "$(REPORTS_DIR)/test-results.xml"' -.PHONY: test-spec -test-spec: install-dev ## run tests with custom input specification (pytest format) [make TESTS='' test-spec] +.PHONY: test-spec-only +test-spec-only: mkdir-reports ## run tests with custom specification (pytest format) [make SPEC='' test-spec] @echo "Running custom tests from input specification..." - @[ "${TESTS}" ] || ( echo ">> 'TESTS' is not set"; exit 1 ) - @bash -c "$(CONDA_CMD) pytest tests -v -m '${TESTS}' --junitxml $(APP_ROOT)/tests/results.xml" + @[ "${SPEC}" ] || ( echo ">> 'SPEC' is not set"; exit 1 ) + @bash -c '$(CONDA_CMD) pytest tests $(TEST_VERBOSITY) \ + -m "${SPEC}" --junitxml "$(REPORTS_DIR)/test-results.xml"' .PHONY: test-smoke -test-smoke: docker-test ## alias to 'docker-test' executing smoke test of built docker images +test-smoke: docker-test ## alias to 'docker-test' executing smoke test of built docker images -.PHONY: coverage -coverage: mkdir-reports install-dev ## run all tests using coverage analysis +.PHONY: test-docker +test-docker: docker-test ## alias to 'docker-test' execution smoke test of built docker images + +.PHONY: test-coverage-only +test-coverage-only: mkdir-reports ## run all tests using coverage analysis @echo "Running coverage analysis..." @bash -c '$(CONDA_CMD) coverage run -m pytest "$(APP_ROOT)/tests" || true' @bash -c '$(CONDA_CMD) coverage xml --rcfile="$(APP_ROOT)/setup.cfg" -i -o "$(REPORTS_DIR)/coverage.xml"' @bash -c '$(CONDA_CMD) coverage report --rcfile="$(APP_ROOT)/setup.cfg" -i -m' @bash -c '$(CONDA_CMD) coverage html --rcfile="$(APP_ROOT)/setup.cfg" -d "$(REPORTS_DIR)/coverage"' -## -- Static code check targets -- ## +.PHONY: coverage +coverage: test-coverage ## alias to run test with coverage analysis + +## -- Static code check targets ------------------------------------------------------------------------------------- ## +## -- [variants '-only' without '-only' suffix are also available with pre-install setup] + +# autogen check variants with pre-install of dependencies using the '-only' target references +CHECKS := pep8 lint security doc8 links imports +CHECKS := $(addprefix check-, $(CHECKS)) + +$(CHECKS): check-%: install-dev check-%-only .PHONY: mkdir-reports mkdir-reports: @mkdir -p "$(REPORTS_DIR)" .PHONY: check -check: check-all ## alias for 'check-all' target +check: check-all ## alias for 'check-all' target + +.PHONY: check-only +check-only: $(addsuffix -only, $(CHECKS)) .PHONY: check-all -check-all: install-dev check-pep8 check-lint check-imports check-security check-doc8 check-links ## run all code checks +check-all: install-dev $(CHECKS) ## run all code checks -.PHONY: check-pep8 -check-pep8: mkdir-reports install-dev ## run PEP8 code style checks +.PHONY: check-pep8-only +check-pep8-only: mkdir-reports ## run PEP8 code style checks @echo "Running pep8 code style checks..." @-rm -fr "$(REPORTS_DIR)/check-pep8.txt" @bash -c '$(CONDA_CMD) \ flake8 --config="$(APP_ROOT)/setup.cfg" --output-file="$(REPORTS_DIR)/check-pep8.txt" --tee' -.PHONY: check-lint -check-lint: mkdir-reports install-dev ## run linting code style checks +.PHONY: check-lint-only +check-lint-only: mkdir-reports ## run linting code style checks @echo "Running linting code style checks..." @-rm -fr "$(REPORTS_DIR)/check-lint.txt" @bash -c '$(CONDA_CMD) \ pylint \ --load-plugins pylint_quotes \ - --rcfile="$(APP_ROOT)/.pylintrc" "$(APP_ROOT)/weaver" "$(APP_ROOT)/tests" \ + --rcfile="$(APP_ROOT)/.pylintrc" \ --reports y \ + "$(APP_ROOT)/weaver" "$(APP_ROOT)/tests" \ 1> >(tee "$(REPORTS_DIR)/check-lint.txt")' -.PHONY: check-security -check-security: mkdir-reports install-dev ## run security code checks +.PHONY: check-security-only +check-security-only: mkdir-reports ## run security code checks @echo "Running security code checks..." @-rm -fr "$(REPORTS_DIR)/check-security.txt" @bash -c '$(CONDA_CMD) \ bandit -v --ini "$(APP_ROOT)/setup.cfg" -r \ 1> >(tee "$(REPORTS_DIR)/check-security.txt")' -.PHONY: check-doc8 -check-doc8: mkdir-reports install-dev ## run doc8 documentation style checks +.PHONY: check-doc8-only +check-doc8-only: mkdir-reports ## run doc8 documentation style checks @echo "Running doc8 doc style checks..." @-rm -fr "$(REPORTS_DIR)/check-doc8.txt" @bash -c '$(CONDA_CMD) \ doc8 "$(APP_ROOT)/docs" \ 1> >(tee "$(REPORTS_DIR)/check-doc8.txt")' -.PHONY: check-links -check-links: install-dev ## check all external links in documentation for integrity +.PHONY: check-links-only +check-links-only: ## check all external links in documentation for integrity @echo "Running link checks on docs..." @bash -c '$(CONDA_CMD) $(MAKE) -C "$(APP_ROOT)/docs" linkcheck' -.PHONY: check-imports -check-imports: mkdir-reports install-dev ## run imports code checks +.PHONY: check-imports-only +check-imports-only: mkdir-reports ## run imports code checks @echo "Running import checks..." @-rm -fr "$(REPORTS_DIR)/check-imports.txt" @bash -c '$(CONDA_CMD) \ isort --check-only --diff --recursive $(APP_ROOT) \ 1> >(tee "$(REPORTS_DIR)/check-imports.txt")' -.PHONY: fix-imports -fix-imports: mkdir-reports install-dev ## apply import code checks corrections +# autogen fix variants with pre-install of dependencies using the '-only' target references +FIXES := imports lint docf +FIXES := $(addprefix fix-, $(FIXES)) + +$(FIXES): fix-%: install-dev fix-%-only + +.PHONY: fix +fix: fix-all ## alias for 'fix-all' target + +.PHONY: fix-only +fix-only: $(addsuffix -only, $(FIXES)) + +.PHONY: fix-all +fix-all: install-dev $(FIXES) ## fix all code check problems automatically + +.PHONY: fix-imports-only +fix-imports-only: mkdir-reports ## apply import code checks corrections @echo "Fixing flagged import checks..." @-rm -fr "$(REPORTS_DIR)/fixed-imports.txt" @bash -c '$(CONDA_CMD) \ isort --recursive $(APP_ROOT) \ 1> >(tee "$(REPORTS_DIR)/fixed-imports.txt")' -## -- Documentation targets -- ## +.PHONY: fix-lint-only +fix-lint-only: mkdir-reports ## fix some PEP8 code style problems automatically + @echo "Fixing PEP8 code style problems..." + @-rm -fr "$(REPORTS_DIR)/fixed-lint.txt" + @bash -c '$(CONDA_CMD) \ + autopep8 -v -j 0 -i -r $(APP_ROOT) \ + 1> >(tee "$(REPORTS_DIR)/fixed-lint.txt")' + +# FIXME: move parameters to setup.cfg when implemented (https://github.com/myint/docformatter/issues/10) +.PHONY: fix-docf-only +fix-docf-only: mkdir-reports ## fix some PEP8 code documentation style problems automatically + @echo "Fixing PEP8 code documentation problems..." + @-rm -fr "$(REPORTS_DIR)/fixed-docf.txt" + @bash -c '$(CONDA_CMD) \ + docformatter \ + --pre-summary-newline \ + --wrap-descriptions 0 \ + --wrap-summaries 120 \ + --make-summary-multi-line \ + --in-place \ + --recursive \ + $(APP_ROOT) \ + 1> >(tee "$(REPORTS_DIR)/fixed-docf.txt")' + +.PHONY: fixme-list-only +fixme-list-only: mkdir-reports ## run linting code style checks + @echo "Listing code that requires fixes..." + @echo '[MISCELLANEOUS]\nnotes=FIXME,TODO,HACK' > "$(REPORTS_DIR)/fixmerc" + @bash -c '$(CONDA_CMD) \ + pylint \ + --disable=all,use-symbolic-message-instead --enable=miscellaneous,W0511 \ + --score n --persistent n \ + --rcfile="$(REPORTS_DIR)/fixmerc" \ + -f colorized \ + "$(APP_ROOT)/weaver" "$(APP_ROOT)/tests" \ + 1> >(tee "$(REPORTS_DIR)/fixme.txt")' + +.PHONY: fixme-list +fixme-list: install-dev fixme-list-only + +## -- Documentation targets ----------------------------------------------------------------------------------------- ## .PHONY: docs docs: install-dev clean-docs ## generate HTML documentation with Sphinx @echo "Generating docs with Sphinx..." @bash -c '$(CONDA_CMD) $(MAKE) -C $@ html' -## -- Versioning targets -- ## +## -- Versioning targets -------------------------------------------------------------------------------------------- ## # Bumpversion 'dry' config # if 'dry' is specified as target, any bumpversion call using 'BUMP_XARGS' will not apply changes @@ -409,7 +527,7 @@ bump: ## bump version using VERSION specified as user input [make VERSION=> 'VERSION' is not set"; exit 1 ) @-bash -c '$(CONDA_CMD) bump2version $(BUMP_XARGS) --new-version "${VERSION}" patch;' -## -- Docker targets -- ## +## -- Docker targets ------------------------------------------------------------------------------------------------ ## .PHONY: docker-info docker-info: ## obtain docker image information @@ -462,13 +580,14 @@ docker-push: docker-push-base docker-push-manager docker-push-worker ## push al # if compose up fails, print the logs and force stop # if compose up succeeds, query weaver to get frontpage response -DOCKER_TEST_COMPOSES := -f "$(APP_ROOT)/tests/travis-ci/docker-compose.smoke-test.yml" +DOCKER_TEST_COMPOSES := -f "$(APP_ROOT)/tests/smoke/docker-compose.smoke-test.yml" .PHONY: docker-test docker-test: docker-build stop ## execute smoke test of the built images (validate that they boots and reply) @echo "Smoke test of built application docker images" docker-compose $(DOCKER_TEST_COMPOSES) up -d sleep 10 ## leave some time to boot - curl localhost:4001 | grep "Weaver Information" || \ + @echo "Pinging Weaver API entrypoint to validate response..." + @curl localhost:4001 | grep "Weaver Information" || \ ( docker-compose $(DOCKER_TEST_COMPOSES) logs weaver worker || true && \ docker-compose $(DOCKER_TEST_COMPOSES) stop; exit 1 ) docker-compose $(DOCKER_TEST_COMPOSES) stop @@ -494,7 +613,7 @@ docker-clean: ## remove all built docker images (only matching current/latest v docker rmi -f "$(APP_NAME):latest" || true docker rmi -f "$(APP_NAME):base" || true -## --- Launchers targets --- ## +## -- Launchers targets --------------------------------------------------------------------------------------------- ## .PHONY: start start: install-run ## start application instance(s) with gunicorn (pserve) diff --git a/README.rst b/README.rst index 552063a95..c0cb8990c 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,9 @@ For more details, see `Configuration`_ section. * - dependencies - | |py_ver| |requires| |pyup| * - build status - - | |travis_latest| |travis_tagged| |readthedocs| |coverage| |codacy| + - | |readthedocs| |coverage| |codacy| + * - tests status + - | |github_latest| |github_tagged| |travis_latest| |travis_tagged| |coverage| |codacy| * - releases - | |version| |commits-since| |license| @@ -55,6 +57,14 @@ For more details, see `Configuration`_ section. :alt: Dependencies Status :target: https://pyup.io/account/repos/github/crim-ca/weaver/ +.. |github_latest| image:: https://img.shields.io/github/workflow/status/crim-ca/weaver/Tests/master?label=master + :alt: Github Actions CI Build Status (master branch) + :target: https://github.com/crim-ca/weaver/actions?query=workflow%3ATests+branch%3Amaster + +.. |github_tagged| image:: https://img.shields.io/github/workflow/status/crim-ca/weaver/Tests/1.14.0?label=1.14.0 + :alt: Github Actions CI Build Status (latest tag) + :target: https://github.com/crim-ca/weaver/actions?query=workflow%3ATests+branch%3A1.14.0 + .. |travis_latest| image:: https://img.shields.io/travis/com/crim-ca/weaver/master.svg?label=master :alt: Travis-CI Build Status (master branch) :target: https://travis-ci.com/crim-ca/weaver diff --git a/config/data_sources.json.example b/config/data_sources.json.example deleted file mode 100644 index 3ca602412..000000000 --- a/config/data_sources.json.example +++ /dev/null @@ -1,18 +0,0 @@ -{ - "crim": { - "netloc":"ogc-ades.crim.ca", - "ades":"https://ogc-ades.crim.ca/weaver" - }, - "localhost": { - "netloc":"localhost", - "ades":"https://localhost:4001", - "default":"true" - }, - "opensearchdefault": { - "collection_id": "", - "accept_schemes": ["http", "https"], - "rootdir": "", - "ades": "http://localhost:4001", - "osdd_url": "http://example.com/opensearchdescription.xml" - } -} diff --git a/config/data_sources.yml.example b/config/data_sources.yml.example new file mode 100644 index 000000000..6b6fb4330 --- /dev/null +++ b/config/data_sources.yml.example @@ -0,0 +1,35 @@ +# List Data-Source known locations such that Weaver configured in EMS mode can dispatch processes execution to +# corresponding ADES when input data references match the provided locations. +# +# For the expected Schema Definition, see module: +# weaver.processes.sources +# +# NOTE: +# This configuration can be formatted in YAML or JSON at your convenience. +# +example: + # since this is not the default (see localhost), + # only data matching that location will be forwarded to corresponding ADES + netloc: "example-data.com" + ades: "https://example.com/ADES" + +localhost: + # default is define here, so any unmatched data-source location will fallback to this ADES + # since that default is 'localhost', default in this case will indicate "run it locally" + # another ADES location could be set as default to dispatch unknown data-source executions to that specific instance + netloc: "localhost" + ades: "https://localhost:4001" + default: true + +opensearchdefault: + # data-sources that require OpenSearch capabilities require more configuration details + # this applies to processes that employ OpenSearch query definitions to define process inputs + # see details and examples: + # https://pavics-weaver.readthedocs.io/en/latest/processes.html#opensearch-data-source + # tests/json_examples/opensearch_process.json + # tests/json_examples/eoimage_inputs_example.json + ades: "http://localhost:4001" + collection_id: "" + accept_schemes: ["http", "https"] + rootdir: "" + osdd_url: "http://example.com/opensearchdescription.xml" diff --git a/config/weaver.ini.example b/config/weaver.ini.example index eb9ea1f70..b251bcb3f 100644 --- a/config/weaver.ini.example +++ b/config/weaver.ini.example @@ -1,4 +1,4 @@ -# NOTE: This configuration file is employed by Travis-CI smoke test to immediately identify any problematic setting. +# NOTE: This configuration file is employed by Docker smoke test to immediately identify any problematic setting. ### # app configuration @@ -34,6 +34,11 @@ mongodb.db_name = weaver weaver.configuration = ems weaver.url = http://localhost:4001 +# --- Weaver Logging --- +# When provided, this value will override every weaver-specific level defined in other section logging configuration +# Otherwise, their individual levels are employed as provided by other logging sections from this configuration file +weaver.log_level = + # --- Weaver requests extension flags --- # SSL verification should be enabled for secured connections # setting is available for convenience, debug purposes and local environments @@ -56,7 +61,7 @@ weaver.wps = true weaver.wps_url = weaver.wps_path = /ows/wps weaver.wps_output = true -weaver.wps_output_dir = /tmp +weaver.wps_output_dir = /tmp/weaver/wps-outputs weaver.wps_output_url = weaver.wps_output_path = /wpsoutputs weaver.wps_output_s3_bucket = @@ -109,8 +114,8 @@ weaver.wps_email_notify_template_default = # additional processes to load at startup (see 'wps_processes.yml.example') weaver.wps_processes_file = wps_processes.yml # known remote ADES for processes redirection based on data-sources when using EMS configuration -# (see 'data_sources.json.example' and 'weaver.processes.sources' for more details) -weaver.data_sources = data_sources.json +# (see 'data_sources.yml.example' and 'weaver.processes.sources' for more details) +weaver.data_sources = data_sources.yml # comma-separated list of key=value options to add to settings weaver.extra_options = diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 9faa29fa1..ea9fc971b 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -20,10 +20,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ netbase \ gcc \ - python3-dev \ - && rm -rf /var/lib/apt/lists/* \ - && pip install --no-cache-dir --upgrade pip setuptools \ - && pip install --no-cache-dir -e ${APP_DIR} + git \ + && pip install --no-cache-dir --upgrade -r requirements-sys.txt \ + && pip install --no-cache-dir -r requirements.txt \ + && pip install --no-cache-dir -e ${APP_DIR} \ + && apt-get remove -y \ + gcc \ + git \ + && rm -rf /var/lib/apt/lists/* # install package COPY ./ ${APP_DIR} diff --git a/docker/Dockerfile-worker b/docker/Dockerfile-worker index 3e547108d..a5fd31fbf 100644 --- a/docker/Dockerfile-worker +++ b/docker/Dockerfile-worker @@ -1,5 +1,21 @@ FROM weaver:base LABEL description.short="Weaver Worker" +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-transport-https \ + curl \ + gnupg \ + gnupg-agent \ + software-properties-common \ + # NOTE: Only 'worker' image should be using docker, 'manager' is only for API. + && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ + && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu bionic stable" \ + && apt update \ + # NOTE: + # Only install CLI package, 'docker-ce' and 'containerd.io' not required as they should be provided by host. + # Docker sibliing execution is expected. See 'docker/docker-compose.yml.example' for details. + && apt install --no-install-recommends docker-ce-cli \ + && rm -rf /var/lib/apt/lists/* + # run app CMD celery worker -A pyramid_celery.celery_app --ini "${APP_CONFIG_DIR}/weaver.ini" diff --git a/docker/docker-compose.yml.example b/docker/docker-compose.yml.example new file mode 100644 index 000000000..136f61379 --- /dev/null +++ b/docker/docker-compose.yml.example @@ -0,0 +1,126 @@ +version: "3.4" + +x-logging: + &default-logging + driver: "json-file" + options: + max-size: "200k" + max-file: "10" + +services: + + weaver: + image: pavics/weaver:latest-manager + container_name: weaver-api + ports: + - "4001:4001" + environment: + HOSTNAME: localhost + FORWARDED_ALLOW_IPS: "*" + # ensure other referenced services are created beforehand to avoid booting timing issues + depends_on: + - worker + links: + - mongodb + volumes: + # Weaver configuration + - ./components/weaver/config/weaver/weaver.ini:/opt/local/src/weaver/config/weaver.ini + - ./components/weaver/config/weaver/data_sources.yml:/opt/local/src/weaver/config/data_sources.yml + - ./components/weaver/config/weaver/request_options.yml:/opt/local/src/weaver/config/request_options.yml + - ./components/weaver/config/weaver/wps_processes.yml:/opt/local/src/weaver/config/wps_processes.yml + # WARNING: see detail in 'worker' definition + - /tmp/weaver/wps-outputs:/tmp/weaver/wps-outputs + networks: + - default + restart: always + logging: *default-logging + + worker: + image: pavics/weaver:latest-worker + container_name: weaver-worker + environment: + # WARNING: + # Only the Worker should have access to docker to be able to execute docker run, pull, etc. + # Allowed commands will depend of provided Docker API permissions in 'docker-proxy' + DOCKER_HOST: "tcp://docker-proxy:2375" + networks: + # WARNING: + # Only the Worker should be able to connect to docker-proxy network to obtain access to Docker API. + # This avoids Weaver-API to be able to run Docker commands directly. + # Furthermore, only the Worker has the 'docker-cli' preinstalled. + - docker-proxy + links: + - mongodb + volumes: + # Weaver configuration + - ./config/weaver.ini:/opt/local/src/weaver/config/weaver.ini + - ./config/data_sources.yml:/opt/local/src/weaver/config/data_sources.yml + - ./config/request_options.yml:/opt/local/src/weaver/config/request_options.yml + # WARNING: + # If you need to provide some credentials to provide access to protected docker repositories + - ~/.docker/config.json:/root/.docker/config.json + # NOTE: + # Working and output directories must be mounted as exact same path (e.g.: /some/path:/some/path) + # to avoid mismatching locations between container-side path resolution of cwltool stagedir/tmpdir/outdir + # and corresponding server-side (host) runtime docker directories since Docker Application Packages will + # not be running inside this docker (not docker-in-docker), but next to it (sibling-dockers). + # Mapping of paths is needed because some part of the code runs inside Weaver/Worker containers, but actual + # execution of the children Application Package of CWL processes calls 'docker run' from the host machine + # and will expect to find the same directory locations. + - /tmp/weaver/wps-outputs:/tmp/weaver/wps-outputs + - /tmp/weaver/wps-workdir:/tmp/weaver/wps-workdir + restart: always + logging: *default-logging + + docker-proxy: + # | Giving access to your Docker socket could mean giving root access to your host, or even to your whole swarm, + # | but some services require hooking into that socket to react to events, etc. + # | Using this proxy lets you block anything you consider those services should not do. + # https://github.com/Tecnativa/docker-socket-proxy + # Other references: + # https://stackoverflow.com/a/47293798/5936364 + # https://docs.docker.com/engine/api/ + image: tecnativa/docker-socket-proxy + container_name: docker-proxy + environment: + # Docker API permissions + # WARNING: + # These variables control which operations the docker-proxy allows from other containers. + # See above references and make sure you understand what you enable. Potential security risk. + - SERVICES=0 + - TASKS=0 + - NETWORKS=0 + - NODES=0 + - INFO=1 + - EXEC=1 + - IMAGES=1 + - VOLUMES=1 + - CONTAINERS=1 + - POST=1 + # access to docker for sibling execution + volumes: + - /var/run/docker.sock:/var/run/docker.sock + # WARNING: + # Never expose this container's port to a public network. + # Other containers that require docker (weaver-worker) should communicate only through 'docker-proxy' network. + networks: + - docker-proxy + logging: *default-logging + # NOTE: Remote swarm as needed (requires more Docker API permissions above) + #deploy: + # placement: + # constraints: [node.role == manager] + + mongodb: + image: mongo:3.4.0 + container_name: mongodb + volumes: + - /data/mongodb_persist:/data/db + # MongoDB crash with permission denied errors if the command is not overridden like this + command: bash -c 'chown -R mongodb:mongodb /data && chmod -R 755 /data && mongod' + restart: always + logging: *default-logging + +networks: + docker-proxy: + driver: bridge diff --git a/docs/.gitignore b/docs/.gitignore index 378eac25d..18a0f924a 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,3 @@ +_build build +source/autoapi diff --git a/docs/source/conf.py b/docs/source/conf.py index 2905f69f2..1ba3a5445 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -419,8 +419,6 @@ def doc_redirect_include(file_path): "https://pavics-weaver.readthedocs.io/en/latest/api.html", # ignore requires.io which just fails periodically - not critical link "https://requires.io/github/crim-ca/weaver/.*", - # FIXME: tmp disable due to Retry-After header for rate-limiting by Github not respected - # (see: https://github.com/sphinx-doc/sphinx/issues/7388) "https://github.com/crim-ca/weaver/*", # limit only our repo so others are still checked ] diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index e07320e0d..39c863e3f 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -229,7 +229,7 @@ Configuration of Data Sources .. todo:: complete docs -`data_sources.json.example`_ +`data_sources.yml.example`_ Configuration of WPS Processes diff --git a/docs/source/references.rst b/docs/source/references.rst index 19a4da3a0..b0f1ccdc3 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -81,7 +81,7 @@ .. |weaver-config| replace:: ``weaver/config`` .. _weaver-config: ../../../config .. _weaver.ini.example: ../../../config/weaver.ini.example -.. _data_sources.json.example: ../../../config/data_sources.json.example +.. _data_sources.yml.example: ../../../config/data_sources.yml.example .. _wps_processes.yml.example: ../../../config/wps_processes.yml.example .. _request_options.yml.example: ../../../config/request_options.yml.example .. _Dockerfile-manager: ../../../docker/Dockerfile-manager diff --git a/requirements-dev.txt b/requirements-dev.txt index 0cc607477..3c11517fb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,7 +5,6 @@ astroid>=2.4 bandit bump2version codacy-coverage -contextlib2; python_version < "3" coverage doc8>=0.8.1 flake8 diff --git a/requirements.txt b/requirements.txt index 88f0b7d94..5d2bd95e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ alembic -backports.tempfile; python_version < "3" # AWS support (S3 buckets) boto3 # celery 5 to be released in near future @@ -11,39 +10,40 @@ cornice cornice_swagger>=0.7.0 # cwltool Python 2 support dropped on 2.x # enforce Python 3 with version >=2 to update existing 1.x installations -cwltool>=1.0.20180820141117,<2; python_version < "3" # FIXME: avoid error invalid get_listing import location, # remove properly as per https://github.com/crim-ca/weaver/issues/154 -cwltool>=2,<=3.0.20200324120055; python_version >= "3" +cwltool>=2,<=3.0.20200324120055 duration -flufl.enum; python_version < "3" +git+https://github.com/ESGF/esgf-compute-api.git@v2.3.7#egg=esgf-compute-api # gunicorn >20 breaks some config.ini loading parameters (paste) # it is also only available for Python >=3.5 -gunicorn>=19.10,<20; python_version < "3" # use pserve to continue supporting config.ini with paste settings -gunicorn>=20.0.4; python_version >= "3.5" +gunicorn>=20.0.4 jsonschema>=3.0.1 lxml mako -owslib<0.19.0; python_version < "3" -owslib>=0.19.2; python_version >= "3" +# esgf-compute-api (cwt) needs oauthlib but doesn't add it in their requirements +oauthlib +owslib>=0.19.2 pymongo pyramid>=1.7.3 pyramid_celery pyramid_mako +python-dateutil pytz -pywps>=4.2.4 +# no viable pywps version with Python>3.5 dependencies +# use '4.2.4' plus a few commits that provide fix, but not yet released +# FIXME: https://github.com/geopython/pywps/issues/568 +## git+https://github.com/fmigneault/pywps.git@799fb14f31533630ce08c171f844294730861b1a#egg=pywps +git+https://github.com/geopython/pywps.git@pywps-4.4#egg=pywps pyyaml>=5.2 requests requests_file # let cwltool define ruamel.yaml version (<=0.16.5) # ensure minimal 0.15.78 to solve install issue (python 3.8) # (https://bitbucket.org/ruamel/yaml/issues/261/error-while-installing-ruamelyaml-setuppy) -# let cwltool fully define the version if python 2 is used -ruamel.yaml>=0.15.78,<=0.16.5; python_version >= "3" -ruamel.yaml<=0.16.5; python_version < "3" +ruamel.yaml>=0.15.78,<=0.16.5 shapely -six simplejson urlmatch xmltodict diff --git a/setup.cfg b/setup.cfg index 66c84040e..b151acf1e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,21 +5,21 @@ tag = True tag_name = {new_version} [bumpversion:file:CHANGES.rst] -search = +search = `Unreleased `_ (latest) ======================================================================== -replace = +replace = `Unreleased `_ (latest) ======================================================================== - + Changes: -------- - No change. - + Fixes: ------ - No change. - + `{new_version} `_ ({now:%%Y-%%m-%%d}) ======================================================================== @@ -40,14 +40,15 @@ search = LABEL version="{current_version}" replace = LABEL version="{new_version}" [tool:pytest] -addopts = - --strict +addopts = + --strict-markers --tb=native weaver/ python_files = test_*.py -markers = +markers = testbed14: mark test as 'testbed14' validation functional: mark test as functionality validation + workflow: mark test as workflow execution (E2E) online: mark test to need internet connection slow: mark test to be slow @@ -67,9 +68,9 @@ exclude = *.egg-info,build,dist,env,tests,./tests,test_* targets = . [flake8] -ignore = E126,E226,E402,F401,W504 +ignore = E126,E226,E402,F401,W503,W504 max-line-length = 120 -exclude = +exclude = src, .git, __pycache__, @@ -82,7 +83,7 @@ exclude = [doc8] max-line-length = 120 -ignore-path = docs/build +ignore-path = docs/build,docs/source/autoapi [pylint] @@ -90,13 +91,13 @@ ignore-path = docs/build branch = true source = ./ include = weaver/* -omit = +omit = setup.py docs/* tests/* [coverage:report] -exclude_lines = +exclude_lines = pragma: no cover raise AssertionError raise NotImplementedError diff --git a/setup.py b/setup.py index f7d28fbbe..74eebf3c1 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,9 @@ # pylint: disable=C0413,wrong-import-order from weaver import __meta__ # isort:skip # noqa: E402 -requirements = [line.strip() for line in open("requirements.txt")] +requirements = {line.strip() for line in open("requirements.txt")} +links = {line for line in requirements if "git+https" in line or "@" in line} +requirements = requirements - links setup(name=__meta__.__name__, version=__meta__.__version__, @@ -48,10 +50,8 @@ zip_safe=False, test_suite="tests", python_requires=">=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4", - install_requires=requirements, - dependency_links=[ - "git+https://github.com/ESGF/esgf-compute-api.git@v2.1.0#egg=cwt" - ], + install_requires=list(requirements), + dependency_links=list(links), entry_points={ "paste.app_factory": [ "main = {}:main".format(__meta__.__name__) diff --git a/tests/compat.py b/tests/compat.py deleted file mode 100644 index e7ef51b3a..000000000 --- a/tests/compat.py +++ /dev/null @@ -1,9 +0,0 @@ -# pylint: disable=W0611,unused-import -try: - import six - if six.PY2: - import contextlib2 as contextlib # noqa - else: - import contextlib -except ImportError: - raise diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index d9964b1df..ed141e9fc 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -1,39 +1,22 @@ +import contextlib import json import os import tempfile -import unittest -from time import sleep -import mock -import pyramid.testing import pytest -import six -from tests.compat import contextlib -from tests.utils import ( - get_settings_from_testapp, - get_test_weaver_app, - get_test_weaver_config, - mocked_execute_process, - mocked_sub_requests, - setup_config_with_celery, - setup_config_with_mongodb, - setup_config_with_pywps -) -from weaver.database import get_db +from tests.functional.utils import WpsPackageConfigBase +from tests.utils import get_settings_from_testapp, mocked_execute_process, mocked_sub_requests +from weaver.execute import EXECUTE_TRANSMISSION_MODE_REFERENCE from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF from weaver.processes.builtin import register_builtin_processes -from weaver.status import JOB_STATUS_CATEGORIES, STATUS_CATEGORY_RUNNING, STATUS_SUCCEEDED @pytest.mark.functional -class BuiltinAppTest(unittest.TestCase): +class BuiltinAppTest(WpsPackageConfigBase): @classmethod def setUpClass(cls): - cls.json_headers = {"Accept": CONTENT_TYPE_APP_JSON, "Content-Type": CONTENT_TYPE_APP_JSON} - - def setUp(self): - settings = { + cls.settings = { "weaver.wps": True, "weaver.wps_output": True, "weaver.wps_output_path": "/wpsoutputs", @@ -41,18 +24,13 @@ def setUp(self): "weaver.wps_path": "/ows/wps", "weaver.wps_restapi_path": "/", } - config = setup_config_with_mongodb(settings=settings) - config = setup_config_with_pywps(config) - config = setup_config_with_celery(config) - config = get_test_weaver_config(config) - self.app = get_test_weaver_app(config=config, settings=settings) - db = get_db(config) - with mock.patch("weaver.processes.builtin.get_db", return_value=db): - db._stores = {} # ensure reset of process store to register builtin processes from scratch - register_builtin_processes(config) + cls.json_headers = {"Accept": CONTENT_TYPE_APP_JSON, "Content-Type": CONTENT_TYPE_APP_JSON} + super(BuiltinAppTest, cls).setUpClass() - def tearDown(self): - pyramid.testing.tearDown() + def setUp(self): + # register builtin processes from scratch to have clean state + self.process_store.clear_processes() + register_builtin_processes(self.settings) def test_jsonarray2netcdf_describe(self): resp = self.app.get("/processes/jsonarray2netcdf", headers=self.json_headers) @@ -77,9 +55,11 @@ def test_jsonarray2netcdf_describe(self): def test_jsonarray2netcdf_execute(self): dirname = tempfile.gettempdir() nc_data = "Hello NetCDF!" - with contextlib.ExitStack() as stack_files: - tmp_ncdf = stack_files.enter_context(tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc")) - tmp_json = stack_files.enter_context(tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json")) + with contextlib.ExitStack() as stack_exec: + tmp_ncdf = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc") + tmp_json = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json") + tmp_ncdf = stack_exec.enter_context(tmp_ncdf) # noqa + tmp_json = stack_exec.enter_context(tmp_json) # noqa tmp_ncdf.write(nc_data) tmp_ncdf.seek(0) tmp_json.write(json.dumps(["file://{}".format(os.path.join(dirname, tmp_ncdf.name))])) @@ -88,37 +68,27 @@ def test_jsonarray2netcdf_execute(self): "mode": "async", "response": "document", "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], - "outputs": [{"id": "output", "transmissionMode": "reference"}], + "outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}], } - with contextlib.ExitStack() as stack_proc: - for process in mocked_execute_process(): - stack_proc.enter_context(process) - path = "/processes/jsonarray2netcdf/jobs" - resp = mocked_sub_requests(self.app, "post_json", path, params=data, headers=self.json_headers) - assert resp.status_code == 201 - assert resp.content_type in CONTENT_TYPE_APP_JSON - job_url = resp.json["location"] - nc_path = None - for delay in range(5): - sleep(delay) - resp = self.app.get(job_url, headers=self.json_headers) - if resp.status_code == 200: - if resp.json["status"] in JOB_STATUS_CATEGORIES[STATUS_CATEGORY_RUNNING]: - continue - assert resp.json["status"] == STATUS_SUCCEEDED, \ - "Process execution failed. Response body:\n{}".format(resp.json) - resp = self.app.get("{}/result".format(job_url), headers=self.json_headers) - assert resp.status_code == 200 - assert resp.json["outputs"][0]["id"] == "output" - nc_path = resp.json["outputs"][0]["href"] - break - assert isinstance(nc_path, six.string_types) and len(nc_path) - settings = get_settings_from_testapp(self.app) - wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path")) - nc_real_path = nc_path.replace(wps_out, settings.get("weaver.wps_output_dir")) - assert nc_path.startswith(wps_out) - assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] - assert os.path.isfile(nc_real_path) - with open(nc_real_path, "r") as f: - assert f.read() == nc_data + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + path = "/processes/jsonarray2netcdf/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=data, headers=self.json_headers, only_local=True) + + assert resp.status_code == 201, "Error: {}".format(resp.json) + assert resp.content_type in CONTENT_TYPE_APP_JSON + job_url = resp.json["location"] + results = self.monitor_job(job_url) + assert results["outputs"][0]["id"] == "output" + nc_path = results["outputs"][0]["href"] + assert isinstance(nc_path, str) and len(nc_path) + settings = get_settings_from_testapp(self.app) + wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path")) + nc_real_path = nc_path.replace(wps_out, settings.get("weaver.wps_output_dir")) + assert nc_path.startswith(wps_out) + assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] + assert os.path.isfile(nc_real_path) + with open(nc_real_path, "r") as f: + assert f.read() == nc_data diff --git a/tests/functional/test_docker_app.py b/tests/functional/test_docker_app.py new file mode 100644 index 000000000..64f60052b --- /dev/null +++ b/tests/functional/test_docker_app.py @@ -0,0 +1,263 @@ +import contextlib +import os +import tempfile + +import lxml.etree +import pytest +from owslib.wps import ComplexDataInput, WPSExecution + +from tests.functional.utils import WpsPackageConfigBase +from tests.utils import mocked_execute_process, mocked_sub_requests +from weaver.execute import EXECUTE_MODE_ASYNC, EXECUTE_RESPONSE_DOCUMENT, EXECUTE_TRANSMISSION_MODE_REFERENCE +from weaver.formats import CONTENT_TYPE_ANY_XML, CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_XML, CONTENT_TYPE_TEXT_PLAIN +from weaver.processes.wps_package import CWL_REQUIREMENT_APP_DOCKER +from weaver.utils import get_any_value, str2bytes +from weaver.wps.utils import get_wps_url + + +@pytest.mark.functional +class WpsPackageDockerAppTest(WpsPackageConfigBase): + @classmethod + def setUpClass(cls): + cls.settings = { + "weaver.url": "http://localhost", + "weaver.wps": True, + "weaver.wps_output": True, + "weaver.wps_output_path": "/wpsoutputs", + "weaver.wps_output_dir": "/tmp", # nosec: B108 # don't care hardcoded for test + "weaver.wps_path": "/ows/wps", + "weaver.wps_restapi_path": "/", + } + super(WpsPackageDockerAppTest, cls).setUpClass() + cls.out_key = "output" + # use default file generated by Weaver/CWL + # command 'cat' within docker application will dump file contents to standard output captured by it + cls.out_file = "stdout.log" + cls.process_id = cls.__name__ + cls.deploy_docker_process() + + @classmethod + def deploy_docker_process(cls): + cwl = { + "cwlVersion": "v1.0", + "class": "CommandLineTool", + "baseCommand": "cat", + "requirements": { + CWL_REQUIREMENT_APP_DOCKER: { + "dockerPull": "debian:stretch-slim" + } + }, + "inputs": [ + {"id": "file", "type": "File", "inputBinding": {"position": 1}}, + ], + "outputs": [ + {"id": cls.out_key, "type": "File", "outputBinding": {"glob": cls.out_file}}, + ] + } + body = { + "processDescription": { + "process": {"id": cls.process_id} + }, + "deploymentProfileName": "http://www.opengis.net/profiles/eoc/dockerizedApplication", + "executionUnit": [{"unit": cwl}], + } + info = cls.deploy_process(body) + return info + + def validate_outputs(self, job_id, result_payload, result_file_content): + # check that output is HTTP reference to file + output_values = {out["id"]: get_any_value(out) for out in result_payload["outputs"]} + assert len(output_values) == 1 + wps_uuid = self.job_store.fetch_by_id(job_id).wps_id + wps_out_path = "{}{}".format(self.settings["weaver.url"], self.settings["weaver.wps_output_path"]) + wps_output = "{}/{}/{}".format(wps_out_path, wps_uuid, self.out_file) + assert output_values[self.out_key] == wps_output + + # check that actual output file was created in expected location along with XML job status + wps_outdir = self.settings["weaver.wps_output_dir"] + wps_out_file = os.path.join(wps_outdir, job_id, self.out_file) + assert not os.path.exists(os.path.join(wps_outdir, self.out_file)), \ + "File is expected to be created in sub-directory of Job ID, not directly in WPS output directory." + # job log, XML status and output directory can be retrieved with both Job UUID and underlying WPS UUID reference + assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(wps_uuid))) + assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(wps_uuid))) + assert os.path.isfile(os.path.join(wps_outdir, wps_uuid, self.out_file)) + assert os.path.isfile(os.path.join(wps_outdir, "{}.log".format(job_id))) + assert os.path.isfile(os.path.join(wps_outdir, "{}.xml".format(job_id))) + assert os.path.isfile(wps_out_file) + + # validate content + with open(wps_out_file) as res_file: + assert res_file.read() == result_file_content + + def test_execute_wps_rest_resp_json(self): + """ + Test validates that basic Docker application runs successfully, fetching the reference as needed. + + The job execution is launched using the WPS-REST endpoint for this test. + Both the request body and response content are JSON. + + .. seealso:: + - :meth:`test_execute_wps_kvp_get_resp_xml` + - :meth:`test_execute_wps_kvp_get_resp_json` + - :meth:`test_execute_wps_xml_post_resp_xml` + - :meth:`test_execute_wps_xml_post_resp_json` + """ + + test_content = "Test file in Docker - WPS-REST job endpoint" + with contextlib.ExitStack() as stack_exec: + # setup + dir_name = tempfile.gettempdir() + tmp_path = tempfile.NamedTemporaryFile(dir=dir_name, mode="w", suffix=".txt") + tmp_file = stack_exec.enter_context(tmp_path) # noqa + tmp_file.write(test_content) + tmp_file.seek(0) + exec_body = { + "mode": EXECUTE_MODE_ASYNC, + "response": EXECUTE_RESPONSE_DOCUMENT, + "inputs": [ + {"id": "file", "href": tmp_file.name}, + ], + "outputs": [ + {"id": self.out_key, "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}, + ] + } + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + + # execute + proc_url = "/processes/{}/jobs".format(self.process_id) + resp = mocked_sub_requests(self.app, "post_json", proc_url, + data=exec_body, headers=self.json_headers, only_local=True) + assert resp.status_code in [200, 201], "Failed with: [{}]\nReason:\n{}".format(resp.status_code, resp.json) + status_url = resp.json["location"] + job_id = resp.json["jobID"] + + # job monitoring + result = self.monitor_job(status_url) + + self.validate_outputs(job_id, result, test_content) + + def wps_execute(self, version, accept): + wps_url = get_wps_url(self.settings) + if version == "1.0.0": + test_content = "Test file in Docker - WPS KVP" + wps_method = "GET" + elif version == "2.0.0": + test_content = "Test file in Docker - WPS XML" + wps_method = "POST" + else: + raise ValueError("Invalid WPS version: {}".format(version)) + test_content += " {} request - Accept {}".format(wps_method, accept.split("/")[-1].upper()) + + with contextlib.ExitStack() as stack_exec: + # setup + dir_name = tempfile.gettempdir() + tmp_path = tempfile.NamedTemporaryFile(dir=dir_name, mode="w", suffix=".txt") + tmp_file = stack_exec.enter_context(tmp_path) # noqa + tmp_file.write(test_content) + tmp_file.seek(0) + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + + # execute + if version == "1.0.0": + wps_inputs = ["file={}@mimeType={}".format(tmp_file.name, CONTENT_TYPE_TEXT_PLAIN)] + wps_params = { + "service": "WPS", + "request": "Execute", + "version": version, + "identifier": self.process_id, + "DataInputs": wps_inputs, + } + wps_headers = {"Accept": accept} + wps_data = None + else: + wps_inputs = [("file", ComplexDataInput(tmp_file.name, mimeType=CONTENT_TYPE_TEXT_PLAIN))] + wps_outputs = [(self.out_key, True)] # as reference + wps_exec = WPSExecution(version=version, url=wps_url) + wps_req = wps_exec.buildRequest(self.process_id, wps_inputs, wps_outputs) + wps_data = lxml.etree.tostring(wps_req) + wps_headers = {"Accept": accept, "Content-Type": CONTENT_TYPE_APP_XML} + wps_params = None + resp = mocked_sub_requests(self.app, wps_method, wps_url, + params=wps_params, data=wps_data, headers=wps_headers, only_local=True) + assert resp.status_code in [200, 201], \ + "Failed with: [{}]\nTest: [{}]\nReason:\n{}".format(resp.status_code, test_content, resp.text) + + # parse response status + if accept == CONTENT_TYPE_APP_XML: + assert resp.content_type in CONTENT_TYPE_ANY_XML, test_content + xml = lxml.etree.fromstring(str2bytes(resp.text)) + status_url = xml.get("statusLocation") + job_id = status_url.split("/")[-1] + elif accept == CONTENT_TYPE_APP_JSON: + assert resp.content_type == CONTENT_TYPE_APP_JSON, test_content + status_url = resp.json["location"] + job_id = resp.json["jobID"] + assert status_url + assert job_id + + # job monitoring + result = self.monitor_job(status_url) + + self.validate_outputs(job_id, result, test_content) + + def test_execute_wps_kvp_get_resp_xml(self): + """ + Test validates that basic Docker application runs successfully, fetching the reference as needed. + + The job is launched using the WPS Execute request with Key-Value Pairs (KVP) and GET method. + The request is done with query parameters, and replies by default with response XML content. + + .. seealso:: + - :meth:`test_execute_wps_rest_resp_json` + - :meth:`test_execute_wps_kvp_get_resp_json` + - :meth:`test_execute_wps_xml_post_resp_xml` + - :meth:`test_execute_wps_xml_post_resp_json` + """ + self.wps_execute("1.0.0", CONTENT_TYPE_APP_XML) + + def test_execute_wps_kvp_get_resp_json(self): + """ + Test validates that basic Docker application runs successfully, fetching the reference as needed. + + Does the same operation as :meth:`test_execute_wps_kvp_get_resp_xml`, but use ``Accept`` header of JSON + which should return a response with the same contents as if called directly via WPS-REST endpoint. + + .. seealso:: + - :meth:`test_execute_wps_rest_resp_json` + - :meth:`test_execute_wps_kvp_get_resp_xml` + - :meth:`test_execute_wps_xml_post_resp_xml` + - :meth:`test_execute_wps_xml_post_resp_json` + """ + self.wps_execute("1.0.0", CONTENT_TYPE_APP_JSON) + + def test_execute_wps_xml_post_resp_xml(self): + """ + Test validates that basic Docker application runs successfully, fetching the reference as needed. + + The job is launched using the WPS Execute request with POST request method and XML content. + + .. seealso:: + - :meth:`test_execute_wps_rest_resp_json` + - :meth:`test_execute_wps_kvp_get_resp_xml` + - :meth:`test_execute_wps_kvp_get_resp_json` + - :meth:`test_execute_wps_xml_post_resp_json` + """ + self.wps_execute("2.0.0", CONTENT_TYPE_APP_XML) + + def test_execute_wps_xml_post_resp_json(self): + """ + Test validates that basic Docker application runs successfully, fetching the reference as needed. + + Does the same operation as :meth:`test_execute_wps_xml_post_resp_xml`, but use ``Accept`` header of JSON + which should return a response with the same contents as if called directly via WPS-REST endpoint. + + .. seealso:: + - :meth:`test_execute_wps_rest_resp_json` + - :meth:`test_execute_wps_kvp_get_resp_xml` + - :meth:`test_execute_wps_kvp_get_resp_json` + - :meth:`test_execute_wps_xml_post_resp_json` + """ + self.wps_execute("2.0.0", CONTENT_TYPE_APP_JSON) diff --git a/tests/functional/test_ems_end2end.py b/tests/functional/test_ems_end2end.py index 632f88b98..874a65283 100644 --- a/tests/functional/test_ems_end2end.py +++ b/tests/functional/test_ems_end2end.py @@ -5,13 +5,13 @@ from copy import deepcopy from typing import TYPE_CHECKING from unittest import TestCase +from urllib.parse import urlparse import mock import pytest from pyramid import testing from pyramid.httpexceptions import HTTPCreated, HTTPNotFound, HTTPOk, HTTPUnauthorized from pyramid.settings import asbool -from six.moves.urllib.parse import urlparse # use 'Web' prefix to avoid pytest to pick up these classes and throw warnings from webtest import TestApp as WebTestApp @@ -34,13 +34,13 @@ from weaver.wps_restapi.utils import get_wps_restapi_base_url if TYPE_CHECKING: - from weaver.typedefs import AnyResponseType, CookiesType, HeadersType, JSON, SettingsType # noqa: F401 - from typing import AnyStr, Dict, Optional, Any, Tuple, Iterable, Callable, Union # noqa: F401 + from weaver.typedefs import AnyResponseType, CookiesType, HeadersType, JSON, SettingsType + from typing import Dict, Optional, Any, Tuple, Iterable, Callable, Union class ProcessInfo(object): def __init__(self, process_id, test_id=None, deploy_payload=None, execute_payload=None): - # type: (AnyStr, Optional[AnyStr], Optional[JSON], Optional[JSON]) -> None + # type: (str, Optional[str], Optional[JSON], Optional[JSON]) -> None self.id = process_id self.test_id = test_id self.deploy_payload = deploy_payload @@ -50,6 +50,7 @@ def __init__(self, process_id, test_id=None, deploy_payload=None, execute_payloa # pylint: disable=C0103,invalid-name @pytest.mark.slow @pytest.mark.functional +@pytest.mark.workflow @pytest.mark.skipif(condition=not len(str(os.getenv("WEAVER_TEST_SERVER_HOSTNAME", ""))), reason="Test server not defined!") class End2EndEMSTestCase(TestCase): @@ -57,18 +58,18 @@ class End2EndEMSTestCase(TestCase): Runs an end-2-end test procedure on weaver configured as EMS located on specified `WEAVER_TEST_SERVER_HOSTNAME`. """ __settings__ = None - test_processes_info = dict() # type: Dict[AnyStr, ProcessInfo] + test_processes_info = dict() # type: Dict[str, ProcessInfo] headers = { "Accept": CONTENT_TYPE_APP_JSON, "Content-Type": CONTENT_TYPE_APP_JSON, } # type: HeadersType cookies = dict() # type: CookiesType app = None # type: Optional[WebTestApp] - logger_result_dir = None # type: Optional[AnyStr] - logger_separator_calls = "" # type: AnyStr - logger_separator_steps = "" # type: AnyStr - logger_separator_tests = "" # type: AnyStr - logger_separator_cases = "" # type: AnyStr + logger_result_dir = None # type: Optional[str] + logger_separator_calls = "" # type: str + logger_separator_steps = "" # type: str + logger_separator_tests = "" # type: str + logger_separator_cases = "" # type: str logger_level = logging.INFO # type: int logger_enabled = True # type: bool logger = None # type: Optional[logging.Logger] @@ -77,8 +78,8 @@ class End2EndEMSTestCase(TestCase): logger_field_indent = 2 # type: int log_full_trace = True # type: bool - WEAVER_URL = None # type: Optional[AnyStr] - WEAVER_RESTAPI_URL = None # type: Optional[AnyStr] + WEAVER_URL = None # type: Optional[str] + WEAVER_RESTAPI_URL = None # type: Optional[str] @staticmethod def mock_get_data_source_from_url(data_url): @@ -215,7 +216,7 @@ def get_http_auth_code(cls, unprotected_code=HTTPOk.code): @classmethod def get_test_process(cls, process_id): - # type: (AnyStr) -> ProcessInfo + # type: (str) -> ProcessInfo return cls.test_processes_info.get(process_id) @classmethod @@ -288,7 +289,7 @@ def setup_test_processes(cls): @classmethod def retrieve_process_info(cls, process_id): - # type: (AnyStr) -> ProcessInfo + # type: (str) -> ProcessInfo base = os.getenv("TEST_GITHUB_SOURCE_URL", "https://raw.githubusercontent.com/crim-ca/testbed14/master/application-packages") deploy_path = "{base}/{proc}/DeployProcess_{proc}.json".format(base=base, proc=process_id) @@ -308,7 +309,7 @@ def retrieve_process_info(cls, process_id): @classmethod def retrieve_payload(cls, url): - # type: (AnyStr) -> Dict + # type: (str) -> Dict local_path = os.path.join(os.path.dirname(__file__), "application-packages", url.split("/")[-1]) try: # Try to find it locally, then fallback to remote @@ -327,7 +328,7 @@ def retrieve_payload(cls, url): @classmethod def get_test_process_id(cls, real_process_id): - # type: (AnyStr) -> AnyStr + # type: (str) -> str return "{}_{}".format(cls.__name__, real_process_id) @classmethod @@ -358,7 +359,7 @@ def clear_test_processes(cls, headers=None, cookies=None): @classmethod def login(cls, username, password, force_magpie=False): - # type: (AnyStr, AnyStr, bool) -> Tuple[HeadersType, CookiesType] + # type: (str, str, bool) -> Tuple[HeadersType, CookiesType] """ Login using WSO2 or Magpie according to ``WEAVER_TEST_PROTECTED_ENABLED`` to retrieve session cookies. @@ -411,17 +412,17 @@ def user_headers_cookies(cls, credentials, force_magpie=False): @classmethod def get_indent(cls, indent_level): - # type: (int) -> AnyStr + # type: (int) -> str return " " * cls.logger_field_indent * indent_level @classmethod def indent(cls, field, indent_level): - # type: (AnyStr, int) -> AnyStr + # type: (str, int) -> str return cls.get_indent(indent_level) + field @classmethod def log_json_format(cls, payload, indent_level): - # type: (AnyStr, int) -> AnyStr + # type: (str, int) -> str """Logs an indented string representation of a JSON payload according to settings.""" sub_indent = cls.get_indent(indent_level if cls.logger_json_indent else 0) log_payload = "\n" if cls.logger_json_indent else "" + json.dumps(payload, indent=cls.logger_json_indent) @@ -441,7 +442,7 @@ def log_dict_format(cls, dictionary, indent_level): @classmethod def request(cls, method, url, ignore_errors=False, force_requests=False, log_enabled=True, **kw): - # type: (AnyStr, AnyStr, bool, bool, bool, Optional[Any]) -> AnyResponseType + # type: (str, str, bool, bool, bool, Optional[Any]) -> AnyResponseType """ Executes the request, but following any server prior redirects as needed. Also prepares JSON body and obvious error handling according to a given status code. @@ -541,7 +542,7 @@ def request(cls, method, url, ignore_errors=False, force_requests=False, log_ena @classmethod def assert_response(cls, response, status=None, message=""): - # type: (AnyResponseType, Optional[Union[int, Iterable[int]]], AnyStr) -> None + # type: (AnyResponseType, Optional[Union[int, Iterable[int]]], str) -> None """Tests a response for expected status and raises an error if not matching.""" code = response.status_code reason = getattr(response, "reason", "") @@ -561,7 +562,7 @@ def assert_response(cls, response, status=None, message=""): @classmethod def assert_test(cls, assert_test, message=None, title="Test Assertion Failed"): - # type: (Callable[[], bool], Optional[AnyStr], AnyStr) -> None + # type: (Callable[[], bool], Optional[str], str) -> None """Tests a callable for assertion and logs the message if it fails, then re-raises to terminate execution.""" try: assert assert_test(), message @@ -762,7 +763,7 @@ def test_workflow_flood_detection(self): [self.PROCESS_STACKER_ID, self.PROCESS_SFS_ID]) def workflow_runner(self, test_workflow_id, test_application_ids, log_full_trace=False): - # type: (AnyStr, Iterable[AnyStr], bool) -> None + # type: (str, Iterable[str], bool) -> None """Simplify test for demonstration purpose""" # test will log basic information @@ -809,7 +810,7 @@ def workflow_runner(self, test_workflow_id, test_application_ids, log_full_trace self.validate_test_job_execution(job_location, None, None) def validate_test_job_execution(self, job_location_url, user_headers=None, user_cookies=None): - # type: (AnyStr, Optional[HeadersType], Optional[CookiesType]) -> None + # type: (str, Optional[HeadersType], Optional[CookiesType]) -> None """ Validates that the job is stated, running, and polls it until completed successfully. Then validates that results are accessible (no data integrity check). diff --git a/tests/functional/test_wps_app.py b/tests/functional/test_wps_app.py index 4031470f4..cbc73e800 100644 --- a/tests/functional/test_wps_app.py +++ b/tests/functional/test_wps_app.py @@ -6,6 +6,7 @@ * http://webtest.pythonpaste.org/en/latest/ * http://docs.pylonsproject.org/projects/pyramid/en/latest/narr/testing.html """ +import contextlib import unittest import pyramid.testing @@ -16,12 +17,13 @@ from tests.utils import ( get_test_weaver_app, get_test_weaver_config, + mocked_execute_process, setup_config_with_celery, setup_config_with_mongodb, setup_config_with_pywps, setup_mongodb_processstore ) -from weaver.formats import CONTENT_TYPE_ANY_XML +from weaver.formats import CONTENT_TYPE_ANY_XML, CONTENT_TYPE_APP_XML from weaver.processes.wps_default import HelloWPS from weaver.processes.wps_testing import WpsTestProcess from weaver.utils import str2bytes @@ -114,27 +116,44 @@ def test_describeprocess_filtered_processes_by_visibility(self): assert resp.content_type in CONTENT_TYPE_ANY_XML resp.mustcontain("Unknown process") - def test_execute_allowed(self): + def test_execute_allowed_demo(self): template = "service=wps&request=execute&version=1.0.0&identifier={}&datainputs=name=tux" params = template.format(HelloWPS.identifier) url = self.make_url(params) - resp = self.app.get(url) - assert resp.status_code == 200 + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + resp = self.app.get(url) + assert resp.status_code == 200 # FIXME: replace by 202 Accepted (?) https://github.com/crim-ca/weaver/issues/14 assert resp.content_type in CONTENT_TYPE_ANY_XML - status = "PyWPS Process {} finished".format(HelloWPS.title) - resp.mustcontain(status) + resp.mustcontain("PyWPS Process {} finished" - .format(self.process_public.title)) + resp.mustcontain("Unknown process") + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + resp = self.app.get(url, headers=headers, expect_errors=True) + assert resp.status_code == 403 + assert resp.content_type in CONTENT_TYPE_ANY_XML, "Error Response: {}".format(resp.text) + resp.mustcontain("") + err_desc = "Process with ID '{}' is not accessible.".format(self.process_private.identifier) + resp.mustcontain("{}".format(err_desc)) diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 6cd11c3fe..15b346a06 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -10,32 +10,22 @@ import contextlib import logging import os -import time -import unittest -from copy import deepcopy import colander import pytest -import six from pyramid.httpexceptions import HTTPBadRequest from tests import resources +from tests.functional.utils import WpsPackageConfigBase from tests.utils import ( MOCK_AWS_REGION, - get_test_weaver_app, - get_test_weaver_config, mocked_aws_credentials, mocked_aws_s3, mocked_aws_s3_bucket_test_file, mocked_execute_process, - mocked_sub_requests, - setup_config_with_celery, - setup_config_with_mongodb, - setup_config_with_pywps, - setup_mongodb_jobstore, - setup_mongodb_processstore + mocked_sub_requests ) -from weaver.execute import EXECUTE_MODE_ASYNC, EXECUTE_RESPONSE_DOCUMENT +from weaver.execute import EXECUTE_MODE_ASYNC, EXECUTE_RESPONSE_DOCUMENT, EXECUTE_TRANSMISSION_MODE_REFERENCE from weaver.formats import ( CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF, @@ -48,9 +38,7 @@ get_cwl_file_format ) from weaver.processes.constants import CWL_REQUIREMENT_APP_BUILTIN -from weaver.status import STATUS_RUNNING, STATUS_SUCCEEDED from weaver.utils import get_any_value -from weaver.visibility import VISIBILITY_PUBLIC EDAM_PLAIN = EDAM_NAMESPACE + ":" + EDAM_MAPPING[CONTENT_TYPE_TEXT_PLAIN] EDAM_NETCDF = EDAM_NAMESPACE + ":" + EDAM_MAPPING[CONTENT_TYPE_APP_NETCDF] @@ -66,46 +54,6 @@ LOGGER = logging.getLogger(__name__) -@pytest.mark.functional -class WpsPackageConfigBase(unittest.TestCase): - json_headers = {"Accept": CONTENT_TYPE_APP_JSON, "Content-Type": CONTENT_TYPE_APP_JSON} - settings = {} - - def __init__(self, *args, **kwargs): - # won't run this as a test suite, only its derived classes - setattr(self, "__test__", self is WpsPackageConfigBase) - super(WpsPackageConfigBase, self).__init__(*args, **kwargs) - - @classmethod - def setUpClass(cls): - config = setup_config_with_mongodb(settings=cls.settings) - config = setup_config_with_pywps(config) - config = setup_config_with_celery(config) - config = get_test_weaver_config(config) - setup_mongodb_processstore(config) # force reset - cls.job_store = setup_mongodb_jobstore(config) - cls.app = get_test_weaver_app(config=config, settings=cls.settings) - - def deploy_process(self, payload): - """ - Deploys a process with :paramref:`payload`. - - :returns: resulting tuple of ``(process-description, package)`` JSON responses. - """ - resp = mocked_sub_requests(self.app, "post_json", "/processes", params=payload, headers=self.json_headers) - assert resp.status_code == 200 # TODO: status should be 201 when properly modified to match API conformance - path = resp.json["processSummary"]["processDescriptionURL"] - body = {"value": VISIBILITY_PUBLIC} - resp = self.app.put_json("{}/visibility".format(path), params=body, headers=self.json_headers) - assert resp.status_code == 200 - info = [] - for pkg_url in [path, "{}/package".format(path)]: - resp = self.app.get(pkg_url, headers=self.json_headers) - assert resp.status_code == 200 - info.append(deepcopy(resp.json)) - return info - - @pytest.mark.functional class WpsPackageAppTest(WpsPackageConfigBase): @classmethod @@ -1303,7 +1251,7 @@ def test_enum_array_and_multi_format_inputs_from_wps_xml_reference(self): assert pkg["inputs"][0]["type"]["items"]["type"] == "enum" assert isinstance(pkg["inputs"][0]["type"]["items"]["symbols"], list) assert len(pkg["inputs"][0]["type"]["items"]["symbols"]) == 220 - assert all(isinstance(s, six.string_types) for s in pkg["inputs"][0]["type"]["items"]["symbols"]) + assert all(isinstance(s, str) for s in pkg["inputs"][0]["type"]["items"]["symbols"]) assert pkg["inputs"][1]["id"] == "mosaic" assert pkg["inputs"][1]["default"] == "null" assert "format" not in pkg["inputs"][1] @@ -1313,7 +1261,7 @@ def test_enum_array_and_multi_format_inputs_from_wps_xml_reference(self): assert pkg["inputs"][2]["type"]["type"] == "array" assert pkg["inputs"][2]["type"]["items"] == "File" # FIXME: TAR cannot be resolved in the CWL context (not official, disable mapping to GZIP) - # this makes all formats to not be resolved (see code: wps_package._any2cwl_io) + # this makes all formats to not be resolved (see code: wps_package.any2cwl_io) # (see issue: https://github.com/crim-ca/weaver/issues/50) assert "format" not in pkg["inputs"][2], \ "CWL formats should all be dropped because (x-tar) cannot be resolved to an existing schema reference" @@ -1447,34 +1395,23 @@ def test_execute_application_package_process_with_bucket(self): {"id": "input_with_s3", "href": test_bucket_ref}, ], "outputs": [ - {"id": "output_from_http", "transmissionMode": "reference"}, - {"id": "output_from_s3", "transmissionMode": "reference"}, + {"id": "output_from_http", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}, + {"id": "output_from_s3", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}, ] } - with contextlib.ExitStack() as stack_proc: - for process in mocked_execute_process(): - stack_proc.enter_context(process) + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) proc_url = "/processes/{}/jobs".format(self._testMethodName) resp = mocked_sub_requests(self.app, "post_json", proc_url, - params=exec_body, headers=self.json_headers, only_local=True) + data=exec_body, headers=self.json_headers, only_local=True) assert resp.status_code in [200, 201], "Failed with: [{}]\nReason:\n{}".format(resp.status_code, resp.json) status_url = resp.json["location"] job_id = resp.json["jobID"] - monitor_timeout = 60 - time.sleep(1) # small delay to ensure process started - while monitor_timeout >= 0: - resp = self.app.get(status_url, headers=self.json_headers) - assert resp.status_code == 200 - assert resp.json["status"] in [STATUS_RUNNING, STATUS_SUCCEEDED] - if resp.json["status"] == STATUS_SUCCEEDED: - break - time.sleep(2) - assert resp.json["status"] == STATUS_SUCCEEDED - resp = self.app.get("{}/result".format(status_url), headers=self.json_headers) - assert resp.status_code == 200 + result = self.monitor_job(status_url) # check that outputs are S3 bucket references - output_values = {out["id"]: get_any_value(out) for out in resp.json["outputs"]} + output_values = {out["id"]: get_any_value(out) for out in result["outputs"]} output_bucket = self.settings["weaver.wps_output_s3_bucket"] wps_uuid = self.job_store.fetch_by_id(job_id).wps_id for out_key, out_file in [("output_from_s3", input_file_s3), ("output_from_http", input_file_http)]: diff --git a/tests/functional/utils.py b/tests/functional/utils.py new file mode 100644 index 000000000..76b525dc7 --- /dev/null +++ b/tests/functional/utils.py @@ -0,0 +1,110 @@ +import time +import unittest +from copy import deepcopy +from typing import TYPE_CHECKING + +import pyramid.testing +import pytest + +from tests.utils import ( + get_test_weaver_app, + get_test_weaver_config, + mocked_sub_requests, + setup_config_with_celery, + setup_config_with_mongodb, + setup_config_with_pywps, + setup_mongodb_jobstore, + setup_mongodb_processstore +) +from weaver.database import get_db +from weaver.formats import CONTENT_TYPE_APP_JSON +from weaver.status import STATUS_RUNNING, STATUS_SUCCEEDED +from weaver.visibility import VISIBILITY_PUBLIC + +if TYPE_CHECKING: + from typing import Optional + from weaver.typedefs import JSON, SettingsType + + +@pytest.mark.functional +class WpsPackageConfigBase(unittest.TestCase): + json_headers = {"Accept": CONTENT_TYPE_APP_JSON, "Content-Type": CONTENT_TYPE_APP_JSON} + monitor_timeout = 30 + monitor_delta = 1 + settings = {} # type: SettingsType + + def __init__(self, *args, **kwargs): + # won't run this as a test suite, only its derived classes + setattr(self, "__test__", self is WpsPackageConfigBase) + super(WpsPackageConfigBase, self).__init__(*args, **kwargs) + + @classmethod + def setUpClass(cls): + config = setup_config_with_mongodb(settings=cls.settings) + config = setup_config_with_pywps(config) + config = setup_config_with_celery(config) + config = get_test_weaver_config(config) + cls.process_store = setup_mongodb_processstore(config) # force reset + cls.job_store = setup_mongodb_jobstore(config) + cls.app = get_test_weaver_app(config=config, settings=cls.settings) + cls.db = get_db(config) + cls.config = config + cls.settings.update(cls.config.registry.settings) # back propagate changes + + @classmethod + def tearDownClass(cls): + pyramid.testing.tearDown() + + @classmethod + def deploy_process(cls, payload): + # type: (JSON) -> JSON + """ + Deploys a process with :paramref:`payload`. + + :returns: resulting tuple of ``(process-description, package)`` JSON responses. + """ + resp = mocked_sub_requests(cls.app, "post_json", "/processes", data=payload, headers=cls.json_headers) + # TODO: status should be 201 when properly modified to match API conformance + assert resp.status_code == 200, "Expected successful deployment.\nError:\n{}".format(resp.text) + path = resp.json["processSummary"]["processDescriptionURL"] + body = {"value": VISIBILITY_PUBLIC} + resp = cls.app.put_json("{}/visibility".format(path), params=body, headers=cls.json_headers) + assert resp.status_code == 200, "Expected successful visibility.\nError:\n{}".format(resp.text) + info = [] + for pkg_url in [path, "{}/package".format(path)]: + resp = cls.app.get(pkg_url, headers=cls.json_headers) + assert resp.status_code == 200 + info.append(deepcopy(resp.json)) + return info + + def monitor_job(self, status_url, timeout=None, delta=None): + # type: (str, Optional[int], Optional[int]) -> JSON + """ + Job polling of status URL until completion or timeout. + + :return: result of the successful job + :raises AssertionError: when job fails or took too long to complete. + """ + def check_job_status(_resp, running=False): + body = _resp.json + statuses = [STATUS_RUNNING, STATUS_SUCCEEDED] if running else [STATUS_SUCCEEDED] + assert _resp.status_code == 200, "Process execution failed. Response body:\n{}".format(body) + assert body["status"] in statuses, "Error job info:\n{}".format(body) + return body["status"] == STATUS_SUCCEEDED + + time.sleep(1) # small delay to ensure process execution had a change to start before monitoring + left = timeout or self.monitor_timeout + delta = delta or self.monitor_delta + once = True + resp = None + while left >= 0 or once: + resp = self.app.get(status_url, headers=self.json_headers) + if check_job_status(resp, running=True): + break + time.sleep(delta) + once = False + left -= delta + check_job_status(resp) + resp = self.app.get("{}/result".format(status_url), headers=self.json_headers) + assert resp.status_code == 200, "Error job info:\n{}".format(resp.json) + return resp.json diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py new file mode 100644 index 000000000..0799b47f3 --- /dev/null +++ b/tests/processes/test_convert.py @@ -0,0 +1,411 @@ +""" +Unit tests of functions within :mod:`weaver.processes.convert`. +""" +from copy import deepcopy + +import pytest +from pywps.inout.formats import Format +from pywps.inout.literaltypes import AnyValue +from pywps.validator.mode import MODE + +from weaver.exceptions import PackageTypeError +from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF, CONTENT_TYPE_APP_XML, CONTENT_TYPE_TEXT_PLAIN +from weaver.processes.constants import WPS_LITERAL +from weaver.processes.convert import _are_different_and_set # noqa: W0212 +from weaver.processes.convert import ( + DEFAULT_FORMAT, + is_cwl_array_type, + is_cwl_enum_type, + json2wps_datatype, + merge_io_formats +) +from weaver.utils import null + + +class ObjectWithEqProperty(object): + """Dummy object for some test evaluations.""" + _prop = "prop" + + def __init__(self, prop="prop"): + self._prop = prop + + @property + def some_property(self): + return self._prop + + def __eq__(self, other): + return self.some_property == other.some_property + + +def test_are_different_and_set_both_set(): + assert _are_different_and_set(1, 2) is True + assert _are_different_and_set(1, 1) is False + assert _are_different_and_set({"a": 1}, {"a": 2}) is True + assert _are_different_and_set({"a": 1}, {"a": 1}) is False + assert _are_different_and_set({"a": 1, "b": 2}, {"a": 1}) is True + assert _are_different_and_set(ObjectWithEqProperty(), ObjectWithEqProperty()) is False + assert _are_different_and_set(ObjectWithEqProperty("a"), ObjectWithEqProperty("b")) is True + + +def test_are_different_and_set_similar_str_formats(): + assert _are_different_and_set(b"something", u"something") is False + assert _are_different_and_set(u"something", u"something") is False + assert _are_different_and_set(b"something", b"something") is False + assert _are_different_and_set(b"something", u"else") is True + assert _are_different_and_set(u"something", u"else") is True + assert _are_different_and_set(b"something", b"else") is True + + +def test_are_different_and_set_both_null(): + assert _are_different_and_set(null, null) is False + + +def test_are_different_and_set_single_null(): + """ + Tests that equality check is correctly handled when a single item amongst the two is ``null``. + This was identified as problematic is case when the checked and set item implements ``__eq__`` and expects a + property to exist, which is not the case for the second item being ``null``. + """ + + item = ObjectWithEqProperty() + assert _are_different_and_set(item, null) is False + assert _are_different_and_set(null, item) is False + + +def test_json2wps_datatype(): + # pylint: disable=C0326,bad-whitespace + test_cases = [ + ("float", {"type": WPS_LITERAL, "data_type": "float"}), # noqa: E241 + ("integer", {"type": WPS_LITERAL, "data_type": "integer"}), # noqa: E241 + ("integer", {"type": WPS_LITERAL, "data_type": "int"}), # noqa: E241 + ("boolean", {"type": WPS_LITERAL, "data_type": "boolean"}), # noqa: E241 + ("boolean", {"type": WPS_LITERAL, "data_type": "bool"}), # noqa: E241 + ("string", {"type": WPS_LITERAL, "data_type": "string"}), # noqa: E241 + ("float", {"type": WPS_LITERAL, "default": 1.0}), # noqa: E241 + ("integer", {"type": WPS_LITERAL, "default": 1}), # noqa: E241 + ("boolean", {"type": WPS_LITERAL, "default": True}), # noqa: E241 + ("string", {"type": WPS_LITERAL, "default": "1"}), # noqa: E241 + ("float", {"type": WPS_LITERAL, "supported_values": [1.0, 2.0]}), # noqa: E241 + ("integer", {"type": WPS_LITERAL, "supported_values": [1, 2]}), # noqa: E241 + ("boolean", {"type": WPS_LITERAL, "supported_values": [True, False]}), # noqa: E241 + ("string", {"type": WPS_LITERAL, "supported_values": ["yes", "no"]}), # noqa: E241 + ("float", {"data_type": "float"}), # noqa: E241 + ("integer", {"data_type": "integer"}), # noqa: E241 + ("integer", {"data_type": "int"}), # noqa: E241 + ("boolean", {"data_type": "boolean"}), # noqa: E241 + ("boolean", {"data_type": "bool"}), # noqa: E241 + ("string", {"data_type": "string"}), # noqa: E241 + ] + + for expect, test_io in test_cases: + copy_io = deepcopy(test_io) # can get modified by function + assert json2wps_datatype(test_io) == expect, "Failed for [{}]".format(copy_io) + + +def testis_cwl_array_type_explicit_invalid_item(): + io_info = { + "name": "test", + "type": { + "type": "array", + "items": "unknown-type-item" + } + } + with pytest.raises(PackageTypeError): + is_cwl_array_type(io_info) + + +def testis_cwl_array_type_shorthand_invalid_item(): + """ + In case of shorthand syntax, because type is only a string, it shouldn't raise. + Type is returned as is and value validation is left to later calls. + """ + io_info = { + "name": "test", + "type": "unknown[]" + } + try: + res = is_cwl_array_type(io_info) + assert res[0] is False + assert res[1] == "unknown[]" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + except PackageTypeError: + pytest.fail("should not raise an error in this case") + + +def testis_cwl_array_type_not_array(): + io_info = { + "name": "test", + "type": "float", + } + res = is_cwl_array_type(io_info) + assert res[0] is False + assert res[1] == "float" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_simple_enum(): + io_info = { + "name": "test", + "type": "enum", + "symbols": ["a", "b", "c"] + } + res = is_cwl_array_type(io_info) + assert res[0] is False + assert res[1] == "enum" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_base(): + io_info = { + "name": "test", + "type": { + "type": "array", + "items": "string" + } + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_enum(): + io_info = { + "name": "test", + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["a", "b", "c"] + } + } + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.SIMPLE + assert res[3] == ["a", "b", "c"] + + +def testis_cwl_array_type_shorthand_base(): + io_info = { + "name": "test", + "type": "string[]", + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_shorthand_enum(): + io_info = { + "name": "test", + "type": "enum[]", + "symbols": ["a", "b", "c"] + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.SIMPLE + assert res[3] == ["a", "b", "c"] + + +def testis_cwl_array_type_explicit_optional_not_array(): + io_info = { + "name": "test", + "type": ["null", "float"], + } + res = is_cwl_array_type(io_info) + assert res[0] is False + assert res[1] == "float" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_optional_simple_enum(): + io_info = { + "name": "test", + "type": ["null", "enum"], + "symbols": ["a", "b", "c"] + } + res = is_cwl_array_type(io_info) + assert res[0] is False + assert res[1] == "enum" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_optional_explicit_base(): + io_info = { + "name": "test", + "type": [ + "null", + {"type": "array", "items": "string"} + ] + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_optional_explicit_enum(): + io_info = { + "name": "test", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["a", "b", "c"] + } + } + ] + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.SIMPLE + assert res[3] == ["a", "b", "c"] + + +def testis_cwl_array_type_explicit_optional_shorthand_base(): + io_info = { + "name": "test", + "type": ["null", "string[]"] + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.NONE + assert res[3] == AnyValue + + +def testis_cwl_array_type_explicit_optional_shorthand_enum(): + io_info = { + "name": "test", + "type": ["null", "enum[]"], + "symbols": ["a", "b", "c"] + } + res = is_cwl_array_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.SIMPLE + assert res[3] == ["a", "b", "c"] + + +def testis_cwl_enum_type_string(): + io_info = { + "name": "test", + "type": { + "type": "enum", + "symbols": ["a", "b", "c"] + } + } + res = is_cwl_enum_type(io_info) + assert res[0] is True + assert res[1] == "string" + assert res[2] == MODE.SIMPLE + assert res[3] == ["a", "b", "c"] + + +def testis_cwl_enum_type_float(): + io_info = { + "name": "test", + "type": { + "type": "enum", + "symbols": [1.9, 2.8, 3.7] + } + } + res = is_cwl_enum_type(io_info) + assert res[0] is True + assert res[1] == "float" + assert res[2] == MODE.SIMPLE + assert res[3] == [1.9, 2.8, 3.7] + + +def testis_cwl_enum_type_int(): + io_info = { + "name": "test", + "type": { + "type": "enum", + "symbols": [1, 2, 3] + } + } + res = is_cwl_enum_type(io_info) + assert res[0] is True + assert res[1] == "int" + assert res[2] == MODE.SIMPLE + assert res[3] == [1, 2, 3] + + +def assert_formats_equal_any_order(format_result, format_expect): + assert len(format_result) == len(format_expect), "Expected formats sizes mismatch" + for r_fmt in format_result: + for e_fmt in format_expect: + if r_fmt.json == e_fmt.json: + format_expect.remove(e_fmt) + break + assert not format_expect, "Not all expected formats matched {}".format([fmt.json for fmt in format_expect]) + + +def test_merge_io_formats_no_wps(): + wps_fmt = [] + cwl_fmt = [DEFAULT_FORMAT] + res_fmt = merge_io_formats(wps_fmt, cwl_fmt) + assert isinstance(res_fmt, list) + assert len(res_fmt) == 1 + assert res_fmt[0] is DEFAULT_FORMAT + + +def test_merge_io_formats_with_wps_and_default_cwl(): + wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)] + cwl_fmt = [DEFAULT_FORMAT] + res_fmt = merge_io_formats(wps_fmt, cwl_fmt) + assert isinstance(res_fmt, list) + assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF)]) + + +def test_merge_io_formats_both_wps_and_cwl(): + wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)] + cwl_fmt = [Format(CONTENT_TYPE_APP_JSON)] + res_fmt = merge_io_formats(wps_fmt, cwl_fmt) + assert isinstance(res_fmt, list) + assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF), Format(CONTENT_TYPE_APP_JSON)]) + + +def test_merge_io_formats_wps_complements_cwl(): + wps_fmt = [Format(CONTENT_TYPE_APP_JSON, encoding="utf-8")] + cwl_fmt = [Format(CONTENT_TYPE_APP_JSON)] + res_fmt = merge_io_formats(wps_fmt, cwl_fmt) + assert isinstance(res_fmt, list) + assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_JSON, encoding="utf-8")]) + + +def test_merge_io_formats_wps_overlaps_cwl(): + wps_fmt = [ + Format(CONTENT_TYPE_APP_JSON, encoding="utf-8"), # complements CWL details + Format(CONTENT_TYPE_APP_NETCDF), # duplicated in CWL (but different index) + Format(CONTENT_TYPE_TEXT_PLAIN) # extra (but not default) + ] + cwl_fmt = [ + Format(CONTENT_TYPE_APP_JSON), # overridden by WPS version + Format(CONTENT_TYPE_APP_XML), # extra preserved + Format(CONTENT_TYPE_APP_NETCDF), # duplicated with WPS, merged + ] + res_fmt = merge_io_formats(wps_fmt, cwl_fmt) + assert isinstance(res_fmt, list) + assert_formats_equal_any_order(res_fmt, [ + Format(CONTENT_TYPE_APP_JSON, encoding="utf-8"), + Format(CONTENT_TYPE_APP_NETCDF), + Format(CONTENT_TYPE_APP_XML), + Format(CONTENT_TYPE_TEXT_PLAIN), + ]) diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 879971f41..4240770f6 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -8,75 +8,12 @@ from collections import OrderedDict from copy import deepcopy -import pytest from pytest import fail from pywps.app import WPSRequest -from pywps.inout.formats import Format -from pywps.inout.literaltypes import AnyValue -from pywps.validator.mode import MODE from weaver.datatype import Process -from weaver.exceptions import PackageTypeError -from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF, CONTENT_TYPE_APP_XML, CONTENT_TYPE_TEXT_PLAIN -from weaver.processes.constants import WPS_LITERAL -from weaver.processes.wps_package import _are_different_and_set # noqa: W0212 from weaver.processes.wps_package import _get_package_ordered_io # noqa: W0212 -from weaver.processes.wps_package import _is_cwl_array_type # noqa: W0212 -from weaver.processes.wps_package import _is_cwl_enum_type # noqa: W0212 -from weaver.processes.wps_package import _json2wps_datatype # noqa: W0212 -from weaver.processes.wps_package import _merge_io_formats # noqa: W0212 -from weaver.processes.wps_package import DEFAULT_FORMAT, WpsPackage -from weaver.utils import null - - -class ObjectWithEqProperty(object): - """Dummy object for some test evaluations.""" - _prop = "prop" - - def __init__(self, prop="prop"): - self._prop = prop - - @property - def some_property(self): - return self._prop - - def __eq__(self, other): - return self.some_property == other.some_property - - -def test_are_different_and_set_both_set(): - assert _are_different_and_set(1, 2) is True - assert _are_different_and_set(1, 1) is False - assert _are_different_and_set({"a": 1}, {"a": 2}) is True - assert _are_different_and_set({"a": 1}, {"a": 1}) is False - assert _are_different_and_set({"a": 1, "b": 2}, {"a": 1}) is True - assert _are_different_and_set(ObjectWithEqProperty(), ObjectWithEqProperty()) is False - assert _are_different_and_set(ObjectWithEqProperty("a"), ObjectWithEqProperty("b")) is True - - -def test_are_different_and_set_similar_str_formats(): - assert _are_different_and_set(b"something", u"something") is False - assert _are_different_and_set(u"something", u"something") is False - assert _are_different_and_set(b"something", b"something") is False - assert _are_different_and_set(b"something", u"else") is True - assert _are_different_and_set(u"something", u"else") is True - assert _are_different_and_set(b"something", b"else") is True - - -def test_are_different_and_set_both_null(): - assert _are_different_and_set(null, null) is False - - -def test_are_different_and_set_single_null(): - """ - Tests that equality check is correctly handled when a single item amongst the two is ``null``. - This was identified as problematic is case when the checked and set item implements ``__eq__`` and expects a - property to exist, which is not the case for the second item being ``null``. - """ - - item = ObjectWithEqProperty() - assert _are_different_and_set(item, null) is False - assert _are_different_and_set(null, item) is False +from weaver.processes.wps_package import WpsPackage def test_get_package_ordered_io_with_builtin_dict_and_hints(): @@ -167,345 +104,6 @@ def test_get_package_ordered_io_with_list(): assert result == expected_result -def test_json2wps_datatype(): - # pylint: disable=C0326,bad-whitespace - test_cases = [ - ("float", {"type": WPS_LITERAL, "data_type": "float"}), # noqa: E241 - ("integer", {"type": WPS_LITERAL, "data_type": "integer"}), # noqa: E241 - ("integer", {"type": WPS_LITERAL, "data_type": "int"}), # noqa: E241 - ("boolean", {"type": WPS_LITERAL, "data_type": "boolean"}), # noqa: E241 - ("boolean", {"type": WPS_LITERAL, "data_type": "bool"}), # noqa: E241 - ("string", {"type": WPS_LITERAL, "data_type": "string"}), # noqa: E241 - ("float", {"type": WPS_LITERAL, "default": 1.0}), # noqa: E241 - ("integer", {"type": WPS_LITERAL, "default": 1}), # noqa: E241 - ("boolean", {"type": WPS_LITERAL, "default": True}), # noqa: E241 - ("string", {"type": WPS_LITERAL, "default": "1"}), # noqa: E241 - ("float", {"type": WPS_LITERAL, "supported_values": [1.0, 2.0]}), # noqa: E241 - ("integer", {"type": WPS_LITERAL, "supported_values": [1, 2]}), # noqa: E241 - ("boolean", {"type": WPS_LITERAL, "supported_values": [True, False]}), # noqa: E241 - ("string", {"type": WPS_LITERAL, "supported_values": ["yes", "no"]}), # noqa: E241 - ("float", {"data_type": "float"}), # noqa: E241 - ("integer", {"data_type": "integer"}), # noqa: E241 - ("integer", {"data_type": "int"}), # noqa: E241 - ("boolean", {"data_type": "boolean"}), # noqa: E241 - ("boolean", {"data_type": "bool"}), # noqa: E241 - ("string", {"data_type": "string"}), # noqa: E241 - ] - - for expect, test_io in test_cases: - copy_io = deepcopy(test_io) # can get modified by function - assert _json2wps_datatype(test_io) == expect, "Failed for [{}]".format(copy_io) - - -def test_is_cwl_array_type_explicit_invalid_item(): - io_info = { - "name": "test", - "type": { - "type": "array", - "items": "unknown-type-item" - } - } - with pytest.raises(PackageTypeError): - _is_cwl_array_type(io_info) - - -def test_is_cwl_array_type_shorthand_invalid_item(): - """ - In case of shorthand syntax, because type is only a string, it shouldn't raise. - Type is returned as is and value validation is left to later calls. - """ - io_info = { - "name": "test", - "type": "unknown[]" - } - try: - res = _is_cwl_array_type(io_info) - assert res[0] is False - assert res[1] == "unknown[]" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - except PackageTypeError: - pytest.fail("should not raise an error in this case") - - -def test_is_cwl_array_type_not_array(): - io_info = { - "name": "test", - "type": "float", - } - res = _is_cwl_array_type(io_info) - assert res[0] is False - assert res[1] == "float" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_simple_enum(): - io_info = { - "name": "test", - "type": "enum", - "symbols": ["a", "b", "c"] - } - res = _is_cwl_array_type(io_info) - assert res[0] is False - assert res[1] == "enum" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_base(): - io_info = { - "name": "test", - "type": { - "type": "array", - "items": "string" - } - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_enum(): - io_info = { - "name": "test", - "type": { - "type": "array", - "items": { - "type": "enum", - "symbols": ["a", "b", "c"] - } - } - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.SIMPLE - assert res[3] == ["a", "b", "c"] - - -def test_is_cwl_array_type_shorthand_base(): - io_info = { - "name": "test", - "type": "string[]", - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_shorthand_enum(): - io_info = { - "name": "test", - "type": "enum[]", - "symbols": ["a", "b", "c"] - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.SIMPLE - assert res[3] == ["a", "b", "c"] - - -def test_is_cwl_array_type_explicit_optional_not_array(): - io_info = { - "name": "test", - "type": ["null", "float"], - } - res = _is_cwl_array_type(io_info) - assert res[0] is False - assert res[1] == "float" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_optional_simple_enum(): - io_info = { - "name": "test", - "type": ["null", "enum"], - "symbols": ["a", "b", "c"] - } - res = _is_cwl_array_type(io_info) - assert res[0] is False - assert res[1] == "enum" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_optional_explicit_base(): - io_info = { - "name": "test", - "type": [ - "null", - {"type": "array", "items": "string"} - ] - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_optional_explicit_enum(): - io_info = { - "name": "test", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "enum", - "symbols": ["a", "b", "c"] - } - } - ] - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.SIMPLE - assert res[3] == ["a", "b", "c"] - - -def test_is_cwl_array_type_explicit_optional_shorthand_base(): - io_info = { - "name": "test", - "type": ["null", "string[]"] - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.NONE - assert res[3] == AnyValue - - -def test_is_cwl_array_type_explicit_optional_shorthand_enum(): - io_info = { - "name": "test", - "type": ["null", "enum[]"], - "symbols": ["a", "b", "c"] - } - res = _is_cwl_array_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.SIMPLE - assert res[3] == ["a", "b", "c"] - - -def test_is_cwl_enum_type_string(): - io_info = { - "name": "test", - "type": { - "type": "enum", - "symbols": ["a", "b", "c"] - } - } - res = _is_cwl_enum_type(io_info) - assert res[0] is True - assert res[1] == "string" - assert res[2] == MODE.SIMPLE - assert res[3] == ["a", "b", "c"] - - -def test_is_cwl_enum_type_float(): - io_info = { - "name": "test", - "type": { - "type": "enum", - "symbols": [1.9, 2.8, 3.7] - } - } - res = _is_cwl_enum_type(io_info) - assert res[0] is True - assert res[1] == "float" - assert res[2] == MODE.SIMPLE - assert res[3] == [1.9, 2.8, 3.7] - - -def test_is_cwl_enum_type_int(): - io_info = { - "name": "test", - "type": { - "type": "enum", - "symbols": [1, 2, 3] - } - } - res = _is_cwl_enum_type(io_info) - assert res[0] is True - assert res[1] == "int" - assert res[2] == MODE.SIMPLE - assert res[3] == [1, 2, 3] - - -def assert_formats_equal_any_order(format_result, format_expect): - assert len(format_result) == len(format_expect), "Expected formats sizes mismatch" - for r_fmt in format_result: - for e_fmt in format_expect: - if r_fmt.json == e_fmt.json: - format_expect.remove(e_fmt) - break - assert not format_expect, "Not all expected formats matched {}".format([fmt.json for fmt in format_expect]) - - -def test_merge_io_formats_no_wps(): - wps_fmt = [] - cwl_fmt = [DEFAULT_FORMAT] - res_fmt = _merge_io_formats(wps_fmt, cwl_fmt) - assert isinstance(res_fmt, list) - assert len(res_fmt) == 1 - assert res_fmt[0] is DEFAULT_FORMAT - - -def test_merge_io_formats_with_wps_and_default_cwl(): - wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)] - cwl_fmt = [DEFAULT_FORMAT] - res_fmt = _merge_io_formats(wps_fmt, cwl_fmt) - assert isinstance(res_fmt, list) - assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF)]) - - -def test_merge_io_formats_both_wps_and_cwl(): - wps_fmt = [Format(CONTENT_TYPE_APP_NETCDF)] - cwl_fmt = [Format(CONTENT_TYPE_APP_JSON)] - res_fmt = _merge_io_formats(wps_fmt, cwl_fmt) - assert isinstance(res_fmt, list) - assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_NETCDF), Format(CONTENT_TYPE_APP_JSON)]) - - -def test_merge_io_formats_wps_complements_cwl(): - wps_fmt = [Format(CONTENT_TYPE_APP_JSON, encoding="utf-8")] - cwl_fmt = [Format(CONTENT_TYPE_APP_JSON)] - res_fmt = _merge_io_formats(wps_fmt, cwl_fmt) - assert isinstance(res_fmt, list) - assert_formats_equal_any_order(res_fmt, [Format(CONTENT_TYPE_APP_JSON, encoding="utf-8")]) - - -def test_merge_io_formats_wps_overlaps_cwl(): - wps_fmt = [ - Format(CONTENT_TYPE_APP_JSON, encoding="utf-8"), # complements CWL details - Format(CONTENT_TYPE_APP_NETCDF), # duplicated in CWL (but different index) - Format(CONTENT_TYPE_TEXT_PLAIN) # extra (but not default) - ] - cwl_fmt = [ - Format(CONTENT_TYPE_APP_JSON), # overridden by WPS version - Format(CONTENT_TYPE_APP_XML), # extra preserved - Format(CONTENT_TYPE_APP_NETCDF), # duplicated with WPS, merged - ] - res_fmt = _merge_io_formats(wps_fmt, cwl_fmt) - assert isinstance(res_fmt, list) - assert_formats_equal_any_order(res_fmt, [ - Format(CONTENT_TYPE_APP_JSON, encoding="utf-8"), - Format(CONTENT_TYPE_APP_NETCDF), - Format(CONTENT_TYPE_APP_XML), - Format(CONTENT_TYPE_TEXT_PLAIN), - ]) - - def test_stdout_stderr_logging_for_commandline_tool_success(): """ Execute a process and assert that stdout is correctly logged to log file. diff --git a/tests/travis-ci/data_sources.json b/tests/smoke/data_sources.json similarity index 100% rename from tests/travis-ci/data_sources.json rename to tests/smoke/data_sources.json diff --git a/tests/travis-ci/docker-compose.smoke-test.yml b/tests/smoke/docker-compose.smoke-test.yml similarity index 100% rename from tests/travis-ci/docker-compose.smoke-test.yml rename to tests/smoke/docker-compose.smoke-test.yml diff --git a/tests/travis-ci/weaver.env b/tests/smoke/weaver.env similarity index 58% rename from tests/travis-ci/weaver.env rename to tests/smoke/weaver.env index e4dcaa612..dede56d2b 100644 --- a/tests/travis-ci/weaver.env +++ b/tests/smoke/weaver.env @@ -1,3 +1,3 @@ WEAVER_TEST_DB_HOST=127.0.0.1 WEAVER_TEST_DB_PORT=27017 -WEAVER_TEST_DB_NAME=weaver-travis-test +WEAVER_TEST_DB_NAME=weaver-smoke-test diff --git a/tests/travis-ci/wps_processes.yml b/tests/smoke/wps_processes.yml similarity index 100% rename from tests/travis-ci/wps_processes.yml rename to tests/smoke/wps_processes.yml diff --git a/tests/test_formats.py b/tests/test_formats.py index e927a1bb9..f72c5ffcc 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -1,7 +1,6 @@ import os import mock -import six from pyramid.httpexceptions import HTTPOk, HTTPRequestTimeout from pyramid.response import Response from pywps.inout.formats import Format @@ -13,7 +12,8 @@ def test_get_extension(): assert f.get_extension(f.CONTENT_TYPE_APP_JSON) == ".json" # basic assert f.get_extension(f.CONTENT_TYPE_APP_JSON + "; charset=UTF-8") == ".json" # ignore extra parameters - assert f.get_extension(f.CONTENT_TYPE_APP_GEOJSON) == ".geojson" # pywps definition + assert f.get_extension(f.CONTENT_TYPE_APP_GEOJSON) == ".geojson" # pywps <4.4 definition + assert f.get_extension(f.CONTENT_TYPE_APP_VDN_GEOJSON) == ".geojson" # pywps>=4.4 definition assert f.get_extension(f.CONTENT_TYPE_IMAGE_GEOTIFF) == ".tiff" # pywps definition assert f.get_extension("application/x-custom") == ".custom" assert f.get_extension("application/unknown") == ".unknown" @@ -52,7 +52,7 @@ def test_get_cwl_file_format_reference(): for ns, mime_type in tests: res = f.get_cwl_file_format(mime_type, make_reference=True) ns_name, ns_url = list(ns.items())[0] - assert isinstance(res, six.string_types) + assert isinstance(res, str) assert res.startswith(ns_url) tested.remove(ns_name) assert len(tested) == 0, "test did not evaluate every namespace variation" @@ -82,7 +82,7 @@ def test_get_cwl_file_format_retry_attempts(): """Verifies that failing request will not immediately fail the MIME-type validation.""" codes = {"codes": [HTTPOk.code, HTTPRequestTimeout.code]} # note: used in reverse order (pop) - def mock_request_extra(*args, **kwargs): # noqa: E811 + def mock_request_extra(*_, **__): m_resp = Response() m_resp.status_code = codes["codes"].pop() return m_resp @@ -95,10 +95,10 @@ def mock_request_extra(*args, **kwargs): # noqa: E811 def test_get_cwl_file_format_retry_fallback_urlopen(): """Verifies that failing request because of critical error still validate the MIME-type using the fallback.""" - def mock_connect_error(*args, **kwargs): # noqa: E811 + def mock_connect_error(*_, **__): raise ConnectionError() - def mock_urlopen(*args, **kwargs): # noqa: E811 + def mock_urlopen(*_, **__): return HTTPOk() with mock.patch("requests.Session.request", side_effect=mock_connect_error) as mocked_request: diff --git a/tests/test_notify.py b/tests/test_notify.py index 0d1c7568d..2594dd0ff 100644 --- a/tests/test_notify.py +++ b/tests/test_notify.py @@ -1,6 +1,6 @@ import pytest -from weaver.wps_restapi.jobs.notify import encrypt_email +from notify import encrypt_email def test_encrypt_email_valid(): diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py index 52b5e0ac6..b2f6d22ce 100644 --- a/tests/test_opensearch.py +++ b/tests/test_opensearch.py @@ -1,18 +1,18 @@ +import contextlib import json import os import unittest from collections import deque from copy import deepcopy from pprint import pformat +from urllib.parse import parse_qsl, urlparse import mock import pytest from pyramid import testing from pyramid.testing import DummyRequest from pywps.inout.inputs import LiteralInput -from six.moves.urllib.parse import parse_qsl, urlparse -from tests.compat import contextlib from tests.utils import setup_mongodb_processstore from weaver.datatype import Process from weaver.processes import opensearch diff --git a/tests/test_utils.py b/tests/test_utils.py index 8ffd4d6eb..bff3e6b92 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,11 +1,13 @@ # pylint: disable=C0103,invalid-name +import contextlib import inspect import json import os import shutil import tempfile from typing import Type +from urllib.parse import urlparse import mock import pytest @@ -22,13 +24,19 @@ from pywps.response.status import WPS_STATUS from requests import Response from requests.exceptions import HTTPError as RequestsHTTPError -from six.moves.urllib.parse import urlparse -from tests.compat import contextlib from tests.utils import mocked_aws_credentials, mocked_aws_s3, mocked_aws_s3_bucket_test_file, mocked_file_response from weaver import status, utils from weaver.utils import _NullType # noqa: W0212 -from weaver.utils import fetch_file, get_request_options, get_ssl_verify_option, make_dirs, null, request_extra +from weaver.utils import ( + fetch_file, + get_path_kvp, + get_request_options, + get_ssl_verify_option, + make_dirs, + null, + request_extra +) def test_null_operators(): @@ -533,3 +541,8 @@ def test_fetch_file_remote_s3_bucket(): assert os.path.isfile(result) with open(result, mode="r") as test_file: assert test_file.read() == test_file_data + + +def test_get_path_kvp(): + res = get_path_kvp("http://localhost", test1="value1", test2=["sub1", "sub2"]) + assert res == "http://localhost?test1=value1&test2=sub1,sub2" diff --git a/tests/utils.py b/tests/utils.py index a970cb5d6..4f2bc934a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,10 +1,12 @@ """ Utility methods for various TestCase setup operations. """ +import contextlib import os import tempfile import uuid import warnings +from configparser import ConfigParser from inspect import isclass from typing import TYPE_CHECKING @@ -12,43 +14,38 @@ import mock import moto import pyramid_celery -import six from pyramid import testing from pyramid.config import Configurator from pyramid.httpexceptions import HTTPException, HTTPNotFound, HTTPUnprocessableEntity from pyramid.registry import Registry from requests import Response -from six.moves.configparser import ConfigParser from webtest import TestApp -from tests.compat import contextlib from weaver.config import WEAVER_CONFIGURATION_DEFAULT, WEAVER_DEFAULT_INI_CONFIG, get_weaver_config_file from weaver.database import get_db from weaver.datatype import Service from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_XML from weaver.store.mongodb import MongodbJobStore, MongodbProcessStore, MongodbServiceStore -from weaver.utils import get_url_without_query, get_weaver_url, null +from weaver.utils import get_path_kvp, get_url_without_query, get_weaver_url, null from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning -from weaver.wps_restapi.processes.processes import execute_process if TYPE_CHECKING: import botocore.client # noqa - from weaver.typedefs import ( # noqa: F401 - Any, AnyResponseType, AnyStr, Callable, List, Optional, SettingsType, Type, Union - ) + + from weaver.typedefs import Any, AnyResponseType, Callable, List, Optional, SettingsType, Type, Union MOCK_AWS_REGION = "us-central-1" def ignore_warning_regex(func, warning_message_regex, warning_categories=DeprecationWarning): - # type: (Callable, Union[AnyStr, List[AnyStr]], Union[Type[Warning], List[Type[Warning]]]) -> Callable + # type: (Callable, Union[str, List[str]], Union[Type[Warning], List[Type[Warning]]]) -> Callable """Wrapper that eliminates any warning matching ``warning_regex`` during testing logging. **NOTE**: Wrapper should be applied on method (not directly on :class:`unittest.TestCase` as it can disable the whole test suite. """ - if isinstance(warning_message_regex, six.string_types): + if isinstance(warning_message_regex, str): warning_message_regex = [warning_message_regex] if not isinstance(warning_message_regex, list): raise NotImplementedError("Argument 'warning_message_regex' must be a string or a list of string.") @@ -80,7 +77,7 @@ def ignore_wps_warnings(func): def get_settings_from_config_ini(config_ini_path=None, ini_section_name="app:main"): - # type: (Optional[AnyStr], AnyStr) -> SettingsType + # type: (Optional[str], str) -> SettingsType parser = ConfigParser() parser.read([get_weaver_config_file(config_ini_path, WEAVER_DEFAULT_INI_CONFIG)]) settings = dict(parser.items(ini_section_name)) @@ -123,11 +120,12 @@ def setup_mongodb_processstore(config=None): # type: (Optional[Configurator]) -> MongodbProcessStore """Setup store using mongodb, will be enforced if not configured properly.""" config = setup_config_with_mongodb(config) - store = get_db(config).get_store(MongodbProcessStore) + db = get_db(config) + store = db.get_store(MongodbProcessStore) store.clear_processes() # store must be recreated after clear because processes are added automatically on __init__ - get_db(config)._stores.pop(MongodbProcessStore.type) # noqa: W0212 - store = get_db(config).get_store(MongodbProcessStore) + db.reset_store(MongodbProcessStore.type) + store = db.get_store(MongodbProcessStore) return store @@ -186,6 +184,9 @@ def get_test_weaver_config(config=None, settings=None): config = setup_config_from_settings(settings=settings) if "weaver.configuration" not in config.registry.settings: config.registry.settings["weaver.configuration"] = WEAVER_CONFIGURATION_DEFAULT + # set default log level for tests to ease debugging failing test cases + if not config.registry.settings.get("weaver.log_level"): + config.registry.settings["weaver.log_level"] = "DEBUG" if "weaver.url" not in config.registry.settings: config.registry.settings["weaver.url"] = "https://localhost" # ignore example config files that would be auto-generated when missing @@ -213,7 +214,7 @@ def get_settings_from_testapp(testapp): def get_setting(env_var_name, app=None, setting_name=None): - # type: (AnyStr, Optional[TestApp], Optional[AnyStr]) -> Any + # type: (str, Optional[TestApp], Optional[str]) -> Any val = os.getenv(env_var_name, null) if val != null: return val @@ -245,7 +246,7 @@ def init_weaver_service(registry): def mocked_file_response(path, url): - # type: (AnyStr, AnyStr) -> Union[Response, HTTPException] + # type: (str, str) -> Union[Response, HTTPException] """ Generates a mocked response from the provided file path, and represented as if coming from the specified URL. @@ -278,7 +279,7 @@ def read(self, chuck_size=None): # noqa: E811 def mocked_sub_requests(app, function, *args, only_local=False, **kwargs): - # type: (TestApp, AnyStr, *Any, bool, **Any) -> AnyResponseType + # type: (TestApp, str, *Any, bool, **Any) -> AnyResponseType """ Executes ``app.function(*args, **kwargs)`` with a mock of every underlying :func:`requests.request` call to relay their execution to the :class:`webTest.TestApp`. @@ -297,6 +298,30 @@ def mocked_sub_requests(app, function, *args, only_local=False, **kwargs): from requests.sessions import Session as RealSession real_request = RealSession.request + def _parse_for_app_req(method, url, **req_kwargs): + """ + WebTest application employs ``params`` instead of ``data``/``json``. + Actual query parameters must be pre-appended to ``url``. + """ + method = method.lower() + url = req_kwargs.pop("base_url", url) + body = req_kwargs.pop("data", None) + query = req_kwargs.pop("query", None) + params = req_kwargs.pop("params", {}) + if query: + url += ("" if query.startswith("?") else "?") + query + elif params: + if isinstance(params, str): + url += ("" if params.startswith("?") else "?") + params + else: + url = get_path_kvp(url, **params) + req_kwargs["params"] = body + # remove unsupported parameters that cannot be passed down to TestApp + for key in ["timeout", "cert", "auth", "ssl_verify", "verify", "language"]: + req_kwargs.pop(key, None) + req = getattr(app, method) + return url, req, req_kwargs + def mocked_app_request(method, url=None, **req_kwargs): """ Request corresponding to :func:`requests.request` that instead gets executed by :class:`webTest.TestApp`, @@ -309,15 +334,9 @@ def mocked_app_request(method, url=None, **req_kwargs): with RealSession() as session: return real_request(session, method, url, **req_kwargs) - method = method.lower() - headers = req_kwargs.get("headers") - req = getattr(app, method) - url = req_kwargs.get("base_url", url) - query = req_kwargs.get("params") - if query: - url = url + "?" + query + url, func, req_kwargs = _parse_for_app_req(method, url, **req_kwargs) if not url.startswith("mock://"): - resp = req(url, params=req_kwargs.get("data"), headers=headers, expect_errors=True) + resp = func(url, expect_errors=True, **req_kwargs) setattr(resp, "content", resp.body) else: path = get_url_without_query(url.replace("mock://", "")) @@ -328,29 +347,33 @@ def mocked_app_request(method, url=None, **req_kwargs): stack.enter_context(mock.patch("requests.request", side_effect=mocked_app_request)) stack.enter_context(mock.patch("requests.Session.request", side_effect=mocked_app_request)) stack.enter_context(mock.patch("requests.sessions.Session.request", side_effect=mocked_app_request)) - request_func = getattr(app, function) + req_url, req_func, kwargs = _parse_for_app_req(function, *args, **kwargs) kwargs.setdefault("expect_errors", True) - return request_func(*args, **kwargs) + return req_func(req_url, **kwargs) def mocked_execute_process(): """ - Provides a mock to call :func:`weaver.wps_restapi.processes.processes.execute_process` safely within - a test employing a :class:`webTest.TestApp` without a running ``Celery`` app. + Provides a mock to call :func:`weaver.processes.execution.execute_process` safely within a test employing + :class:`webTest.TestApp` without a running ``Celery`` app. + This avoids connection error from ``Celery`` during a job execution request. - Bypasses the ``execute_process.delay`` call by directly invoking the ``execute_process``. + Bypasses ``execute_process.delay`` call by directly invoking the ``execute_process``. - **Note**: since ``delay`` and ``Celery`` are bypassed, the process execution becomes blocking (not asynchronous). + .. note:: + Since ``delay`` and ``Celery`` are bypassed, the process execution becomes blocking (not asynchronous). .. seealso:: - :func:`mocked_process_job_runner` to completely skip process execution. - :func:`setup_config_with_celery` """ + from weaver.processes.execution import execute_process as real_execute_process + class MockTask(object): """ - Mocks call ``self.request.id`` in :func:`weaver.wps_restapi.processes.processes.execute_process` and - call ``result.id`` in :func:`weaver.wps_restapi.processes.processes.submit_job_handler`. + Mocks call ``self.request.id`` in :func:`weaver.processes.execution.execute_process` and + call ``result.id`` in :func:`weaver.processes.execution.submit_job_handler`. """ _id = str(uuid.uuid4()) @@ -360,27 +383,27 @@ def id(self): task = MockTask() - def mock_execute_process(job_id, url, headers, notification_email): - execute_process(job_id, url, headers, notification_email) + def mock_execute_process(job_id, url, headers): + real_execute_process(job_id, url, headers) return task return ( - mock.patch("weaver.wps_restapi.processes.processes.execute_process.delay", side_effect=mock_execute_process), + mock.patch("weaver.processes.execution.execute_process.delay", side_effect=mock_execute_process), mock.patch("celery.app.task.Context", return_value=task) ) def mocked_process_job_runner(job_task_id="mocked-job-id"): """ - Provides a mock that will no execute the process execution when call during job creation. + Provides a mock that will bypass execution of the process when called during job submission. .. seealso:: - - :func:`mocked_execute_process` to still execute the process, but without `Celery` connection. + - :func:`mocked_execute_process` to still execute the process, but directly instead of within ``Celery`` worker. """ result = mock.MagicMock() result.id = job_task_id return ( - mock.patch("weaver.wps_restapi.processes.processes.execute_process.delay", return_value=result), + mock.patch("weaver.processes.execution.execute_process.delay", return_value=result), ) @@ -428,7 +451,7 @@ def wrapped(*args, **kwargs): def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test file inside test S3 bucket"): - # type: (AnyStr,AnyStr, AnyStr) -> AnyStr + # type: (str,str, str) -> str """ Generates a test file reference from dummy data that will be uploaded to the specified S3 bucket name using the provided file key. diff --git a/tests/wps/__init__.py b/tests/wps/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/wps/test_utils.py b/tests/wps/test_utils.py new file mode 100644 index 000000000..db6e6927d --- /dev/null +++ b/tests/wps/test_utils.py @@ -0,0 +1,14 @@ +import mock + +from weaver.wps.utils import set_wps_language + + +def test_set_wps_language(): + wps = mock.Mock() + languages = mock.Mock() + wps.languages = languages + languages.default = "en-US" + languages.supported = ["en-US", "fr-CA"] + + set_wps_language(wps, "ru, fr;q=0.5") + assert wps.language == "fr-CA" diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index e6dbcaf50..2283e59e2 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1,17 +1,16 @@ +import contextlib import json import unittest import warnings from collections import OrderedDict -from typing import TYPE_CHECKING, AnyStr, List, Tuple, Union +from typing import TYPE_CHECKING import mock import pyramid.testing import pytest -import six import webtest from owslib.wps import WebProcessingService -from tests.compat import contextlib from tests.utils import ( mocked_process_job_runner, setup_config_with_mongodb, @@ -30,14 +29,17 @@ STATUS_FAILED, STATUS_SUCCEEDED ) +from weaver.utils import get_path_kvp from weaver.visibility import VISIBILITY_PRIVATE, VISIBILITY_PUBLIC from weaver.warning import TimeZoneInfoAlreadySetWarning from weaver.wps_restapi.swagger_definitions import jobs_full_uri, jobs_short_uri, process_jobs_uri if TYPE_CHECKING: - # pylint: disable=W0611,unused-import - from owslib.wps import Process as ProcessOWSWPS # noqa: F401 - from pywps.app import Process as ProcessPyWPS # noqa: F401 + from typing import Iterable, List, Tuple, Union + + from owslib.wps import Process as ProcessOWSWPS + from pywps.app import Process as ProcessPyWPS + # pylint: disable=C0103,invalid-name,E1101,no-member MockPatch = mock._patch # noqa: W0212 @@ -146,7 +148,7 @@ def get_job_request_auth_mock(self, user_id): @staticmethod def get_job_remote_service_mock(processes): - # type: (List[Union[ProcessPyWPS, ProcessOWSWPS]]) -> Tuple[MockPatch] + # type: (List[Union[ProcessPyWPS, ProcessOWSWPS]]) -> Iterable[MockPatch] mock_processes = mock.PropertyMock mock_processes.return_value = processes return tuple([ @@ -157,16 +159,16 @@ def get_job_remote_service_mock(processes): @staticmethod def check_job_format(job): assert isinstance(job, dict) - assert "jobID" in job and isinstance(job["jobID"], six.string_types) - assert "status" in job and isinstance(job["status"], six.string_types) - assert "message" in job and isinstance(job["message"], six.string_types) + assert "jobID" in job and isinstance(job["jobID"], str) + assert "status" in job and isinstance(job["status"], str) + assert "message" in job and isinstance(job["message"], str) assert "percentCompleted" in job and isinstance(job["percentCompleted"], int) - assert "logs" in job and isinstance(job["logs"], six.string_types) + assert "logs" in job and isinstance(job["logs"], str) assert job["status"] in JOB_STATUS_VALUES if job["status"] == STATUS_SUCCEEDED: - assert "result" in job and isinstance(job["result"], six.string_types) + assert "result" in job and isinstance(job["result"], str) elif job["status"] == STATUS_FAILED: - assert "exceptions" in job and isinstance(job["exceptions"], six.string_types) + assert "exceptions" in job and isinstance(job["exceptions"], str) @staticmethod def check_basic_jobs_info(response): @@ -181,7 +183,7 @@ def check_basic_jobs_info(response): @staticmethod def check_basic_jobs_grouped_info(response, groups): - if isinstance(groups, six.string_types): + if isinstance(groups, str): groups = [groups] assert response.status_code == 200 assert response.content_type == CONTENT_TYPE_APP_JSON @@ -201,26 +203,22 @@ def check_basic_jobs_grouped_info(response, groups): total += grouped_jobs["count"] assert total == response.json["total"] - @staticmethod - def add_params(path, **kwargs): - return path + "?" + "&".join("{}={}".format(k, v) for k, v in kwargs.items()) - def test_get_jobs_normal_paged(self): resp = self.app.get(jobs_short_uri, headers=self.json_headers) self.check_basic_jobs_info(resp) for job_id in resp.json["jobs"]: - assert isinstance(job_id, six.string_types) + assert isinstance(job_id, str) for detail in ("false", 0, "False", "no", "None", "null", None, ""): - path = self.add_params(jobs_short_uri, detail=detail) + path = get_path_kvp(jobs_short_uri, detail=detail) resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_info(resp) for job_id in resp.json["jobs"]: - assert isinstance(job_id, six.string_types) + assert isinstance(job_id, str) def test_get_jobs_detail_paged(self): for detail in ("true", 1, "True", "yes"): - path = self.add_params(jobs_short_uri, detail=detail) + path = get_path_kvp(jobs_short_uri, detail=detail) resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_info(resp) for job in resp.json["jobs"]: @@ -229,17 +227,17 @@ def test_get_jobs_detail_paged(self): def test_get_jobs_normal_grouped(self): for detail in ("false", 0, "False", "no"): groups = ["process", "service"] - path = self.add_params(jobs_short_uri, detail=detail, groups=",".join(groups)) + path = get_path_kvp(jobs_short_uri, detail=detail, groups=groups) resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_grouped_info(resp, groups=groups) for grouped_jobs in resp.json["groups"]: for job in grouped_jobs["jobs"]: - assert isinstance(job, six.string_types) + assert isinstance(job, str) def test_get_jobs_detail_grouped(self): for detail in ("true", 1, "True", "yes"): groups = ["process", "service"] - path = self.add_params(jobs_short_uri, detail=detail, groups=",".join(groups)) + path = get_path_kvp(jobs_short_uri, detail=detail, groups=groups) resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_grouped_info(resp, groups=groups) for grouped_jobs in resp.json["groups"]: @@ -247,7 +245,7 @@ def test_get_jobs_detail_grouped(self): self.check_job_format(job) def test_get_jobs_valid_grouping_by_process(self): - path = self.add_params(jobs_short_uri, detail="false", groups="process") + path = get_path_kvp(jobs_short_uri, detail="false", groups="process") resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_grouped_info(resp, groups="process") @@ -274,7 +272,7 @@ def test_get_jobs_valid_grouping_by_process(self): pytest.fail("Unknown job grouping 'process' value not expected.") def test_get_jobs_valid_grouping_by_service(self): - path = self.add_params(jobs_short_uri, detail="false", groups="service") + path = get_path_kvp(jobs_short_uri, detail="false", groups="service") resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_grouped_info(resp, groups="service") @@ -324,7 +322,7 @@ def test_get_jobs_by_encrypted_email(self): assert job.notification_email != email and job.notification_email is not None assert int(job.notification_email, 16) != 0 # email should be encrypted with hex string - path = self.add_params(jobs_short_uri, detail="true", notification_email=email) + path = get_path_kvp(jobs_short_uri, detail="true", notification_email=email) resp = self.app.get(path, headers=self.json_headers) assert resp.status_code == 200 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -332,14 +330,14 @@ def test_get_jobs_by_encrypted_email(self): assert resp.json["jobs"][0]["jobID"] == job_id def test_get_jobs_process_in_query_normal(self): - path = self.add_params(jobs_short_uri, process=self.job_info[0].process) + path = get_path_kvp(jobs_short_uri, process=self.job_info[0].process) resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_info(resp) assert self.job_info[0].id in resp.json["jobs"], self.message_with_jobs_mapping("expected in") assert self.job_info[1].id not in resp.json["jobs"], self.message_with_jobs_mapping("expected not in") def test_get_jobs_process_in_query_detail(self): - path = self.add_params(jobs_short_uri, process=self.job_info[0].process, detail="true") + path = get_path_kvp(jobs_short_uri, process=self.job_info[0].process, detail="true") resp = self.app.get(path, headers=self.json_headers) self.check_basic_jobs_info(resp) job_ids = [j["jobID"] for j in resp.json["jobs"]] @@ -368,7 +366,7 @@ def test_get_jobs_process_unknown_in_path(self): assert resp.content_type == CONTENT_TYPE_APP_JSON def test_get_jobs_process_unknown_in_query(self): - path = self.add_params(jobs_short_uri, process="unknown-process-id") + path = get_path_kvp(jobs_short_uri, process="unknown-process-id") resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 404 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -380,7 +378,7 @@ def test_get_jobs_private_process_unauthorized_in_path(self): assert resp.content_type == CONTENT_TYPE_APP_JSON def test_get_jobs_private_process_not_returned_in_query(self): - path = self.add_params(jobs_short_uri, process=self.process_private.identifier) + path = get_path_kvp(jobs_short_uri, process=self.process_private.identifier) resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 401 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -392,7 +390,7 @@ def test_get_jobs_service_and_process_unknown_in_path(self): assert resp.content_type == CONTENT_TYPE_APP_JSON def test_get_jobs_service_and_process_unknown_in_query(self): - path = self.add_params(jobs_short_uri, service="unknown-service-id", process="unknown-process-id") + path = get_path_kvp(jobs_short_uri, service="unknown-service-id", process="unknown-process-id") resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 404 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -404,9 +402,9 @@ def test_get_jobs_private_service_public_process_unauthorized_in_path(self): assert resp.content_type == CONTENT_TYPE_APP_JSON def test_get_jobs_private_service_public_process_unauthorized_in_query(self): - path = self.add_params(jobs_short_uri, - service=self.service_private.name, - process=self.process_public.identifier) + path = get_path_kvp(jobs_short_uri, + service=self.service_private.name, + process=self.process_public.identifier) resp = self.app.get(path, headers=self.json_headers, expect_errors=True) assert resp.status_code == 401 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -417,9 +415,9 @@ def test_get_jobs_public_service_private_process_unauthorized_in_query(self): it is up to the remote service to hide private processes if the process is visible, the a job can be executed and it is automatically considered public """ - path = self.add_params(jobs_short_uri, - service=self.service_public.name, - process=self.process_private.identifier) + path = get_path_kvp(jobs_short_uri, + service=self.service_public.name, + process=self.process_private.identifier) with contextlib.ExitStack() as stack: for runner in self.get_job_remote_service_mock([self.process_private]): # process visible on remote stack.enter_context(runner) @@ -433,9 +431,9 @@ def test_get_jobs_public_service_no_processes(self): it is up to the remote service to hide private processes if the process is invisible, no job should have been executed nor can be fetched """ - path = self.add_params(jobs_short_uri, - service=self.service_public.name, - process=self.process_private.identifier) + path = get_path_kvp(jobs_short_uri, + service=self.service_public.name, + process=self.process_private.identifier) with contextlib.ExitStack() as stack: for job in self.get_job_remote_service_mock([]): # process invisible (not returned by remote) stack.enter_context(job) @@ -457,10 +455,10 @@ def test_get_jobs_public_with_access_and_request_user(self): editor1_private_jobs = list(filter(lambda j: VISIBILITY_PRIVATE in j.access, editor1_all_jobs)) public_jobs = list(filter(lambda j: VISIBILITY_PUBLIC in j.access, self.job_info)) - def filter_process(jobs): + def filter_process(jobs): # type: (Iterable[Job]) -> List[Job] return list(filter(lambda j: j.process == self.process_public.identifier, jobs)) - def filter_service(jobs): + def filter_service(jobs): # type: (Iterable[Job]) -> List[Job] return list(filter(lambda j: j.service == self.service_public.name, jobs)) # test variations of [paths, query, user-id, expected-job-ids] @@ -497,14 +495,15 @@ def filter_service(jobs): (uri_provider_jobs, VISIBILITY_PUBLIC, self.user_editor1_id, filter_service(editor1_public_jobs)), # noqa: E241,E501 (uri_provider_jobs, VISIBILITY_PUBLIC, self.user_admin_id, filter_service(self.job_info)), # noqa: E241,E501 - ] # type: List[Tuple[AnyStr, AnyStr, Union[None, int], List[AnyStr]]] + ] # type: List[Tuple[str, str, Union[None, int], List[Job]]] for i, (path, access, user_id, expected_jobs) in enumerate(path_jobs_user_req_tests): - patches = self.get_job_request_auth_mock(user_id) + self.get_job_remote_service_mock([self.process_public]) with contextlib.ExitStack() as stack: - for patch in patches: + for patch in self.get_job_request_auth_mock(user_id): + stack.enter_context(patch) + for patch in self.get_job_remote_service_mock([self.process_public]): stack.enter_context(patch) - test = self.add_params(path, access=access) if access else path + test = get_path_kvp(path, access=access) if access else path resp = self.app.get(test, headers=self.json_headers) self.check_basic_jobs_info(resp) job_ids = [job.id for job in expected_jobs] diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index fe72f6308..45fb622fa 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -1,18 +1,17 @@ +import contextlib import os import unittest from copy import deepcopy import colander -import mock import pyramid.testing import pytest import responses -import six import webtest -from tests.compat import contextlib from tests.utils import ( get_test_weaver_app, + mocked_execute_process, mocked_process_job_runner, mocked_process_package, setup_config_with_mongodb, @@ -33,9 +32,8 @@ from weaver.status import STATUS_ACCEPTED from weaver.utils import fully_qualified_name, ows_context_href from weaver.visibility import VISIBILITY_PRIVATE, VISIBILITY_PUBLIC -from weaver.wps import get_wps_url +from weaver.wps.utils import get_wps_url from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.processes.processes import set_wps_language # simulated remote server with remote processes (mocked with `responses` package) TEST_REMOTE_SERVER_URL = "https://remote-server.com" @@ -155,9 +153,9 @@ def test_get_processes(self): assert resp.content_type == CONTENT_TYPE_APP_JSON assert "processes" in resp.json and isinstance(resp.json["processes"], list) and len(resp.json["processes"]) > 0 for process in resp.json["processes"]: - assert "id" in process and isinstance(process["id"], six.string_types) - assert "title" in process and isinstance(process["title"], six.string_types) - assert "version" in process and isinstance(process["version"], six.string_types) + assert "id" in process and isinstance(process["id"], str) + assert "title" in process and isinstance(process["title"], str) + assert "version" in process and isinstance(process["version"], str) assert "keywords" in process and isinstance(process["keywords"], list) assert "metadata" in process and isinstance(process["metadata"], list) @@ -199,7 +197,7 @@ def test_describe_process_visibility_public(self): def test_describe_process_visibility_private(self): uri = "/processes/{}".format(self.process_private.identifier) resp = self.app.get(uri, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 401 + assert resp.status_code == 403 assert resp.content_type == CONTENT_TYPE_APP_JSON def test_deploy_process_success(self): @@ -290,7 +288,7 @@ def test_deploy_process_default_endpoint_wps1(self): weaver_wps_path = get_wps_url(self.config.registry.settings) process_wps_endpoint = self.process_store.fetch_by_id(process_name).processEndpointWPS1 - assert isinstance(process_wps_endpoint, six.string_types) and len(process_wps_endpoint) + assert isinstance(process_wps_endpoint, str) and len(process_wps_endpoint) assert process_wps_endpoint == weaver_wps_path @staticmethod @@ -444,7 +442,7 @@ def test_deploy_process_WPS3_DescribeProcess_executionUnit(self): def test_delete_process_success(self): uri = "/processes/{}".format(self.process_public.identifier) resp = self.app.delete_json(uri, headers=self.json_headers) - assert resp.status_code == 200 + assert resp.status_code == 200, "Error: {}".format(resp.text) assert resp.content_type == CONTENT_TYPE_APP_JSON assert resp.json["identifier"] == self.process_public.identifier assert isinstance(resp.json["undeploymentDone"], bool) and resp.json["undeploymentDone"] @@ -454,19 +452,19 @@ def test_delete_process_success(self): def test_delete_process_not_accessible(self): uri = "/processes/{}".format(self.process_private.identifier) resp = self.app.delete_json(uri, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 401 + assert resp.status_code == 403, "Error: {}".format(resp.text) assert resp.content_type == CONTENT_TYPE_APP_JSON def test_delete_process_not_found(self): uri = "/processes/{}".format(self.fully_qualified_test_process_name()) resp = self.app.delete_json(uri, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 404 + assert resp.status_code == 404, "Error: {}".format(resp.text) assert resp.content_type == CONTENT_TYPE_APP_JSON def test_delete_process_bad_name(self): uri = "/processes/{}".format(self.fully_qualified_test_process_name() + "...") resp = self.app.delete_json(uri, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 400 + assert resp.status_code == 400, "Error: {}".format(resp.text) assert resp.content_type == CONTENT_TYPE_APP_JSON def test_execute_process_success(self): @@ -479,7 +477,7 @@ def test_execute_process_success(self): for exe in mock_execute: stack.enter_context(exe) resp = self.app.post_json(uri, params=data, headers=self.json_headers) - assert resp.status_code == 201 + assert resp.status_code == 201, "Error: {}".format(resp.text) assert resp.content_type == CONTENT_TYPE_APP_JSON assert resp.json["location"].endswith(resp.json["jobID"]) assert resp.headers["Location"] == resp.json["location"] @@ -502,7 +500,7 @@ def test_execute_process_language(self): headers = self.json_headers.copy() headers["Accept-Language"] = "fr-CA" resp = self.app.post_json(uri, params=data, headers=headers) - assert resp.status_code == 201 + assert resp.status_code == 201, "Error: {}".format(resp.text) try: job = self.job_store.fetch_by_id(resp.json["jobID"]) except JobNotFound: @@ -510,16 +508,6 @@ def test_execute_process_language(self): assert job.id == resp.json["jobID"] assert job.accept_language == "fr-CA" - def test_set_wps_language(self): # noqa - wps = mock.Mock() - languages = mock.Mock() - wps.languages = languages - languages.default = "en-US" - languages.supported = ["en-US", "fr-CA"] - - set_wps_language(wps, "ru, fr;q=0.5") - assert wps.language == "fr-CA" - def test_execute_process_no_json_body(self): uri = "/processes/{}/jobs".format(self.process_public.identifier) resp = self.app.post_json(uri, headers=self.json_headers, expect_errors=True) @@ -606,7 +594,10 @@ def test_execute_process_transmission_mode_value_not_supported(self): execute_data = self.get_process_execute_template(fully_qualified_name(self)) execute_data["outputs"][0]["transmissionMode"] = EXECUTE_TRANSMISSION_MODE_VALUE uri = "/processes/{}/jobs".format(self.process_public.identifier) - resp = self.app.post_json(uri, params=execute_data, headers=self.json_headers, expect_errors=True) + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + resp = self.app.post_json(uri, params=execute_data, headers=self.json_headers, expect_errors=True) assert resp.status_code == 501 assert resp.content_type == CONTENT_TYPE_APP_JSON @@ -614,17 +605,20 @@ def test_execute_process_not_visible(self): uri = "/processes/{}/jobs".format(self.process_private.identifier) data = self.get_process_execute_template() resp = self.app.post_json(uri, params=data, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 401 + assert resp.status_code == 403 assert resp.content_type == CONTENT_TYPE_APP_JSON - def test_get_process_visibility_success(self): - for wps_process in [self.process_private, self.process_public]: + def test_get_process_visibility_expected_response(self): + for http_code, wps_process in [(403, self.process_private), (200, self.process_public)]: process = self.process_store.fetch_by_id(wps_process.identifier) uri = "/processes/{}/visibility".format(process.identifier) - resp = self.app.get(uri, headers=self.json_headers) - assert resp.status_code == 200 + resp = self.app.get(uri, headers=self.json_headers, expect_errors=True) + assert resp.status_code == http_code assert resp.content_type == CONTENT_TYPE_APP_JSON - assert resp.json["value"] == process.visibility + if http_code == 200: + assert resp.json["value"] == process.visibility + else: + assert "value" not in resp.json def test_get_process_visibility_not_found(self): uri = "/processes/{}/visibility".format(self.fully_qualified_test_process_name()) @@ -639,7 +633,7 @@ def test_set_process_visibility_success(self): # validate cannot be found before resp = self.app.get(uri_describe, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 401 + assert resp.status_code == 403 # make public data = {"value": VISIBILITY_PUBLIC} @@ -662,7 +656,7 @@ def test_set_process_visibility_success(self): # validate cannot be found anymore resp = self.app.get(uri_describe, headers=self.json_headers, expect_errors=True) - assert resp.status_code == 401 + assert resp.status_code == 403 def test_set_process_visibility_bad_formats(self): uri = "/processes/{}/visibility".format(self.process_private.identifier) diff --git a/weaver/__init__.py b/weaver/__init__.py index 740fb037d..9ad4ee1c1 100644 --- a/weaver/__init__.py +++ b/weaver/__init__.py @@ -14,9 +14,6 @@ sys.path.insert(0, WEAVER_ROOT_DIR) sys.path.insert(0, WEAVER_MODULE_DIR) -# provide standard package version location -from __meta__ import __version__ # noqa: E402,F401 # isort:skip # pylint: disable=C0413 - def main(global_config, **settings): import weaver.app @@ -24,5 +21,15 @@ def main(global_config, **settings): def includeme(config): - LOGGER.info("Adding Weaver Package") - config.include("weaver.app") + LOGGER.info("Adding Weaver") + config.include("weaver.config") + config.include("weaver.database") + config.include("weaver.processes") + config.include("weaver.wps") + config.include("weaver.wps_restapi") + config.include("weaver.tweens") + # must be after views includes, + # otherwise can cause sporadic conflicts + config.include("cornice") + config.include("cornice_swagger") + config.include("pyramid_mako") diff --git a/weaver/app.py b/weaver/app.py index 27b0ea9b6..8d236ab84 100644 --- a/weaver/app.py +++ b/weaver/app.py @@ -9,28 +9,20 @@ import yaml from pyramid.config import Configurator +from weaver import __meta__ from weaver.config import WEAVER_DEFAULT_REQUEST_OPTIONS_CONFIG, get_weaver_config_file, get_weaver_configuration from weaver.processes.builtin import register_builtin_processes from weaver.processes.utils import register_wps_processes_from_config -from weaver.utils import get_settings, parse_extra_options +from weaver.utils import get_settings, parse_extra_options, setup_loggers LOGGER = logging.getLogger(__name__) -def includeme(config): - LOGGER.info("Adding Web Application") - config.include("weaver.config") - config.include("weaver.database") - config.include("weaver.wps") - config.include("weaver.wps_restapi") - config.include("weaver.processes") - config.include("weaver.tweens") - - def main(global_config, **settings): """ Creates a Pyramid WSGI application for Weaver. """ + setup_loggers(settings) LOGGER.info("Initiating weaver application") # validate and fix configuration @@ -40,10 +32,12 @@ def main(global_config, **settings): # Parse extra_options and add each of them in the settings dict settings.update(parse_extra_options(settings.get("weaver.extra_options", ""))) - # load requests options - req_file = get_weaver_config_file(settings.get("weaver.request_options", ""), WEAVER_DEFAULT_REQUEST_OPTIONS_CONFIG) - with open(req_file, "r") as f: - settings.update({"weaver.request_options": yaml.safe_load(f)}) + # load requests options if found, otherwise skip + req_file = get_weaver_config_file(settings.get("weaver.request_options", ""), + WEAVER_DEFAULT_REQUEST_OPTIONS_CONFIG, generate_default_from_example=False) + if req_file: + with open(req_file, "r") as f: + settings.update({"weaver.request_options": yaml.safe_load(f)}) local_config = Configurator(settings=settings) if global_config.get("__file__") is not None: diff --git a/weaver/config.py b/weaver/config.py index 4d66ceeb5..a2bb1a047 100644 --- a/weaver/config.py +++ b/weaver/config.py @@ -9,7 +9,7 @@ from weaver.utils import get_settings if TYPE_CHECKING: - from weaver.typedefs import AnyStr, AnySettingsContainer # noqa: F401 + from weaver.typedefs import AnySettingsContainer LOGGER = logging.getLogger(__name__) @@ -23,7 +23,7 @@ ]) WEAVER_DEFAULT_INI_CONFIG = "weaver.ini" -WEAVER_DEFAULT_DATA_SOURCES_CONFIG = "data_sources.json" +WEAVER_DEFAULT_DATA_SOURCES_CONFIG = "data_sources.yml" WEAVER_DEFAULT_REQUEST_OPTIONS_CONFIG = "request_options.yml" WEAVER_DEFAULT_WPS_PROCESSES_CONFIG = "wps_processes.yml" WEAVER_DEFAULT_CONFIGS = frozenset([ @@ -35,7 +35,7 @@ def get_weaver_configuration(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """Obtains the defined operation configuration mode. :returns: one value amongst :py:data:`weaver.config.WEAVER_CONFIGURATIONS`. @@ -52,7 +52,7 @@ def get_weaver_configuration(container): def get_weaver_config_file(file_path, default_config_file, generate_default_from_example=True): - # type: (AnyStr, AnyStr, bool) -> AnyStr + # type: (str, str, bool) -> str """Validates that the specified configuration file can be found, or falls back to the default one. Handles 'relative' paths for settings in ``WEAVER_DEFAULT_INI_CONFIG`` referring to other configuration files. @@ -66,7 +66,7 @@ def get_weaver_config_file(file_path, default_config_file, generate_default_from :param file_path: path to a configuration file (can be relative if resolvable or matching a default file name) :param default_config_file: one of :py:data:`WEAVER_DEFAULT_CONFIGS`. :param generate_default_from_example: enable fallback copy of default configuration file from corresponding example. - :returns: absolue path of the resolved file. + :returns: absolute path of the resolved file. """ if default_config_file not in WEAVER_DEFAULT_CONFIGS: raise ValueError("Invalid default configuration file [{}] is not one of {}" diff --git a/weaver/database/__init__.py b/weaver/database/__init__.py index 1cd255cb6..c695a6834 100644 --- a/weaver/database/__init__.py +++ b/weaver/database/__init__.py @@ -1,6 +1,8 @@ import logging from typing import TYPE_CHECKING +from pyramid.registry import Registry +from pyramid.request import Request from pyramid.settings import asbool from weaver.database.mongodb import MongoDatabase @@ -8,15 +10,25 @@ LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: - from weaver.typedefs import AnyDatabaseContainer # noqa: F401 + from weaver.typedefs import AnySettingsContainer def get_db(container, reset_connection=False): - # type: (AnyDatabaseContainer, bool) -> MongoDatabase - registry = get_registry(container) + # type: (AnySettingsContainer, bool) -> MongoDatabase + """ + Obtains the database connection from configured application settings. + + If :paramref:`reset_connection` is ``True``, the :paramref:`container` must be the application :class:`Registry` or + any container that can retrieve it to accomplish reference reset. Otherwise, any settings container can be provided. + """ + registry = get_registry(container, nothrow=True) + if not reset_connection and registry and registry.db: + return registry.db + database = MongoDatabase(container) if reset_connection: - registry.db = MongoDatabase(registry, reset_connection=reset_connection) - return registry.db + registry = get_registry(container) + registry.db = database + return database def includeme(config): @@ -26,11 +38,8 @@ def includeme(config): return LOGGER.info("Adding database...") - config.registry.db = MongoDatabase(config.registry) def _add_db(request): - db = request.registry.db - # if db_url.username and db_url.password: - # db.authenticate(db_url.username, db_url.password) - return db + return MongoDatabase(request.registry) + config.add_request_method(_add_db, "db", reify=True) diff --git a/weaver/database/base.py b/weaver/database/base.py index 91440ce63..786b91837 100644 --- a/weaver/database/base.py +++ b/weaver/database/base.py @@ -1,14 +1,61 @@ +import abc from typing import TYPE_CHECKING +from weaver.store.base import StoreInterface + if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer + from weaver.typedefs import AnySettingsContainer, JSON, Type, Union + + StoreSelector = Union[Type[StoreInterface], StoreInterface, str] -class DatabaseInterface(object): +class DatabaseInterface(metaclass=abc.ABCMeta): """Return the unique identifier of db type matching settings.""" __slots__ = ["type"] - def __init__(self, container): # noqa: E811 + def __init__(self, _): # type: (AnySettingsContainer) -> None if not self.type: # pylint: disable=E1101,no-member raise NotImplementedError("Database 'type' must be overridden in inheriting class.") + + @staticmethod + def _get_store_type(store_type): + # type: (StoreSelector) -> str + if isinstance(store_type, StoreInterface): + return store_type.type + if isinstance(store_type, type) and issubclass(store_type, StoreInterface): + return store_type.type + if isinstance(store_type, str): + return store_type + raise TypeError("Unsupported store type selector: [{}] ({})".format(store_type, type(store_type))) + + @abc.abstractmethod + def get_store(self, store_type, *store_args, **store_kwargs): + raise NotImplementedError + + @abc.abstractmethod + def reset_store(self, store_type): + # type: (StoreSelector) -> None + raise NotImplementedError + + @abc.abstractmethod + def get_session(self): + raise NotImplementedError + + @abc.abstractmethod + def get_information(self): + # type: (...) -> JSON + """ + :returns: {'version': version, 'type': db_type} + """ + raise NotImplementedError + + @abc.abstractmethod + def is_ready(self): + # type: (...) -> bool + raise NotImplementedError + + @abc.abstractmethod + def run_migration(self): + # type: (...) -> None + raise NotImplementedError diff --git a/weaver/database/mongodb.py b/weaver/database/mongodb.py index 62d57ca3f..6aaf9f61b 100644 --- a/weaver/database/mongodb.py +++ b/weaver/database/mongodb.py @@ -17,9 +17,10 @@ from weaver.utils import get_settings if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer, JSON # noqa: F401 - from typing import Any, AnyStr, Optional, Union # noqa: F401 - from pymongo.database import Database # noqa: F401 + from typing import Any, Optional, Type, Union + from pymongo.database import Database + from weaver.typedefs import AnySettingsContainer, JSON + from weaver.database.base import StoreSelector # pylint: disable=C0103,invalid-name MongoDB = None # type: Optional[Database] @@ -33,7 +34,16 @@ if TYPE_CHECKING: # pylint: disable=E0601,used-before-assignment - AnyStoreType = Union[MongodbStores] # noqa: F401 + AnyMongodbStore = Union[MongodbStores] + AnyMongodbStoreType = Union[ + StoreSelector, + AnyMongodbStore, + Type[MongodbServiceStore], + Type[MongodbProcessStore], + Type[MongodbJobStore], + Type[MongodbQuoteStore], + Type[MongodbBillStore], + ] class MongoDatabase(DatabaseInterface): @@ -42,19 +52,19 @@ class MongoDatabase(DatabaseInterface): _stores = None type = "mongodb" - def __init__(self, registry, reset_connection=False): - # type: (AnySettingsContainer, bool) -> None - super(MongoDatabase, self).__init__(registry) - self._database = get_mongodb_engine(registry, reset_connection) - self._settings = get_settings(registry) + def __init__(self, container): + # type: (AnySettingsContainer) -> None + super(MongoDatabase, self).__init__(container) + self._database = get_mongodb_engine(container) + self._settings = get_settings(container) self._stores = dict() - def is_ready(self): - # type: (...) -> bool - return self._database is not None and self._settings is not None + def reset_store(self, store_type): + store_type = self._get_store_type(store_type) + return self._stores.pop(store_type, None) def get_store(self, store_type, *store_args, **store_kwargs): - # type: (Union[AnyStr, StoreInterface, MongodbStores], *Any, **Any) -> AnyStoreType + # type: (Union[str, Type[StoreInterface], AnyMongodbStoreType], *Any, **Any) -> AnyMongodbStore """ Retrieve a store from the database. @@ -62,8 +72,7 @@ def get_store(self, store_type, *store_args, **store_kwargs): :param store_args: additional arguments to pass down to the store. :param store_kwargs: additional keyword arguments to pass down to the store. """ - if isinstance(store_type, StoreInterface) or issubclass(store_type, StoreInterface): - store_type = store_type.type + store_type = self._get_store_type(store_type) for store in MongodbStores: if store.type == store_type: @@ -90,33 +99,32 @@ def get_information(self): db_version = result["version_num"] return {"version": db_version, "type": self.type} + def is_ready(self): + # type: (...) -> bool + return self._database is not None and self._settings is not None + def run_migration(self): # type: (...) -> None warnings.warn("Not implemented {}.run_migration implementation.".format(self.type)) -def get_mongodb_connection(container, reset_connection=False): - # type: (AnySettingsContainer, bool) -> Database +def get_mongodb_connection(container): + # type: (AnySettingsContainer) -> Database """Obtains the basic database connection from settings.""" - global MongoDB # pylint: disable=W0603,global-statement - if reset_connection: - MongoDB = None - if not MongoDB: - settings = get_settings(container) - settings_default = [("mongodb.host", "localhost"), ("mongodb.port", 27017), ("mongodb.db_name", "weaver")] - for setting, default in settings_default: - if settings.get(setting, None) is None: - warnings.warn("Setting '{}' not defined in registry, using default [{}].".format(setting, default)) - settings[setting] = default - client = pymongo.MongoClient(settings["mongodb.host"], int(settings["mongodb.port"])) - MongoDB = client[settings["mongodb.db_name"]] - return MongoDB - - -def get_mongodb_engine(container, reset_connection=False): - # type: (AnySettingsContainer, bool) -> Database + settings = get_settings(container) + settings_default = [("mongodb.host", "localhost"), ("mongodb.port", 27017), ("mongodb.db_name", "weaver")] + for setting, default in settings_default: + if settings.get(setting, None) is None: + warnings.warn("Setting '{}' not defined in registry, using default [{}].".format(setting, default)) + settings[setting] = default + client = pymongo.MongoClient(settings["mongodb.host"], int(settings["mongodb.port"]), connect=False) + return client[settings["mongodb.db_name"]] + + +def get_mongodb_engine(container): + # type: (AnySettingsContainer) -> Database """Obtains the database with configuration ready for usage.""" - db = get_mongodb_connection(container, reset_connection) + db = get_mongodb_connection(container) db.services.create_index("name", unique=True) db.services.create_index("url", unique=True) db.processes.create_index("identifier", unique=True) diff --git a/weaver/datatype.py b/weaver/datatype.py index 603f9e044..a4e5a921d 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -7,12 +7,14 @@ from logging import ERROR, INFO, Logger, getLevelName, getLogger from typing import TYPE_CHECKING -import six -from dateutil.parser import parse as dt_parse # noqa +import lxml.etree +from dateutil.parser import parse as dt_parse from owslib.wps import WPSException from pywps import Process as ProcessWPS from weaver.exceptions import ProcessInstanceError +from weaver.processes.convert import ows2json_io_FIXME # FIXME: duplicate functions +from weaver.processes.convert import get_field, ows2json_io, wps2json_io from weaver.processes.types import PROCESS_APPLICATION, PROCESS_BUILTIN, PROCESS_TEST, PROCESS_WORKFLOW, PROCESS_WPS from weaver.status import ( JOB_STATUS_CATEGORIES, @@ -29,8 +31,8 @@ from weaver.wps_restapi.utils import get_wps_restapi_base_url if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer, Number, CWL, JSON # noqa: F401 - from typing import Any, AnyStr, Dict, List, Optional, Union # noqa: F401 + from weaver.typedefs import AnySettingsContainer, Number, CWL, JSON + from typing import Any, Dict, List, Optional, Union LOGGER = getLogger(__name__) @@ -62,11 +64,11 @@ def __getattr__(self, item): raise AttributeError("Can't get attribute '{}'.".format(item)) def __str__(self): - # type: () -> AnyStr + # type: () -> str return "{0} <{1}>".format(type(self).__name__, self.id) def __repr__(self): - # type: () -> AnyStr + # type: () -> str cls = type(self) repr_ = dict.__repr__(self) return "{0}.{1} ({2})".format(cls.__module__, cls.__name__, repr_) @@ -91,7 +93,7 @@ def json(self): raise NotImplementedError("Method 'json' must be defined for JSON request item representation.") def params(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] """ Obtain the internal data representation for storage. @@ -168,19 +170,19 @@ def __init__(self, *args, **kwargs): super(Job, self).__init__(*args, **kwargs) if "task_id" not in self: raise TypeError("Parameter 'task_id' is required for '{}' creation.".format(type(self))) - if not isinstance(self.id, six.string_types): + if not isinstance(self.id, str): raise TypeError("Type 'str' is required for '{}.id'".format(type(self))) def _get_log_msg(self, msg=None): - # type: (Optional[AnyStr]) -> AnyStr + # type: (Optional[str]) -> str if not msg: msg = self.status_message return get_job_log_msg(duration=self.duration_str, progress=self.progress, status=self.status, message=msg) def save_log(self, - errors=None, # type: Optional[Union[AnyStr, List[WPSException]]] + errors=None, # type: Optional[Union[str, List[WPSException]]] logger=None, # type: Optional[Logger] - message=None, # type: Optional[AnyStr] + message=None, # type: Optional[str] level=INFO, # type: int ): # type: (...) -> None """ @@ -201,7 +203,7 @@ def save_log(self, .. note:: The job object is updated with the log but still requires to be pushed to database to actually persist it. """ - if isinstance(errors, six.string_types): + if isinstance(errors, str): log_msg = [(ERROR, self._get_log_msg(message))] self.exceptions.append(errors) elif isinstance(errors, list): @@ -226,7 +228,7 @@ def save_log(self, @property def id(self): - # type: () -> AnyStr + # type: () -> str """Job UUID to retrieve the details from storage.""" job_id = self.get("id") if not job_id: @@ -236,20 +238,20 @@ def id(self): @property def task_id(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] """Reference Task UUID attributed by the ``Celery`` worker that monitors and executes this job.""" return self.get("task_id", None) @task_id.setter def task_id(self, task_id): - # type: (AnyStr) -> None - if not isinstance(task_id, six.string_types): + # type: (str) -> None + if not isinstance(task_id, str): raise TypeError("Type 'str' is required for '{}.task_id'".format(type(self))) self["task_id"] = task_id @property def wps_id(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] """Reference WPS Request/Response UUID attributed by the executed ``PyWPS`` process. This UUID matches the status-location, log and output directory of the WPS process. @@ -263,14 +265,14 @@ def wps_id(self): @wps_id.setter def wps_id(self, wps_id): - # type: (AnyStr) -> None - if not isinstance(wps_id, six.string_types): + # type: (str) -> None + if not isinstance(wps_id, str): raise TypeError("Type 'str' is required for '{}.wps_id'".format(type(self))) self["wps_id"] = wps_id @property def service(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] """Service identifier of the corresponding remote process. .. seealso:: @@ -280,14 +282,14 @@ def service(self): @service.setter def service(self, service): - # type: (Optional[AnyStr]) -> None - if not isinstance(service, six.string_types) or service is None: + # type: (Optional[str]) -> None + if not isinstance(service, str) or service is None: raise TypeError("Type 'str' is required for '{}.service'".format(type(self))) self["service"] = service @property def process(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] """Process identifier of the corresponding remote process. .. seealso:: @@ -297,19 +299,19 @@ def process(self): @process.setter def process(self, process): - # type: (Optional[AnyStr]) -> None - if not isinstance(process, six.string_types) or process is None: + # type: (Optional[str]) -> None + if not isinstance(process, str) or process is None: raise TypeError("Type 'str' is required for '{}.process'".format(type(self))) self["process"] = process def _get_inputs(self): - # type: () -> List[Optional[Dict[AnyStr, Any]]] + # type: () -> List[Optional[Dict[str, Any]]] if self.get("inputs") is None: self["inputs"] = list() return self["inputs"] def _set_inputs(self, inputs): - # type: (List[Optional[Dict[AnyStr, Any]]]) -> None + # type: (List[Optional[Dict[str, Any]]]) -> None if not isinstance(inputs, list): raise TypeError("Type 'list' is required for '{}.inputs'".format(type(self))) self["inputs"] = inputs @@ -319,27 +321,27 @@ def _set_inputs(self, inputs): @property def user_id(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("user_id", None) @user_id.setter def user_id(self, user_id): - # type: (Optional[AnyStr]) -> None + # type: (Optional[str]) -> None if not isinstance(user_id, int) or user_id is None: raise TypeError("Type 'int' is required for '{}.user_id'".format(type(self))) self["user_id"] = user_id @property def status(self): - # type: () -> AnyStr + # type: () -> str return self.get("status", STATUS_UNKNOWN) @status.setter def status(self, status): - # type: (AnyStr) -> None + # type: (str) -> None if status == "accepted" and self.status == "running": LOGGER.debug(traceback.extract_stack()) - if not isinstance(status, six.string_types): + if not isinstance(status, str): raise TypeError("Type 'str' is required for '{}.status'".format(type(self))) if status not in JOB_STATUS_VALUES: raise ValueError("Status '{0}' is not valid for '{1}.status', must be one of {2!s}'" @@ -348,51 +350,51 @@ def status(self, status): @property def status_message(self): - # type: () -> AnyStr + # type: () -> str return self.get("status_message", "no message") @status_message.setter def status_message(self, message): - # type: (Optional[AnyStr]) -> None + # type: (Optional[str]) -> None if message is None: return - if not isinstance(message, six.string_types): + if not isinstance(message, str): raise TypeError("Type 'str' is required for '{}.status_message'".format(type(self))) self["status_message"] = message @property def status_location(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("status_location", None) @status_location.setter def status_location(self, location_url): - # type: (Optional[AnyStr]) -> None - if not isinstance(location_url, six.string_types) or location_url is None: + # type: (Optional[str]) -> None + if not isinstance(location_url, str) or location_url is None: raise TypeError("Type 'str' is required for '{}.status_location'".format(type(self))) self["status_location"] = location_url @property def notification_email(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("notification_email") @notification_email.setter def notification_email(self, email): - # type: (Optional[Union[AnyStr]]) -> None - if not isinstance(email, six.string_types): + # type: (Optional[Union[str]]) -> None + if not isinstance(email, str): raise TypeError("Type 'str' is required for '{}.notification_email'".format(type(self))) self["notification_email"] = email @property def accept_language(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("accept_language") @accept_language.setter def accept_language(self, language): - # type: (Optional[Union[AnyStr]]) -> None - if not isinstance(language, six.string_types): + # type: (Optional[Union[str]]) -> None + if not isinstance(language, str): raise TypeError("Type 'str' is required for '{}.accept_language'".format(type(self))) self["accept_language"] = language @@ -449,7 +451,7 @@ def duration(self): @property def duration_str(self): - # type: () -> AnyStr + # type: () -> str return str(self.duration).split(".")[0] @property @@ -467,13 +469,13 @@ def progress(self, progress): self["progress"] = progress def _get_results(self): - # type: () -> List[Optional[Dict[AnyStr, Any]]] + # type: () -> List[Optional[Dict[str, Any]]] if self.get("results") is None: self["results"] = list() return self["results"] def _set_results(self, results): - # type: (List[Optional[Dict[AnyStr, Any]]]) -> None + # type: (List[Optional[Dict[str, Any]]]) -> None if not isinstance(results, list): raise TypeError("Type 'list' is required for '{}.results'".format(type(self))) self["results"] = results @@ -482,13 +484,13 @@ def _set_results(self, results): results = property(_get_results, _set_results) def _get_exceptions(self): - # type: () -> List[Optional[Dict[AnyStr, AnyStr]]] + # type: () -> List[Optional[Dict[str, str]]] if self.get("exceptions") is None: self["exceptions"] = list() return self["exceptions"] def _set_exceptions(self, exceptions): - # type: (List[Optional[Dict[AnyStr, AnyStr]]]) -> None + # type: (List[Optional[Dict[str, str]]]) -> None if not isinstance(exceptions, list): raise TypeError("Type 'list' is required for '{}.exceptions'".format(type(self))) self["exceptions"] = exceptions @@ -497,13 +499,13 @@ def _set_exceptions(self, exceptions): exceptions = property(_get_exceptions, _set_exceptions) def _get_logs(self): - # type: () -> List[Dict[AnyStr, AnyStr]] + # type: () -> List[Dict[str, str]] if self.get("logs") is None: self["logs"] = list() return self["logs"] def _set_logs(self, logs): - # type: (List[Dict[AnyStr, AnyStr]]) -> None + # type: (List[Dict[str, str]]) -> None if not isinstance(logs, list): raise TypeError("Type 'list' is required for '{}.logs'".format(type(self))) self["logs"] = logs @@ -512,13 +514,13 @@ def _set_logs(self, logs): logs = property(_get_logs, _set_logs) def _get_tags(self): - # type: () -> List[Optional[AnyStr]] + # type: () -> List[Optional[str]] if self.get("tags") is None: self["tags"] = list() return self["tags"] def _set_tags(self, tags): - # type: (List[Optional[AnyStr]]) -> None + # type: (List[Optional[str]]) -> None if not isinstance(tags, list): raise TypeError("Type 'list' is required for '{}.tags'".format(type(self))) self["tags"] = tags @@ -528,15 +530,15 @@ def _set_tags(self, tags): @property def access(self): - # type: () -> AnyStr + # type: () -> str """Job visibility access from execution.""" return self.get("access", VISIBILITY_PRIVATE) @access.setter def access(self, visibility): - # type: (AnyStr) -> None + # type: (str) -> None """Job visibility access from execution.""" - if not isinstance(visibility, six.string_types): + if not isinstance(visibility, str): raise TypeError("Type 'str' is required for '{}.access'".format(type(self))) if visibility not in VISIBILITY_VALUES: raise ValueError("Invalid 'visibility' value specified for '{}.access'".format(type(self))) @@ -544,26 +546,30 @@ def access(self, visibility): @property def request(self): - # type: () -> Optional[AnyStr] - """XML request for WPS execution submission as string.""" + # type: () -> Optional[str] + """XML request for WPS execution submission as string (binary).""" return self.get("request", None) @request.setter def request(self, request): - # type: (Optional[AnyStr]) -> None - """XML request for WPS execution submission as string.""" + # type: (Optional[str]) -> None + """XML request for WPS execution submission as string (binary).""" + if isinstance(request, lxml.etree._Element): # noqa + request = lxml.etree.tostring(request) self["request"] = request @property def response(self): - # type: () -> Optional[AnyStr] - """XML status response from WPS execution submission as string.""" + # type: () -> Optional[str] + """XML status response from WPS execution submission as string (binary).""" return self.get("response", None) @response.setter def response(self, response): - # type: (Optional[AnyStr]) -> None - """XML status response from WPS execution submission as string.""" + # type: (Optional[str]) -> None + """XML status response from WPS execution submission as string (binary).""" + if isinstance(response, lxml.etree._Element): # noqa + response = lxml.etree.tostring(response) self["response"] = response def _job_url(self, settings): @@ -602,7 +608,7 @@ def json(self, container=None): # pylint: disable=W0221,arguments-differ return sd.JobStatusInfo().deserialize(job_json) def params(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] return { "id": self.id, "task_id": self.task_id, @@ -650,77 +656,77 @@ def __init__(self, *args, **kwargs): @property def id(self): - # type: () -> AnyStr + # type: () -> str return self["id"] @property def identifier(self): - # type: () -> AnyStr + # type: () -> str return self.id @identifier.setter def identifier(self, value): - # type: (AnyStr) -> None + # type: (str) -> None self["id"] = value @property def title(self): - # type: () -> AnyStr + # type: () -> str return self.get("title", self.id) @property def abstract(self): - # type: () -> AnyStr + # type: () -> str return self.get("abstract", "") @property def keywords(self): - # type: () -> List[AnyStr] + # type: () -> List[str] return self.get("keywords", []) @property def metadata(self): - # type: () -> List[AnyStr] + # type: () -> List[str] return self.get("metadata", []) @property def version(self): - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("version") @property def inputs(self): - # type: () -> Optional[List[Dict[AnyStr, Any]]] + # type: () -> Optional[List[Dict[str, Any]]] return self.get("inputs") @property def outputs(self): - # type: () -> Optional[List[Dict[AnyStr, Any]]] + # type: () -> Optional[List[Dict[str, Any]]] return self.get("outputs") @property def jobControlOptions(self): # noqa: N802 - # type: () -> Optional[List[AnyStr]] + # type: () -> Optional[List[str]] return self.get("jobControlOptions") @property def outputTransmission(self): # noqa: N802 - # type: () -> Optional[List[AnyStr]] + # type: () -> Optional[List[str]] return self.get("outputTransmission") @property def processDescriptionURL(self): # noqa: N802 - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("processDescriptionURL") @property def processEndpointWPS1(self): # noqa: N802 - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("processEndpointWPS1") @property def executeEndpoint(self): # noqa: N802 - # type: () -> Optional[AnyStr] + # type: () -> Optional[str] return self.get("executeEndpoint") @property @@ -731,7 +737,7 @@ def owsContext(self): # noqa: N802 # wps, workflow, etc. @property def type(self): - # type: () -> AnyStr + # type: () -> str return self.get("type", PROCESS_WPS) @property @@ -804,13 +810,13 @@ def _decode(obj): @property def visibility(self): - # type: () -> AnyStr + # type: () -> str return self.get("visibility", VISIBILITY_PRIVATE) @visibility.setter def visibility(self, visibility): - # type: (AnyStr) -> None - if not isinstance(visibility, six.string_types): + # type: (str) -> None + if not isinstance(visibility, str): raise TypeError("Type 'str' is required for '{}.visibility'".format(type(self))) if visibility not in VISIBILITY_VALUES: raise ValueError("Status '{0}' is not valid for '{1}.visibility, must be one of {2!s}'" @@ -818,7 +824,7 @@ def visibility(self, visibility): self["visibility"] = visibility def params(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] return { "identifier": self.identifier, "title": self.title, @@ -842,7 +848,7 @@ def params(self): @property def params_wps(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] """Values applicable to PyWPS Process ``__init__``""" return { "identifier": self.identifier, @@ -880,20 +886,48 @@ def process_summary(self): def from_wps(wps_process, **extra_params): # type: (ProcessWPS, **Any) -> Process """ - Converts a PyWPS Process into a :class:`weaver.datatype.Process` using provided parameters. + Converts a :mod:`pywps` Process into a :class:`weaver.datatype.Process` using provided parameters. """ - # import here to avoid circular dependencies - from weaver.processes.wps_package import _wps2json_io # noqa: W0212 - assert isinstance(wps_process, ProcessWPS) process = wps_process.json process_type = getattr(wps_process, "type", wps_process.identifier) process.update({"type": process_type, "package": None, "reference": None, - "inputs": [_wps2json_io(i) for i in wps_process.inputs], - "outputs": [_wps2json_io(o) for o in wps_process.outputs]}) + "inputs": [wps2json_io(i) for i in wps_process.inputs], + "outputs": [wps2json_io(o) for o in wps_process.outputs]}) process.update(**extra_params) return Process(process) + @staticmethod + def from_ows(service, process, container): + # type: (Service, ProcessWPS, AnySettingsContainer) -> Process + """ + Converts a :mod:`owslib.wps` Process to local storage :class:`weaver.datatype.Process`. + """ + wps_url = get_wps_restapi_base_url(container) + if wps_url == service.url: + provider_url = wps_url # local weaver process, using WPS-XML endpoint + else: + provider_url = "{}/providers/{}".format(wps_url, service.get("name")) + describe_process_url = "{}/processes/{}".format(provider_url, process.identifier) + execute_process_url = "{describe_url}/jobs".format(describe_url=describe_process_url) + + # FIXME: should use common function + inputs = [ows2json_io_FIXME(ows_input) for ows_input in get_field(process, "dataInputs", default=[])] + outputs = [ows2json_io(ows_output) for ows_output in get_field(process, "processOutputs", default=[])] + + return Process( + id=process.identifier, + title=get_field(process, "title", default=process.identifier, search_variations=True), + abstract=get_field(process, "abstract", default=None, search_variations=True), + inputs=inputs, + outputs=outputs, + url=describe_process_url, + processEndpointWPS1=service.get("url"), + processDescriptionURL=describe_process_url, + executeEndpoint=execute_process_url, + package=None, + ) + def wps(self): # type: () -> ProcessWPS @@ -928,11 +962,11 @@ def __init__(self, *args, **kwargs): super(Quote, self).__init__(*args, **kwargs) if "process" not in self: raise TypeError("Field 'Quote.process' is required") - if not isinstance(self.get("process"), six.string_types): + if not isinstance(self.get("process"), str): raise ValueError("Field 'Quote.process' must be a string.") if "user" not in self: raise TypeError("Field 'Quote.user' is required") - if not isinstance(self.get("user"), six.string_types): + if not isinstance(self.get("user"), str): raise ValueError("Field 'Quote.user' must be a string.") if "price" not in self: raise TypeError("Field 'Quote.price' is required") @@ -940,7 +974,7 @@ def __init__(self, *args, **kwargs): raise ValueError("Field 'Quote.price' must be a float number.") if "currency" not in self: raise TypeError("Field 'Quote.currency' is required") - if not isinstance(self.get("currency"), six.string_types) or len(self.get("currency")) != 3: + if not isinstance(self.get("currency"), str) or len(self.get("currency")) != 3: raise ValueError("Field 'Quote.currency' must be an ISO-4217 currency string code.") if "created" not in self: self["created"] = now() @@ -1028,7 +1062,7 @@ def steps(self): return self.get("steps", []) def params(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] return { "id": self.id, "price": self.price, @@ -1061,15 +1095,15 @@ def __init__(self, *args, **kwargs): super(Bill, self).__init__(*args, **kwargs) if "quote" not in self: raise TypeError("Field 'Bill.quote' is required") - if not isinstance(self.get("quote"), six.string_types): + if not isinstance(self.get("quote"), str): raise ValueError("Field 'Bill.quote' must be a string.") if "job" not in self: raise TypeError("Field 'Bill.job' is required") - if not isinstance(self.get("job"), six.string_types): + if not isinstance(self.get("job"), str): raise ValueError("Field 'Bill.job' must be a string.") if "user" not in self: raise TypeError("Field 'Bill.user' is required") - if not isinstance(self.get("user"), six.string_types): + if not isinstance(self.get("user"), str): raise ValueError("Field 'Bill.user' must be a string.") if "price" not in self: raise TypeError("Field 'Bill.price' is required") @@ -1077,7 +1111,7 @@ def __init__(self, *args, **kwargs): raise ValueError("Field 'Bill.price' must be a float number.") if "currency" not in self: raise TypeError("Field 'Bill.currency' is required") - if not isinstance(self.get("currency"), six.string_types) or len(self.get("currency")) != 3: + if not isinstance(self.get("currency"), str) or len(self.get("currency")) != 3: raise ValueError("Field 'Bill.currency' must be an ISO-4217 currency string code.") if "created" not in self: self["created"] = now() @@ -1134,7 +1168,7 @@ def description(self): return self.get("description") def params(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] return { "id": self.id, "user": self.user, diff --git a/weaver/exceptions.py b/weaver/exceptions.py index 7cae6f172..a4915804c 100644 --- a/weaver/exceptions.py +++ b/weaver/exceptions.py @@ -1,83 +1,119 @@ """ -Errors raised during the weaver flow. +Errors raised during the Weaver flow. + +Some of these error inherit from :class:`weaver.owsexceptions.OWSException` and their derived classes to allow +:mod:`pywps` to automatically understand and render those exception if raised by an underlying :mod:`weaver` operation. """ +import functools import logging -from functools import wraps from typing import TYPE_CHECKING -from pyramid.httpexceptions import HTTPException, HTTPInternalServerError +from pyramid.httpexceptions import ( + HTTPBadRequest, + HTTPException, + HTTPForbidden, + HTTPInternalServerError, + HTTPNotFound, + HTTPUnprocessableEntity +) from pyramid.request import Request as PyramidRequest from pyramid.testing import DummyRequest from requests import Request as RequestsRequest +from werkzeug.wrappers import Request as WerkzeugRequest + +from weaver.formats import CONTENT_TYPE_TEXT_XML +from weaver.owsexceptions import ( + OWSAccessForbidden, + OWSException, + OWSInvalidParameterValue, + OWSMissingParameterValue, + OWSNoApplicableCode, + OWSNotFound +) LOGGER = logging.getLogger(__name__) - if TYPE_CHECKING: - from typing import Any, AnyStr, Callable, Type + from typing import Any, Callable, Type class WeaverException(Exception): """Base class of exceptions defined by :mod:`weaver` package.""" + code = 500 + title = "Internal Server Error" + detail = message = comment = explanation = "Unknown error" -class InvalidIdentifierValue(WeaverException, ValueError): +class InvalidIdentifierValue(WeaverException, ValueError, HTTPBadRequest, OWSInvalidParameterValue): """ - Error indicating that an id to be employed for following operations + Error indicating that an ID to be employed for following operations is not considered as valid to allow further processed or usage. """ + code = 400 + locator = "identifier" + +class MissingIdentifierValue(WeaverException, ValueError, HTTPBadRequest, OWSMissingParameterValue): + """ + Error indicating that an ID to be employed for following operations + was missing and cannot continue further processing or usage. + """ + code = 400 + locator = "identifier" -class ServiceException(WeaverException): + +class ServiceException(WeaverException, OWSException): """Base exception related to a :class:`weaver.datatype.Service`.""" + locator = "service" -class ServiceNotAccessible(ServiceException): +class ServiceNotAccessible(ServiceException, HTTPForbidden, OWSAccessForbidden): """ Error indicating that a WPS service exists but is not visible to retrieve from the storage backend of an instance of :class:`weaver.store.ServiceStore`. """ -class ServiceNotFound(ServiceException): +class ServiceNotFound(ServiceException, HTTPNotFound, OWSNotFound): """ Error indicating that an OWS service could not be read from the storage backend by an instance of :class:`weaver.store.ServiceStore`. """ -class ServiceRegistrationError(ServiceException): +class ServiceRegistrationError(ServiceException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that an OWS service could not be registered in the storage backend by an instance of :class:`weaver.store.ServiceStore`. """ -class ProcessException(WeaverException): +class ProcessException(WeaverException, OWSException): """Base exception related to a :class:`weaver.datatype.Process`.""" + locator = "process" -class ProcessNotAccessible(ProcessException): +class ProcessNotAccessible(ProcessException, HTTPForbidden, OWSAccessForbidden): """ Error indicating that a local WPS process exists but is not visible to retrieve from the storage backend of an instance of :class:`weaver.store.ProcessStore`. """ -class ProcessNotFound(ProcessException): +class ProcessNotFound(ProcessException, HTTPNotFound, OWSNotFound): """ Error indicating that a local WPS process could not be read from the storage backend by an instance of :class:`weaver.store.ProcessStore`. """ -class ProcessRegistrationError(ProcessException): +class ProcessRegistrationError(ProcessException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a WPS process could not be registered in the storage backend by an instance of :class:`weaver.store.ProcessStore`. """ -class ProcessInstanceError(ProcessException): +class ProcessInstanceError(ProcessException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that the process instance passed is not supported with storage backend by an instance of :class:`weaver.store.ProcessStore`. @@ -86,23 +122,24 @@ class ProcessInstanceError(ProcessException): class JobException(WeaverException): """Base exception related to a :class:`weaver.datatype.Job`.""" + locator = "job" -class JobNotFound(JobException): +class JobNotFound(JobException, HTTPNotFound, OWSNotFound): """ Error indicating that a job could not be read from the storage backend by an instance of :class:`weaver.store.JobStore`. """ -class JobRegistrationError(JobException): +class JobRegistrationError(JobException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a job could not be registered in the storage backend by an instance of :class:`weaver.store.JobStore`. """ -class JobUpdateError(JobException): +class JobUpdateError(JobException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a job could not be updated in the storage backend by an instance of :class:`weaver.store.JobStore`. @@ -111,37 +148,38 @@ class JobUpdateError(JobException): class PackageException(WeaverException): """Base exception related to a :class:`weaver.processes.wps_package.Package`.""" + locator = "package" -class PackageTypeError(PackageException): +class PackageTypeError(PackageException, HTTPUnprocessableEntity): """ Error indicating that an instance of :class:`weaver.processes.wps_package.WpsPackage` could not properly parse input/output type(s) for package deployment or execution. """ -class PackageRegistrationError(PackageException): +class PackageRegistrationError(PackageException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that an instance of :class:`weaver.processes.wps_package.WpsPackage` could not properly be registered for package deployment because of invalid prerequisite. """ -class PackageExecutionError(PackageException): +class PackageExecutionError(PackageException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that an instance of :class:`weaver.processes.wps_package.WpsPackage` could not properly execute the package using provided inputs and package definition. """ -class PackageNotFound(PackageException): +class PackageNotFound(PackageException, HTTPNotFound, OWSNotFound): """ Error indicating that an instance of :class:`weaver.processes.wps_package.WpsPackage` could not properly retrieve the package definition using provided references. """ -class PayloadNotFound(PackageException): +class PayloadNotFound(PackageException, HTTPNotFound, OWSNotFound): """ Error indicating that an instance of :class:`weaver.processes.wps_package.WpsPackage` could not properly retrieve the package deploy payload using provided references. @@ -150,23 +188,24 @@ class PayloadNotFound(PackageException): class QuoteException(WeaverException): """Base exception related to a :class:`weaver.datatype.Quote`.""" + locator = "quote" -class QuoteNotFound(QuoteException): +class QuoteNotFound(QuoteException, HTTPNotFound, OWSNotFound): """ Error indicating that a quote could not be read from the storage backend by an instance of :class:`weaver.store.QuoteStore`. """ -class QuoteRegistrationError(QuoteException): +class QuoteRegistrationError(QuoteException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a quote could not be registered in the storage backend by an instance of :class:`weaver.store.QuoteStore`. """ -class QuoteInstanceError(QuoteException): +class QuoteInstanceError(QuoteException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a given object doesn't correspond to an expected instance of :class:`weaver.datatype.Quote`. @@ -175,32 +214,81 @@ class QuoteInstanceError(QuoteException): class BillException(WeaverException): """Base exception related to a :class:`weaver.datatype.Bill`.""" + locator = "bill" -class BillNotFound(BillException): +class BillNotFound(BillException, HTTPNotFound, OWSNotFound): """ Error indicating that a bill could not be read from the storage backend by an instance of :class:`weaver.store.BillStore`. """ -class BillRegistrationError(BillException): +class BillRegistrationError(BillException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a bill could not be registered in the storage backend by an instance of :class:`weaver.store.BillStore`. """ -class BillInstanceError(BillException): +class BillInstanceError(BillException, HTTPInternalServerError, OWSNoApplicableCode): """ Error indicating that a given object doesn't correspond to an expected instance of :class:`weaver.datatype.Bill`. """ +# FIXME: +# https://github.com/crim-ca/weaver/issues/215 +# define common Exception classes that won't require this type of conversion +def handle_known_exceptions(function): + # type: (Callable[[Any, Any], Any]) -> Callable + """ + Decorator that catches lower-level raised exception that are known to :mod:`weaver` but not by :mod:`pywps`. + + .. seealso:: + :class:`weaver.wps.service.WorkerService` + + Without prior handling of known internal exception, :mod:`pywps` generates by default ``500`` internal server + error response since it doesn't know how to interpret more specific exceptions defined in :mod:`weaver`. + + The decorator simply returns the known exception such that :func:`weaver.tweens.ows_response_tween` can later + handle it appropriately. Exceptions derived from :exception:`weaver.owsexceptions.OWSException` are employed since + they themselves have base references to :mod:`pywps.exceptions` classes that the service can understand. + + .. warning:: + In :mod:`pywps`, ``HTTPException`` refers to :exception:`werkzeug.exceptions.HTTPException` while in + :mod:`weaver`, it is :exception:`pyramid.httpexceptions.HTTPException`. They both offer similar interfaces and + functionalities (headers, body, status-code, etc.), but they are not intercepted in the same try/except blocks. + """ + + @functools.wraps(function) + def wrapped(*_, **__): + try: + return function(*_, **__) + except (WeaverException, OWSException, HTTPException) as exc: + if isinstance(exc, WeaverException) and not isinstance(exc, OWSException): + return OWSNoApplicableCode(str(exc), locator="service", content_type=CONTENT_TYPE_TEXT_XML) + if isinstance(exc, HTTPException): + # override default pre-generated plain text content-type such that + # resulting exception generates the response content with requested accept or XML by default + exc.headers.setdefault("Accept", CONTENT_TYPE_TEXT_XML) + exc.headers.pop("Content-Type", None) + if isinstance(exc, HTTPNotFound): + exc = OWSNotFound(str(exc), locator="service", status=exc) + elif isinstance(exc, HTTPForbidden): + exc = OWSAccessForbidden(str(exc), locator="service", status=exc) + else: + exc = OWSException(str(exc), locator="service", status=exc) + return exc # return to avoid raising, raise would be caught by parent pywps call wrapping 'function' + # any other unknown exception by weaver will be raised here as normal, and pywps should repackage them as 500 + + return wrapped + + def log_unhandled_exceptions(logger=LOGGER, message="Unhandled exception occurred.", exception=Exception, force=False, require_http=True, is_request=True): - # type: (logging.Logger, AnyStr, Type[Exception], bool, bool, bool) -> Callable + # type: (logging.Logger, str, Type[Exception], bool, bool, bool) -> Callable """ Decorator that will raise ``exception`` with specified ``message`` if an exception is caught while execution the wrapped function, after logging relevant details about the caught exception with ``logger``. @@ -216,8 +304,6 @@ def log_unhandled_exceptions(logger=LOGGER, message="Unhandled exception occurre :raises exception: if an *unknown* exception was caught (or forced) during the decorated function's execution. :raises Exception: original exception if it is *known*. """ - from weaver.owsexceptions import OWSException # avoid circular import error - known_exceptions = [WeaverException] known_http_exceptions = [HTTPException, OWSException] if require_http: @@ -228,12 +314,13 @@ def log_unhandled_exceptions(logger=LOGGER, message="Unhandled exception occurre def wrap(function): # type: (Callable[[Any, Any], Any]) -> Callable - @wraps(function) + @functools.wraps(function) def call(*args, **kwargs): try: # handle input arguments that are extended by various pyramid operations if is_request: - while len(args) and not isinstance(args[0], (RequestsRequest, PyramidRequest, DummyRequest)): + any_request_type = (RequestsRequest, PyramidRequest, DummyRequest, WerkzeugRequest) + while len(args) and not isinstance(args[0], any_request_type): args = args[1:] return function(*args, **kwargs) except Exception as exc: @@ -245,7 +332,7 @@ def call(*args, **kwargs): # unless specified to log any type, raise only known exceptions if force or not isinstance(exc, known_exceptions): setattr(exception, handle, True) # mark as handled - setattr(exception, "error", exc) # make original exception available through new one raised + setattr(exception, "cause", exc) # make original exception available through new one raised logger.exception("%s%s[%r]", message, (" " if message else "") + "Exception: ", exc) raise exception(message) raise exc diff --git a/weaver/formats.py b/weaver/formats.py index 2c4eab7b6..78c0f1db1 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -1,17 +1,17 @@ import os from typing import TYPE_CHECKING +from urllib.error import HTTPError +from urllib.request import urlopen from pyramid.httpexceptions import HTTPNotFound, HTTPOk from pywps.inout.formats import FORMATS, Format from requests.exceptions import ConnectionError -from six.moves.urllib.error import HTTPError -from six.moves.urllib.request import urlopen from weaver.utils import request_extra if TYPE_CHECKING: - from weaver.typedefs import JSON # noqa: F401 - from typing import AnyStr, Dict, Optional, Tuple, Union # noqa: F401 + from weaver.typedefs import JSON + from typing import Dict, Optional, Tuple, Union # Content-Types # MIME-type nomenclature: @@ -29,7 +29,8 @@ CONTENT_TYPE_TEXT_PLAIN = "text/plain" CONTENT_TYPE_APP_PDF = "application/pdf" CONTENT_TYPE_APP_JSON = "application/json" -CONTENT_TYPE_APP_GEOJSON = "application/vnd.geo+json" +CONTENT_TYPE_APP_GEOJSON = "application/geo+json" +CONTENT_TYPE_APP_VDN_GEOJSON = "application/vnd.geo+json" CONTENT_TYPE_APP_XML = "application/xml" CONTENT_TYPE_IMAGE_GEOTIFF = "image/tiff; subtype=geotiff" CONTENT_TYPE_TEXT_XML = "text/xml" @@ -38,17 +39,18 @@ # explicit mime-type to extension when not literally written in item after '/' (excluding 'x-' prefix) _CONTENT_TYPE_EXTENSION_MAPPING = { + CONTENT_TYPE_APP_VDN_GEOJSON: ".geojson", # pywps 4.4 default extension without vdn prefix CONTENT_TYPE_APP_NETCDF: ".nc", CONTENT_TYPE_APP_GZIP: ".gz", CONTENT_TYPE_APP_TAR_GZ: ".tar.gz", CONTENT_TYPE_APP_YAML: ".yml", CONTENT_TYPE_ANY: ".*", # any for glob -} # type: Dict[AnyStr, AnyStr] +} # type: Dict[str, str] # extend with all known pywps formats _CONTENT_TYPE_FORMAT_MAPPING = { # content-types here are fully defined with extra parameters (e.g.: geotiff as subtype of tiff) fmt.mime_type: fmt for _, fmt in FORMATS._asdict().items() # noqa: W0212 -} # type: Dict[AnyStr, Format] +} # type: Dict[str, Format] _CONTENT_TYPE_EXTENSION_MAPPING.update({ ctype: fmt.extension for ctype, fmt in _CONTENT_TYPE_FORMAT_MAPPING.items() # noqa: W0212 }) @@ -81,16 +83,28 @@ } FORMAT_NAMESPACES = frozenset([IANA_NAMESPACE, EDAM_NAMESPACE]) +# renderers output formats for OpenAPI generation +WPS_VERSION_100 = "1.0.0" +WPS_VERSION_200 = "2.0.0" +OUTPUT_FORMAT_JSON = "json" +OUTPUT_FORMAT_XML = "xml" +OUTPUT_FORMATS = { + WPS_VERSION_100: OUTPUT_FORMAT_XML, + WPS_VERSION_200: OUTPUT_FORMAT_JSON, + CONTENT_TYPE_APP_XML: OUTPUT_FORMAT_XML, + CONTENT_TYPE_APP_JSON: OUTPUT_FORMAT_JSON, +} + def get_format(mime_type): - # type: (AnyStr) -> Format + # type: (str) -> Format """Obtains a :class:`Format` with predefined extension and encoding details from known MIME-types.""" ctype = clean_mime_type_format(mime_type, strip_parameters=True) return _CONTENT_TYPE_FORMAT_MAPPING.get(mime_type, Format(ctype, extension=get_extension(ctype))) def get_extension(mime_type): - # type: (AnyStr) -> AnyStr + # type: (str) -> str """ Retrieves the extension corresponding to :paramref:`mime_type` if explicitly defined, or by parsing it. """ @@ -105,7 +119,7 @@ def get_extension(mime_type): def get_cwl_file_format(mime_type, make_reference=False, must_exist=True, allow_synonym=True): - # type: (AnyStr, bool, bool, bool) -> Union[Tuple[Optional[JSON], Optional[AnyStr]], Optional[AnyStr]] + # type: (str, bool, bool, bool) -> Union[Tuple[Optional[JSON], Optional[str]], Optional[str]] """ Obtains the corresponding `IANA`/`EDAM` ``format`` value to be applied under a `CWL` I/O ``File`` from the :paramref:`mime_type` (`Content-Type` header) using the first matched one. @@ -186,7 +200,7 @@ def _request_extra_various(_mime_type): def clean_mime_type_format(mime_type, suffix_subtype=False, strip_parameters=False): - # type: (AnyStr, bool, bool) -> AnyStr + # type: (str, bool, bool) -> str """ Removes any additional namespace key or URL from :paramref:`mime_type` so that it corresponds to the generic representation (e.g.: ``application/json``) instead of the ``:`` mapping variant used diff --git a/weaver/wps_restapi/jobs/notify.py b/weaver/notify.py similarity index 97% rename from weaver/wps_restapi/jobs/notify.py rename to weaver/notify.py index 82054613d..8522419b8 100644 --- a/weaver/wps_restapi/jobs/notify.py +++ b/weaver/notify.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import binascii import hashlib import logging @@ -7,7 +5,6 @@ import smtplib from typing import TYPE_CHECKING -import six from mako.template import Template from pyramid.settings import asbool @@ -122,7 +119,7 @@ def notify_job_complete(job, to_email_recipient, container): def encrypt_email(email, settings): - if not email or not isinstance(email, six.string_types): + if not email or not isinstance(email, str): raise TypeError("Invalid email: {!s}".format(email)) LOGGER.debug("Job email setup.") try: diff --git a/weaver/owsexceptions.py b/weaver/owsexceptions.py index 12d9fbc77..1ef15c72f 100644 --- a/weaver/owsexceptions.py +++ b/weaver/owsexceptions.py @@ -1,28 +1,31 @@ """ -OWSExceptions are based on pyramid.httpexceptions. +OWSExceptions definitions. -See also: https://github.com/geopython/pywps/blob/master/pywps/exceptions.py +Exceptions are based on :mod:`pyramid.httpexceptions` and :mod:`pywps.exceptions` to handle more cases where they can +be caught whether the running process is via :mod:`weaver` or through :mod:`pywps` service. + +Furthermore, interrelation with :mod:`weaver.exceptions` classes (with base +:exception:`weaver.exceptions.WeaverException`) also employ specific :exception:`OWSExceptions` definitions to provide +specific error details. """ import json import warnings from string import Template -from typing import TYPE_CHECKING, AnyStr +from typing import TYPE_CHECKING -import six -from pyramid.compat import text_type from pyramid.httpexceptions import ( HTTPBadRequest, HTTPException, + HTTPForbidden, HTTPInternalServerError, HTTPNotAcceptable, HTTPNotFound, HTTPNotImplemented, - HTTPOk, - HTTPUnauthorized + HTTPOk ) from pyramid.interfaces import IExceptionResponse from pyramid.response import Response -from webob import html_escape as _html_escape +from pywps.exceptions import InvalidParameterValue, MissingParameterValue, NoApplicableCode from webob.acceptparse import create_accept_header from zope.interface import implementer @@ -31,7 +34,7 @@ from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning if TYPE_CHECKING: - from weaver.typedefs import JSON, SettingsType # noqa: F401 + from weaver.typedefs import JSON, SettingsType @implementer(IExceptionResponse) @@ -57,7 +60,7 @@ def __init__(self, detail=None, value=None, **kw): status = kw.pop("status", None) if isinstance(status, type) and issubclass(status, HTTPException): status = status().status - elif isinstance(status, six.class_types): + elif isinstance(status, str): try: int(status.split()[0]) except Exception: @@ -70,6 +73,7 @@ def __init__(self, detail=None, value=None, **kw): Exception.__init__(self, detail) self.message = detail or self.explanation self.content_type = CONTENT_TYPE_TEXT_XML + value = kw.get("locator", value) if value: self.locator = value @@ -83,7 +87,7 @@ def __repr__(self): @staticmethod def json_formatter(status, body, title, environ): # noqa: F811 - # type: (AnyStr, AnyStr, AnyStr, SettingsType) -> JSON + # type: (str, str, str, SettingsType) -> JSON body = clean_json_text_body(body) return {"description": body, "code": int(status.split()[0]), "status": status, "title": title} @@ -106,9 +110,15 @@ class JsonPageTemplate(object): def __init__(self, excobj): self.excobj = excobj - def substitute(self, code, locator, message): # noqa: W0613 - return json.dumps(self.excobj.json_formatter( - status=self.excobj.status, body=message, title=None, environ=environ)) + def substitute(self, code, locator, message): + status = self.excobj.status + data = self.excobj.json_formatter(status=status, body=message, title=None, environ=environ) + data["exception"] = { + "code": code or "", + "locator": locator or "", + "message": message or "", + } + return json.dumps(data) page_template = JsonPageTemplate(self) @@ -117,12 +127,12 @@ def substitute(self, code, locator, message): # noqa: W0613 page_template = self.page_template args = { - "code": _html_escape(self.code), - "locator": _html_escape(self.locator), - "message": _html_escape(self.message or ""), + "code": self.code, + "locator": self.locator, + "message": self.message or "", } page = page_template.substitute(**args) - if isinstance(page, text_type): + if isinstance(page, str): page = page.encode(self.charset if self.charset else "UTF-8") self.app_iter = [page] self.body = page @@ -147,17 +157,19 @@ def __call__(self, environ, start_response): class OWSAccessForbidden(OWSException): - locator = "AccessUnauthorized" - explanation = "Access to this service is unauthorized." + code = "AccessForbidden" + locator = "" + explanation = "Access to this service is forbidden." def __init__(self, *args, **kwargs): - kwargs["status"] = HTTPUnauthorized + kwargs["status"] = HTTPForbidden super(OWSAccessForbidden, self).__init__(*args, **kwargs) class OWSNotFound(OWSException): - locator = "NotFound" - explanation = "This resource does not exist." + code = "NotFound" + locator = "" + explanation = "Resource does not exist." def __init__(self, *args, **kwargs): kwargs["status"] = HTTPNotFound @@ -165,27 +177,28 @@ def __init__(self, *args, **kwargs): class OWSNotAcceptable(OWSException): - locator = "NotAcceptable" - explanation = "Access to this service failed." + code = "NotAcceptable" + locator = "" + explanation = "Cannot produce requested Accept format." def __init__(self, *args, **kwargs): kwargs["status"] = HTTPNotAcceptable super(OWSNotAcceptable, self).__init__(*args, **kwargs) -class OWSNoApplicableCode(OWSException): +class OWSNoApplicableCode(OWSException, NoApplicableCode): """WPS Bad Request Exception""" code = "NoApplicableCode" locator = "" - explanation = "Parameter value is missing" + explanation = "Undefined error" def __init__(self, *args, **kwargs): - kwargs["status"] = HTTPBadRequest + kwargs["status"] = HTTPInternalServerError super(OWSNoApplicableCode, self).__init__(*args, **kwargs) warnings.warn(self.message, UnsupportedOperationWarning) -class OWSMissingParameterValue(OWSException): +class OWSMissingParameterValue(OWSException, MissingParameterValue): """MissingParameterValue WPS Exception""" code = "MissingParameterValue" locator = "" @@ -197,14 +210,14 @@ def __init__(self, *args, **kwargs): warnings.warn(self.message, MissingParameterWarning) -class OWSInvalidParameterValue(OWSException): +class OWSInvalidParameterValue(OWSException, InvalidParameterValue): """InvalidParameterValue WPS Exception""" code = "InvalidParameterValue" locator = "" explanation = "Parameter value is not acceptable." def __init__(self, *args, **kwargs): - kwargs["status"] = HTTPNotAcceptable + kwargs["status"] = HTTPBadRequest super(OWSInvalidParameterValue, self).__init__(*args, **kwargs) warnings.warn(self.message, UnsupportedOperationWarning) diff --git a/weaver/processes/builtin/__init__.py b/weaver/processes/builtin/__init__.py index 21d978c74..ea1cfef74 100644 --- a/weaver/processes/builtin/__init__.py +++ b/weaver/processes/builtin/__init__.py @@ -4,7 +4,6 @@ from string import Template from typing import TYPE_CHECKING -import six from cwltool.command_line_tool import CommandLineTool from cwltool.docker import DockerCommandLineJob from cwltool.job import CommandLineJob, JobBase @@ -21,13 +20,13 @@ from weaver.store.base import StoreProcesses from weaver.utils import clean_json_text_body, ows_context_href from weaver.visibility import VISIBILITY_PUBLIC -from weaver.wps import get_wps_url +from weaver.wps.utils import get_wps_url from weaver.wps_restapi.utils import get_wps_restapi_base_url if TYPE_CHECKING: - from weaver.typedefs import AnyDatabaseContainer, CWL # noqa: F401 - from cwltool.context import RuntimeContext # noqa: F401 - from typing import Any, AnyStr, Dict, Type, Union # noqa: F401 + from weaver.typedefs import AnySettingsContainer, CWL + from cwltool.context import RuntimeContext + from typing import Any, Dict, Type, Union LOGGER = logging.getLogger(__name__) @@ -39,7 +38,7 @@ def _get_builtin_reference_mapping(root): - # type: (AnyStr) -> Dict[AnyStr, AnyStr] + # type: (str) -> Dict[str, str] """Generates a mapping of `reference` to actual ``builtin`` package file path.""" builtin_names = [_pkg for _pkg in os.listdir(root) if os.path.splitext(_pkg)[-1].replace(".", "") in PACKAGE_EXTENSIONS] @@ -47,7 +46,7 @@ def _get_builtin_reference_mapping(root): def _get_builtin_metadata(process_id, process_path, meta_field, clean=False): - # type: (AnyStr, AnyStr, AnyStr, bool) -> Union[AnyStr, None] + # type: (str, str, str, bool) -> Union[str, None] """ Retrieves the ``builtin`` process ``meta_field`` from its definition if it exists. """ @@ -56,7 +55,7 @@ def _get_builtin_metadata(process_id, process_path, meta_field, clean=False): try: mod = import_module("{}.{}".format(__name__, process_id)) meta = getattr(mod, meta_field, None) - if meta and isinstance(meta, six.string_types): + if meta and isinstance(meta, str): return clean_json_text_body(meta) if clean else meta except ImportError: pass @@ -64,20 +63,20 @@ def _get_builtin_metadata(process_id, process_path, meta_field, clean=False): def _replace_template(pkg, var, val): - # type: (CWL, AnyStr, AnyStr) -> CWL - if isinstance(pkg, six.string_types): + # type: (CWL, str, str) -> CWL + if isinstance(pkg, str): return Template(pkg).safe_substitute({var: val}) - for k in pkg: + for k in pkg: # type: str if isinstance(pkg[k], list): for i, _ in enumerate(pkg[k]): pkg[k][i] = _replace_template(pkg[k][i], var, val) - elif isinstance(pkg[k], (dict, six.string_types)): + elif isinstance(pkg[k], (dict, str)): pkg[k] = _replace_template(pkg[k], var, val) return pkg def _get_builtin_package(process_id, package): - # type: (AnyStr, CWL) -> CWL + # type: (str, CWL) -> CWL """ Updates the `CWL` with following requirements to allow running a ``PROCESS_BUILTIN``: - add `hints` section with ``CWL_REQUIREMENT_APP_BUILTIN`` @@ -94,7 +93,7 @@ def _get_builtin_package(process_id, package): def register_builtin_processes(container): - # type: (AnyDatabaseContainer) -> None + # type: (AnySettingsContainer) -> None """Registers every ``builtin`` CWL package to the processes database. CWL definitions must be located within the :mod:`weaver.processes.builtin` module. diff --git a/weaver/processes/builtin/file2string_array.cwl b/weaver/processes/builtin/file2string_array.cwl index dfcb60aaa..65a238365 100644 --- a/weaver/processes/builtin/file2string_array.cwl +++ b/weaver/processes/builtin/file2string_array.cwl @@ -2,8 +2,8 @@ cwlVersion: v1.0 class: CommandLineTool # target the installed python pointing to weaver conda env to allow imports -baseCommand: $WEAVER_ROOT_DIR/bin/python -arguments: ["$WEAVER_ROOT_DIR/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)] +baseCommand: ${WEAVER_ROOT_DIR}/bin/python +arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)] inputs: input: type: File diff --git a/weaver/processes/builtin/file2string_array.py b/weaver/processes/builtin/file2string_array.py index 50625658c..280c11451 100644 --- a/weaver/processes/builtin/file2string_array.py +++ b/weaver/processes/builtin/file2string_array.py @@ -1,4 +1,5 @@ -__doc__ = """ +#!/usr/bin/env python +""" Transforms a file input into JSON file containing an array of file references as value. """ import argparse @@ -6,7 +7,6 @@ import logging import os import sys -from typing import AnyStr CUR_DIR = os.path.abspath(os.path.dirname(__file__)) @@ -21,7 +21,7 @@ def main(input_file, output_dir): - # type: (argparse.FileType, AnyStr) -> None + # type: (argparse.FileType, str) -> None LOGGER.info( "Got arguments: input_file=%s output_dir=%s", input_file, output_dir ) diff --git a/weaver/processes/builtin/jsonarray2netcdf.py b/weaver/processes/builtin/jsonarray2netcdf.py index 1ab9fcc61..aef8756d9 100644 --- a/weaver/processes/builtin/jsonarray2netcdf.py +++ b/weaver/processes/builtin/jsonarray2netcdf.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Extracts and fetches NetCDF files from a JSON file containing an URL string array, and provides them on the output directory. @@ -7,15 +8,7 @@ import logging import os import sys -from typing import Any, AnyStr - -import six -from six.moves.urllib.parse import urlparse - -if six.PY3: - from tempfile import TemporaryDirectory -else: - from backports.tempfile import TemporaryDirectory # pylint: disable=E0611 # noqa # py2 +from tempfile import TemporaryDirectory CUR_DIR = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, CUR_DIR) @@ -24,7 +17,7 @@ # place weaver specific imports after sys path fixing to ensure they are found from external call # pylint: disable=C0413,wrong-import-order -from weaver.formats import get_extension, CONTENT_TYPE_APP_NETCDF # isort:skip # noqa: E402 +from weaver.processes.builtin.utils import is_netcdf_url # isort:skip # noqa: E402 from weaver.utils import fetch_file # isort:skip # noqa: E402 PACKAGE_NAME = os.path.split(os.path.splitext(__file__)[0])[-1] @@ -35,22 +28,13 @@ LOGGER.setLevel(logging.INFO) # process details -__version__ = "1.0" +__version__ = "1.1" __title__ = "JSON array to NetCDF" __abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative -def _is_netcdf_url(url): - # type: (Any) -> bool - if not isinstance(url, six.string_types): - return False - if urlparse(url).scheme == "": - return False - return os.path.splitext(url)[-1] == get_extension(CONTENT_TYPE_APP_NETCDF) - - def j2n(json_reference, output_dir): - # type: (AnyStr, AnyStr) -> None + # type: (str, str) -> None LOGGER.info("Process '%s' execution starting...", PACKAGE_NAME) LOGGER.debug("Process '%s' output directory: [%s].", PACKAGE_NAME, output_dir) try: @@ -62,7 +46,7 @@ def j2n(json_reference, output_dir): LOGGER.debug("Reading JSON file: [%s]", json_path) with open(json_path) as json_file: json_content = json.load(json_file) - if not isinstance(json_content, list) or any(not _is_netcdf_url(f) for f in json_content): + if not isinstance(json_content, list) or any(not is_netcdf_url(f) for f in json_content): LOGGER.error("Invalid JSON: [%s]", json_content) raise ValueError("Invalid JSON file format, expected a plain array of NetCDF file URL strings.") LOGGER.debug("Parsing JSON file references.") diff --git a/weaver/processes/builtin/metalink2netcdf.py b/weaver/processes/builtin/metalink2netcdf.py index a0da4a1eb..80ae83f57 100644 --- a/weaver/processes/builtin/metalink2netcdf.py +++ b/weaver/processes/builtin/metalink2netcdf.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python """ Extracts and fetches NetCDF files from a Metalink file containing an URL, and outputs the NetCDF file at a given index of the list. @@ -6,16 +7,10 @@ import logging import os import sys -from typing import AnyStr +from tempfile import TemporaryDirectory -import six from lxml import etree -if six.PY3: - from tempfile import TemporaryDirectory -else: - from backports.tempfile import TemporaryDirectory # pylint: disable=E0611 # noqa # py2 - CUR_DIR = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, CUR_DIR) # root to allow 'from weaver import <...>' @@ -39,7 +34,7 @@ def m2n(metalink_reference, index, output_dir): - # type: (AnyStr, int, AnyStr) -> None + # type: (str, int, str) -> None LOGGER.info( "Got arguments: metalink_reference=%s index=%s output_dir=%s", metalink_reference, index, output_dir ) diff --git a/weaver/processes/builtin/utils.py b/weaver/processes/builtin/utils.py index 59ccfea96..2985df6da 100644 --- a/weaver/processes/builtin/utils.py +++ b/weaver/processes/builtin/utils.py @@ -2,14 +2,12 @@ from typing import Any from urllib.parse import urlparse -import six - from weaver.formats import CONTENT_TYPE_APP_NETCDF, get_extension -def _is_netcdf_url(url): +def is_netcdf_url(url): # type: (Any) -> bool - if not isinstance(url, six.string_types): + if not isinstance(url, str): return False if urlparse(url).scheme == "": return False diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py new file mode 100644 index 000000000..8c58b0242 --- /dev/null +++ b/weaver/processes/convert.py @@ -0,0 +1,1355 @@ +""" +Conversion functions between corresponding data structures. +""" +import json +import logging +import sys +from collections import Hashable, OrderedDict # pylint: disable=E0611,no-name-in-module # moved to .abc in Python 3 +from copy import deepcopy +from tempfile import TemporaryDirectory +from typing import TYPE_CHECKING +from urllib.parse import urlparse + +import lxml.etree +from owslib.wps import ( + ComplexData, + Input as OWS_Input_Type, + Metadata as OWS_Metadata, + Output as OWS_Output_Type, + WebProcessingService, + is_reference +) +from pywps import Process as ProcessWPS +from pywps.app.Common import Metadata as WPS_Metadata +from pywps.inout import BoundingBoxInput, BoundingBoxOutput, ComplexInput, ComplexOutput, LiteralInput, LiteralOutput +from pywps.inout.basic import BasicIO +from pywps.inout.formats import Format +from pywps.inout.literaltypes import ALLOWEDVALUETYPE, AllowedValue, AnyValue +# FIXME: #211 (range): pywps.inout.literaltypes.RANGECLOSURETYPE +from pywps.validator.mode import MODE + +from weaver.exceptions import PackageTypeError +from weaver.execute import ( + EXECUTE_MODE_ASYNC, + EXECUTE_RESPONSE_DOCUMENT, + EXECUTE_TRANSMISSION_MODE_REFERENCE, + EXECUTE_TRANSMISSION_MODE_VALUE +) +from weaver.formats import ( + CONTENT_TYPE_ANY, + CONTENT_TYPE_APP_JSON, + CONTENT_TYPE_TEXT_PLAIN, + get_cwl_file_format, + get_extension, + get_format +) +from weaver.processes.constants import ( + CWL_REQUIREMENT_APP_WPS1, + WPS_BOUNDINGBOX, + WPS_COMPLEX, + WPS_COMPLEX_DATA, + WPS_INPUT, + WPS_LITERAL, + WPS_OUTPUT, + WPS_REFERENCE +) +from weaver.utils import bytes2str, fetch_file, get_any_id, get_url_without_query, null, str2bytes + +if TYPE_CHECKING: + from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union + + from cwltool.process import Process as ProcessCWL + from pywps.app import WPSRequest + from pywps.response.execute import ExecuteResponse + from owslib.wps import Process as ProcessOWS, WPSExecution + from requests.models import Response + + from weaver.datatype import Job + from weaver.status import AnyStatusType + from weaver.typedefs import ( + AnyKey, + AnySettingsContainer, + AnyValueType, + CWL, + JSON, + Number, + TypedDict, + XML + ) + + # typing shortcuts + # pylint: disable=C0103,invalid-name + WPS_Input_Type = Union[LiteralInput, ComplexInput, BoundingBoxInput] + WPS_Output_Type = Union[LiteralOutput, ComplexOutput, BoundingBoxOutput] + WPS_IO_Type = Union[WPS_Input_Type, WPS_Output_Type] + OWS_IO_Type = Union[OWS_Input_Type, OWS_Output_Type] + JSON_IO_Type = JSON + CWL_Input_Type = TypedDict("CWL_Input_Type", {"id": str, "type": str}, total=False) + CWL_Output_Type = TypedDict("CWL_Output_Type", {"id": str, "type": str}, total=False) + CWL_IO_Type = Union[CWL_Input_Type, CWL_Output_Type] + PKG_IO_Type = Union[JSON_IO_Type, WPS_IO_Type] + ANY_IO_Type = Union[CWL_IO_Type, JSON_IO_Type, WPS_IO_Type, OWS_IO_Type] + ANY_Format_Type = Union[Dict[str, Optional[str]], Format] + ANY_Metadata_Type = Union[OWS_Metadata, WPS_Metadata, Dict[str, str]] + +# CWL package types and extensions +PACKAGE_BASE_TYPES = frozenset(["string", "boolean", "float", "int", "integer", "long", "double"]) +PACKAGE_LITERAL_TYPES = frozenset(list(PACKAGE_BASE_TYPES) + ["null", "Any"]) +PACKAGE_COMPLEX_TYPES = frozenset(["File", "Directory"]) +PACKAGE_ARRAY_BASE = "array" +PACKAGE_ARRAY_MAX_SIZE = sys.maxsize # pywps doesn't allow None, so use max size # FIXME: unbounded (weaver #165) +PACKAGE_CUSTOM_TYPES = frozenset(["enum"]) # can be anything, but support "enum" which is more common +PACKAGE_ARRAY_ITEMS = frozenset(list(PACKAGE_BASE_TYPES) + list(PACKAGE_COMPLEX_TYPES) + list(PACKAGE_CUSTOM_TYPES)) +PACKAGE_ARRAY_TYPES = frozenset(["{}[]".format(item) for item in PACKAGE_ARRAY_ITEMS]) + +# WPS object attribute -> all possible *other* naming variations +WPS_FIELD_MAPPING = { + "identifier": ["Identifier", "ID", "id", "Id"], + "title": ["Title", "Label", "label"], + "abstract": ["Abstract"], + "metadata": ["Metadata"], + "keywords": ["Keywords"], + "allowed_values": ["AllowedValues", "allowedValues", "allowedvalues", "Allowed_Values", "Allowedvalues"], + "allowed_collections": ["AllowedCollections", "allowedCollections", "allowedcollections", "Allowed_Collections", + "Allowedcollections"], + "default": ["default_value", "defaultValue", "DefaultValue", "Default", "data_format"], + "supported_values": ["SupportedValues", "supportedValues", "supportedvalues", "Supported_Values"], + "supported_formats": ["SupportedFormats", "supportedFormats", "supportedformats", "Supported_Formats", "formats"], + "additional_parameters": ["AdditionalParameters", "additionalParameters", "additionalparameters", + "Additional_Parameters"], + "type": ["Type", "data_type", "dataType", "DataType", "Data_Type"], + "min_occurs": ["minOccurs", "MinOccurs", "Min_Occurs", "minoccurs"], + "max_occurs": ["maxOccurs", "MaxOccurs", "Max_Occurs", "maxoccurs"], + "mime_type": ["mimeType", "MimeType", "mime-type", "Mime-Type", "MIME-Type", "mimetype"], + "encoding": ["Encoding"], + "href": ["url", "link", "reference"], +} +# WPS fields that contain a structure corresponding to `Format` object +# - keys must match `WPS_FIELD_MAPPING` keys +# - fields are placed in order of relevance (prefer explicit format, then supported, and defaults as last resort) +WPS_FIELD_FORMAT = ["formats", "supported_formats", "supported_values", "default"] + +# WPS 'type' string variations employed to indicate a Complex (file) I/O by different libraries +# for literal types, see 'any2cwl_literal_datatype' and 'any2wps_literal_datatype' functions +WPS_COMPLEX_TYPES = [WPS_COMPLEX, WPS_COMPLEX_DATA, WPS_REFERENCE] + +# WPS 'type' string of all combinations (type of data / library implementation) +WPS_ALL_TYPES = [WPS_LITERAL, WPS_BOUNDINGBOX] + WPS_COMPLEX_TYPES + +# default format if missing (minimal requirement of one) +DEFAULT_FORMAT = Format(mime_type=CONTENT_TYPE_TEXT_PLAIN) +DEFAULT_FORMAT_MISSING = "__DEFAULT_FORMAT_MISSING__" +setattr(DEFAULT_FORMAT, DEFAULT_FORMAT_MISSING, True) + +LOGGER = logging.getLogger(__name__) + + +def complex2json(data): + # type: (Union[ComplexData, Any]) -> Union[JSON, Any] + """ + Obtains the JSON representation of a :class:`ComplexData` or simply return the unmatched type. + """ + if not isinstance(data, ComplexData): + return data + return { + "mimeType": data.mimeType, + "encoding": data.encoding, + "schema": data.schema, + } + + +def metadata2json(meta, force=False): + # type: (Union[ANY_Metadata_Type, Any], bool) -> Union[JSON, Any] + """ + Obtains the JSON representation of a :class:`OWS_Metadata` or :class:`pywps.app.Common.Metadata`. + Otherwise, simply return the unmatched type. + If requested, can enforce parsing a dictionary for the corresponding keys. + """ + if not force and not isinstance(meta, (OWS_Metadata, WPS_Metadata)): + return meta + return { + "href": get_field(meta, "href", search_variations=True, default=None), + "title": get_field(meta, "title", search_variations=True, default=None), + "role": get_field(meta, "role", search_variations=True, default=None), + } + + +def ows2json_field(ows_field): + # type: (Union[ComplexData, OWS_Metadata, AnyValueType]) -> Union[JSON, AnyValueType] + """ + Obtains the JSON or raw value from an :mod:`owslib.wps` I/O field. + """ + if isinstance(ows_field, ComplexData): + return complex2json(ows_field) + if isinstance(ows_field, OWS_Metadata): + return metadata2json(ows_field) + return ows_field + + +def ows2json_io(ows_io): + # type: (OWS_IO_Type) -> JSON_IO_Type + """ + Converts I/O from :mod:`owslib.wps` to JSON. + """ + + json_io = dict() + for field in WPS_FIELD_MAPPING: + value = get_field(ows_io, field, search_variations=True) + # preserve numeric values (ex: "minOccurs"=0) as actual parameters + # ignore undefined values represented by `null`, empty list, or empty string + if value or value in [0, 0.0]: + if isinstance(value, list): + # complex data is converted as is + # metadata converted and preserved if it results into a minimally valid definition (otherwise dropped) + json_io[field] = [ + complex2json(v) if isinstance(v, ComplexData) else + metadata2json(v) if isinstance(v, OWS_Metadata) else v + for v in value if not isinstance(v, OWS_Metadata) or v.url is not None + ] + elif isinstance(value, ComplexData): + json_io[field] = complex2json(value) + elif isinstance(value, OWS_Metadata): + json_io[field] = metadata2json(value) + else: + json_io[field] = value + + # add 'format' if missing, derived from other variants + if "formats" not in json_io: + fmt_val = get_field(json_io, "supported_values") + if fmt_val and json_io.get("type") == WPS_COMPLEX_DATA: + json_io["formats"] = json_io.pop("supported_values") + else: + # search for format fields directly specified in I/O body + for field in WPS_FIELD_FORMAT: + fmt = get_field(json_io, field, search_variations=True) + if not fmt: + continue + if isinstance(fmt, dict): + fmt = [fmt] + fmt = filter(lambda f: isinstance(f, dict), fmt) + if not isinstance(json_io.get("formats"), list): + json_io["formats"] = list() + for var_fmt in fmt: + # add it only if not exclusively provided by a previous variant + json_fmt_items = [j_fmt.items() for j_fmt in json_io["formats"]] + if any(all(var_item in items for var_item in var_fmt.items()) for items in json_fmt_items): + continue + json_io["formats"].append(var_fmt) + + return json_io + + +# FIXME: duplicate operation of 'ows2json_io', but slightly different result (camel case vs snake case) +# resolve and combine into single function +# resulting fields should conform to OGC WPS-REST bindings specifications: +# https://raw.githubusercontent.com/opengeospatial/ogcapi-processes/master/core/openapi/schemas/inputDescription.yaml +# (https://github.com/crim-ca/weaver/issues/211) +def ows2json_io_FIXME(ows_io): # pylint: disable=C0103 + # type: (OWS_IO_Type) -> JSON_IO_Type + default_format = {"mimeType": CONTENT_TYPE_TEXT_PLAIN} + if isinstance(ows_io, OWS_Input_Type): + return dict( + id=getattr(ows_io, "identifier", ""), + title=getattr(ows_io, "title", ""), + abstract=getattr(ows_io, "abstract", ""), + minOccurs=str(getattr(ows_io, "minOccurs", 0)), + maxOccurs=str(getattr(ows_io, "maxOccurs", 0)), + dataType=ows_io.dataType, + defaultValue=ows2json_field(getattr(ows_io, "defaultValue", None)), + allowedValues=[ows2json_field(dataValue) for dataValue in getattr(ows_io, "allowedValues", [])], + supportedValues=[ows2json_field(dataValue) for dataValue in getattr(ows_io, "supportedValues", [])], + formats=[ows2json_field(value) for value in getattr(ows_io, "supportedValues", [default_format])], + ) + if isinstance(ows_io, OWS_Output_Type): + return dict( + id=getattr(ows_io, "identifier", ""), + title=getattr(ows_io, "title", ""), + abstract=getattr(ows_io, "abstract", ""), + dataType=ows_io.dataType, + defaultValue=ows2json_field(getattr(ows_io, "defaultValue", None)), + formats=[ows2json_field(value) for value in getattr(ows_io, "supportedValues", [default_format])], + ) + raise PackageTypeError("Unsupported OWS-WPS I/O type: {}".format(type(ows_io))) + + +# FIXME: duplicate of 'ows2json_io' specific to output, with some extra util JSON unwrapping +def ows2json_output(output, process_description, container=None): + # type: (OWS_Output_Type, ProcessOWS, Optional[AnySettingsContainer]) -> JSON + """ + Utility method to jsonify an output element from a WPS1 process description. + + In the case that a reference JSON output is specified and that it refers to a file that contains an array list of + URL references to simulate a multiple-output, this specific output gets expanded to contain both the original + URL ``reference`` field and the loaded URL list under ``data`` field for easier access from the response body. + """ + + if not output.dataType: + for process_output in getattr(process_description, "processOutputs", []): + if getattr(process_output, "identifier", "") == output.identifier: + output.dataType = process_output.dataType + break + + json_output = dict(identifier=output.identifier, + title=output.title, + dataType=output.dataType) + + # WPS standard v1.0.0 specify that either a reference or a data field has to be provided + if output.reference: + json_output["reference"] = output.reference + + # Handle special case where we have a reference to a json array containing dataset reference + # Avoid reference to reference by fetching directly the dataset references + json_array = _get_multi_json_references(output, container) + if json_array and all(str(ref).startswith("http") for ref in json_array): + json_output["data"] = json_array + else: + # WPS standard v1.0.0 specify that Output data field has Zero or one value + json_output["data"] = output.data[0] if output.data else None + + if json_output["dataType"] == WPS_COMPLEX_DATA: + json_output["mimeType"] = output.mimeType + + return json_output + + +# FIXME: support metalink unwrapping (weaver #25) +# FIXME: reuse functions +# definitely can be improved and simplified with 'fetch_file' function +# then return parsed contents from that file +def _get_multi_json_references(output, container): + # type: (OWS_Output_Type, Optional[AnySettingsContainer]) -> Optional[List[JSON]] + """ + Since WPS standard does not allow to return multiple values for a single output, + a lot of process actually return a JSON array containing references to these outputs. + + Because the multi-output references are contained within this JSON file, it is not very convenient to retrieve + the list of URLs as one always needs to open and read the file to get them. This function goal is to detect this + particular format and expand the references to make them quickly available in the job output response. + + :return: + Array of HTTP(S) references if the specified output is effectively a JSON containing that, ``None`` otherwise. + """ + # Check for the json datatype and mime-type + if output.dataType == WPS_COMPLEX_DATA and output.mimeType == CONTENT_TYPE_APP_JSON: + try: + # If the json data is referenced read it's content + if output.reference: + with TemporaryDirectory() as tmp_dir: + file_path = fetch_file(output.reference, tmp_dir, settings=container) + with open(file_path, "r") as tmp_file: + json_data_str = tmp_file.read() + # Else get the data directly + else: + # process output data are append into a list and + # WPS standard v1.0.0 specify that Output data field has zero or one value + if not output.data: + return None + json_data_str = output.data[0] + + # Load the actual json dict + json_data = json.loads(json_data_str) + except Exception as exc: # pylint: disable=W0703 + LOGGER.debug("Failed retrieval of JSON output file for multi-reference unwrapping", exc_info=exc) + return None + if isinstance(json_data, list): + return None if any(not is_reference(data_value) for data_value in json_data) else json_data + return None + + +def any2cwl_io(wps_io, io_select): + # type: (Union[JSON_IO_Type, WPS_IO_Type, OWS_IO_Type], str) -> Tuple[CWL_IO_Type, Dict[str, str]] + """ + Converts a `WPS`-like I/O to `CWL` corresponding I/O. + Because of `CWL` I/O of type `File` with `format` field, the applicable namespace is also returned. + + :returns: converted I/O and namespace dictionary with corresponding format references as required + """ + def _get_cwl_fmt_details(wps_fmt): + # type: (ANY_Format_Type) -> Union[Tuple[Tuple[str, str], str, str], Tuple[None, None, None]] + _wps_io_fmt = get_field(wps_fmt, "mime_type", search_variations=True) + if not _wps_io_fmt: + return None, None, None + _cwl_io_ext = get_extension(_wps_io_fmt) + _cwl_io_ref, _cwl_io_fmt = get_cwl_file_format(_wps_io_fmt, must_exist=True, allow_synonym=False) + return _cwl_io_ref, _cwl_io_fmt, _cwl_io_ext + + wps_io_type = get_field(wps_io, "type", search_variations=True) + wps_io_id = get_field(wps_io, "identifier", search_variations=True) + cwl_ns = dict() + cwl_io = {"id": wps_io_id} # type: CWL_IO_Type + if wps_io_type not in WPS_COMPLEX_TYPES: + cwl_io_type = any2cwl_literal_datatype(wps_io_type) + wps_allow = get_field(wps_io, "allowed_values", search_variations=True) + if isinstance(wps_allow, list) and len(wps_allow) > 0: + cwl_io["type"] = {"type": "enum", "symbols": wps_allow} + else: + cwl_io["type"] = cwl_io_type + # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) + else: + cwl_io_fmt = None + cwl_io_ext = CONTENT_TYPE_ANY + cwl_io["type"] = "File" + + # inputs are allowed to define multiple 'supported' formats + # outputs are allowed to define only one 'applied' format + for field in WPS_FIELD_FORMAT: + fmt = get_field(wps_io, field, search_variations=True) + if isinstance(fmt, dict): + cwl_io_ref, cwl_io_fmt, cwl_io_ext = _get_cwl_fmt_details(fmt) + if cwl_io_ref and cwl_io_fmt: + cwl_ns.update(cwl_io_ref) + break + if isinstance(fmt, list): + if len(fmt) == 1: + cwl_io_ref, cwl_io_fmt, cwl_io_ext = _get_cwl_fmt_details(fmt[0]) + if cwl_io_ref and cwl_io_fmt: + cwl_ns.update(cwl_io_ref) + break + if io_select == WPS_OUTPUT and len(fmt) > 1: + break # don't use any format because we cannot enforce one + cwl_ns_multi = {} + cwl_fmt_multi = [] + for fmt_i in fmt: + # FIXME: (?) + # when multiple formats are specified, but at least one schema/namespace reference can't be found, + # we must drop all since that unknown format is still allowed but cannot be validated + # avoid potential validation error if that format was the one provided during execute... + # (see: https://github.com/crim-ca/weaver/issues/50) + cwl_io_ref_i, cwl_io_fmt_i, _ = _get_cwl_fmt_details(fmt_i) + if cwl_io_ref_i and cwl_io_fmt_i: + cwl_ns_multi.update(cwl_io_ref_i) + cwl_fmt_multi.append(cwl_io_fmt_i) + else: + # reset all since at least one format could not be mapped to an official schema + cwl_ns_multi = {} + cwl_fmt_multi = None + break + cwl_io_fmt = cwl_fmt_multi # all formats or none of them + cwl_ns.update(cwl_ns_multi) + break + if cwl_io_fmt: + cwl_io["format"] = cwl_io_fmt + # for backward compatibility with deployed processes, consider text/plan as 'any' for glob pattern + cwl_io_txt = get_extension(CONTENT_TYPE_TEXT_PLAIN) + if cwl_io_ext == cwl_io_txt: + cwl_io_any = get_extension(CONTENT_TYPE_ANY) + LOGGER.warning("Replacing '%s' [%s] to generic '%s' [%s] glob pattern. " + "More explicit format could be considered for %s '%s'.", + CONTENT_TYPE_TEXT_PLAIN, cwl_io_txt, CONTENT_TYPE_ANY, cwl_io_any, io_select, wps_io_id) + cwl_io_ext = cwl_io_any + if io_select == WPS_OUTPUT: + # FIXME: (?) how to specify the 'name' part of the glob (using the "id" value for now) + cwl_io["outputBinding"] = { + "glob": "{}{}".format(wps_io_id, cwl_io_ext) + } + + if io_select == WPS_INPUT: + wps_default = get_field(wps_io, "default", search_variations=True) + wps_min_occ = get_field(wps_io, "min_occurs", search_variations=True) + # field 'default' must correspond to a fallback "value", not a default "format" + if (wps_default != null and not isinstance(wps_default, dict)) or wps_min_occ in [0, "0"]: + cwl_io["default"] = wps_default or "null" + + wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) + if wps_max_occ != null and wps_max_occ > 1: + cwl_io["type"] = { + "type": "array", + "items": cwl_io["type"] + } + + return cwl_io, cwl_ns + + +def xml_wps2cwl(wps_process_response): + # type: (Response) -> Tuple[CWL, JSON] + """ + Converts a `WPS-1 ProcessDescription XML` tree structure to an equivalent `WPS-3 Process JSON` and builds the + associated `CWL` package in conformance to :ref:`weaver.processes.wps_package.CWL_REQUIREMENT_APP_WPS1`. + + :param wps_process_response: valid response (XML, 200) from a `WPS-1 ProcessDescription`. + """ + def _tag_name(_xml): + # type: (Union[XML, str]) -> str + """Obtains ``tag`` from a ``{namespace}Tag`` `XML` element.""" + if hasattr(_xml, "tag"): + _xml = _xml.tag + return _xml.split("}")[-1].lower() + + # look for `XML` structure starting at `ProcessDescription` (WPS-1) + xml_resp = lxml.etree.fromstring(str2bytes(wps_process_response.content)) + xml_wps_process = xml_resp.xpath("//ProcessDescription") # type: List[XML] + if not len(xml_wps_process) == 1: + raise ValueError("Could not retrieve a valid 'ProcessDescription' from WPS-1 response.") + process_id = None + for sub_xml in xml_wps_process[0]: + tag = _tag_name(sub_xml) + if tag == "identifier": + process_id = sub_xml.text + break + if not process_id: + raise ValueError("Could not find a match for 'ProcessDescription.identifier' from WPS-1 response.") + + # transform WPS-1 -> WPS-3 + wps = WebProcessingService(wps_process_response.url) + wps_service_url = urlparse(wps_process_response.url) + if wps.provider: + wps_service_name = wps.provider.name + else: + wps_service_name = wps_service_url.hostname + process_info = OrderedDict([ + ("identifier", "{}_{}".format(wps_service_name, process_id)), + ("keywords", [wps_service_name]), + ]) + wps_process = wps.describeprocess(process_id, xml=wps_process_response.content) + for field in ["title", "abstract"]: + process_info[field] = get_field(wps_process, field, search_variations=True) + if wps_process.metadata: + process_info["metadata"] = [] + for meta in wps_process.metadata: + process_info["metadata"].append({"href": meta.url, "title": meta.title, "role": meta.role}) + process_info["inputs"] = [] # type: List[JSON] + process_info["outputs"] = [] # type: List[JSON] + for wps_in in wps_process.dataInputs: # type: OWS_Input_Type + process_info["inputs"].append(ows2json_io(wps_in)) + for wps_out in wps_process.processOutputs: # type: OWS_Output_Type + process_info["outputs"].append(ows2json_io(wps_out)) + + # generate CWL for WPS-1 using parsed WPS-3 + cwl_package = OrderedDict([ + ("cwlVersion", "v1.0"), + ("class", "CommandLineTool"), + ("hints", { + CWL_REQUIREMENT_APP_WPS1: { + "provider": get_url_without_query(wps_service_url), + "process": process_id, + }}), + ]) + for io_select in [WPS_INPUT, WPS_OUTPUT]: + io_section = "{}s".format(io_select) + cwl_package[io_section] = list() + for wps_io in process_info[io_section]: + cwl_io, cwl_ns = any2cwl_io(wps_io, io_select) + cwl_package[io_section].append(cwl_io) + if cwl_ns: + if "$namespaces" not in cwl_package: + cwl_package["$namespaces"] = dict() + cwl_package["$namespaces"].update(cwl_ns) + + return cwl_package, process_info + + +def is_cwl_array_type(io_info): + # type: (CWL_IO_Type) -> Tuple[bool, str, MODE, Union[AnyValue, List[Any]]] + """Verifies if the specified I/O corresponds to one of various CWL array type definitions. + + returns ``tuple(is_array, io_type, io_mode, io_allow)`` where: + - ``is_array``: specifies if the I/O is of array type. + - ``io_type``: array element type if ``is_array`` is True, type of ``io_info`` otherwise. + - ``io_mode``: validation mode to be applied if sub-element requires it, defaults to ``MODE.NONE``. + - ``io_allow``: validation values to be applied if sub-element requires it, defaults to ``AnyValue``. + :raises PackageTypeError: if the array element doesn't have the required values and valid format. + """ + # use mapping to allow sub-function updates + io_return = { + "array": False, + "allow": AnyValue, + "type": io_info["type"], + "mode": MODE.NONE, + } + + def _update_if_sub_enum(_io_item): + # type: (CWL_IO_Type) -> bool + """ + Updates the ``io_return`` parameters if ``io_item`` evaluates to a valid ``enum`` type. + Parameter ``io_item`` should correspond to the ``items`` field of an array I/O definition. + Simple pass-through if the array item is not an ``enum``. + """ + _is_enum, _enum_type, _enum_mode, _enum_allow = is_cwl_enum_type({"type": _io_item}) + if _is_enum: + LOGGER.debug("I/O [%s] parsed as 'array' with sub-item as 'enum'", io_info["name"]) + io_return["type"] = _enum_type + io_return["mode"] = _enum_mode + io_return["allow"] = _enum_allow + return _is_enum + + # optional I/O could be an array of '["null", ""]' with "" being any of the formats parsed after + # is it the literal representation instead of the shorthand with '?' + if isinstance(io_info["type"], list) and any(sub_type == "null" for sub_type in io_info["type"]): + # we can ignore the optional indication in this case because it doesn't impact following parsing + io_return["type"] = list(filter(lambda sub_type: sub_type != "null", io_info["type"]))[0] + + # array type conversion when defined as '{"type": "array", "items": ""}' + # validate against 'Hashable' instead of 'dict' since 'OrderedDict'/'CommentedMap' can fail 'isinstance()' + if not isinstance(io_return["type"], str) and not isinstance(io_return["type"], Hashable) \ + and "items" in io_return["type"] and "type" in io_return["type"]: + io_type = dict(io_return["type"]) # make hashable to allow comparison + if io_type["type"] != PACKAGE_ARRAY_BASE: + raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) + # parse enum in case we got an array of allowed symbols + is_enum = _update_if_sub_enum(io_type["items"]) + if not is_enum: + io_return["type"] = io_type["items"] + if io_return["type"] not in PACKAGE_ARRAY_ITEMS: + raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) + LOGGER.debug("I/O [%s] parsed as 'array' with nested dict notation", io_info["name"]) + io_return["array"] = True + # array type conversion when defined as string '[]' + elif isinstance(io_return["type"], str) and io_return["type"] in PACKAGE_ARRAY_TYPES: + io_return["type"] = io_return["type"][:-2] # remove '[]' + if io_return["type"] in PACKAGE_CUSTOM_TYPES: + # parse 'enum[]' for array of allowed symbols, provide expected structure for sub-item parsing + io_item = deepcopy(io_info) + io_item["type"] = io_return["type"] # override corrected type without '[]' + _update_if_sub_enum(io_item) + if io_return["type"] not in PACKAGE_ARRAY_ITEMS: + raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) + LOGGER.debug("I/O [%s] parsed as 'array' with shorthand '[]' notation", io_info["name"]) + io_return["array"] = True + return io_return["array"], io_return["type"], io_return["mode"], io_return["allow"] + + +def is_cwl_enum_type(io_info): + # type: (CWL_IO_Type) -> Tuple[bool, str, int, Union[List[str], None]] + """Verifies if the specified I/O corresponds to a CWL enum definition. + + returns ``tuple(is_enum, io_type, io_allow)`` where: + - ``is_enum``: specifies if the I/O is of enum type. + - ``io_type``: enum base type if ``is_enum=True``, type of ``io_info`` otherwise. + - ``io_mode``: validation mode to be applied if input requires it, defaults to ``MODE.NONE``. + - ``io_allow``: validation values of the enum. + :raises PackageTypeError: if the enum doesn't have the required parameters and valid format. + """ + io_type = io_info["type"] + if not isinstance(io_type, dict) or "type" not in io_type or io_type["type"] not in PACKAGE_CUSTOM_TYPES: + return False, io_type, MODE.NONE, None + + if "symbols" not in io_type: + raise PackageTypeError("Unsupported I/O 'enum' definition: '{!r}'.".format(io_info)) + io_allow = io_type["symbols"] + if not isinstance(io_allow, list) or len(io_allow) < 1: + raise PackageTypeError("Invalid I/O 'enum.symbols' definition: '{!r}'.".format(io_info)) + + # validate matching types in allowed symbols and convert to supported CWL type + first_allow = io_allow[0] + for io_i in io_allow: + if type(io_i) is not type(first_allow): + raise PackageTypeError("Ambiguous types in I/O 'enum.symbols' definition: '{!r}'.".format(io_info)) + if isinstance(first_allow, str): + io_type = "string" + elif isinstance(first_allow, float): + io_type = "float" + elif isinstance(first_allow, int): + io_type = "int" + else: + raise PackageTypeError("Unsupported I/O 'enum' base type: `{!s}`, from definition: `{!r}`." + .format(type(first_allow), io_info)) + + # allowed value validator mode must be set for input + return True, io_type, MODE.SIMPLE, io_allow + + +def cwl2wps_io(io_info, io_select): + # type:(CWL_IO_Type, str) -> WPS_IO_Type + """Converts input/output parameters from CWL types to WPS types. + + :param io_info: parsed IO of a CWL file + :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. + :returns: corresponding IO in WPS format + """ + is_input = False + is_output = False + # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) + if io_select == WPS_INPUT: + is_input = True + io_literal = LiteralInput # type: Union[Type[LiteralInput], Type[LiteralOutput]] + io_complex = ComplexInput # type: Union[Type[ComplexInput], Type[ComplexOutput]] + # io_bbox = BoundingBoxInput # type: Union[Type[BoundingBoxInput], Type[BoundingBoxOutput]] + elif io_select == WPS_OUTPUT: + is_output = True + io_literal = LiteralOutput # type: Union[Type[LiteralInput], Type[LiteralOutput]] + io_complex = ComplexOutput # type: Union[Type[ComplexInput], Type[ComplexOutput]] + # io_bbox = BoundingBoxOutput # type: Union[Type[BoundingBoxInput], Type[BoundingBoxOutput]] + else: + raise PackageTypeError("Unsupported I/O info definition: '{!r}' with '{}'.".format(io_info, io_select)) + + io_name = io_info["name"] + io_type = io_info["type"] + io_min_occurs = 1 + io_max_occurs = 1 + + # obtain real type if "default" or shorthand "?" was in CWL, which defines "type" as `["null", ]` + if isinstance(io_type, list) and "null" in io_type: + if not len(io_type) == 2: + raise PackageTypeError("Unsupported I/O type parsing for info: '{!r}' with '{}'." + .format(io_info, io_select)) + LOGGER.debug("I/O parsed for 'default'") + io_type = io_type[1] if io_type[0] == "null" else io_type[0] + io_info["type"] = io_type + io_min_occurs = 0 # I/O can be omitted since default value exists + + # convert array types + is_array, array_elem, io_mode, io_allow = is_cwl_array_type(io_info) + if is_array: + LOGGER.debug("I/O parsed for 'array'") + io_type = array_elem + io_max_occurs = PACKAGE_ARRAY_MAX_SIZE + + # convert enum types + is_enum, enum_type, enum_mode, enum_allow = is_cwl_enum_type(io_info) + if is_enum: + LOGGER.debug("I/O parsed for 'enum'") + io_type = enum_type + io_allow = enum_allow + io_mode = enum_mode + + # debug info for unhandled types conversion + if not isinstance(io_type, str): + LOGGER.debug("is_array: [%s]", repr(is_array)) + LOGGER.debug("array_elem: [%s]", repr(array_elem)) + LOGGER.debug("is_enum: [%s]", repr(is_enum)) + LOGGER.debug("enum_type: [%s]", repr(enum_type)) + LOGGER.debug("enum_allow: [%s]", repr(enum_allow)) + LOGGER.debug("io_info: [%s]", repr(io_info)) + LOGGER.debug("io_type: [%s]", repr(io_type)) + LOGGER.debug("type(io_type): [%s]", type(io_type)) + raise TypeError("I/O type has not been properly decoded. Should be a string, got: '{!r}'".format(io_type)) + + # literal types + if is_enum or io_type in PACKAGE_LITERAL_TYPES: + if io_type == "Any": + io_type = "anyvalue" + if io_type == "null": + io_type = "novalue" + if io_type in ["int", "integer", "long"]: + io_type = "integer" + if io_type in ["float", "double"]: + io_type = "float" + # keywords commonly used by I/O + kw = { + "identifier": io_name, + "title": io_info.get("label", ""), + "abstract": io_info.get("doc", ""), + "data_type": io_type, + "mode": io_mode, + } + if is_input: + # avoid storing 'AnyValue' which become more problematic than + # anything later on when CWL/WPS merging is attempted + if io_allow is not AnyValue: + kw["allowed_values"] = io_allow + kw["default"] = io_info.get("default", None) + kw["min_occurs"] = io_min_occurs + kw["max_occurs"] = io_max_occurs + return io_literal(**kw) + # complex types + else: + # keywords commonly used by I/O + kw = { + "identifier": io_name, + "title": io_info.get("label", io_name), + "abstract": io_info.get("doc", ""), + } + if "format" in io_info: + io_formats = [io_info["format"]] if isinstance(io_info["format"], str) else io_info["format"] + kw["supported_formats"] = [get_format(fmt) for fmt in io_formats] + kw["mode"] = MODE.SIMPLE # only validate the extension (not file contents) + else: + # we need to minimally add 1 format, otherwise empty list is evaluated as None by pywps + # when "supported_formats" is None, the process's json property raises because of it cannot iterate formats + kw["supported_formats"] = [DEFAULT_FORMAT] + kw["mode"] = MODE.NONE # don't validate anything as default is only raw text + if is_output: + if io_type == "Directory": + kw["as_reference"] = True + if io_type == "File": + has_contents = io_info.get("contents") is not None + kw["as_reference"] = not has_contents + else: + # note: + # value of 'data_format' is identified as 'default' input format if specified with `Format` + # otherwise, `None` makes it automatically use the first one available in 'supported_formats' + kw["data_format"] = get_field(io_info, "data_format") + kw["data_format"] = json2wps_field(kw["data_format"], "supported_formats") if kw["data_format"] else None + kw.update({ + "min_occurs": io_min_occurs, + "max_occurs": io_max_occurs, + }) + return io_complex(**kw) + + +def any2cwl_literal_datatype(io_type): + # type: (str) -> Union[str, Type[null]] + """ + Solves common literal data-type names to supported ones for `CWL`. + """ + if io_type in ["string", "date", "time", "dateTime", "anyURI"]: + return "string" + if io_type in ["scale", "angle", "float", "double"]: + return "float" + if io_type in ["integer", "long", "positiveInteger", "nonNegativeInteger"]: + return "int" + if io_type in ["bool", "boolean"]: + return "boolean" + LOGGER.warning("Could not identify a CWL literal data type with [%s].", io_type) + return null + + +def any2wps_literal_datatype(io_type, is_value): + # type: (AnyValueType, bool) -> Union[str, Type[null]] + """ + Solves common literal data-type names to supported ones for `WPS`. + Verification is accomplished by name when ``is_value=False``, otherwise with python ``type`` when ``is_value=True``. + """ + if isinstance(io_type, str): + if not is_value: + if io_type in ["date", "time", "dateTime", "anyURI"]: + return "string" + if io_type in ["scale", "angle", "float", "double"]: + return "float" + if io_type in ["int", "integer", "long", "positiveInteger", "nonNegativeInteger"]: + return "integer" + if io_type in ["bool", "boolean"]: + return "boolean" + return "string" + if is_value and isinstance(io_type, bool): + return "boolean" + if is_value and isinstance(io_type, int): + return "integer" + if is_value and isinstance(io_type, float): + return "float" + return null + + +def json2wps_datatype(io_info): + # type: (JSON_IO_Type) -> str + """ + Guesses the literal data-type from I/O JSON information in order to allow creation of the corresponding I/O WPS. + Defaults to ``string`` if no suitable guess can be accomplished. + """ + io_type = get_field(io_info, "type", search_variations=False, pop_found=True) + if str(io_type).lower() == WPS_LITERAL: + io_type = null + io_guesses = [ + (io_type, False), + (get_field(io_info, "type", search_variations=True), False), + (get_field(io_info, "default", search_variations=True), True), + (get_field(io_info, "allowed_values", search_variations=True), True), + (get_field(io_info, "supported_values", search_variations=True), True) + ] + for io_guess, is_value in io_guesses: + if io_type: + break + if isinstance(io_guess, list) and len(io_guess): + io_guess = io_guess[0] + io_type = any2wps_literal_datatype(io_guess, is_value) + if not isinstance(io_type, str): + LOGGER.warning("Failed literal data-type guess, using default 'string' for I/O [%s].", + get_field(io_info, "identifier", search_variations=True)) + return "string" + return io_type + + +def json2wps_field(field_info, field_category): + # type: (JSON_IO_Type, str) -> Any + """ + Converts an I/O field from a JSON literal data, list, or dictionary to corresponding WPS types. + + :param field_info: literal data or information container describing the type to be generated. + :param field_category: one of ``WPS_FIELD_MAPPING`` keys to indicate how to parse ``field_info``. + """ + if field_category == "allowed_values": + if isinstance(field_info, AllowedValue): + return field_info + if isinstance(field_info, dict): + field_info.pop("type", None) + return AllowedValue(**field_info) + if isinstance(field_info, str): + return AllowedValue(value=field_info, allowed_type=ALLOWEDVALUETYPE.VALUE) + if isinstance(field_info, list): + return AllowedValue(minval=min(field_info), maxval=max(field_info), allowed_type=ALLOWEDVALUETYPE.RANGE) + elif field_category == "supported_formats": + if isinstance(field_info, dict): + return Format(**field_info) + if isinstance(field_info, str): + return Format(field_info) + elif field_category == "metadata": + if isinstance(field_info, WPS_Metadata): + return field_info + if isinstance(field_info, dict): + return WPS_Metadata(**metadata2json(field_info, force=True)) + if isinstance(field_info, str): + return WPS_Metadata(field_info) + elif field_category == "keywords" and isinstance(field_info, list): + return field_info + elif field_category in ["identifier", "title", "abstract"] and isinstance(field_info, str): + return field_info + LOGGER.warning("Field of type '%s' not handled as known WPS field.", field_category) + return None + + +def json2wps_io(io_info, io_select): + # type: (JSON_IO_Type, Union[WPS_INPUT, WPS_OUTPUT]) -> WPS_IO_Type + """Converts an I/O from a JSON dict to PyWPS types. + + :param io_info: I/O in JSON dict format. + :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. + :return: corresponding I/O in WPS format. + """ + + io_info["identifier"] = get_field(io_info, "identifier", search_variations=True, pop_found=True) + + rename = { + "formats": "supported_formats", + "minOccurs": "min_occurs", + "maxOccurs": "max_occurs", + } + remove = [ + "id", + "workdir", + "any_value", + "data_format", + "data", + "file", + "mimetype", + "encoding", + "schema", + "asreference", + "additionalParameters", + ] + replace_values = {"unbounded": PACKAGE_ARRAY_MAX_SIZE} + + transform_json(io_info, rename=rename, remove=remove, replace_values=replace_values) + + # convert allowed value objects + values = get_field(io_info, "allowed_values", search_variations=True, pop_found=True) + if values is not null: + if isinstance(values, list) and len(values) > 0: + io_info["allowed_values"] = list() + for allow_value in values: + io_info["allowed_values"].append(json2wps_field(allow_value, "allowed_values")) + else: + io_info["allowed_values"] = AnyValue # noqa + + # convert supported format objects + formats = get_field(io_info, "supported_formats", search_variations=True, pop_found=True) + if formats is not null: + for fmt in formats: + fmt["mime_type"] = get_field(fmt, "mime_type", search_variations=True, pop_found=True) + fmt.pop("maximumMegabytes", None) + # define the 'default' with 'data_format' to be used if explicitly specified from the payload + if fmt.pop("default", None) is True: + if get_field(io_info, "data_format") != null: # if set by previous 'fmt' + raise PackageTypeError("Cannot have multiple 'default' formats simultaneously.") + # use 'data_format' instead of 'default' to avoid overwriting a potential 'default' value + # field 'data_format' is mapped as 'default' format + io_info["data_format"] = json2wps_field(fmt, "supported_formats") + io_info["supported_formats"] = [json2wps_field(fmt, "supported_formats") for fmt in formats] + + # convert metadata objects + metadata = get_field(io_info, "metadata", search_variations=True, pop_found=True) + if metadata is not null: + io_info["metadata"] = [json2wps_field(meta, "metadata") for meta in metadata] + + # convert literal fields specified as is + for field in ["identifier", "title", "abstract", "keywords"]: + value = get_field(io_info, field, search_variations=True, pop_found=True) + if value is not null: + io_info[field] = json2wps_field(value, field) + + # convert by type, add missing required arguments and + # remove additional arguments according to each case + io_type = io_info.pop("type", WPS_COMPLEX) # only ComplexData doesn't have "type" + # attempt to identify defined data-type directly in 'type' field instead of 'data_type' + if io_type not in WPS_ALL_TYPES: + io_type_guess = any2wps_literal_datatype(io_type, is_value=False) + if io_type_guess is not null: + io_type = WPS_LITERAL + io_info["data_type"] = io_type_guess + if io_select == WPS_INPUT: + if io_type in WPS_COMPLEX_TYPES: + io_info.pop("data_type", None) + if "supported_formats" not in io_info: + io_info["supported_formats"] = [DEFAULT_FORMAT] + if ("max_occurs", "unbounded") in io_info.items(): + io_info["max_occurs"] = PACKAGE_ARRAY_MAX_SIZE + io_info.pop("supported_values", None) + return ComplexInput(**io_info) + if io_type == WPS_BOUNDINGBOX: + io_info.pop("supported_formats", None) + io_info.pop("supportedCRS", None) + return BoundingBoxInput(**io_info) + if io_type == WPS_LITERAL: + io_info.pop("data_format", None) + io_info.pop("supported_formats", None) + io_info.pop("literalDataDomains", None) + io_info["data_type"] = json2wps_datatype(io_info) + return LiteralInput(**io_info) + elif io_select == WPS_OUTPUT: + io_info.pop("min_occurs", None) + io_info.pop("max_occurs", None) + io_info.pop("allowed_values", None) + io_info.pop("data_format", None) + io_info.pop("default", None) + if io_type in WPS_COMPLEX_TYPES: + io_info.pop("supported_values", None) + return ComplexOutput(**io_info) + if io_type == WPS_BOUNDINGBOX: + io_info.pop("supported_formats", None) + return BoundingBoxOutput(**io_info) + if io_type == WPS_LITERAL: + io_info.pop("supported_formats", None) + io_info["data_type"] = json2wps_datatype(io_info) + return LiteralOutput(**io_info) + raise PackageTypeError("Unknown conversion from dict to WPS type (type={0}, mode={1}).".format(io_type, io_select)) + + +def wps2json_io(io_wps): + # type: (WPS_IO_Type) -> JSON_IO_Type + """Converts a PyWPS I/O into a dictionary based version with keys corresponding to standard names (WPS 2.0).""" + + if not isinstance(io_wps, BasicIO): + raise PackageTypeError("Invalid type, expected 'BasicIO', got: [{0!r}] '{1!r}'".format(type(io_wps), io_wps)) + if not hasattr(io_wps, "json"): + raise PackageTypeError("Invalid type definition expected to have a 'json' property.") + + io_wps_json = io_wps.json # noqa + + rename = { + u"identifier": u"id", + u"supported_formats": u"formats", + u"mime_type": u"mimeType", + u"min_occurs": u"minOccurs", + u"max_occurs": u"maxOccurs", + } + replace_values = { + PACKAGE_ARRAY_MAX_SIZE: "unbounded", + } + replace_func = { + "maxOccurs": str, + "minOccurs": str, + } + + transform_json(io_wps_json, rename=rename, replace_values=replace_values, replace_func=replace_func) + + # in some cases (Complex I/O), 'as_reference=True' causes "type" to be overwritten, revert it back + if "type" in io_wps_json and io_wps_json["type"] == WPS_REFERENCE: + io_wps_json["type"] = WPS_COMPLEX + + # minimum requirement of 1 format object which defines mime-type + if io_wps_json["type"] == WPS_COMPLEX: + # FIXME: should we store 'None' in db instead of empty string when missing "encoding", "schema", etc. ? + if "formats" not in io_wps_json or not len(io_wps_json["formats"]): + io_wps_json["formats"] = [DEFAULT_FORMAT.json] + for io_format in io_wps_json["formats"]: + transform_json(io_format, rename=rename, replace_values=replace_values, replace_func=replace_func) + + # set 'default' format if it matches perfectly, or if only mime-type matches and it is the only available one + # (this avoid 'encoding' possibly not matching due to CWL not providing this information) + io_default = get_field(io_wps_json, "default", search_variations=True) + for io_format in io_wps_json["formats"]: + io_format["default"] = (io_default != null and is_equal_formats(io_format, io_default)) + if io_default and len(io_wps_json["formats"]) == 1 and not io_wps_json["formats"][0]["default"]: + io_default_mime_type = get_field(io_default, "mime_type", search_variations=True) + io_single_fmt_mime_type = get_field(io_wps_json["formats"][0], "mime_type", search_variations=True) + io_wps_json["formats"][0]["default"] = (io_default_mime_type == io_single_fmt_mime_type) + + return io_wps_json + + +def wps2json_job_payload(wps_request, wps_process): + # type: (WPSRequest, ProcessWPS) -> JSON + """ + Converts the input and output values of a :mod:`pywps` WPS ``Execute`` request to corresponding WPS-REST job. + + The inputs and outputs must be parsed from XML POST payload or KVP GET query parameters, and converted to data + container defined by :mod:`pywps` based on the process definition. + """ + data = { + "inputs": [], + "outputs": [], + "response": EXECUTE_RESPONSE_DOCUMENT, + "mode": EXECUTE_MODE_ASYNC, + } + multi_inputs = list(wps_request.inputs.values()) + for input_list in multi_inputs: + iid = get_any_id(input_list[0]) + for input_value in input_list: + input_data = input_value.get("data") + input_href = input_value.get("href") + if input_data: + data["inputs"].append({"id": iid, "data": input_data}) + elif input_href: + data["inputs"].append({"id": iid, "href": input_href}) + output_ids = list(wps_request.outputs) + for output in wps_process.outputs: + oid = output.identifier + as_ref = isinstance(output, ComplexOutput) + if oid not in output_ids: + data_output = {"identifier": oid, "asReference": str(as_ref).lower()} + else: + data_output = wps_request.outputs[oid] + if as_ref: + data_output["transmissionMode"] = EXECUTE_TRANSMISSION_MODE_REFERENCE + else: + data_output["transmissionMode"] = EXECUTE_TRANSMISSION_MODE_VALUE + data_output["id"] = oid + data["outputs"].append(data_output) + return data + + +def get_field(io_object, field, search_variations=False, pop_found=False, default=null): + # type: (Union[ANY_IO_Type, ANY_Format_Type], str, bool, bool, Any) -> Any + """ + Gets a field by name from various I/O object types. + + Default value is :py:data:`null` used for most situations to differentiate from + literal ``None`` which is often used as default for parameters. The :class:`NullType` + allows to explicitly tell that there was 'no field' and not 'no value' in existing + field. If you provided another value, it will be returned if not found within + the input object. + + :returns: matched value (including search variations if enabled), or ``default``. + """ + if isinstance(io_object, dict): + value = io_object.get(field, null) + if value is not null: + if pop_found: + io_object.pop(field) + return value + else: + value = getattr(io_object, field, null) + if value is not null: + return value + if search_variations and field in WPS_FIELD_MAPPING: + for var in WPS_FIELD_MAPPING[field]: + value = get_field(io_object, var, pop_found=pop_found) + if value is not null: + return value + return default + + +def set_field(io_object, field, value, force=False): + # type: (Union[ANY_IO_Type, ANY_Format_Type], str, Any, bool) -> None + """ + Sets a field by name into various I/O object types. + Field value is set only if not ``null`` to avoid inserting data considered `invalid`. + If ``force=True``, verification of ``null`` value is ignored. + """ + if value is not null or force: + if isinstance(io_object, dict): + io_object[field] = value + return + setattr(io_object, field, value) + + +def _are_different_and_set(item1, item2): + # type: (Any, Any) -> bool + """ + Compares two value representations and returns ``True`` only if both are not ``null``, are of same ``type`` and + of different representative value. By "representative", we consider here the visual representation of byte/unicode + strings to support XML/JSON and Python 2/3 implementations. Other non string-like types are verified with + literal (usual) equality method. + """ + if item1 is null or item2 is null: + return False + try: + # Note: + # Calling ``==`` will result in one defined item's type ``__eq__`` method calling a property to validate + # equality with the second. When compared to a ``null``, ``None`` or differently type'd second item, the + # missing property on the second item could raise and ``AssertionError`` depending on the ``__eq__`` + # implementation (eg: ``Format`` checking for ``item.mime_type``, etc.). + equal = item1 == item2 + except AttributeError: + return False + if equal: + return False + # Note: check for both (str, bytes) for any python implementation that modifies its value + type1 = str if isinstance(item1, (str, bytes)) else type(item1) + type2 = str if isinstance(item2, (str, bytes)) else type(item2) + if type1 is str and type2 is str: + return bytes2str(item1) != bytes2str(item2) + return True + + +def is_equal_formats(format1, format2): + # type: (Union[Format, JSON], Union[Format, JSON]) -> bool + """Verifies for matching formats.""" + mime_type1 = get_field(format1, "mime_type", search_variations=True) + mime_type2 = get_field(format2, "mime_type", search_variations=True) + encoding1 = get_field(format1, "encoding", search_variations=True) + encoding2 = get_field(format2, "encoding", search_variations=True) + if mime_type1 == mime_type2 and encoding1 == encoding2 and \ + all(f != null for f in [mime_type1, mime_type2, encoding1, encoding2]): + return True + return False + + +def merge_io_formats(wps_formats, cwl_formats): + # type: (List[ANY_Format_Type], List[ANY_Format_Type]) -> List[ANY_Format_Type] + """ + Merges I/O format definitions by matching ``mime-type`` field. + In case of conflict, preserve the WPS version which can be more detailed (for example, by specifying ``encoding``). + + Verifies if ``DEFAULT_FORMAT_MISSING`` was written to a single `CWL` format caused by a lack of any value + provided as input. In this case, *only* `WPS` formats are kept. + + In the event that ``DEFAULT_FORMAT_MISSING`` was written to the `CWL` formats and that no `WPS` format was + specified, the :py:data:`DEFAULT_FORMAT` is returned. + + :raises PackageTypeError: if inputs are invalid format lists + """ + if not (isinstance(wps_formats, (list, tuple, set)) and isinstance(cwl_formats, (list, tuple, set))): + raise PackageTypeError("Cannot merge formats definitions with invalid lists.") + if not len(wps_formats): + wps_formats = [DEFAULT_FORMAT] + if len(cwl_formats) == 1 and get_field(cwl_formats[0], DEFAULT_FORMAT_MISSING) is True: + return wps_formats + + formats = [] + cwl_fmt_dict = OrderedDict((get_field(fmt, "mime_type", search_variations=True), fmt) for fmt in cwl_formats) + wps_fmt_dict = OrderedDict((get_field(fmt, "mime_type", search_variations=True), fmt) for fmt in wps_formats) + for cwl_fmt in cwl_fmt_dict: + if cwl_fmt in wps_fmt_dict: + formats.append(wps_fmt_dict[cwl_fmt]) + else: + formats.append(cwl_fmt_dict[cwl_fmt]) + wps_fmt_only = set(wps_fmt_dict) - set(cwl_fmt_dict) + for wps_fmt in wps_fmt_only: + formats.append(wps_fmt_dict[wps_fmt]) + return formats + + +def merge_package_io(wps_io_list, cwl_io_list, io_select): + # type: (List[ANY_IO_Type], List[WPS_IO_Type], Union[WPS_INPUT, WPS_OUTPUT]) -> List[WPS_IO_Type] + """ + Update I/O definitions to use for process creation and returned by GetCapabilities, DescribeProcess. + If WPS I/O definitions where provided during deployment, update `CWL-to-WPS` converted I/O with the WPS I/O + complementary details. Otherwise, provide minimum field requirements that can be retrieved from CWL definitions. + + Removes any deployment WPS I/O definitions that don't match any CWL I/O by ID. + Adds missing deployment WPS I/O definitions using expected CWL I/O IDs. + + :param wps_io_list: list of WPS I/O (as json) passed during process deployment. + :param cwl_io_list: list of CWL I/O converted to WPS-like I/O for counter-validation. + :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. + :returns: list of validated/updated WPS I/O for the process matching CWL I/O requirements. + """ + if not isinstance(cwl_io_list, list): + raise PackageTypeError("CWL I/O definitions must be provided, empty list if none required.") + if not wps_io_list: + wps_io_list = list() + wps_io_dict = OrderedDict((get_field(wps_io, "identifier", search_variations=True), deepcopy(wps_io)) + for wps_io in wps_io_list) + cwl_io_dict = OrderedDict((get_field(cwl_io, "identifier", search_variations=True), deepcopy(cwl_io)) + for cwl_io in cwl_io_list) + missing_io_list = [cwl_io for cwl_io in cwl_io_dict if cwl_io not in wps_io_dict] # preserve ordering + updated_io_list = list() + + # WPS I/O by id not matching any converted CWL->WPS I/O are discarded + # otherwise, evaluate provided WPS I/O definitions and find potential new information to be merged + for cwl_id in cwl_io_dict: + cwl_io = cwl_io_dict[cwl_id] + updated_io_list.append(cwl_io) + if cwl_id in missing_io_list: + continue # missing WPS I/O are inferred only using CWL->WPS definitions + + # enforce expected CWL->WPS I/O required parameters + cwl_io_json = cwl_io.json + wps_io_json = wps_io_dict[cwl_id] + cwl_identifier = get_field(cwl_io_json, "identifier", search_variations=True) + cwl_title = get_field(wps_io_json, "title", search_variations=True) + wps_io_json.update({ + "identifier": cwl_identifier, + "title": cwl_title if cwl_title is not null else cwl_identifier + }) + # apply type if WPS deploy definition was partial but can be retrieved from CWL + wps_io_json.setdefault("type", get_field(cwl_io_json, "type", search_variations=True)) + + # fill missing WPS min/max occurs in 'provided' json to avoid overwriting resolved CWL values by WPS default '1' + # with 'default' field, this default '1' causes erroneous result when 'min_occurs' should be "0" + # with 'array' type, this default '1' causes erroneous result when 'max_occurs' should be "unbounded" + cwl_min_occurs = get_field(cwl_io_json, "min_occurs", search_variations=True) + cwl_max_occurs = get_field(cwl_io_json, "max_occurs", search_variations=True) + wps_min_occurs = get_field(wps_io_json, "min_occurs", search_variations=True) + wps_max_occurs = get_field(wps_io_json, "max_occurs", search_variations=True) + if wps_min_occurs == null and cwl_min_occurs != null: + wps_io_json["min_occurs"] = cwl_min_occurs + if wps_max_occurs == null and cwl_max_occurs != null: + wps_io_json["max_occurs"] = cwl_max_occurs + wps_io = json2wps_io(wps_io_json, io_select) + + # retrieve any complementing fields (metadata, keywords, etc.) passed as WPS input + # additionally enforce 'default' format defined by 'data_format' to keep value specified by WPS if applicable + # (see function 'json2wps_io' for detail) + for field_type in list(WPS_FIELD_MAPPING) + ["data_format"]: + cwl_field = get_field(cwl_io, field_type) + wps_field = get_field(wps_io, field_type) + # override provided formats if different (keep WPS), or if CWL->WPS was missing but is provided by WPS + if _are_different_and_set(wps_field, cwl_field) or (wps_field is not null and cwl_field is null): + # list of formats are updated by comparing format items since information can be partially complementary + if field_type in ["supported_formats"]: + wps_field = merge_io_formats(wps_field, cwl_field) + # default 'data_format' must be one of the 'supported_formats' + # avoid setting something invalid in this case, or it will cause problem after + # note: 'supported_formats' must have been processed before + if field_type == "data_format": + wps_fmts = get_field(updated_io_list[-1], "supported_formats", search_variations=False, default=[]) + if wps_field not in wps_fmts: + continue + set_field(updated_io_list[-1], field_type, wps_field) + return updated_io_list + + +def transform_json(json_data, # type: ANY_IO_Type + rename=None, # type: Optional[Dict[AnyKey, Any]] + remove=None, # type: Optional[List[AnyKey]] + add=None, # type: Optional[Dict[AnyKey, Any]] + replace_values=None, # type: Optional[Dict[AnyKey, Any]] + replace_func=None, # type: Optional[Dict[AnyKey, Callable[[Any], Any]]] + ): # type: (...) -> ANY_IO_Type + """ + Transforms the input json_data with different methods. + The transformations are applied in the same order as the arguments. + """ + rename = rename or {} + remove = remove or [] + add = add or {} + replace_values = replace_values or {} + replace_func = replace_func or {} + + # rename + for k, v in rename.items(): + if k in json_data: + json_data[v] = json_data.pop(k) + + # remove + for r_k in remove: + json_data.pop(r_k, None) + + # add + for k, v in add.items(): + json_data[k] = v + + # replace values + for key, value in json_data.items(): + for old_value, new_value in replace_values.items(): + if value == old_value: + json_data[key] = new_value + + # replace with function call + for k, func in replace_func.items(): + if k in json_data: + json_data[k] = func(json_data[k]) + + # also rename if the type of the value is a list of dicts + for key, value in json_data.items(): + if isinstance(value, list): + for nested_item in value: + if isinstance(nested_item, dict): + for k, v in rename.items(): + if k in nested_item: + nested_item[v] = nested_item.pop(k) + for k, func in replace_func.items(): + if k in nested_item: + nested_item[k] = func(nested_item[k]) + return json_data diff --git a/weaver/processes/esgf_process.py b/weaver/processes/esgf_process.py index 5c8a3ef84..64d0752f0 100644 --- a/weaver/processes/esgf_process.py +++ b/weaver/processes/esgf_process.py @@ -3,7 +3,7 @@ from collections import defaultdict from typing import TYPE_CHECKING, Optional -import cwt +import cwt # noqa # package: esgf-compute-api from weaver.processes.wps1_process import Wps1Process from weaver.status import STATUS_FAILED, STATUS_RUNNING, STATUS_SUCCEEDED @@ -11,7 +11,7 @@ if TYPE_CHECKING: from weaver.typedefs import JSON - from typing import AnyStr, Dict, List, Tuple + from typing import Dict, List, Tuple LAST_PERCENT_REGEX = re.compile(r".+ (\d{1,3})$") @@ -42,7 +42,7 @@ class ESGFProcess(Wps1Process): required_inputs = (InputNames.VARIABLE, ) def execute(self, workflow_inputs, out_dir, expected_outputs): - # type: (JSON, AnyStr, Dict[AnyStr, AnyStr]) -> None + # type: (JSON, str, Dict[str, str]) -> None """Execute an ESGF process from cwl inputs""" self._check_required_inputs(workflow_inputs) @@ -201,7 +201,7 @@ def update_history(): return esgf_process.succeeded def _process_results(self, esgf_process, output_dir, expected_outputs): - # type: (cwt.Process, AnyStr, Dict[AnyStr, AnyStr]) -> None + # type: (cwt.Process, str, Dict[str, str]) -> None """Process the result of the execution""" if not esgf_process.succeeded: message = "Process failed." diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py new file mode 100644 index 000000000..ba9619068 --- /dev/null +++ b/weaver/processes/execution.py @@ -0,0 +1,423 @@ +import logging +import os +from time import sleep +from typing import TYPE_CHECKING + +import colander +from celery.utils.log import get_task_logger +from owslib.util import clean_ows_url +from owslib.wps import ComplexDataInput, WebProcessingService +from pyramid.httpexceptions import HTTPBadRequest, HTTPNotImplemented +from pyramid_celery import celery_app as app + +from weaver.database import get_db +from weaver.datatype import Process, Service +from weaver.execute import ( + EXECUTE_MODE_ASYNC, + EXECUTE_MODE_AUTO, + EXECUTE_MODE_SYNC, + EXECUTE_RESPONSE_DOCUMENT, + EXECUTE_TRANSMISSION_MODE_OPTIONS +) +from weaver.formats import CONTENT_TYPE_APP_JSON +from weaver.notify import encrypt_email, notify_job_complete +from weaver.owsexceptions import OWSNoApplicableCode +from weaver.processes import wps_package +from weaver.processes.constants import WPS_COMPLEX_DATA +from weaver.processes.convert import ows2json_output +from weaver.processes.types import PROCESS_WORKFLOW +from weaver.status import STATUS_ACCEPTED, STATUS_FAILED, STATUS_STARTED, STATUS_SUCCEEDED, map_status +from weaver.store.base import StoreJobs +from weaver.utils import ( + get_any_id, + get_any_value, + get_cookie_headers, + get_settings, + get_ssl_verify_option, + raise_on_xml_exception, + wait_secs +) +from weaver.visibility import VISIBILITY_PUBLIC +from weaver.wps.utils import ( + check_wps_status, + get_wps_local_status_location, + get_wps_output_path, + get_wps_output_url, + load_pywps_config, + set_wps_language +) +from weaver.wps_restapi import swagger_definitions as sd +from weaver.wps_restapi.utils import get_wps_restapi_base_url + +LOGGER = logging.getLogger(__name__) +if TYPE_CHECKING: + from typing import List, Optional, Union + from pyramid.request import Request + from weaver.datatype import Job + from weaver.typedefs import HeaderCookiesType, JSON, SettingsType + +# job process execution progress +JOB_PROGRESS_SETUP = 1 +JOB_PROGRESS_DESCRIBE = 2 +JOB_PROGRESS_GET_INPUTS = 4 +JOB_PROGRESS_GET_OUTPUTS = 6 +JOB_PROGRESS_EXECUTE_REQUEST = 8 +JOB_PROGRESS_EXECUTE_STATUS_LOCATION = 10 +JOB_PROGRESS_EXECUTE_MONITOR_START = 15 +JOB_PROGRESS_EXECUTE_MONITOR_LOOP = 20 +JOB_PROGRESS_EXECUTE_MONITOR_ERROR = 85 +JOB_PROGRESS_EXECUTE_MONITOR_END = 90 +JOB_PROGRESS_NOTIFY = 95 +JOB_PROGRESS_DONE = 100 + + +@app.task(bind=True) +def execute_process(self, job_id, url, headers=None): + from weaver.wps.service import get_pywps_service + + LOGGER.debug("Job execute process called.") + settings = get_settings(app) + task_logger = get_task_logger(__name__) + load_pywps_config(settings) + + task_logger.debug("Job task setup.") + + # reset the connection because we are in a forked celery process + db = get_db(app, reset_connection=True) + store = db.get_store(StoreJobs) + + job = store.fetch_by_id(job_id) + job.task_id = self.request.id + job.progress = JOB_PROGRESS_SETUP + job.save_log(logger=task_logger, message="Job task setup completed.") + job = store.update_job(job) + + try: + try: + job.progress = JOB_PROGRESS_DESCRIBE + job.save_log(logger=task_logger, message="Execute WPS request for process [{!s}]".format(job.process)) + ssl_verify = get_ssl_verify_option("get", url, settings=settings) + wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) + set_wps_language(wps, accept_language=job.accept_language) + raise_on_xml_exception(wps._capabilities) # noqa + except Exception as ex: + raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex))) + try: + process = wps.describeprocess(job.process) + except Exception as ex: + raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex))) + + # prepare inputs + job.progress = JOB_PROGRESS_GET_INPUTS + job.save_log(logger=task_logger, message="Fetching job input definitions.") + complex_inputs = [] + for process_input in process.dataInputs: + if WPS_COMPLEX_DATA in process_input.dataType: + complex_inputs.append(process_input.identifier) + + try: + wps_inputs = list() + for process_input in job.inputs: + input_id = get_any_id(process_input) + process_value = get_any_value(process_input) + # in case of array inputs, must repeat (id,value) + input_values = process_value if isinstance(process_value, list) else [process_value] + + # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// + input_values = [val[7:] if str(val).startswith("file://") else val for val in input_values] + + # need to use ComplexDataInput structure for complex input + # need to use literal String for anything else than complex + # TODO: BoundingBox not supported + wps_inputs.extend([ + (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) + for input_value in input_values]) + except KeyError: + wps_inputs = [] + + # prepare outputs + job.progress = JOB_PROGRESS_GET_OUTPUTS + job.save_log(logger=task_logger, message="Fetching job output definitions.") + wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] + + mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC + job.progress = JOB_PROGRESS_EXECUTE_REQUEST + job.save_log(logger=task_logger, message="Starting job process execution.") + job.save_log(logger=task_logger, + message="Following updates could take a while until the Application Package answers...") + + wps_worker = get_pywps_service(environ=settings, is_worker=True) + execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, + mode=mode, job_uuid=job.id) + if not execution.process and execution.errors: + raise execution.errors[0] + + # adjust status location + wps_status_path = get_wps_local_status_location(execution.statusLocation, settings) + job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION + LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) + if not wps_status_path.startswith("http") and not os.path.isfile(wps_status_path): + LOGGER.warning("WPS status location not resolved to local path: [%s]", wps_status_path) + job.save_log(logger=task_logger, level=logging.DEBUG, + message="Updated job status location: [{}].".format(wps_status_path)) + + job.status = map_status(STATUS_STARTED) + job.status_message = execution.statusMessage or "{} initiation done.".format(str(job)) + job.status_location = wps_status_path + job.request = execution.request + job.response = execution.response + job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START + job.save_log(logger=task_logger, message="Starting monitoring of job execution.") + job = store.update_job(job) + + max_retries = 5 + num_retries = 0 + run_step = 0 + while execution.isNotComplete() or run_step == 0: + if num_retries >= max_retries: + raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) + try: + # NOTE: + # Don't actually log anything here until process is completed (success or fail) so that underlying + # WPS execution logs can be inserted within the current job log and appear continuously. + # Only update internal job fields in case they get referenced elsewhere. + job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP + execution = check_wps_status(location=wps_status_path, settings=settings, + sleep_secs=wait_secs(run_step)) + job_msg = (execution.statusMessage or "").strip() + job.response = execution.response + job.status = map_status(execution.getStatus()) + job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ + .format(execution.percentCompleted, job_msg or "n/a") + # job.save_log(logger=task_logger) + # job = store.update_job(job) + + if execution.isComplete(): + job.mark_finished() + job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END + msg_progress = " (status: {})".format(job_msg) if job_msg else "" + if execution.isSucceded(): + job.status = map_status(STATUS_SUCCEEDED) + job.status_message = "Job succeeded{}.".format(msg_progress) + wps_package.retrieve_package_job_log(execution, job) + job.save_log(logger=task_logger) + job_results = [ows2json_output(output, process, settings) + for output in execution.processOutputs] + job.results = make_results_relative(job_results, settings) + else: + task_logger.debug("Job failed.") + job.status_message = "Job failed{}.".format(msg_progress) + wps_package.retrieve_package_job_log(execution, job) + job.save_log(errors=execution.errors, logger=task_logger) + task_logger.debug("Mapping Job references with generated WPS locations.") + map_locations(job, settings) + + except Exception as exc: + num_retries += 1 + task_logger.debug("Exception raised: %s", repr(exc)) + job.status_message = "Could not read status XML document for {!s}. Trying again...".format(job) + job.save_log(errors=execution.errors, logger=task_logger) + sleep(1) + else: + # job.status_message = "Update {}...".format(str(job)) + # job.save_log(logger=task_logger) + num_retries = 0 + run_step += 1 + finally: + job = store.update_job(job) + + except Exception as exc: + LOGGER.exception("Failed running [%s]", job) + job.status = map_status(STATUS_FAILED) + job.status_message = "Failed to run {!s}.".format(job) + job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR + exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) + errors = "{0}: {1!s}".format(exception_class, exc) + job.save_log(errors=errors, logger=task_logger) + finally: + job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END + job.status_message = "Job {}.".format(job.status) + job.save_log(logger=task_logger) + + # Send email if requested + if job.notification_email is not None: + job.progress = JOB_PROGRESS_NOTIFY + try: + notify_job_complete(job, job.notification_email, settings) + message = "Notification email sent successfully." + job.save_log(logger=task_logger, message=message) + except Exception as exc: + exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) + exception = "{0}: {1!s}".format(exception_class, exc) + message = "Couldn't send notification email ({})".format(exception) + job.save_log(errors=message, logger=task_logger, message=message) + + job.progress = JOB_PROGRESS_DONE + job.save_log(logger=task_logger, message="Job task complete.") + job = store.update_job(job) + + return job.status + + +def make_results_relative(results, settings): + # type: (List[JSON], SettingsType) -> List[JSON] + """ + Redefines job results to be saved in database as relative paths to output directory configured in PyWPS + (i.e.: relative to ``weaver.wps_output_dir``). + + This allows us to easily adjust the exposed result HTTP path according to server configuration + (i.e.: relative to ``weaver.wps_output_path`` and/or ``weaver.wps_output_url``) and it also avoid rewriting + the whole database job results if the setting is changed later on. + """ + wps_url = get_wps_output_url(settings) + wps_path = get_wps_output_path(settings) + for res in results: + ref = res.get("reference") + if isinstance(ref, str) and ref: + if ref.startswith(wps_url): + ref = ref.replace(wps_url, "", 1) + if ref.startswith(wps_path): + ref = ref.replace(wps_path, "", 1) + res["reference"] = ref + return results + + +def map_locations(job, settings): + # type: (Job, SettingsType) -> None + """ + Generates symlink references from the Job UUID to PyWPS UUID results (outputs directory, status and log locations). + Update the Job's WPS ID if applicable (job executed locally). + Assumes that all results are located under the same reference UUID. + """ + local_path = get_wps_local_status_location(job.status_location, settings) + if not local_path: + LOGGER.debug("Not possible to map Job to WPS locations.") + return + base_dir, status_xml = os.path.split(local_path) + job.wps_id = os.path.splitext(status_xml)[0] + wps_loc = os.path.join(base_dir, job.wps_id) + job_loc = os.path.join(base_dir, job.id) + if wps_loc == job_loc: + LOGGER.debug("Job already refers to WPS locations.") + return + for loc_ext in ["", ".log", ".xml"]: + wps_ref = wps_loc + loc_ext + job_ref = job_loc + loc_ext + if os.path.exists(wps_ref): # possible that there are no results (e.g.: failed job) + os.symlink(wps_ref, job_ref) + + +def submit_job(request, reference, tags=None): + # type: (Request, Union[Service, Process], Optional[List[str]]) -> JSON + """ + Generates the job submission from details retrieved in the request. + + .. seealso:: + :func:`submit_job_handler` to provide elements pre-extracted from requests or from other parsing. + """ + # validate body with expected JSON content and schema + if CONTENT_TYPE_APP_JSON not in request.content_type: + raise HTTPBadRequest("Request 'Content-Type' header other than '{}' not supported." + .format(CONTENT_TYPE_APP_JSON)) + try: + json_body = request.json_body + except Exception as ex: + raise HTTPBadRequest("Invalid JSON body cannot be decoded for job submission. [{}]".format(ex)) + provider_id = None # None OK if local + process_id = None # None OK if remote, but can be found as well if available from WPS-REST path + tags = tags or [] + if isinstance(reference, Process): + service_url = reference.processEndpointWPS1 + process_id = reference.id + visibility = reference.visibility + is_workflow = reference.type == PROCESS_WORKFLOW + is_local = True + tags += "local" + elif isinstance(reference, Service): + service_url = reference.url + provider_id = reference.id + process_id = request.matchdict.get("process_id") + visibility = VISIBILITY_PUBLIC + is_workflow = False + is_local = False + tags += "remote" + else: + LOGGER.error("Expected process/service, got: %s", type(reference)) + raise TypeError("Invalid process or service reference to execute job.") + tags = request.params.get("tags", "").split(",") + tags + user = request.authenticated_userid + lang = request.accept_language.header_value + headers = dict(request.headers) + settings = get_settings(request) + return submit_job_handler(json_body, settings, service_url, provider_id, process_id, is_workflow, is_local, + visibility, language=lang, auth=headers, tags=tags, user=user) + + +# FIXME: this should not be necessary if schema validators correctly implement OneOf(values) +def _validate_job_parameters(json_body): + """ + Tests supported parameters not automatically validated by colander deserialize. + """ + if json_body["mode"] not in [EXECUTE_MODE_ASYNC, EXECUTE_MODE_AUTO]: + raise HTTPNotImplemented(detail="Execution mode '{}' not supported.".format(json_body["mode"])) + + if json_body["response"] != EXECUTE_RESPONSE_DOCUMENT: + raise HTTPNotImplemented(detail="Execution response type '{}' not supported.".format(json_body["response"])) + + for job_output in json_body["outputs"]: + mode = job_output["transmissionMode"] + if mode not in EXECUTE_TRANSMISSION_MODE_OPTIONS: + raise HTTPNotImplemented(detail="Execute transmissionMode '{}' not supported.".format(mode)) + + +def submit_job_handler(payload, # type: JSON + settings, # type: SettingsType + service_url, # type: str + provider_id=None, # type: Optional[str] + process_id=None, # type: str + is_workflow=False, # type: bool + is_local=True, # type: bool + visibility=None, # type: Optional[str] + language=None, # type: Optional[str] + auth=None, # type: Optional[HeaderCookiesType] + tags=None, # type: Optional[List[str]] + user=None, # type: Optional[int] + ): # type: (...) -> JSON + """ + Submits the job to the Celery worker with provided parameters. + + Assumes that parameters have been pre-fetched and validated, except for the input payload. + """ + try: + json_body = sd.Execute().deserialize(payload) + except colander.Invalid as ex: + raise HTTPBadRequest("Invalid schema: [{}]".format(str(ex))) + + # TODO: remove when all parameter variations are supported + _validate_job_parameters(json_body) + + is_execute_async = json_body["mode"] != EXECUTE_MODE_SYNC # convert auto to async + notification_email = json_body.get("notification_email") + encrypted_email = encrypt_email(notification_email, settings) if notification_email else None + + store = get_db(settings).get_store(StoreJobs) + job = store.save_job(task_id=STATUS_ACCEPTED, process=process_id, service=provider_id, + inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow, + access=visibility, user_id=user, execute_async=is_execute_async, custom_tags=tags, + notification_email=encrypted_email, accept_language=language) + result = execute_process.delay(job_id=job.id, url=clean_ows_url(service_url), headers=auth) + LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) + + # local/provider process location + location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else "" + location = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( + base_url=get_wps_restapi_base_url(settings), + location_base=location_base, + process_id=process_id, + job_id=job.id) + body_data = { + "jobID": job.id, + "status": map_status(STATUS_ACCEPTED), + "location": location + } + return body_data diff --git a/weaver/processes/opensearch.py b/weaver/processes/opensearch.py index a4f66e77d..11968bbee 100644 --- a/weaver/processes/opensearch.py +++ b/weaver/processes/opensearch.py @@ -2,12 +2,12 @@ from collections import deque from copy import deepcopy from typing import TYPE_CHECKING +from urllib.parse import parse_qsl, urlparse import lxml.etree import shapely.wkt from pyramid.httpexceptions import HTTPOk from pyramid.settings import asbool -from six.moves.urllib.parse import parse_qsl, urlparse from weaver.formats import CONTENT_TYPE_TEXT_PLAIN from weaver.processes.constants import ( @@ -22,8 +22,8 @@ from weaver.utils import get_any_id, request_extra if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer, XML # noqa: F401 - from typing import AnyStr, Deque, Dict, Iterable, List, Optional, Tuple # noqa: F401 + from weaver.typedefs import AnySettingsContainer, XML + from typing import Deque, Dict, Iterable, List, Optional, Tuple LOGGER = logging.getLogger("PACKAGE") @@ -261,7 +261,7 @@ def _query_features_paginated(self, params): response = request_extra("get", base_url, params=query_params, intervals=list(range(1, 5)), allowed_codes=[HTTPOk.code], settings=self.settings) - if not response.status_code == 200: + if response.status_code != 200: break json_body = response.json() features = json_body.get("features", []) @@ -279,7 +279,7 @@ def _query_features_paginated(self, params): start_index += n_received_features def query_datasets(self, params, accept_schemes, accept_mime_types): - # type: (Dict, Tuple, List) -> Iterable[AnyStr] + # type: (Dict, Tuple, List) -> Iterable[str] """ Loop on every opensearch result feature and yield url matching required mime-type and scheme. Log a warning if a feature cannot yield a valid url (either no compatible mime-type or scheme) diff --git a/weaver/processes/sources.py b/weaver/processes/sources.py index 0fb8d9127..21b80243a 100644 --- a/weaver/processes/sources.py +++ b/weaver/processes/sources.py @@ -1,10 +1,10 @@ import os from typing import TYPE_CHECKING +from urllib.parse import urlparse import yaml from pyramid.settings import asbool from pyramid_celery import celery_app as app -from six.moves.urllib.parse import urlparse from weaver import WEAVER_ROOT_DIR from weaver.config import WEAVER_DEFAULT_DATA_SOURCES_CONFIG, get_weaver_config_file @@ -18,8 +18,8 @@ DATA_SOURCES = {} """Data sources configuration. -Unless explicitly overridden, the configuration will be loaded from file as specified by -``weaver.data_sources`` setting. Following JSON schema format is expected (corresponding YAML also supported): +Unless explicitly overridden, the configuration will be loaded from file as specified by``weaver.data_sources`` setting. +Following JSON schema format is expected (corresponding YAML also supported): .. code-block:: json diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index 88b6afca2..79d67dfa0 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -1,27 +1,24 @@ -import json import logging -import os import warnings from copy import deepcopy from distutils.version import LooseVersion from typing import TYPE_CHECKING +from urllib.parse import parse_qs, urlparse import colander -import six import yaml -from owslib.wps import WebProcessingService, is_reference +from owslib.wps import WebProcessingService from pyramid.httpexceptions import ( HTTPBadRequest, HTTPConflict, + HTTPCreated, HTTPException, + HTTPForbidden, HTTPNotFound, HTTPOk, HTTPUnprocessableEntity ) from pyramid.settings import asbool -from six.moves.urllib.error import URLError -from six.moves.urllib.parse import parse_qs, urlparse -from six.moves.urllib.request import urlopen from weaver.config import ( WEAVER_CONFIGURATION_EMS, @@ -30,106 +27,75 @@ get_weaver_configuration ) from weaver.database import get_db -from weaver.datatype import Process as ProcessDB, Service +from weaver.datatype import Process, Service from weaver.exceptions import ( InvalidIdentifierValue, + MissingIdentifierValue, PackageNotFound, PackageRegistrationError, PackageTypeError, + ProcessNotAccessible, ProcessNotFound, ProcessRegistrationError, ServiceNotFound, log_unhandled_exceptions ) -from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_PLAIN -from weaver.processes.constants import WPS_COMPLEX_DATA from weaver.processes.types import PROCESS_APPLICATION, PROCESS_WORKFLOW from weaver.store.base import StoreProcesses, StoreServices from weaver.utils import get_sane_name, get_settings, get_url_without_query from weaver.visibility import VISIBILITY_PRIVATE, VISIBILITY_PUBLIC -from weaver.wps import get_wps_output_dir from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.utils import get_wps_restapi_base_url -if TYPE_CHECKING: - from weaver.typedefs import AnyContainer, AnySettingsContainer, FileSystemPathType, JSON, Number - from typing import Any, AnyStr, List, Optional, Tuple, Union - from pywps import Process as ProcessWPS - import owslib.wps LOGGER = logging.getLogger(__name__) +if TYPE_CHECKING: + from typing import List, Optional, Tuple, Union + from pyramid.request import Request -def _get_data(output): - # type: (owslib.wps.Output) -> Optional[Any] - """ - Extract the data from the output value. - """ - # process output data are append into a list and - # WPS standard v1.0.0 specify that Output data field has zero or one value - if output.data: - return output.data[0] - return None + from weaver.typedefs import AnyContainer, AnySettingsContainer, FileSystemPathType, JSON, Number, SettingsType -def _read_reference(url): - # type: (AnyStr) -> Optional[AnyStr] +# FIXME: +# https://github.com/crim-ca/weaver/issues/215 +# define common Exception classes that won't require this type of conversion +def get_process(process_id=None, request=None, settings=None, store=None): + # type: (Optional[str], Optional[Request], Optional[SettingsType], Optional[StoreProcesses]) -> Process """ - Read a reference HTTP(S) URL and return the content. - """ - if not isinstance(url, six.string_types): - return None - if not url.lower().startswith("http"): - LOGGER.warning("URL reading not allowed because of potentially insecure scheme: [%s]", url) - return None - try: - return urlopen(url).read() # nosec: B310 - except URLError: - return None + Obtain the specified process and validate information, returning appropriate HTTP error if invalid. + Process identifier must be provided from either the request path definition or literal ID. + Database must be retrievable from either the request, underlying settings, or direct store reference. -def _get_multi_json_references(output, container): - # type: (owslib.wps.Output, Optional[AnySettingsContainer]) -> Optional[List[JSON]] + Different parameter combinations are intended to be used as needed or more appropriate, such that redundant + operations can be reduced where some objects are already fetched from previous operations. """ - Since WPS standard does not allow to return multiple values for a single output, - a lot of process actually return a json array containing references to these outputs. + if process_id is None and request is not None: + process_id = request.matchdict.get("process_id") + if store is None: + store = get_db(settings or request).get_store(StoreProcesses) + try: + process = store.fetch_by_id(process_id, visibility=VISIBILITY_PUBLIC) + return process + except (InvalidIdentifierValue, MissingIdentifierValue) as ex: + raise HTTPBadRequest(str(ex)) + except ProcessNotAccessible: + raise HTTPForbidden("Process with ID '{!s}' is not accessible.".format(process_id)) + except ProcessNotFound: + raise HTTPNotFound("Process with ID '{!s}' does not exist.".format(process_id)) + except colander.Invalid as ex: + raise HTTPBadRequest("Invalid schema:\n[{0!r}].".format(ex)) - Because the multi-output references are contained within this JSON file, it is not very convenient to retrieve - the list of URLs as one always needs to open and read the file to get them. This function goal is to detect this - particular format and expand the references to make them quickly available in the job output response. - :return: - Array of HTTP(S) references if the specified output is effectively a JSON containing that, ``None`` otherwise. +def get_job_submission_response(body): + # type: (JSON) -> HTTPCreated """ - # Check for the json datatype and mime-type - if output.dataType == WPS_COMPLEX_DATA and output.mimeType == CONTENT_TYPE_APP_JSON: - try: - # If the json data is referenced read it's content - if output.reference: - out_ref = output.reference - if container: - if out_ref.startswith("file://"): - out_ref = out_ref[7:] - if out_ref.startswith("/"): - wps_out_dir = get_wps_output_dir(container) - out_ref = os.path.join(wps_out_dir, out_ref) - if not os.path.isfile(out_ref): - out_ref = output.reference - json_data_str = _read_reference(out_ref) - # Else get the data directly - else: - json_data_str = _get_data(output) + Generates the successful response from contents returned by job submission process. - # Load the actual json dict - json_data = json.loads(json_data_str) - except Exception: - return None - - if isinstance(json_data, list): - for data_value in json_data: - if not is_reference(data_value): - return None - return json_data - return None + .. seealso:: + :func:`weaver.processes.execution.submit_job` + """ + return HTTPCreated(location=body["location"], json=body) def map_progress(progress, range_min, range_max): @@ -138,96 +104,6 @@ def map_progress(progress, range_min, range_max): return max(range_min, min(range_max, range_min + (progress * (range_max - range_min)) / 100)) -def jsonify_output(output, process_description, container=None): - # type: (owslib.wps.Output, owslib.wps.Process, Optional[AnySettingsContainer]) -> JSON - """ - Utility method to jsonify an output element from a WPS1 process description. - - In the case that a reference JSON output is specified and that it refers to a file that contains an array list of - URL references to simulate a multiple-output, this specific output gets expanded to contain both the original - URL ``reference`` field and the loaded URL list under ``data`` field for easier access from the response body. - """ - - if not output.dataType: - for process_output in getattr(process_description, "processOutputs", []): - if getattr(process_output, "identifier", "") == output.identifier: - output.dataType = process_output.dataType - break - - json_output = dict(identifier=output.identifier, - title=output.title, - dataType=output.dataType) - - # WPS standard v1.0.0 specify that either a reference or a data field has to be provided - if output.reference: - json_output["reference"] = output.reference - - # Handle special case where we have a reference to a json array containing dataset reference - # Avoid reference to reference by fetching directly the dataset references - json_array = _get_multi_json_references(output, container) - if json_array and all(str(ref).startswith("http") for ref in json_array): - json_output["data"] = json_array - else: - # WPS standard v1.0.0 specify that Output data field has Zero or one value - json_output["data"] = output.data[0] if output.data else None - - if json_output["dataType"] == WPS_COMPLEX_DATA: - json_output["mimeType"] = output.mimeType - - return json_output - - -def convert_process_wps_to_db(service, process, container): - # type: (Service, ProcessWPS, AnySettingsContainer) -> ProcessDB - """ - Converts an owslib WPS Process to local storage Process. - """ - from weaver.processes.wps_package import complex2json as jsonify_value - - describe_process_url = "{base_url}/providers/{provider_id}/processes/{process_id}".format( - base_url=get_wps_restapi_base_url(container), - provider_id=service.get("name"), - process_id=process.identifier) - execute_process_url = "{describe_url}/jobs".format(describe_url=describe_process_url) - - default_format = {"mimeType": CONTENT_TYPE_TEXT_PLAIN} - inputs = [dict( - id=getattr(dataInput, "identifier", ""), - title=getattr(dataInput, "title", ""), - abstract=getattr(dataInput, "abstract", ""), - minOccurs=str(getattr(dataInput, "minOccurs", 0)), - maxOccurs=str(getattr(dataInput, "maxOccurs", 0)), - dataType=dataInput.dataType, - defaultValue=jsonify_value(getattr(dataInput, "defaultValue", None)), - allowedValues=[jsonify_value(dataValue) for dataValue in getattr(dataInput, "allowedValues", [])], - supportedValues=[jsonify_value(dataValue) for dataValue in getattr(dataInput, "supportedValues", [])], - formats=[jsonify_value(dataValue) for dataValue in getattr(dataInput, "supportedValues", [default_format])], - ) for dataInput in getattr(process, "dataInputs", [])] - - outputs = [dict( - id=getattr(processOutput, "identifier", ""), - title=getattr(processOutput, "title", ""), - abstract=getattr(processOutput, "abstract", ""), - dataType=processOutput.dataType, - defaultValue=jsonify_value(getattr(processOutput, "defaultValue", None)), - formats=[jsonify_value(dataValue) for dataValue in getattr(processOutput, "supportedValues", [default_format])], - ) for processOutput in getattr(process, "processOutputs", [])] - - return ProcessDB( - id=process.identifier, - label=getattr(process, "title", ""), - title=getattr(process, "title", ""), - abstract=getattr(process, "abstract", ""), - inputs=inputs, - outputs=outputs, - url=describe_process_url, - processEndpointWPS1=service.get("url"), - processDescriptionURL=describe_process_url, - executeEndpoint=execute_process_url, - package=None, - ) - - @log_unhandled_exceptions(logger=LOGGER, message="Unhandled error occurred during parsing of deploy payload.", is_request=False) def _check_deploy(payload): @@ -340,7 +216,7 @@ def deploy_process_from_payload(payload, container): try: store = get_db(container).get_store(StoreProcesses) - saved_process = store.save_process(ProcessDB(process_info), overwrite=False) + saved_process = store.save_process(Process(process_info), overwrite=False) except ProcessRegistrationError as ex: raise HTTPConflict(detail=str(ex)) except ValueError as ex: @@ -352,7 +228,7 @@ def deploy_process_from_payload(payload, container): def parse_wps_process_config(config_entry): - # type: (Union[JSON, AnyStr]) -> Tuple[AnyStr, AnyStr, List[AnyStr], bool] + # type: (Union[JSON, str]) -> Tuple[str, str, List[str], bool] """ Parses the available WPS provider or process entry to retrieve its relevant information. @@ -364,7 +240,7 @@ def parse_wps_process_config(config_entry): svc_name = config_entry.get("name") svc_proc = config_entry.get("id", []) svc_vis = asbool(config_entry.get("visible", False)) - elif isinstance(config_entry, six.string_types): + elif isinstance(config_entry, str): svc_url = config_entry svc_name = None svc_proc = [] @@ -375,8 +251,8 @@ def parse_wps_process_config(config_entry): qs_p = parse_qs(url_p.query) svc_url = get_url_without_query(url_p) svc_name = svc_name or get_sane_name(url_p.hostname) - svc_proc = svc_proc or qs_p.get("identifier", []) - if not isinstance(svc_name, six.string_types): + svc_proc = svc_proc or qs_p.get("identifier", []) # noqa + if not isinstance(svc_name, str): raise ValueError("Invalid service value: [{!s}].".format(svc_name)) if not isinstance(svc_proc, list): raise ValueError("Invalid process value: [{!s}].".format(svc_proc)) diff --git a/weaver/processes/wps1_process.py b/weaver/processes/wps1_process.py index 85984ae8f..4666528f9 100644 --- a/weaver/processes/wps1_process.py +++ b/weaver/processes/wps1_process.py @@ -1,6 +1,6 @@ import logging from time import sleep -from typing import TYPE_CHECKING, AnyStr +from typing import TYPE_CHECKING from owslib.wps import ComplexDataInput, WebProcessingService @@ -8,7 +8,8 @@ from weaver.execute import EXECUTE_MODE_ASYNC from weaver.owsexceptions import OWSNoApplicableCode from weaver.processes.constants import WPS_COMPLEX_DATA -from weaver.processes.utils import jsonify_output, map_progress +from weaver.processes.convert import ows2json_output +from weaver.processes.utils import map_progress from weaver.processes.wps_process_base import WpsProcessInterface from weaver.utils import ( get_any_id, @@ -19,11 +20,11 @@ request_extra, wait_secs ) -from weaver.wps import check_wps_status +from weaver.wps.utils import check_wps_status if TYPE_CHECKING: - from weaver.typedefs import UpdateStatusPartialFunction # noqa: F401 - from pywps.app import WPSRequest # noqa: F401 + from pywps.app import WPSRequest + from weaver.typedefs import UpdateStatusPartialFunction LOGGER = logging.getLogger(__name__) @@ -36,8 +37,8 @@ class Wps1Process(WpsProcessInterface): def __init__(self, - provider, # type: AnyStr - process, # type: AnyStr + provider, # type: str + process, # type: str request, # type: WPSRequest update_status, # type: UpdateStatusPartialFunction ): @@ -125,7 +126,7 @@ def execute(self, workflow_inputs, out_dir, expected_outputs): if num_retries >= max_retries: raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) try: - execution = check_wps_status(url=execution.statusLocation, verify=self.verify, + execution = check_wps_status(location=execution.statusLocation, verify=self.verify, sleep_secs=wait_secs(run_step)) job_id = execution.statusLocation.replace(".xml", "").split("/")[-1] LOGGER.debug(get_log_monitor_msg(job_id, status.map_status(execution.getStatus()), @@ -155,7 +156,7 @@ def execute(self, workflow_inputs, out_dir, expected_outputs): self.update_status("Fetching job outputs from remote WPS1 provider.", REMOTE_JOB_PROGRESS_FETCH_OUT, status.STATUS_RUNNING) - results = [jsonify_output(output, process) for output in execution.processOutputs] + results = [ows2json_output(output, process) for output in execution.processOutputs] for result in results: result_id = get_any_id(result) result_val = get_any_value(result) diff --git a/weaver/processes/wps3_process.py b/weaver/processes/wps3_process.py index be2ac91c6..171713bde 100644 --- a/weaver/processes/wps3_process.py +++ b/weaver/processes/wps3_process.py @@ -43,9 +43,11 @@ ) if TYPE_CHECKING: - from weaver.typedefs import JSON, UpdateStatusPartialFunction # noqa: F401 - from typing import Union, AnyStr # noqa: F401 - from pywps.app import WPSRequest # noqa: F401 + from typing import Union + + from pywps.app import WPSRequest + + from weaver.typedefs import JSON, UpdateStatusPartialFunction LOGGER = logging.getLogger(__name__) @@ -63,7 +65,7 @@ class Wps3Process(WpsProcessInterface): def __init__(self, step_payload, # type: JSON joborder, # type: JSON - process, # type: AnyStr + process, # type: str request, # type: WPSRequest update_status, # type: UpdateStatusPartialFunction ): diff --git a/weaver/processes/wps_default.py b/weaver/processes/wps_default.py index 8c4205f0e..e858f147f 100644 --- a/weaver/processes/wps_default.py +++ b/weaver/processes/wps_default.py @@ -13,12 +13,9 @@ class HelloWPS(Process): title = "Say Hello" type = PROCESS_WPS - def __init__(self, *args, **kwargs): # noqa: E811 - inputs = [ - LiteralInput("name", "Your name", data_type="string")] - outputs = [ - LiteralOutput("output", "Output response", - data_type="string")] + def __init__(self, *_, **__): + inputs = [LiteralInput("name", "Your name", data_type="string")] + outputs = [LiteralOutput("output", "Output response", data_type="string")] super(HelloWPS, self).__init__( self._handler, diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 2537aa105..7268555e5 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -13,51 +13,39 @@ import logging import os import shutil +import sys import tempfile import time import uuid -from collections import Hashable, OrderedDict # pylint: disable=E0611,no-name-in-module # moved to .abc in Python 3 +from collections import OrderedDict # pylint: disable=E0611,no-name-in-module # moved to .abc in Python 3 from copy import deepcopy from typing import TYPE_CHECKING +from urllib.parse import urlparse import cwltool import cwltool.docker -import cwltool.factory -import lxml.etree -import six import yaml from cwltool.context import LoadingContext, RuntimeContext -from owslib.wps import ComplexData, Metadata as OwsMetadata, WebProcessingService +from cwltool.factory import Factory as CWLFactory, WorkflowStatus as CWLException from pyramid.httpexceptions import HTTPOk, HTTPServiceUnavailable from pyramid_celery import celery_app as app from pywps import Process -from pywps.app.Common import Metadata -from pywps.inout import BoundingBoxInput, BoundingBoxOutput, ComplexInput, ComplexOutput, LiteralInput, LiteralOutput -from pywps.inout.basic import SOURCE_TYPE, BasicIO -from pywps.inout.formats import Format -from pywps.inout.literaltypes import ALLOWEDVALUETYPE, AllowedValue, AnyValue +from pywps.inout import BoundingBoxInput, ComplexInput, LiteralInput +from pywps.inout.basic import SOURCE_TYPE +from pywps.inout.literaltypes import AnyValue from pywps.inout.storage.s3 import S3StorageBuilder -from pywps.validator.mode import MODE -from six.moves.urllib.parse import urlparse from yaml.scanner import ScannerError from weaver.config import WEAVER_CONFIGURATION_EMS, get_weaver_configuration from weaver.exceptions import ( + PackageException, PackageExecutionError, PackageNotFound, PackageRegistrationError, PackageTypeError, PayloadNotFound ) -from weaver.formats import ( - CONTENT_TYPE_ANY, - CONTENT_TYPE_ANY_XML, - CONTENT_TYPE_APP_JSON, - CONTENT_TYPE_TEXT_PLAIN, - get_cwl_file_format, - get_extension, - get_format -) +from weaver.formats import CONTENT_TYPE_ANY_XML, CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_PLAIN, get_cwl_file_format from weaver.processes import opensearch from weaver.processes.constants import ( CWL_REQUIREMENT_APP_BUILTIN, @@ -65,13 +53,18 @@ CWL_REQUIREMENT_APP_ESGF_CWT, CWL_REQUIREMENT_APP_TYPES, CWL_REQUIREMENT_APP_WPS1, - WPS_BOUNDINGBOX, - WPS_COMPLEX, - WPS_COMPLEX_DATA, WPS_INPUT, - WPS_LITERAL, - WPS_OUTPUT, - WPS_REFERENCE + WPS_OUTPUT +) +from weaver.processes.convert import ( + cwl2wps_io, + get_field, + is_cwl_array_type, + json2wps_field, + json2wps_io, + merge_package_io, + wps2json_io, + xml_wps2cwl ) from weaver.processes.sources import retrieve_data_source_url from weaver.processes.types import PROCESS_APPLICATION, PROCESS_WORKFLOW @@ -87,7 +80,6 @@ ) from weaver.utils import ( SUPPORTED_FILE_SCHEMES, - bytes2str, fetch_file, get_any_id, get_header, @@ -96,80 +88,50 @@ get_log_fmt, get_sane_name, get_settings, - get_url_without_query, - null, request_extra, - str2bytes + setup_loggers ) -from weaver.wps import get_wps_output_dir +from weaver.wps.utils import get_wps_output_dir from weaver.wps_restapi.swagger_definitions import process_uri if TYPE_CHECKING: - # pylint: disable=W0611,unused-import - from weaver.datatype import Job # noqa: F401 - from weaver.status import AnyStatusType # noqa: F401 - from weaver.typedefs import ( # noqa: F401 - ToolPathObjectType, CWLFactoryCallable, CWL, AnyKey, AnyValue as AnyValueType, JSON, XML, Number + from typing import Any, Deque, Dict, List, Optional, Tuple, Type, Union + + from cwltool.factory import Callable as CWLFactoryCallable + from cwltool.process import Process as ProcessCWL + from owslib.wps import WPSExecution + from pywps.app import WPSRequest + from pywps.response.execute import ExecuteResponse + + from weaver.datatype import Job + from weaver.processes.convert import ( + ANY_IO_Type, + CWL_Input_Type, + JSON_IO_Type, + PKG_IO_Type, + WPS_Input_Type, + WPS_Output_Type ) - from typing import Any, AnyStr, Callable, Dict, List, Optional, Tuple, Type, Union # noqa: F401 - from cwltool.process import Process as ProcessCWL # noqa: F401 - from pywps.app import WPSRequest # noqa: F401 - from pywps.response.execute import ExecuteResponse # noqa: F401 - from owslib.wps import Input, Output, WPSExecution # noqa: F401 - from requests.models import Response # noqa: F401 - - # typing shortcuts - # pylint: disable=C0103,invalid-name - WPS_Input_Type = Union[LiteralInput, ComplexInput, BoundingBoxInput] - WPS_Output_Type = Union[LiteralOutput, ComplexOutput, BoundingBoxOutput] - WPS_IO_Type = Union[WPS_Input_Type, WPS_Output_Type] - OWS_Input_Type = Input - OWS_Output_Type = Output - OWS_IO_Type = Union[OWS_Input_Type, OWS_Output_Type] - JSON_IO_Type = JSON - CWL_Input_Type = Dict[{"id": AnyStr, "type": AnyStr}] - CWL_Output_Type = Dict[{"id": AnyStr, "type": AnyStr}] - CWL_IO_Type = Union[CWL_Input_Type, CWL_Output_Type] - PKG_IO_Type = Union[JSON_IO_Type, WPS_IO_Type] - ANY_IO_Type = Union[CWL_IO_Type, JSON_IO_Type, WPS_IO_Type, OWS_IO_Type] - ANY_Format_Type = Union[Dict[AnyStr, Optional[AnyStr]], Format] - ANY_Metadata_Type = Union[OwsMetadata, Metadata, Dict[AnyStr, AnyStr]] + from weaver.status import AnyStatusType + from weaver.typedefs import AnyValueType, CWL, JSON, Number, ToolPathObjectType, TypedDict, ValueType + # note: below requirements also include 'hints' - DictCWLRequirements = Dict[AnyStr, Dict[AnyStr, Any]] # {'': {: }} - ListCWLRequirements = List[Dict[AnyStr, Any]] # [{'class': , : }] + CWLRequirement = TypedDict("CWLRequirement", {"class": str}, total=False) + DictCWLRequirements = Dict[str, Dict[str, str]] # {'': {: }} + ListCWLRequirements = List[CWLRequirement] # [{'class': , : }] AnyCWLRequirements = Union[DictCWLRequirements, ListCWLRequirements] - CWLResultEntry = Dict[AnyStr, Union[AnyValueType, List[AnyValueType]]] - CWLResults = Dict[AnyStr, CWLResultEntry] - + # results from CWL execution + CWLResultEntry = Dict[str, Union[AnyValueType, List[AnyValueType]]] + CWLResults = Dict[str, CWLResultEntry] # NOTE: # Only use this logger for 'utility' methods (not residing under WpsPackage). # In that case, employ 'self.logger' instead so that the executed process has its self-contained job log entries. LOGGER = logging.getLogger(__name__) -__all__ = [ - "PACKAGE_EXTENSIONS", - "DEFAULT_FORMAT", - "WpsPackage", - "get_process_definition", - "get_process_location", - "get_package_workflow_steps", - "retrieve_package_job_log", - "complex2json", - "metadata2json", -] - -# package types and extensions -PACKAGE_EXTENSIONS = frozenset(["yaml", "yml", "json", "cwl", "job"]) -PACKAGE_BASE_TYPES = frozenset(["string", "boolean", "float", "int", "integer", "long", "double"]) -PACKAGE_LITERAL_TYPES = frozenset(list(PACKAGE_BASE_TYPES) + ["null", "Any"]) -PACKAGE_COMPLEX_TYPES = frozenset(["File", "Directory"]) -PACKAGE_ARRAY_BASE = "array" -PACKAGE_ARRAY_MAX_SIZE = six.MAXSIZE # pywps doesn't allow None, so use max size -PACKAGE_CUSTOM_TYPES = frozenset(["enum"]) # can be anything, but support "enum" which is more common -PACKAGE_ARRAY_ITEMS = frozenset(list(PACKAGE_BASE_TYPES) + list(PACKAGE_COMPLEX_TYPES) + list(PACKAGE_CUSTOM_TYPES)) -PACKAGE_ARRAY_TYPES = frozenset(["{}[]".format(item) for item in PACKAGE_ARRAY_ITEMS]) +# CWL package references PACKAGE_DEFAULT_FILE_NAME = "package" +PACKAGE_EXTENSIONS = frozenset(["yaml", "yml", "json", "cwl", "job"]) PACKAGE_OUTPUT_HOOK_LOG_UUID = "PACKAGE_OUTPUT_HOOK_LOG_{}" # process execution progress @@ -184,48 +146,9 @@ PACKAGE_PROGRESS_PREP_OUT = 98 PACKAGE_PROGRESS_DONE = 100 -# WPS object attribute -> all possible *other* naming variations -WPS_FIELD_MAPPING = { - "identifier": ["Identifier", "ID", "id", "Id"], - "title": ["Title", "Label", "label"], - "abstract": ["Abstract"], - "metadata": ["Metadata"], - "keywords": ["Keywords"], - "allowed_values": ["AllowedValues", "allowedValues", "allowedvalues", "Allowed_Values", "Allowedvalues"], - "allowed_collections": ["AllowedCollections", "allowedCollections", "allowedcollections", "Allowed_Collections", - "Allowedcollections"], - "default": ["default_value", "defaultValue", "DefaultValue", "Default", "data_format"], - "supported_values": ["SupportedValues", "supportedValues", "supportedvalues", "Supported_Values"], - "supported_formats": ["SupportedFormats", "supportedFormats", "supportedformats", "Supported_Formats", "formats"], - "additional_parameters": ["AdditionalParameters", "additionalParameters", "additionalparameters", - "Additional_Parameters"], - "type": ["Type", "data_type", "dataType", "DataType", "Data_Type"], - "min_occurs": ["minOccurs", "MinOccurs", "Min_Occurs", "minoccurs"], - "max_occurs": ["maxOccurs", "MaxOccurs", "Max_Occurs", "maxoccurs"], - "mime_type": ["mimeType", "MimeType", "mime-type", "Mime-Type", "MIME-Type", "mimetype"], - "encoding": ["Encoding"], - "href": ["url", "link", "reference"], -} -# WPS fields that contain a structure corresponding to `Format` object -# - keys must match `WPS_FIELD_MAPPING` keys -# - fields are placed in order of relevance (prefer explicit format, then supported, and defaults as last resort) -WPS_FIELD_FORMAT = ["formats", "supported_formats", "supported_values", "default"] - -# WPS 'type' string variations employed to indicate a Complex (file) I/O by different libraries -# for literal types, see '_any2cwl_literal_datatype' and '_any2wps_literal_datatype' functions -WPS_COMPLEX_TYPES = [WPS_COMPLEX, WPS_COMPLEX_DATA, WPS_REFERENCE] - -# WPS 'type' string of all combinations (type of data / library implementation) -WPS_ALL_TYPES = [WPS_LITERAL, WPS_BOUNDINGBOX] + WPS_COMPLEX_TYPES - -# default format if missing (minimal requirement of one) -DEFAULT_FORMAT = Format(mime_type=CONTENT_TYPE_TEXT_PLAIN) -DEFAULT_FORMAT_MISSING = "__DEFAULT_FORMAT_MISSING__" -setattr(DEFAULT_FORMAT, DEFAULT_FORMAT_MISSING, True) - def get_status_location_log_path(status_location, out_dir=None): - # type: (AnyStr, Optional[AnyStr]) -> AnyStr + # type: (str, Optional[str]) -> str log_path = os.path.splitext(status_location)[0] + ".log" return os.path.join(out_dir, os.path.split(log_path)[-1]) if out_dir else log_path @@ -248,7 +171,7 @@ def retrieve_package_job_log(execution, job): def get_process_location(process_id_or_url, data_source=None): - # type: (Union[Dict[AnyStr, Any], AnyStr], Optional[AnyStr]) -> AnyStr + # type: (Union[Dict[str, Any], str], Optional[str]) -> str """ Obtains the URL of a WPS REST DescribeProcess given the specified information. @@ -266,13 +189,13 @@ def get_process_location(process_id_or_url, data_source=None): def get_package_workflow_steps(package_dict_or_url): - # type: (Union[Dict[AnyStr, Any], AnyStr]) -> List[Dict[AnyStr, AnyStr]] + # type: (Union[Dict[str, Any], str]) -> List[Dict[str, str]] """ :param package_dict_or_url: process package definition or literal URL to DescribeProcess WPS-REST location. :return: list of workflow steps as {"name": , "reference": } where `name` is the generic package step name, and `reference` is the id/url of a registered WPS package. """ - if isinstance(package_dict_or_url, six.string_types): + if isinstance(package_dict_or_url, str): package_dict_or_url = _get_process_package(package_dict_or_url) workflow_steps_ids = list() package_type = _get_package_type(package_dict_or_url) @@ -289,7 +212,7 @@ def get_package_workflow_steps(package_dict_or_url): def _fetch_process_info(process_info_url, fetch_error): - # type: (AnyStr, Type[Exception]) -> JSON + # type: (str, Type[Exception]) -> JSON """ Fetches the JSON process information from the specified URL and validates that it contains something. @@ -298,7 +221,7 @@ def _fetch_process_info(process_info_url, fetch_error): def _info_not_found_error(): return fetch_error("Could not find reference: '{!s}'".format(process_info_url)) - if not isinstance(process_info_url, six.string_types): + if not isinstance(process_info_url, str): raise _info_not_found_error() resp = request_extra("get", process_info_url, headers={"Accept": CONTENT_TYPE_APP_JSON}, settings=get_settings(app)) if resp.status_code != HTTPOk.code: @@ -310,7 +233,7 @@ def _info_not_found_error(): def _get_process_package(process_url): - # type: (AnyStr) -> Tuple[CWL, AnyStr] + # type: (str) -> Tuple[CWL, str] """ Retrieves the WPS process package content from given process ID or literal URL. @@ -324,7 +247,7 @@ def _get_process_package(process_url): def _get_process_payload(process_url): - # type: (AnyStr) -> JSON + # type: (str) -> JSON """ Retrieves the WPS process payload content from given process ID or literal URL. @@ -358,7 +281,7 @@ def _get_package_requirements_as_class_list(requirements): def _get_package_ordered_io(io_section, order_hints=None): - # type: (Union[List[JSON], OrderedDict[AnyStr, JSON]], Optional[List[JSON]]) -> List[JSON] + # type: (Union[List[JSON], OrderedDict[str, JSON]], Optional[List[JSON]]) -> List[JSON] """ Converts `CWL` package I/O definitions defined as dictionary to an equivalent :class:`list` representation. The list representation ensures that I/O order is preserved when written to file and reloaded afterwards @@ -389,7 +312,7 @@ def _get_package_ordered_io(io_section, order_hints=None): # pre-order I/O that can be resolved with hint when the specified I/O section is not ordered io_section = deepcopy(io_section) for hint in order_hints: - hint_id = _get_field(hint, "identifier", search_variations=True) + hint_id = get_field(hint, "identifier", search_variations=True) if hint_id in io_section: io_dict[hint_id] = io_section.pop(hint_id) for hint in io_section: @@ -400,7 +323,7 @@ def _get_package_ordered_io(io_section, order_hints=None): # I/O value can be a literal type string or dictionary with more details at this point # make it always detailed dictionary to avoid problems for later parsing # this is also required to make the list, since all list items must have a matching type - if isinstance(io_value, six.string_types): + if isinstance(io_value, str): io_list.append({"type": io_value}) else: io_list.append(io_value) @@ -409,7 +332,7 @@ def _get_package_ordered_io(io_section, order_hints=None): def _check_package_file(cwl_file_path_or_url): - # type: (AnyStr) -> Tuple[AnyStr, bool] + # type: (str) -> Tuple[str, bool] """ Validates that the specified CWL file path or URL points to an existing and allowed file format. @@ -436,7 +359,7 @@ def _check_package_file(cwl_file_path_or_url): def _load_package_file(file_path): - # type: (AnyStr) -> CWL + # type: (str) -> CWL """Loads the package in YAML/JSON format specified by the file path.""" file_path, is_url = _check_package_file(file_path) @@ -454,14 +377,14 @@ def _load_package_file(file_path): def _load_package_content(package_dict, # type: Dict - package_name=PACKAGE_DEFAULT_FILE_NAME, # type: AnyStr - data_source=None, # type: Optional[AnyStr] + package_name=PACKAGE_DEFAULT_FILE_NAME, # type: str + data_source=None, # type: Optional[str] only_dump_file=False, # type: bool - tmp_dir=None, # type: Optional[AnyStr] + tmp_dir=None, # type: Optional[str] loading_context=None, # type: Optional[LoadingContext] runtime_context=None, # type: Optional[RuntimeContext] process_offering=None, # type: Optional[JSON] - ): # type: (...) -> Optional[Tuple[CWLFactoryCallable, AnyStr, Dict]] + ): # type: (...) -> Optional[Tuple[CWLFactoryCallable, str, Dict]] """ Loads the package content to file in a temporary directory. Recursively processes sub-packages steps if the parent is a `Workflow` (CWL class). @@ -470,7 +393,7 @@ def _load_package_content(package_dict, # type: Dict :param package_name: name to use to create the package file. :param data_source: identifier of the data source to map to specific ADES, or map to localhost if ``None``. :param only_dump_file: specify if the ``CWLFactoryCallable`` should be validated and returned. - :param tmp_dir: location of the temporary directory to dump files (warning: will be deleted on exit). + :param tmp_dir: location of the temporary directory to dump files (deleted on exit). :param loading_context: cwltool context used to create the cwl package (required if ``only_dump_file=False``) :param runtime_context: cwltool context used to execute the cwl package (required if ``only_dump_file=False``) :param process_offering: JSON body of the process description payload (used as I/O hint ordering) @@ -480,6 +403,9 @@ def _load_package_content(package_dict, # type: Dict - instance of ``CWLFactoryCallable`` - package type (``PROCESS_WORKFLOW`` or ``PROCESS_APPLICATION``) - dict of each step with their package name that must be run + + .. warning:: + Specified :paramref:`tmp_dir` will be deleted on exit. """ tmp_dir = tmp_dir or tempfile.mkdtemp() @@ -510,788 +436,12 @@ def _load_package_content(package_dict, # type: Dict if only_dump_file: return - cwl_factory = cwltool.factory.Factory(loading_context=loading_context, runtime_context=runtime_context) - package = cwl_factory.make(tmp_json_cwl) # type: CWLFactoryCallable + factory = CWLFactory(loading_context=loading_context, runtime_context=runtime_context) + package = factory.make(tmp_json_cwl) # type: CWLFactoryCallable shutil.rmtree(tmp_dir) return package, package_type, step_packages -def _is_cwl_array_type(io_info): - # type: (CWL_IO_Type) -> Tuple[bool, AnyStr, MODE, Union[AnyValue, List[Any]]] - """Verifies if the specified I/O corresponds to one of various CWL array type definitions. - - returns ``tuple(is_array, io_type, io_mode, io_allow)`` where: - - ``is_array``: specifies if the I/O is of array type. - - ``io_type``: array element type if ``is_array`` is True, type of ``io_info`` otherwise. - - ``io_mode``: validation mode to be applied if sub-element requires it, defaults to ``MODE.NONE``. - - ``io_allow``: validation values to be applied if sub-element requires it, defaults to ``AnyValue``. - :raises PackageTypeError: if the array element doesn't have the required values and valid format. - """ - # use mapping to allow sub-function updates - io_return = { - "array": False, - "allow": AnyValue, - "type": io_info["type"], - "mode": MODE.NONE, - } - - def _update_if_sub_enum(_io_item): - # type: (CWL_IO_Type) -> bool - """ - Updates the ``io_return`` parameters if ``io_item`` evaluates to a valid ``enum`` type. - Parameter ``io_item`` should correspond to the ``items`` field of an array I/O definition. - Simple pass-through if the array item is not an ``enum``. - """ - _is_enum, _enum_type, _enum_mode, _enum_allow = _is_cwl_enum_type({"type": _io_item}) - if _is_enum: - LOGGER.debug("I/O [%s] parsed as 'array' with sub-item as 'enum'", io_info["name"]) - io_return["type"] = _enum_type - io_return["mode"] = _enum_mode - io_return["allow"] = _enum_allow - return _is_enum - - # optional I/O could be an array of '["null", ""]' with "" being any of the formats parsed after - # is it the literal representation instead of the shorthand with '?' - if isinstance(io_info["type"], list) and any(sub_type == "null" for sub_type in io_info["type"]): - # we can ignore the optional indication in this case because it doesn't impact following parsing - io_return["type"] = list(filter(lambda sub_type: sub_type != "null", io_info["type"]))[0] - - # array type conversion when defined as '{"type": "array", "items": ""}' - # validate against 'Hashable' instead of 'dict' since 'OrderedDict'/'CommentedMap' can fail 'isinstance()' - if not isinstance(io_return["type"], six.string_types) and not isinstance(io_return["type"], Hashable) \ - and "items" in io_return["type"] and "type" in io_return["type"]: - io_type = dict(io_return["type"]) # make hashable to allow comparison - if io_type["type"] != PACKAGE_ARRAY_BASE: - raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) - # parse enum in case we got an array of allowed symbols - is_enum = _update_if_sub_enum(io_type["items"]) - if not is_enum: - io_return["type"] = io_type["items"] - if io_return["type"] not in PACKAGE_ARRAY_ITEMS: - raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) - LOGGER.debug("I/O [%s] parsed as 'array' with nested dict notation", io_info["name"]) - io_return["array"] = True - # array type conversion when defined as string '[]' - elif isinstance(io_return["type"], six.string_types) and io_return["type"] in PACKAGE_ARRAY_TYPES: - io_return["type"] = io_return["type"][:-2] # remove '[]' - if io_return["type"] in PACKAGE_CUSTOM_TYPES: - # parse 'enum[]' for array of allowed symbols, provide expected structure for sub-item parsing - io_item = deepcopy(io_info) - io_item["type"] = io_return["type"] # override corrected type without '[]' - _update_if_sub_enum(io_item) - if io_return["type"] not in PACKAGE_ARRAY_ITEMS: - raise PackageTypeError("Unsupported I/O 'array' definition: '{}'.".format(repr(io_info))) - LOGGER.debug("I/O [%s] parsed as 'array' with shorthand '[]' notation", io_info["name"]) - io_return["array"] = True - return io_return["array"], io_return["type"], io_return["mode"], io_return["allow"] - - -def _is_cwl_enum_type(io_info): - # type: (CWL_IO_Type) -> Tuple[bool, AnyStr, int, Union[List[AnyStr], None]] - """Verifies if the specified I/O corresponds to a CWL enum definition. - - returns ``tuple(is_enum, io_type, io_allow)`` where: - - ``is_enum``: specifies if the I/O is of enum type. - - ``io_type``: enum base type if ``is_enum=True``, type of ``io_info`` otherwise. - - ``io_mode``: validation mode to be applied if input requires it, defaults to ``MODE.NONE``. - - ``io_allow``: validation values of the enum. - :raises PackageTypeError: if the enum doesn't have the required parameters and valid format. - """ - io_type = io_info["type"] - if not isinstance(io_type, dict) or "type" not in io_type or io_type["type"] not in PACKAGE_CUSTOM_TYPES: - return False, io_type, MODE.NONE, None - - if "symbols" not in io_type: - raise PackageTypeError("Unsupported I/O 'enum' definition: '{!r}'.".format(io_info)) - io_allow = io_type["symbols"] - if not isinstance(io_allow, list) or len(io_allow) < 1: - raise PackageTypeError("Invalid I/O 'enum.symbols' definition: '{!r}'.".format(io_info)) - - # validate matching types in allowed symbols and convert to supported CWL type - first_allow = io_allow[0] - for io_i in io_allow: - if type(io_i) is not type(first_allow): - raise PackageTypeError("Ambiguous types in I/O 'enum.symbols' definition: '{!r}'.".format(io_info)) - if isinstance(first_allow, six.string_types): - io_type = "string" - elif isinstance(first_allow, float): - io_type = "float" - elif isinstance(first_allow, int): - io_type = "int" - else: - raise PackageTypeError("Unsupported I/O 'enum' base type: `{!s}`, from definition: `{!r}`." - .format(type(first_allow), io_info)) - - # allowed value validator mode must be set for input - return True, io_type, MODE.SIMPLE, io_allow - - -def _cwl2wps_io(io_info, io_select): - # type:(CWL_IO_Type, AnyStr) -> WPS_IO_Type - """Converts input/output parameters from CWL types to WPS types. - - :param io_info: parsed IO of a CWL file - :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. - :returns: corresponding IO in WPS format - """ - is_input = False - is_output = False - # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) - if io_select == WPS_INPUT: - is_input = True - io_literal = LiteralInput # type: Union[Type[LiteralInput], Type[LiteralOutput]] - io_complex = ComplexInput # type: Union[Type[ComplexInput], Type[ComplexOutput]] - # io_bbox = BoundingBoxInput # type: Union[Type[BoundingBoxInput], Type[BoundingBoxOutput]] - elif io_select == WPS_OUTPUT: - is_output = True - io_literal = LiteralOutput # type: Union[Type[LiteralInput], Type[LiteralOutput]] - io_complex = ComplexOutput # type: Union[Type[ComplexInput], Type[ComplexOutput]] - # io_bbox = BoundingBoxOutput # type: Union[Type[BoundingBoxInput], Type[BoundingBoxOutput]] - else: - raise PackageTypeError("Unsupported I/O info definition: '{!r}' with '{}'.".format(io_info, io_select)) - - io_name = io_info["name"] - io_type = io_info["type"] - io_min_occurs = 1 - io_max_occurs = 1 - - # obtain real type if "default" or shorthand "?" was in CWL, which defines "type" as `["null", ]` - if isinstance(io_type, list) and "null" in io_type: - if not len(io_type) == 2: - raise PackageTypeError("Unsupported I/O type parsing for info: '{!r}' with '{}'." - .format(io_info, io_select)) - LOGGER.debug("I/O parsed for 'default'") - io_type = io_type[1] if io_type[0] == "null" else io_type[0] - io_info["type"] = io_type - io_min_occurs = 0 # I/O can be omitted since default value exists - - # convert array types - is_array, array_elem, io_mode, io_allow = _is_cwl_array_type(io_info) - if is_array: - LOGGER.debug("I/O parsed for 'array'") - io_type = array_elem - io_max_occurs = PACKAGE_ARRAY_MAX_SIZE - - # convert enum types - is_enum, enum_type, enum_mode, enum_allow = _is_cwl_enum_type(io_info) - if is_enum: - LOGGER.debug("I/O parsed for 'enum'") - io_type = enum_type - io_allow = enum_allow - io_mode = enum_mode - - # debug info for unhandled types conversion - if not isinstance(io_type, six.string_types): - LOGGER.debug("is_array: [%s]", repr(is_array)) - LOGGER.debug("array_elem: [%s]", repr(array_elem)) - LOGGER.debug("is_enum: [%s]", repr(is_enum)) - LOGGER.debug("enum_type: [%s]", repr(enum_type)) - LOGGER.debug("enum_allow: [%s]", repr(enum_allow)) - LOGGER.debug("io_info: [%s]", repr(io_info)) - LOGGER.debug("io_type: [%s]", repr(io_type)) - LOGGER.debug("type(io_type): [%s]", type(io_type)) - raise TypeError("I/O type has not been properly decoded. Should be a string, got: '{!r}'".format(io_type)) - - # literal types - if is_enum or io_type in PACKAGE_LITERAL_TYPES: - if io_type == "Any": - io_type = "anyvalue" - if io_type == "null": - io_type = "novalue" - if io_type in ["int", "integer", "long"]: - io_type = "integer" - if io_type in ["float", "double"]: - io_type = "float" - # keywords commonly used by I/O - kw = { - "identifier": io_name, - "title": io_info.get("label", ""), - "abstract": io_info.get("doc", ""), - "data_type": io_type, - "mode": io_mode, - } - if is_input: - # avoid storing 'AnyValue' which become more problematic than - # anything later on when CWL/WPS merging is attempted - if io_allow is not AnyValue: - kw["allowed_values"] = io_allow - kw["default"] = io_info.get("default", None) - kw["min_occurs"] = io_min_occurs - kw["max_occurs"] = io_max_occurs - return io_literal(**kw) - # complex types - else: - # keywords commonly used by I/O - kw = { - "identifier": io_name, - "title": io_info.get("label", io_name), - "abstract": io_info.get("doc", ""), - } - if "format" in io_info: - io_formats = [io_info["format"]] if isinstance(io_info["format"], six.string_types) else io_info["format"] - kw["supported_formats"] = [get_format(fmt) for fmt in io_formats] - kw["mode"] = MODE.SIMPLE # only validate the extension (not file contents) - else: - # we need to minimally add 1 format, otherwise empty list is evaluated as None by pywps - # when "supported_formats" is None, the process's json property raises because of it cannot iterate formats - kw["supported_formats"] = [DEFAULT_FORMAT] - kw["mode"] = MODE.NONE # don't validate anything as default is only raw text - if is_output: - if io_type == "Directory": - kw["as_reference"] = True - if io_type == "File": - has_contents = io_info.get("contents") is not None - kw["as_reference"] = not has_contents - else: - # note: - # value of 'data_format' is identified as 'default' input format if specified with `Format` - # otherwise, `None` makes it automatically use the first one available in 'supported_formats' - kw["data_format"] = _get_field(io_info, "data_format") - kw["data_format"] = _json2wps_field(kw["data_format"], "supported_formats") if kw["data_format"] else None - kw.update({ - "min_occurs": io_min_occurs, - "max_occurs": io_max_occurs, - }) - return io_complex(**kw) - - -def _any2cwl_literal_datatype(io_type): - # type: (AnyStr) -> Union[AnyStr, Type[null]] - """ - Solves common literal data-type names to supported ones for `CWL`. - """ - if io_type in ["string", "date", "time", "dateTime", "anyURI"]: - return "string" - if io_type in ["scale", "angle", "float", "double"]: - return "float" - if io_type in ["integer", "long", "positiveInteger", "nonNegativeInteger"]: - return "int" - if io_type in ["bool", "boolean"]: - return "boolean" - LOGGER.warning("Could not identify a CWL literal data type with [%s].", io_type) - return null - - -def _any2wps_literal_datatype(io_type, is_value): - # type: (AnyValueType, bool) -> Union[AnyStr, Type[null]] - """ - Solves common literal data-type names to supported ones for `WPS`. - Verification is accomplished by name when ``is_value=False``, otherwise with python ``type`` when ``is_value=True``. - """ - if isinstance(io_type, six.string_types): - if not is_value: - if io_type in ["date", "time", "dateTime", "anyURI"]: - return "string" - if io_type in ["scale", "angle", "float", "double"]: - return "float" - if io_type in ["int", "integer", "long", "positiveInteger", "nonNegativeInteger"]: - return "integer" - if io_type in ["bool", "boolean"]: - return "boolean" - return "string" - if is_value and isinstance(io_type, bool): - return "boolean" - if is_value and isinstance(io_type, int): - return "integer" - if is_value and isinstance(io_type, float): - return "float" - return null - - -def _json2wps_datatype(io_info): - # type: (JSON_IO_Type) -> AnyStr - """ - Guesses the literal data-type from I/O JSON information in order to allow creation of the corresponding I/O WPS. - Defaults to ``string`` if no suitable guess can be accomplished. - """ - io_type = _get_field(io_info, "type", search_variations=False, pop_found=True) - if str(io_type).lower() == WPS_LITERAL: - io_type = null - io_guesses = [ - (io_type, False), - (_get_field(io_info, "type", search_variations=True), False), - (_get_field(io_info, "default", search_variations=True), True), - (_get_field(io_info, "allowed_values", search_variations=True), True), - (_get_field(io_info, "supported_values", search_variations=True), True) - ] - for io_guess, is_value in io_guesses: - if io_type: - break - if isinstance(io_guess, list) and len(io_guess): - io_guess = io_guess[0] - io_type = _any2wps_literal_datatype(io_guess, is_value) - if not isinstance(io_type, six.string_types): - LOGGER.warning("Failed literal data-type guess, using default 'string' for I/O [%s].", - _get_field(io_info, "identifier", search_variations=True)) - return "string" - return io_type - - -def _json2wps_field(field_info, field_category): - # type: (JSON_IO_Type, AnyStr) -> Any - """ - Converts an I/O field from a JSON literal data, list, or dictionary to corresponding WPS types. - - :param field_info: literal data or information container describing the type to be generated. - :param field_category: one of ``WPS_FIELD_MAPPING`` keys to indicate how to parse ``field_info``. - """ - if field_category == "allowed_values": - if isinstance(field_info, AllowedValue): - return field_info - if isinstance(field_info, dict): - field_info.pop("type", None) - return AllowedValue(**field_info) - if isinstance(field_info, six.string_types): - return AllowedValue(value=field_info, allowed_type=ALLOWEDVALUETYPE.VALUE) - if isinstance(field_info, list): - return AllowedValue(minval=min(field_info), maxval=max(field_info), allowed_type=ALLOWEDVALUETYPE.RANGE) - elif field_category == "supported_formats": - if isinstance(field_info, dict): - return Format(**field_info) - if isinstance(field_info, six.string_types): - return Format(field_info) - elif field_category == "metadata": - if isinstance(field_info, Metadata): - return field_info - if isinstance(field_info, dict): - return Metadata(**metadata2json(field_info, force=True)) - if isinstance(field_info, six.string_types): - return Metadata(field_info) - elif field_category == "keywords" and isinstance(field_info, list): - return field_info - elif field_category in ["identifier", "title", "abstract"] and isinstance(field_info, six.string_types): - return field_info - LOGGER.warning("Field of type '%s' not handled as known WPS field.", field_category) - return None - - -def _json2wps_io(io_info, io_select): - # type: (JSON_IO_Type, Union[WPS_INPUT, WPS_OUTPUT]) -> WPS_IO_Type - """Converts an I/O from a JSON dict to WPS types. - - :param io_info: I/O in JSON dict format. - :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. - :return: corresponding I/O in WPS format. - """ - - io_info["identifier"] = _get_field(io_info, "identifier", search_variations=True, pop_found=True) - - rename = { - "formats": "supported_formats", - "minOccurs": "min_occurs", - "maxOccurs": "max_occurs", - } - remove = [ - "id", - "workdir", - "any_value", - "data_format", - "data", - "file", - "mimetype", - "encoding", - "schema", - "asreference", - "additionalParameters", - ] - replace_values = {"unbounded": PACKAGE_ARRAY_MAX_SIZE} - - transform_json(io_info, rename=rename, remove=remove, replace_values=replace_values) - - # convert allowed value objects - values = _get_field(io_info, "allowed_values", search_variations=True, pop_found=True) - if values is not null: - if isinstance(values, list) and len(values) > 0: - io_info["allowed_values"] = list() - for allow_value in values: - io_info["allowed_values"].append(_json2wps_field(allow_value, "allowed_values")) - else: - io_info["allowed_values"] = AnyValue # noqa - - # convert supported format objects - formats = _get_field(io_info, "supported_formats", search_variations=True, pop_found=True) - if formats is not null: - for fmt in formats: - fmt["mime_type"] = _get_field(fmt, "mime_type", search_variations=True, pop_found=True) - fmt.pop("maximumMegabytes", None) - # define the 'default' with 'data_format' to be used if explicitly specified from the payload - if fmt.pop("default", None) is True: - if _get_field(io_info, "data_format") != null: # if set by previous 'fmt' - raise PackageTypeError("Cannot have multiple 'default' formats simultaneously.") - # use 'data_format' instead of 'default' to avoid overwriting a potential 'default' value - # field 'data_format' is mapped as 'default' format - io_info["data_format"] = _json2wps_field(fmt, "supported_formats") - io_info["supported_formats"] = [_json2wps_field(fmt, "supported_formats") for fmt in formats] - - # convert metadata objects - metadata = _get_field(io_info, "metadata", search_variations=True, pop_found=True) - if metadata is not null: - io_info["metadata"] = [_json2wps_field(meta, "metadata") for meta in metadata] - - # convert literal fields specified as is - for field in ["identifier", "title", "abstract", "keywords"]: - value = _get_field(io_info, field, search_variations=True, pop_found=True) - if value is not null: - io_info[field] = _json2wps_field(value, field) - - # convert by type, add missing required arguments and - # remove additional arguments according to each case - io_type = io_info.pop("type", WPS_COMPLEX) # only ComplexData doesn't have "type" - # attempt to identify defined data-type directly in 'type' field instead of 'data_type' - if io_type not in WPS_ALL_TYPES: - io_type_guess = _any2wps_literal_datatype(io_type, is_value=False) - if io_type_guess is not null: - io_type = WPS_LITERAL - io_info["data_type"] = io_type_guess - if io_select == WPS_INPUT: - if io_type in WPS_COMPLEX_TYPES: - io_info.pop("data_type", None) - if "supported_formats" not in io_info: - io_info["supported_formats"] = [DEFAULT_FORMAT] - if ("max_occurs", "unbounded") in io_info.items(): - io_info["max_occurs"] = PACKAGE_ARRAY_MAX_SIZE - io_info.pop("supported_values", None) - return ComplexInput(**io_info) - if io_type == WPS_BOUNDINGBOX: - io_info.pop("supported_formats", None) - io_info.pop("supportedCRS", None) - return BoundingBoxInput(**io_info) - if io_type == WPS_LITERAL: - io_info.pop("data_format", None) - io_info.pop("supported_formats", None) - io_info.pop("literalDataDomains", None) - io_info["data_type"] = _json2wps_datatype(io_info) - return LiteralInput(**io_info) - elif io_select == WPS_OUTPUT: - io_info.pop("min_occurs", None) - io_info.pop("max_occurs", None) - io_info.pop("allowed_values", None) - io_info.pop("data_format", None) - io_info.pop("default", None) - if io_type in WPS_COMPLEX_TYPES: - io_info.pop("supported_values", None) - return ComplexOutput(**io_info) - if io_type == WPS_BOUNDINGBOX: - io_info.pop("supported_formats", None) - return BoundingBoxOutput(**io_info) - if io_type == WPS_LITERAL: - io_info.pop("supported_formats", None) - io_info["data_type"] = _json2wps_datatype(io_info) - return LiteralOutput(**io_info) - raise PackageTypeError("Unknown conversion from dict to WPS type (type={0}, mode={1}).".format(io_type, io_select)) - - -def _wps2json_io(io_wps): - # type: (WPS_IO_Type) -> JSON_IO_Type - """Converts a PyWPS I/O into a dictionary based version with keys corresponding to standard names (WPS 2.0).""" - - if not isinstance(io_wps, BasicIO): - raise PackageTypeError("Invalid type, expected 'BasicIO', got: [{0!r}] '{1!r}'".format(type(io_wps), io_wps)) - if not hasattr(io_wps, "json"): - raise PackageTypeError("Invalid type definition expected to have a 'json' property.") - - io_wps_json = io_wps.json # noqa - - rename = { - u"identifier": u"id", - u"supported_formats": u"formats", - u"mime_type": u"mimeType", - u"min_occurs": u"minOccurs", - u"max_occurs": u"maxOccurs", - } - replace_values = { - PACKAGE_ARRAY_MAX_SIZE: "unbounded", - } - replace_func = { - "maxOccurs": str, - "minOccurs": str, - } - - transform_json(io_wps_json, rename=rename, replace_values=replace_values, replace_func=replace_func) - - # in some cases (Complex I/O), 'as_reference=True' causes "type" to be overwritten, revert it back - if "type" in io_wps_json and io_wps_json["type"] == WPS_REFERENCE: - io_wps_json["type"] = WPS_COMPLEX - - # minimum requirement of 1 format object which defines mime-type - if io_wps_json["type"] == WPS_COMPLEX: - # FIXME: should we store 'None' in db instead of empty string when missing "encoding", "schema", etc. ? - if "formats" not in io_wps_json or not len(io_wps_json["formats"]): - io_wps_json["formats"] = [DEFAULT_FORMAT.json] - for io_format in io_wps_json["formats"]: - transform_json(io_format, rename=rename, replace_values=replace_values, replace_func=replace_func) - - # set 'default' format if it matches perfectly, or if only mime-type matches and it is the only available one - # (this avoid 'encoding' possibly not matching due to CWL not providing this information) - io_default = _get_field(io_wps_json, "default", search_variations=True) - for io_format in io_wps_json["formats"]: - io_format["default"] = (io_default != null and _matching_formats(io_format, io_default)) - if io_default and len(io_wps_json["formats"]) == 1 and not io_wps_json["formats"][0]["default"]: - io_default_mime_type = _get_field(io_default, "mime_type", search_variations=True) - io_single_fmt_mime_type = _get_field(io_wps_json["formats"][0], "mime_type", search_variations=True) - io_wps_json["formats"][0]["default"] = (io_default_mime_type == io_single_fmt_mime_type) - - return io_wps_json - - -def _get_field(io_object, field, search_variations=False, pop_found=False, default=null): - # type: (Union[ANY_IO_Type, ANY_Format_Type], AnyStr, bool, bool, Any) -> Any - """ - Gets a field by name from various I/O object types. - - Default value is :py:data:`null` used for most situations to differentiate from - literal ``None`` which is often used as default for parameters. The :class:`NullType` - allows to explicitly tell that there was 'no field' and not 'no value' in existing - field. If you provided another value, it will be returned if not found within - the input object. - - :returns: matched value (including search variations if enabled), or ``default``. - """ - if isinstance(io_object, dict): - value = io_object.get(field, null) - if value is not null: - if pop_found: - io_object.pop(field) - return value - else: - value = getattr(io_object, field, null) - if value is not null: - return value - if search_variations and field in WPS_FIELD_MAPPING: - for var in WPS_FIELD_MAPPING[field]: - value = _get_field(io_object, var, pop_found=pop_found) - if value is not null: - return value - return default - - -def _set_field(io_object, field, value, force=False): - # type: (Union[ANY_IO_Type, ANY_Format_Type], AnyStr, Any, bool) -> None - """ - Sets a field by name into various I/O object types. - Field value is set only if not ``null`` to avoid inserting data considered `invalid`. - If ``force=True``, verification of ``null`` value is ignored. - """ - if value is not null or force: - if isinstance(io_object, dict): - io_object[field] = value - return - setattr(io_object, field, value) - - -def _matching_formats(format1, format2): - # type: (Union[Format, JSON], Union[Format, JSON]) -> bool - """Verifies for matching formats.""" - mime_type1 = _get_field(format1, "mime_type", search_variations=True) - mime_type2 = _get_field(format2, "mime_type", search_variations=True) - encoding1 = _get_field(format1, "encoding", search_variations=True) - encoding2 = _get_field(format2, "encoding", search_variations=True) - if mime_type1 == mime_type2 and encoding1 == encoding2 and \ - all(f != null for f in [mime_type1, mime_type2, encoding1, encoding2]): - return True - return False - - -def _are_different_and_set(item1, item2): - # type: (Any, Any) -> bool - """ - Compares two value representations and returns ``True`` only if both are not ``null``, are of same ``type`` and - of different representative value. By "representative", we consider here the visual representation of byte/unicode - strings to support XML/JSON and Python 2/3 implementations. Other non string-like types are verified with - literal (usual) equality method. - """ - if item1 is null or item2 is null: - return False - try: - # Note: - # Calling ``==`` will result in one defined item's type ``__eq__`` method calling a property to validate - # equality with the second. When compared to a ``null``, ``None`` or differently type'd second item, the - # missing property on the second item could raise and ``AssertionError`` depending on the ``__eq__`` - # implementation (eg: ``Format`` checking for ``item.mime_type``, etc.). - equal = item1 == item2 - except AttributeError: - return False - if equal: - return False - # Note: don't only use six.string_types here to check for any python implementation that modifies its value - type1 = str if isinstance(item1, (six.string_types, six.binary_type)) else type(item1) - type2 = str if isinstance(item2, (six.string_types, six.binary_type)) else type(item2) - if type1 is str and type2 is str: - return bytes2str(item1) != bytes2str(item2) - return True - - -def _merge_io_formats(wps_formats, cwl_formats): - # type: (List[ANY_Format_Type], List[ANY_Format_Type]) -> List[ANY_Format_Type] - """ - Merges I/O format definitions by matching ``mime-type`` field. - In case of conflict, preserve the WPS version which can be more detailed (for example, by specifying ``encoding``). - - Verifies if ``DEFAULT_FORMAT_MISSING`` was written to a single `CWL` format caused by a lack of any value - provided as input. In this case, *only* `WPS` formats are kept. - - In the event that ``DEFAULT_FORMAT_MISSING`` was written to the `CWL` formats and that no `WPS` format was - specified, the :py:data:`DEFAULT_FORMAT` is returned. - - :raises PackageTypeError: if inputs are invalid format lists - """ - if not (isinstance(wps_formats, (list, tuple, set)) and isinstance(cwl_formats, (list, tuple, set))): - raise PackageTypeError("Cannot merge formats definitions with invalid lists.") - if not len(wps_formats): - wps_formats = [DEFAULT_FORMAT] - if len(cwl_formats) == 1 and _get_field(cwl_formats[0], DEFAULT_FORMAT_MISSING) is True: - return wps_formats - - formats = [] - cwl_fmt_dict = OrderedDict((_get_field(fmt, "mime_type", search_variations=True), fmt) for fmt in cwl_formats) - wps_fmt_dict = OrderedDict((_get_field(fmt, "mime_type", search_variations=True), fmt) for fmt in wps_formats) - for cwl_fmt in cwl_fmt_dict: - if cwl_fmt in wps_fmt_dict: - formats.append(wps_fmt_dict[cwl_fmt]) - else: - formats.append(cwl_fmt_dict[cwl_fmt]) - wps_fmt_only = set(wps_fmt_dict) - set(cwl_fmt_dict) - for wps_fmt in wps_fmt_only: - formats.append(wps_fmt_dict[wps_fmt]) - return formats - - -def _merge_package_io(wps_io_list, cwl_io_list, io_select): - # type: (List[ANY_IO_Type], List[WPS_IO_Type], Union[WPS_INPUT, WPS_OUTPUT]) -> List[WPS_IO_Type] - """ - Update I/O definitions to use for process creation and returned by GetCapabilities, DescribeProcess. - If WPS I/O definitions where provided during deployment, update `CWL-to-WPS` converted I/O with the WPS I/O - complementary details. Otherwise, provide minimum field requirements that can be retrieved from CWL definitions. - - Removes any deployment WPS I/O definitions that don't match any CWL I/O by ID. - Adds missing deployment WPS I/O definitions using expected CWL I/O IDs. - - :param wps_io_list: list of WPS I/O (as json) passed during process deployment. - :param cwl_io_list: list of CWL I/O converted to WPS-like I/O for counter-validation. - :param io_select: ``WPS_INPUT`` or ``WPS_OUTPUT`` to specify desired WPS type conversion. - :returns: list of validated/updated WPS I/O for the process matching CWL I/O requirements. - """ - if not isinstance(cwl_io_list, list): - raise PackageTypeError("CWL I/O definitions must be provided, empty list if none required.") - if not wps_io_list: - wps_io_list = list() - wps_io_dict = OrderedDict((_get_field(wps_io, "identifier", search_variations=True), deepcopy(wps_io)) - for wps_io in wps_io_list) - cwl_io_dict = OrderedDict((_get_field(cwl_io, "identifier", search_variations=True), deepcopy(cwl_io)) - for cwl_io in cwl_io_list) - missing_io_list = [cwl_io for cwl_io in cwl_io_dict if cwl_io not in wps_io_dict] # preserve ordering - updated_io_list = list() - - # WPS I/O by id not matching any converted CWL->WPS I/O are discarded - # otherwise, evaluate provided WPS I/O definitions and find potential new information to be merged - for cwl_id in cwl_io_dict: - cwl_io = cwl_io_dict[cwl_id] - updated_io_list.append(cwl_io) - if cwl_id in missing_io_list: - continue # missing WPS I/O are inferred only using CWL->WPS definitions - - # enforce expected CWL->WPS I/O required parameters - cwl_io_json = cwl_io.json - wps_io_json = wps_io_dict[cwl_id] - cwl_identifier = _get_field(cwl_io_json, "identifier", search_variations=True) - cwl_title = _get_field(wps_io_json, "title", search_variations=True) - wps_io_json.update({ - "identifier": cwl_identifier, - "title": cwl_title if cwl_title is not null else cwl_identifier - }) - # apply type if WPS deploy definition was partial but can be retrieved from CWL - wps_io_json.setdefault("type", _get_field(cwl_io_json, "type", search_variations=True)) - - # fill missing WPS min/max occurs in 'provided' json to avoid overwriting resolved CWL values by WPS default '1' - # with 'default' field, this default '1' causes erroneous result when 'min_occurs' should be "0" - # with 'array' type, this default '1' causes erroneous result when 'max_occurs' should be "unbounded" - cwl_min_occurs = _get_field(cwl_io_json, "min_occurs", search_variations=True) - cwl_max_occurs = _get_field(cwl_io_json, "max_occurs", search_variations=True) - wps_min_occurs = _get_field(wps_io_json, "min_occurs", search_variations=True) - wps_max_occurs = _get_field(wps_io_json, "max_occurs", search_variations=True) - if wps_min_occurs == null and cwl_min_occurs != null: - wps_io_json["min_occurs"] = cwl_min_occurs - if wps_max_occurs == null and cwl_max_occurs != null: - wps_io_json["max_occurs"] = cwl_max_occurs - wps_io = _json2wps_io(wps_io_json, io_select) - - # retrieve any complementing fields (metadata, keywords, etc.) passed as WPS input - # additionally enforce 'default' format defined by 'data_format' to keep value specified by WPS if applicable - # (see function '_json2wps_io' for detail) - for field_type in list(WPS_FIELD_MAPPING) + ["data_format"]: - cwl_field = _get_field(cwl_io, field_type) - wps_field = _get_field(wps_io, field_type) - # override provided formats if different (keep WPS), or if CWL->WPS was missing but is provided by WPS - if _are_different_and_set(wps_field, cwl_field) or (wps_field is not null and cwl_field is null): - # list of formats are updated by comparing format items since information can be partially complementary - if field_type in ["supported_formats"]: - wps_field = _merge_io_formats(wps_field, cwl_field) - # default 'data_format' must be one of the 'supported_formats' - # avoid setting something invalid in this case, or it will cause problem after - # note: 'supported_formats' must have been processed before - if field_type == "data_format": - if wps_field not in _get_field(updated_io_list[-1], "supported_formats", - search_variations=False, default=[]): - continue - _set_field(updated_io_list[-1], field_type, wps_field) - return updated_io_list - - -def transform_json(json_data, # type: ANY_IO_Type - rename=None, # type: Optional[Dict[AnyKey, Any]] - remove=None, # type: Optional[List[AnyKey]] - add=None, # type: Optional[Dict[AnyKey, Any]] - replace_values=None, # type: Optional[Dict[AnyKey, Any]] - replace_func=None, # type: Optional[Dict[AnyKey, Callable[[Any], Any]]] - ): # type: (...) -> ANY_IO_Type - """ - Transforms the input json_data with different methods. - The transformations are applied in the same order as the arguments. - """ - rename = rename or {} - remove = remove or [] - add = add or {} - replace_values = replace_values or {} - replace_func = replace_func or {} - - # rename - for k, v in rename.items(): - if k in json_data: - json_data[v] = json_data.pop(k) - - # remove - for r_k in remove: - json_data.pop(r_k, None) - - # add - for k, v in add.items(): - json_data[k] = v - - # replace values - for key, value in json_data.items(): - for old_value, new_value in replace_values.items(): - if value == old_value: - json_data[key] = new_value - - # replace with function call - for k, func in replace_func.items(): - if k in json_data: - json_data[k] = func(json_data[k]) - - # also rename if the type of the value is a list of dicts - for key, value in json_data.items(): - if isinstance(value, list): - for nested_item in value: - if isinstance(nested_item, dict): - for k, v in rename.items(): - if k in nested_item: - nested_item[v] = nested_item.pop(k) - for k, func in replace_func.items(): - if k in nested_item: - nested_item[k] = func(nested_item[k]) - return json_data - - def _merge_package_inputs_outputs(wps_inputs_list, # type: List[ANY_IO_Type] cwl_inputs_list, # type: List[WPS_Input_Type] wps_outputs_list, # type: List[ANY_IO_Type] @@ -1305,13 +455,13 @@ def _merge_package_inputs_outputs(wps_inputs_list, # type: List[ANY_IO_Type parameters ``cwl_inputs_list`` and ``cwl_outputs_list`` are expected to be in `WPS`-like format (ie: `CWL` I/O converted to corresponding `WPS` I/O) """ - wps_inputs_merged = _merge_package_io(wps_inputs_list, cwl_inputs_list, WPS_INPUT) - wps_outputs_merged = _merge_package_io(wps_outputs_list, cwl_outputs_list, WPS_OUTPUT) - return [_wps2json_io(i) for i in wps_inputs_merged], [_wps2json_io(o) for o in wps_outputs_merged] + wps_inputs_merged = merge_package_io(wps_inputs_list, cwl_inputs_list, WPS_INPUT) + wps_outputs_merged = merge_package_io(wps_outputs_list, cwl_outputs_list, WPS_OUTPUT) + return [wps2json_io(i) for i in wps_inputs_merged], [wps2json_io(o) for o in wps_outputs_merged] def _get_package_io(package_factory, io_select, as_json): - # type: (CWLFactoryCallable, AnyStr, bool) -> List[PKG_IO_Type] + # type: (CWLFactoryCallable, str, bool) -> List[PKG_IO_Type] """ Retrieves I/O definitions from a validated ``CWLFactoryCallable``. Returned I/O format depends on value ``as_json``. """ @@ -1322,9 +472,9 @@ def _get_package_io(package_factory, io_select, as_json): else: raise PackageTypeError("Unknown I/O selection: '{}'.".format(io_select)) cwl_package_io = getattr(package_factory.t, io_attrib) - wps_package_io = [_cwl2wps_io(io, io_select) for io in cwl_package_io["fields"]] + wps_package_io = [cwl2wps_io(io, io_select) for io in cwl_package_io["fields"]] if as_json: - return [_wps2json_io(io) for io in wps_package_io] + return [wps2json_io(io) for io in wps_package_io] return wps_package_io @@ -1342,8 +492,12 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): wps_package_metadata["title"] = wps_package_metadata.get("title", cwl_package_package.get("label", "")) wps_package_metadata["abstract"] = wps_package_metadata.get("abstract", cwl_package_package.get("doc", "")) - if "$schemas" in cwl_package_package and isinstance(cwl_package_package["$schemas"], list) \ - and "$namespaces" in cwl_package_package and isinstance(cwl_package_package["$namespaces"], dict): + if ( + "$schemas" in cwl_package_package + and isinstance(cwl_package_package["$schemas"], list) + and "$namespaces" in cwl_package_package + and isinstance(cwl_package_package["$namespaces"], dict) + ): metadata = wps_package_metadata.get("metadata", list()) namespaces_inv = {v: k for k, v in cwl_package_package["$namespaces"]} for schema in cwl_package_package["$schemas"]: @@ -1353,275 +507,13 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): wps_package_metadata["metadata"] = metadata if "s:keywords" in cwl_package_package and isinstance(cwl_package_package["s:keywords"], list): - wps_package_metadata["keywords"] = list(set(wps_package_metadata.get("keywords", list)) | - set(cwl_package_package.get("s:keywords"))) - - -def complex2json(data): - # type: (Union[ComplexData, Any]) -> Union[JSON, Any] - """ - Obtains the JSON representation of a :class:`ComplexData` or simply return the unmatched type. - """ - if not isinstance(data, ComplexData): - return data - return { - "mimeType": data.mimeType, - "encoding": data.encoding, - "schema": data.schema, - } - - -def metadata2json(meta, force=False): - # type: (Union[ANY_Metadata_Type, Any], bool) -> Union[JSON, Any] - """ - Obtains the JSON representation of a :class:`OwsMetadata` or :class:`pywps.app.Common.Metadata`. - Otherwise, simply return the unmatched type. - If requested, can enforce parsing a dictionary for the corresponding keys. - """ - if not force and not isinstance(meta, (OwsMetadata, Metadata)): - return meta - return { - "href": _get_field(meta, "href", search_variations=True, default=None), - "title": _get_field(meta, "title", search_variations=True, default=None), - "role": _get_field(meta, "role", search_variations=True, default=None), - } - - -def _ows2json_io(ows_io): - # type: (OWS_IO_Type) -> JSON_IO_Type - """Converts I/O from :module:`owslib.wps` to JSON.""" - - json_io = dict() - for field in WPS_FIELD_MAPPING: - value = _get_field(ows_io, field, search_variations=True) - # preserve numeric values (ex: "minOccurs"=0) as actual parameters - # ignore undefined values represented by `null`, empty list, or empty string - if value or value in [0, 0.0]: - if isinstance(value, list): - # complex data is converted as is - # metadata converted and preserved if it results into a minimally valid definition (otherwise dropped) - json_io[field] = [ - complex2json(v) if isinstance(v, ComplexData) else - metadata2json(v) if isinstance(v, OwsMetadata) else v - for v in value if not isinstance(v, OwsMetadata) or v.url is not None - ] - elif isinstance(value, ComplexData): - json_io[field] = complex2json(value) - elif isinstance(value, OwsMetadata): - json_io[field] = metadata2json(value) - else: - json_io[field] = value - - # add 'format' if missing, derived from other variants - if "formats" not in json_io: - fmt_val = _get_field(json_io, "supported_values") - if fmt_val and json_io.get("type") == WPS_COMPLEX_DATA: - json_io["formats"] = json_io.pop("supported_values") - else: - # search for format fields directly specified in I/O body - for field in WPS_FIELD_FORMAT: - fmt = _get_field(json_io, field, search_variations=True) - if not fmt: - continue - if isinstance(fmt, dict): - fmt = [fmt] - fmt = filter(lambda f: isinstance(f, dict), fmt) - if not isinstance(json_io.get("formats"), list): - json_io["formats"] = list() - for var_fmt in fmt: - # add it only if not exclusively provided by a previous variant - json_fmt_items = [j_fmt.items() for j_fmt in json_io["formats"]] - if any(all(var_item in items for var_item in var_fmt.items()) for items in json_fmt_items): - continue - json_io["formats"].append(var_fmt) - - return json_io - - -def _any2cwl_io(wps_io, io_select): - # type: (Union[JSON_IO_Type, WPS_IO_Type, OWS_IO_Type], AnyStr) -> Tuple[CWL_IO_Type, Dict[AnyStr, AnyStr]] - """ - Converts a `WPS`-like I/O to `CWL` corresponding I/O. - Because of `CWL` I/O of type `File` with `format` field, the applicable namespace is also returned. - - :returns: converted I/O and namespace dictionary with corresponding format references as required - """ - def _get_cwl_fmt_details(wps_fmt): - # type: (ANY_Format_Type) -> Union[Tuple[Tuple[AnyStr, AnyStr], AnyStr, AnyStr], Tuple[None, None, None]] - _wps_io_fmt = _get_field(wps_fmt, "mime_type", search_variations=True) - if not _wps_io_fmt: - return None, None, None - _cwl_io_ext = get_extension(_wps_io_fmt) - _cwl_io_ref, _cwl_io_fmt = get_cwl_file_format(_wps_io_fmt, must_exist=True, allow_synonym=False) - return _cwl_io_ref, _cwl_io_fmt, _cwl_io_ext - - wps_io_type = _get_field(wps_io, "type", search_variations=True) - wps_io_id = _get_field(wps_io, "identifier", search_variations=True) - cwl_ns = dict() - cwl_io = {"id": wps_io_id} # type: CWL_IO_Type - if wps_io_type not in WPS_COMPLEX_TYPES: - cwl_io_type = _any2cwl_literal_datatype(wps_io_type) - wps_allow = _get_field(wps_io, "allowed_values", search_variations=True) - if isinstance(wps_allow, list) and len(wps_allow) > 0: - cwl_io["type"] = {"type": "enum", "symbols": wps_allow} - else: - cwl_io["type"] = cwl_io_type - # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) - else: - cwl_io_fmt = None - cwl_io_ext = CONTENT_TYPE_ANY - cwl_io["type"] = "File" - - # inputs are allowed to define multiple 'supported' formats - # outputs are allowed to define only one 'applied' format - for field in WPS_FIELD_FORMAT: - fmt = _get_field(wps_io, field, search_variations=True) - if isinstance(fmt, dict): - cwl_io_ref, cwl_io_fmt, cwl_io_ext = _get_cwl_fmt_details(fmt) - if cwl_io_ref and cwl_io_fmt: - cwl_ns.update(cwl_io_ref) - break - if isinstance(fmt, list): - if len(fmt) == 1: - cwl_io_ref, cwl_io_fmt, cwl_io_ext = _get_cwl_fmt_details(fmt[0]) - if cwl_io_ref and cwl_io_fmt: - cwl_ns.update(cwl_io_ref) - break - if io_select == WPS_OUTPUT and len(fmt) > 1: - break # don't use any format because we cannot enforce one - cwl_ns_multi = {} - cwl_fmt_multi = [] - for fmt_i in fmt: - # FIXME: (?) - # when multiple formats are specified, but at least one schema/namespace reference can't be found, - # we must drop all since that unknown format is still allowed but cannot be validated - # avoid potential validation error if that format was the one provided during execute... - # (see: https://github.com/crim-ca/weaver/issues/50) - cwl_io_ref_i, cwl_io_fmt_i, _ = _get_cwl_fmt_details(fmt_i) - if cwl_io_ref_i and cwl_io_fmt_i: - cwl_ns_multi.update(cwl_io_ref_i) - cwl_fmt_multi.append(cwl_io_fmt_i) - else: - # reset all since at least one format could not be mapped to an official schema - cwl_ns_multi = {} - cwl_fmt_multi = None - break - cwl_io_fmt = cwl_fmt_multi # all formats or none of them - cwl_ns.update(cwl_ns_multi) - break - if cwl_io_fmt: - cwl_io["format"] = cwl_io_fmt - # for backward compatibility with deployed processes, consider text/plan as 'any' for glob pattern - cwl_io_txt = get_extension(CONTENT_TYPE_TEXT_PLAIN) - if cwl_io_ext == cwl_io_txt: - cwl_io_any = get_extension(CONTENT_TYPE_ANY) - LOGGER.warning("Replacing '%s' [%s] to generic '%s' [%s] glob pattern. " - "More explicit format could be considered for %s '%s'.", - CONTENT_TYPE_TEXT_PLAIN, cwl_io_txt, CONTENT_TYPE_ANY, cwl_io_any, io_select, wps_io_id) - cwl_io_ext = cwl_io_any - if io_select == WPS_OUTPUT: - # FIXME: (?) how to specify the 'name' part of the glob (using the "id" value for now) - cwl_io["outputBinding"] = { - "glob": "{}{}".format(wps_io_id, cwl_io_ext) - } - - if io_select == WPS_INPUT: - wps_default = _get_field(wps_io, "default", search_variations=True) - wps_min_occ = _get_field(wps_io, "min_occurs", search_variations=True) - # field 'default' must correspond to a fallback "value", not a default "format" - if (wps_default != null and not isinstance(wps_default, dict)) or wps_min_occ in [0, "0"]: - cwl_io["default"] = wps_default or "null" - - wps_max_occ = _get_field(wps_io, "max_occurs", search_variations=True) - if wps_max_occ != null and wps_max_occ > 1: - cwl_io["type"] = { - "type": "array", - "items": cwl_io["type"] - } - - return cwl_io, cwl_ns - - -def _xml_wps2cwl(wps_process_response): - # type: (Response) -> Tuple[CWL, JSON] - """ - Converts a `WPS-1 ProcessDescription XML` tree structure to an equivalent `WPS-3 Process JSON` and builds the - associated `CWL` package in conformance to :ref:`weaver.processes.wps_package.CWL_REQUIREMENT_APP_WPS1`. - - :param wps_process_response: valid response (XML, 200) from a `WPS-1 ProcessDescription`. - """ - def _tag_name(_xml): - # type: (Union[XML, AnyStr]) -> AnyStr - """Obtains ``tag`` from a ``{namespace}Tag`` `XML` element.""" - if hasattr(_xml, "tag"): - _xml = _xml.tag - return _xml.split("}")[-1].lower() - - # look for `XML` structure starting at `ProcessDescription` (WPS-1) - xml_resp = lxml.etree.fromstring(str2bytes(wps_process_response.content)) - xml_wps_process = xml_resp.xpath("//ProcessDescription") # type: List[XML] - if not len(xml_wps_process) == 1: - raise ValueError("Could not retrieve a valid 'ProcessDescription' from WPS-1 response.") - process_id = None - for sub_xml in xml_wps_process[0]: - tag = _tag_name(sub_xml) - if tag == "identifier": - process_id = sub_xml.text - break - if not process_id: - raise ValueError("Could not find a match for 'ProcessDescription.identifier' from WPS-1 response.") - - # transform WPS-1 -> WPS-3 - wps = WebProcessingService(wps_process_response.url) - wps_service_url = urlparse(wps_process_response.url) - if wps.provider: - wps_service_name = wps.provider.name - else: - wps_service_name = wps_service_url.hostname - process_info = OrderedDict([ - ("identifier", "{}_{}".format(wps_service_name, process_id)), - ("keywords", [wps_service_name]), - ]) - wps_process = wps.describeprocess(process_id, xml=wps_process_response.content) - for field in ["title", "abstract"]: - process_info[field] = _get_field(wps_process, field, search_variations=True) - if wps_process.metadata: - process_info["metadata"] = [] - for meta in wps_process.metadata: - process_info["metadata"].append({"href": meta.url, "title": meta.title, "role": meta.role}) - process_info["inputs"] = [] # type: List[JSON] - process_info["outputs"] = [] # type: List[JSON] - for wps_in in wps_process.dataInputs: # type: OWS_Input_Type - process_info["inputs"].append(_ows2json_io(wps_in)) - for wps_out in wps_process.processOutputs: # type: OWS_Output_Type - process_info["outputs"].append(_ows2json_io(wps_out)) - - # generate CWL for WPS-1 using parsed WPS-3 - cwl_package = OrderedDict([ - ("cwlVersion", "v1.0"), - ("class", "CommandLineTool"), - ("hints", { - CWL_REQUIREMENT_APP_WPS1: { - "provider": get_url_without_query(wps_service_url), - "process": process_id, - }}), - ]) - for io_select in [WPS_INPUT, WPS_OUTPUT]: - io_section = "{}s".format(io_select) - cwl_package[io_section] = list() - for wps_io in process_info[io_section]: - cwl_io, cwl_ns = _any2cwl_io(wps_io, io_select) - cwl_package[io_section].append(cwl_io) - if cwl_ns: - if "$namespaces" not in cwl_package: - cwl_package["$namespaces"] = dict() - cwl_package["$namespaces"].update(cwl_ns) - - return cwl_package, process_info + wps_package_metadata["keywords"] = list( + set(wps_package_metadata.get("keywords", list)) | set(cwl_package_package.get("s:keywords")) + ) def _generate_process_with_cwl_from_reference(reference): - # type: (AnyStr) -> Tuple[CWL, JSON] + # type: (str) -> Tuple[CWL, JSON] """ Resolves the ``reference`` type (`CWL`, `WPS-1`, `WPS-2`, `WPS-3`) and generates a `CWL` ``package`` from it. Additionally provides minimal process details retrieved from the ``reference``. @@ -1645,7 +537,7 @@ def _generate_process_with_cwl_from_reference(reference): content_type = get_header("Content-Type", response.headers) if any(ct in content_type for ct in CONTENT_TYPE_ANY_XML): # attempt to retrieve a WPS-1 ProcessDescription definition - cwl_package, process_info = _xml_wps2cwl(response) + cwl_package, process_info = xml_wps2cwl(response) elif any(ct in content_type for ct in [CONTENT_TYPE_APP_JSON]): payload = response.json() @@ -1664,7 +556,7 @@ def _generate_process_with_cwl_from_reference(reference): def get_process_definition(process_offering, reference=None, package=None, data_source=None): - # type: (JSON, Optional[AnyStr], Optional[CWL], Optional[AnyStr]) -> JSON + # type: (JSON, Optional[str], Optional[CWL], Optional[str]) -> JSON """ Returns an updated process definition dictionary ready for storage using provided `WPS` ``process_offering`` and a package definition passed by ``reference`` or ``package`` `CWL` content. @@ -1690,12 +582,10 @@ def try_or_raise_package_error(call, reason): LOGGER.exception(exc_msg) raise exc_type("Invalid package/reference definition. {0} generated error: [{1!r}].".format(reason, exc)) - if not (isinstance(package, dict) or isinstance(reference, six.string_types)): - raise PackageRegistrationError( - "Invalid parameters amongst one of [package, reference].") + if not (isinstance(package, dict) or isinstance(reference, str)): + raise PackageRegistrationError("Invalid parameters amongst one of [package, reference].") if package and reference: - raise PackageRegistrationError( - "Simultaneous parameters [package, reference] not allowed.") + raise PackageRegistrationError("Simultaneous parameters [package, reference] not allowed.") process_info = process_offering if reference: @@ -1744,17 +634,17 @@ class WpsPackage(Process): # defined on __init__ call package = None # type: Optional[CWL] # defined only after/while _handler is called (or sub-methods) - package_id = None # type: Optional[AnyStr] - package_type = None # type: Optional[AnyStr] - package_log_hook_stderr = None # type: Optional[AnyStr] - package_log_hook_stdout = None # type: Optional[AnyStr] + package_id = None # type: Optional[str] + package_type = None # type: Optional[str] + package_log_hook_stderr = None # type: Optional[str] + package_log_hook_stdout = None # type: Optional[str] percent = None # type: Optional[Number] is_ems = None # type: Optional[bool] - log_file = None # type: Optional[AnyStr] + log_file = None # type: Optional[str] log_level = None # type: Optional[int] logger = None # type: Optional[logging.Logger] step_packages = None # type: Optional[List[CWL]] - step_launched = None # type: Optional[List[AnyStr]] + step_launched = None # type: Optional[List[str]] request = None # type: Optional[WPSRequest] response = None # type: Optional[ExecuteResponse] @@ -1780,9 +670,9 @@ def __init__(self, **kw): # handle EOImage inputs inputs = opensearch.replace_inputs_describe_process(inputs=inputs, payload=self.payload) - inputs = [_json2wps_io(i, WPS_INPUT) for i in inputs] - outputs = [_json2wps_io(o, WPS_OUTPUT) for o in kw.pop("outputs", list())] - metadata = [_json2wps_field(meta_kw, "metadata") for meta_kw in kw.pop("metadata", list())] + inputs = [json2wps_io(i, WPS_INPUT) for i in inputs] + outputs = [json2wps_io(o, WPS_OUTPUT) for o in kw.pop("outputs", list())] + metadata = [json2wps_field(meta_kw, "metadata") for meta_kw in kw.pop("metadata", list())] super(WpsPackage, self).__init__( self._handler, @@ -1794,7 +684,7 @@ def __init__(self, **kw): **kw ) - def setup_logger(self, log_stdout_stderr=True): + def setup_loggers(self, log_stdout_stderr=True): # type: (bool) -> None """ Configures useful loggers to catch most of the common output and/or error messages during package execution. @@ -1803,7 +693,8 @@ def setup_logger(self, log_stdout_stderr=True): :meth:`insert_package_log` :func:`retrieve_package_job_log` """ - self.log_level = self.log_level or logging.getLogger("weaver").level + setup_loggers(self.settings) + self.log_level = self.log_level or logging.getLogger("weaver").getEffectiveLevel() # file logger for output self.log_file = get_status_location_log_path(self.status_location) @@ -1845,32 +736,42 @@ def setup_logger(self, log_stdout_stderr=True): weaver_tweens_logger.setLevel(self.log_level) def insert_package_log(self, result): + # type: (Union[CWLResults, CWLException]) -> List[str] """Retrieves additional `CWL` sub-process logs captures to retrieve internal application output and/or errors. After execution of this method, the `WPS` output log (which can be obtained by :func:`retrieve_package_job_log`) will have additional ``stderr/stdout`` entries extracted from the underlying application package tool execution. - The outputs and errors are inserted as best as possible in the logical order to make reading of the merged + The outputs and errors are inserted *as best as possible* in the logical order to make reading of the merged logs appear as a natural and chronological order. In the event that both output and errors are available, they are appended one after another as merging in an orderly fashion cannot be guaranteed by outside `CWL` runner. - :param result: output results returned from the `CWL` package instance execution. + .. note:: + In case of any exception, log reporting is aborted and ignored. - .. todo:: improve for realtime updates when using async routine (https://github.com/crim-ca/weaver/issues/131) + .. todo:: + Improve for realtime updates when using async routine (https://github.com/crim-ca/weaver/issues/131) .. seealso:: - :meth:`setup_logger` + :meth:`setup_loggers` :func:`retrieve_package_job_log` + + :param result: output results returned by successful `CWL` package instance execution or raised CWL exception. + :returns: captured execution log lines retrieved from files """ + captured_log = [] try: + status = STATUS_RUNNING + if isinstance(result, CWLException): + result = getattr(result, "out") + status = STATUS_FAILED stderr_file = result.get(self.package_log_hook_stderr, {}).get("location", "").replace("file://", "") stdout_file = result.get(self.package_log_hook_stdout, {}).get("location", "").replace("file://", "") with_stderr_file = os.path.isfile(stderr_file) with_stdout_file = os.path.isfile(stdout_file) if not with_stdout_file and not with_stderr_file: - self.log_message(STATUS_RUNNING, "Could not retrieve any internal application log.", - level=logging.WARNING) - return + self.log_message(status, "Could not retrieve any internal application log.", level=logging.WARNING) + return captured_log out_log = [] if with_stdout_file: with open(stdout_file) as app_log_fd: @@ -1884,8 +785,8 @@ def insert_package_log(self, result): if err_log: err_log = ["----- Captured Log (stderr) -----\n"] + err_log if not out_log and not err_log: - self.log_message(STATUS_RUNNING, "Nothing captured from internal application logs.", level=logging.INFO) - return + self.log_message(status, "Nothing captured from internal application logs.", level=logging.INFO) + return captured_log with open(self.log_file, "r") as pkg_log_fd: pkg_log = pkg_log_fd.readlines() cwl_end_index = -1 @@ -1894,18 +795,21 @@ def insert_package_log(self, result): if cwl_end_search in pkg_log[i]: cwl_end_index = i break - merged_log = pkg_log[:cwl_end_index] + out_log + err_log + pkg_log[cwl_end_index:] + captured_log = out_log + err_log + merged_log = pkg_log[:cwl_end_index] + captured_log + pkg_log[cwl_end_index:] with open(self.log_file, "w") as pkg_log_fd: pkg_log_fd.writelines(merged_log) except Exception as exc: # log exception, but non-failing - self.exception_message(PackageExecutionError, exception=exc, level=logging.WARNING, status=STATUS_RUNNING, + self.exception_message(PackageExecutionError, exception=exc, level=logging.WARNING, status=status, message="Error occurred when retrieving internal application log.") + return captured_log def update_requirements(self): """ - Inplace modification of :attr:`package` to remove invalid items that would break behaviour we must enforce. + Inplace modification of :attr:`package` to adjust invalid items that would break behaviour we must enforce. """ + is_builtin = False for req_type in ["hints", "requirements"]: req_items = self.package.get(req_type, {}) for req_cls in req_items: @@ -1914,6 +818,8 @@ def update_requirements(self): else: req_def = req_cls req_cls = req_cls["class"] + if req_cls == CWL_REQUIREMENT_APP_BUILTIN: + is_builtin = True if req_cls != CWL_REQUIREMENT_APP_DOCKER: continue # remove build-related parameters because we forbid this in our case @@ -1924,6 +830,33 @@ def update_requirements(self): self.logger.warning("Removed CWL [%s.%s] %s parameter from [%s] package definition (forced).", req_cls, req_rm, req_type[:-1], self.package_id) + # update python reference if builtin script + # since subprocess is created by CWL, the default python detected is from the OS + # when running from within Weaver Docker, this doesn't matter much as OS Python == Weaver Env Python + # but running in any other situation (e.g.: local, tests) will not necessarily point to same instance + if is_builtin: + python_path = os.getenv("PYTHONPATH") + if not python_path: + return + req_items = self.package.get("requirements", {}) + if not isinstance(req_items, dict): + # definition as list + req_env = {"class": "EnvVarRequirement", "envDef": {}} + for req in req_items: + if req["class"] == "EnvVarRequirement": + req_env = req + break + req_items.append(req_env) + else: + # definition as mapping + req_items.setdefault("EnvVarRequirement", {"envDef": {}}) + req_env = req_items.get("EnvVarRequirement") + active_python_path = os.path.join(sys.exec_prefix, "bin") + env_path = "{}:{}".format(active_python_path, os.getenv("PATH")) + req_env["envDef"].update({"PATH": env_path}) + if self.package.get("baseCommand") == "python": + self.package["baseCommand"] = os.path.join(active_python_path, "python") + def update_effective_user(self): """ Update effective user/group for the `Application Package` to be executed. @@ -1945,7 +878,7 @@ def update_effective_user(self): "Visible application CWL euid:egid [%s:%s]", app_euid, app_egid) def update_status(self, message, progress, status): - # type: (AnyStr, Number, AnyStatusType) -> None + # type: (str, Number, AnyStatusType) -> None """Updates the `PyWPS` real job status from a specified parameters.""" self.percent = progress or self.percent or 0 @@ -1953,6 +886,12 @@ def update_status(self, message, progress, status): pywps_status = map_status(status, STATUS_COMPLIANT_PYWPS) pywps_status_id = STATUS_PYWPS_IDS[pywps_status] + # NOTE: + # When running process in sync (because executed within celery worker already async), + # pywps reverts status file output flag. Re-enforce it for our needs. + # (see: 'wevaer.wps.WorkerService.execute_job') + self.response.store_status_file = True + # pywps overrides 'status' by 'accepted' in 'update_status', so use the '_update_status' to enforce the status # using protected method also avoids weird overrides of progress percent on failure and final 'success' status self.response._update_status(pywps_status_id, message, self.percent) # noqa: W0212 @@ -1960,7 +899,7 @@ def update_status(self, message, progress, status): def step_update_status(self, message, progress, start_step_progress, end_step_progress, step_name, target_host, status): - # type: (AnyStr, Number, Number, Number, AnyStr, AnyValue, AnyStr) -> None + # type: (str, Number, Number, Number, str, AnyValue, str) -> None self.update_status( message="{0} [{1}] - {2}".format(target_host, step_name, str(message).strip()), progress=map_progress(progress, start_step_progress, end_step_progress), @@ -1968,14 +907,19 @@ def step_update_status(self, message, progress, start_step_progress, end_step_pr ) def log_message(self, status, message, progress=None, level=logging.INFO): - # type: (AnyStatusType, AnyStr, Optional[Number], int) -> None + # type: (AnyStatusType, str, Optional[Number], int) -> None progress = progress if progress is not None else self.percent message = get_job_log_msg(status=map_status(status), message=message, progress=progress) self.logger.log(level, message, exc_info=level > logging.INFO) def exception_message(self, exception_type, exception=None, message="no message", status=STATUS_EXCEPTION, level=logging.ERROR): - # type: (Type[Exception], Optional[Exception], AnyStr, AnyStatusType, int) -> Exception + # type: (Type[Exception], Optional[Exception], str, AnyStatusType, int) -> Exception + """ + Logs to the job the specified error message with the provided exception type. + + :returns: formatted exception with message to be raised by calling function. + """ exception_msg = " [{}]".format(repr(exception)) if isinstance(exception, Exception) else "" self.log_message(status=status, level=level, message="{0}: {1}{2}".format(exception_type.__name__, message, exception_msg)) @@ -2000,7 +944,7 @@ def _handler(self, request, response): Method called when process receives the WPS execution request. """ - # note: only 'LOGGER' call allowed here, since 'setup_logger' not called yet + # note: only 'LOGGER' call allowed here, since 'setup_loggers' not called yet LOGGER.debug("HOME=%s, Current Dir=%s", os.environ.get("HOME"), os.path.abspath(os.curdir)) self.request = request self.response = response @@ -2011,7 +955,7 @@ def _handler(self, request, response): # workflows do not support stdout/stderr self.package_type = _get_package_type(self.package) log_stdout_stderr = self.package_type != PROCESS_WORKFLOW - self.setup_logger(log_stdout_stderr) + self.setup_loggers(log_stdout_stderr) self.update_status("Preparing package logs done.", PACKAGE_PROGRESS_PREP_LOG, STATUS_RUNNING) except Exception as exc: raise self.exception_message(PackageExecutionError, exc, "Failed preparing package logging.") @@ -2069,7 +1013,7 @@ def _handler(self, request, response): self.update_status("Loading package content done.", PACKAGE_PROGRESS_LOADING, STATUS_RUNNING) try: - cwl_input_info = {i["name"]: i for i in package_inst.t.inputs_record_schema["fields"]} + cwl_inputs_info = {i["name"]: i for i in package_inst.t.inputs_record_schema["fields"]} self.update_status("Retrieve package inputs done.", PACKAGE_PROGRESS_GET_INPUT, STATUS_RUNNING) except Exception as exc: raise self.exception_message(PackageExecutionError, exc, "Failed retrieving package input types.") @@ -2087,38 +1031,10 @@ def _handler(self, request, response): eoimage_data_sources, accept_mime_types, settings=self.settings) - - cwl_inputs = dict() - for input_id in request.inputs: - # skip empty inputs (if that is even possible...) - input_occurs = request.inputs[input_id] - if len(input_occurs) <= 0: - continue - # process single occurrences - input_i = input_occurs[0] - # handle as reference/data - is_array, elem_type, _, _ = _is_cwl_array_type(cwl_input_info[input_id]) - if isinstance(input_i, ComplexInput) or elem_type == "File": - # extend array data that allow max_occur > 1 - if is_array: - input_type = elem_type - cwl_inputs[input_id] = [ - self.make_location_input(input_type, input_def) for input_def in input_occurs - ] - else: - input_type = cwl_input_info[input_id]["type"] - cwl_inputs[input_id] = self.make_location_input(input_type, input_i) - elif isinstance(input_i, (LiteralInput, BoundingBoxInput)): - # extend array data that allow max_occur > 1 - if is_array: - input_data = [i.url if i.as_reference else i.data for i in input_occurs] - else: - input_data = input_i.url if input_i.as_reference else input_i.data - cwl_inputs[input_id] = input_data - else: - raise self.exception_message( - PackageTypeError, None, "Undefined package input for execution: {}.".format(type(input_i))) + cwl_inputs = self.make_inputs(request.inputs, cwl_inputs_info) self.update_status("Convert package inputs done.", PACKAGE_PROGRESS_CONVERT_INPUT, STATUS_RUNNING) + except PackageException as exc: + raise self.exception_message(type(exc), None, str(exc)) # re-raise as is, but with extra log entry except Exception as exc: raise self.exception_message(PackageExecutionError, exc, "Failed to load package inputs.") @@ -2128,6 +1044,9 @@ def _handler(self, request, response): result = package_inst(**cwl_inputs) # type: CWLResults self.update_status("Package execution done.", PACKAGE_PROGRESS_CWL_DONE, STATUS_RUNNING) except Exception as exc: + if isinstance(exc, CWLException): + lines = self.insert_package_log(exc) + LOGGER.debug("Captured logs:\n%s", "\n".join(lines)) raise self.exception_message(PackageExecutionError, exc, "Failed package execution.") # FIXME: this won't be necessary using async routine (https://github.com/crim-ca/weaver/issues/131) self.insert_package_log(result) @@ -2146,7 +1065,7 @@ def _handler(self, request, response): return self.response def must_fetch(self, input_ref): - # type: (AnyStr) -> bool + # type: (str) -> bool """ Figures out if file reference should be fetched immediately for local execution. If anything else than local script/docker, remote ADES/WPS process will fetch it. @@ -2164,8 +1083,54 @@ def must_fetch(self, input_ref): return False return True + def make_inputs(self, + wps_inputs, # type: Dict[str, Deque[WPS_Input_Type]] + cwl_inputs_info, # type: Dict[str, CWL_Input_Type] + ): # type: (...) -> Dict[str, ValueType] + """ + Converts WPS input values to corresponding CWL input values for processing by CWL package instance. + + The WPS inputs must correspond to :mod:`pywps` definitions. + Multiple values are adapted to arrays as needed. + WPS ``Complex`` types (files) are converted to appropriate locations based on data or reference specification. + + :param wps_inputs: actual WPS inputs parsed from execution request + :param cwl_inputs_info: expected CWL input definitions for mapping + :return: CWL input values + """ + cwl_inputs = dict() + for input_id in wps_inputs: + # skip empty inputs (if that is even possible...) + input_occurs = wps_inputs[input_id] + if len(input_occurs) <= 0: + continue + # process single occurrences + input_i = input_occurs[0] + # handle as reference/data + is_array, elem_type, _, _ = is_cwl_array_type(cwl_inputs_info[input_id]) + if isinstance(input_i, ComplexInput) or elem_type == "File": + # extend array data that allow max_occur > 1 + if is_array: + input_type = elem_type + cwl_inputs[input_id] = [ + self.make_location_input(input_type, input_def) for input_def in input_occurs + ] + else: + input_type = cwl_inputs_info[input_id]["type"] + cwl_inputs[input_id] = self.make_location_input(input_type, input_i) + elif isinstance(input_i, (LiteralInput, BoundingBoxInput)): + # extend array data that allow max_occur > 1 + if is_array: + input_data = [i.url if i.as_reference else i.data for i in input_occurs] + else: + input_data = input_i.url if input_i.as_reference else input_i.data + cwl_inputs[input_id] = input_data + else: + raise PackageTypeError("Undefined package input for execution: {}.".format(type(input_i))) + return cwl_inputs + def make_location_input(self, input_type, input_definition): - # type: (AnyStr, ComplexInput) -> JSON + # type: (str, ComplexInput) -> JSON """ Generates the JSON content required to specify a `CWL` ``File`` input definition from a location. @@ -2197,8 +1162,7 @@ def make_location_input(self, input_type, input_definition): input_location = input_definition.data if not input_location: url = getattr(input_definition, "url") - if isinstance(url, six.string_types) and \ - any([url.startswith("{}://".format(p)) for p in SUPPORTED_FILE_SCHEMES]): + if isinstance(url, str) and any([url.startswith("{}://".format(p)) for p in SUPPORTED_FILE_SCHEMES]): input_location = url else: # last option, could not resolve 'lazily' so will fetch data if needed @@ -2243,7 +1207,7 @@ def make_outputs(self, cwl_result): self.logger.info("Resolved WPS output [%s] as literal data", output_id) def make_location_output(self, cwl_result, output_id): - # type: (CWLResults, AnyStr) -> None + # type: (CWLResults, str) -> None """ Rewrite the `WPS` output with required location using result path from `CWL` execution. @@ -2285,7 +1249,7 @@ def make_tool(self, toolpath_object, loading_context): return default_make_tool(toolpath_object, loading_context, self.get_job_process_definition) def get_application_requirement(self): - # type: () -> Dict[AnyStr, Any] + # type: () -> Dict[str, Any] """ Obtains the first item in `CWL` package ``requirements`` or ``hints`` that corresponds to a `Weaver`-specific application type as defined in :py:data:`CWL_REQUIREMENT_APP_TYPES`. @@ -2306,7 +1270,7 @@ def get_application_requirement(self): return requirement def get_job_process_definition(self, jobname, joborder, tool): # noqa: E811 - # type: (AnyStr, JSON, CWL) -> WpsPackage + # type: (str, JSON, CWL) -> WpsPackage """ This function is called before running an ADES job (either from a workflow step or a simple EMS dispatch). It must return a WpsProcess instance configured with the proper package, ADES target and cookies. diff --git a/weaver/processes/wps_process_base.py b/weaver/processes/wps_process_base.py index a19cc34a5..1e66a497d 100644 --- a/weaver/processes/wps_process_base.py +++ b/weaver/processes/wps_process_base.py @@ -8,12 +8,12 @@ from weaver.formats import CONTENT_TYPE_APP_JSON from weaver.utils import get_cookie_headers, get_settings, request_extra -from weaver.wps import get_wps_output_dir, get_wps_output_url +from weaver.wps.utils import get_wps_output_dir, get_wps_output_url if TYPE_CHECKING: - from weaver.typedefs import CWL # noqa: F401 - from typing import AnyStr, Dict # noqa: F401 - from pywps.app import WPSRequest # noqa: F401 + from weaver.typedefs import CWL + from typing import Dict + from pywps.app import WPSRequest class WpsProcessInterface(object): @@ -24,8 +24,8 @@ class WpsProcessInterface(object): @abstractmethod def execute(self, workflow_inputs, # type: CWL - out_dir, # type: AnyStr - expected_outputs, # type: Dict[AnyStr, AnyStr] + out_dir, # type: str + expected_outputs, # type: Dict[str, str] ): """ Execute a remote process using the given inputs. diff --git a/weaver/processes/wps_testing.py b/weaver/processes/wps_testing.py index 90329c05f..40a885b46 100644 --- a/weaver/processes/wps_testing.py +++ b/weaver/processes/wps_testing.py @@ -8,17 +8,17 @@ class WpsTestProcess(Process): def __init__(self, **kw): # remove duplicates/unsupported keywords - kw.pop("title", None) + title = kw.pop("title", kw.get("identifier")) + version = kw.pop("version", "0.0") kw.pop("inputs", None) kw.pop("outputs", None) - kw.pop("version", None) kw.pop("payload", None) kw.pop("package", None) super(WpsTestProcess, self).__init__( self._handler, - title="WpsTestProcess", - version="0.0", + title=title, + version=version, inputs=[LiteralInput("test_input", "Input Request", data_type="string")], outputs=[LiteralOutput("test_output", "Output response", data_type="string")], store_supported=True, diff --git a/weaver/processes/wps_workflow.py b/weaver/processes/wps_workflow.py index eaf9eeb3c..bd252ab6e 100644 --- a/weaver/processes/wps_workflow.py +++ b/weaver/processes/wps_workflow.py @@ -30,7 +30,6 @@ from pyramid_celery import celery_app as app from schema_salad import validate from schema_salad.sourceline import SourceLine -from six import string_types from weaver.processes.builtin import BuiltinProcess from weaver.processes.constants import ( @@ -40,17 +39,15 @@ CWL_REQUIREMENT_APP_WPS1 ) from weaver.utils import get_settings, make_dirs, now -from weaver.wps import get_wps_output_dir +from weaver.wps.utils import get_wps_output_dir if TYPE_CHECKING: - from weaver.typedefs import ( # noqa: F401 - ExpectedOutputType, GetJobProcessDefinitionFunction, ToolPathObjectType, AnyValue - ) - from weaver.processes.wps_process_base import WpsProcessInterface # noqa: F401 - from typing import Any, Dict, Generator, List, Optional, Set, Union # noqa: F401 - from cwltool.command_line_tool import OutputPorts # noqa: F401 + from typing import Any, Dict, Generator, List, Optional, Set, Union + from cwltool.command_line_tool import OutputPorts from cwltool.provenance import ProvenanceProfile - import threading # noqa: F401 + from threading import Lock as ThreadLock + from weaver.typedefs import AnyValue, ExpectedOutputType, GetJobProcessDefinitionFunction, ToolPathObjectType + from weaver.processes.wps_process_base import WpsProcessInterface LOGGER = logging.getLogger(__name__) DEFAULT_TMP_PREFIX = "tmp" @@ -357,7 +354,7 @@ def collect_output(self, sfpath = file subst = True for sfitem in aslist(sfpath): - if isinstance(sfitem, string_types): + if isinstance(sfitem, str): if subst: sfitem = {"path": substitute(primary["path"], sfitem)} else: @@ -419,7 +416,7 @@ def __init__(self, def run(self, runtimeContext, # type: RuntimeContext - tmpdir_lock=None, # type: Optional[threading.Lock] + tmpdir_lock=None, # type: Optional[ThreadLock] ): # type: (...) -> None make_dirs(self.tmpdir, exist_ok=True) diff --git a/weaver/status.py b/weaver/status.py index 2fd35f2d1..e0ec19d7b 100644 --- a/weaver/status.py +++ b/weaver/status.py @@ -3,8 +3,9 @@ from pywps.response.status import _WPS_STATUS, WPS_STATUS # noqa: W0212 if TYPE_CHECKING: - from typing import AnyStr, Union # noqa: F401 - AnyStatusType = Union[AnyStr, int] # noqa: F401 + from typing import Union + + AnyStatusType = Union[str, int] STATUS_COMPLIANT_OGC = "STATUS_COMPLIANT_OGC" STATUS_COMPLIANT_PYWPS = "STATUS_COMPLIANT_PYWPS" @@ -58,7 +59,7 @@ def map_status(wps_status, compliant=STATUS_COMPLIANT_OGC): - # type: (AnyStatusType, AnyStr) -> AnyStr + # type: (AnyStatusType, str) -> str """ Maps WPS statuses (weaver.status, OWSLib or PyWPS) to OWSLib/PyWPS compatible values. For each compliant combination, unsupported statuses are changed to corresponding ones (with closest logical match). diff --git a/weaver/store/base.py b/weaver/store/base.py index b1c264f6f..d590c4a90 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -1,4 +1,19 @@ -class StoreInterface(object): +import abc +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Dict, List, Optional, Tuple, Union + from pyramid.request import Request + from pywps import Process as ProcessWPS + from weaver.datatype import Bill, Job, Process, Quote, Service + from weaver.typedefs import AnyValue + + JobListAndCount = Tuple[List[Job], int] + JobCategory = Dict[str, Union[AnyValue, Job]] + JobCategoriesAndCount = Tuple[List[JobCategory], int] + + +class StoreInterface(object, metaclass=abc.ABCMeta): type = None def __init__(self): @@ -9,18 +24,181 @@ def __init__(self): class StoreServices(StoreInterface): type = "services" + @abc.abstractmethod + def save_service(self, service, overwrite=True): + # type: (Service, bool) -> Service + raise NotImplementedError + + @abc.abstractmethod + def delete_service(self, name): + # type: (str) -> bool + raise NotImplementedError + + @abc.abstractmethod + def list_services(self): + # type: () -> List[Service] + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_name(self, name, visibility=None): + # type: (str, Optional[str]) -> Service + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_url(self, url): + # type: (str) -> Service + raise NotImplementedError + + @abc.abstractmethod + def clear_services(self): + # type: () -> bool + raise NotImplementedError + class StoreProcesses(StoreInterface): type = "processes" + @abc.abstractmethod + def save_process(self, process, overwrite=True): + # type: (Union[Process, ProcessWPS], bool) -> Process + raise NotImplementedError + + @abc.abstractmethod + def delete_process(self, process_id, visibility=None): + # type: (str, Optional[str]) -> bool + raise NotImplementedError + + @abc.abstractmethod + def list_processes(self, visibility=None): + # type: (Optional[str]) -> List[Process] + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_id(self, process_id, visibility=None): + # type: (str, Optional[str]) -> Process + raise NotImplementedError + + @abc.abstractmethod + def get_visibility(self, process_id): + # type: (str) -> str + raise NotImplementedError + + @abc.abstractmethod + def set_visibility(self, process_id, visibility): + # type: (str, str) -> None + raise NotImplementedError + + @abc.abstractmethod + def clear_processes(self): + # type: () -> bool + raise NotImplementedError + class StoreJobs(StoreInterface): type = "jobs" + @abc.abstractmethod + def save_job(self, + task_id, # type: str + process, # type: str + service=None, # type: Optional[str] + inputs=None, # type: Optional[List[Any]] + is_workflow=False, # type: bool + is_local=False, # type: bool + user_id=None, # type: Optional[int] + execute_async=True, # type: bool + custom_tags=None, # type: Optional[List[str]] + access=None, # type: Optional[str] + notification_email=None, # type: Optional[str] + accept_language=None, # type: Optional[str] + ): # type: (...) -> Job + raise NotImplementedError + + @abc.abstractmethod + def update_job(self, job): + # type: (Job) -> Job + raise NotImplementedError + + @abc.abstractmethod + def delete_job(self, job_id): + # type: (str) -> bool + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_id(self, job_id): + # type: (str) -> Job + raise NotImplementedError + + @abc.abstractmethod + def list_jobs(self): + # type: () -> List[Job] + raise NotImplementedError + + @abc.abstractmethod + def find_jobs(self, + process=None, # type: Optional[str] + service=None, # type: Optional[str] + tags=None, # type: Optional[List[str]] + access=None, # type: Optional[str] + notification_email=None, # type: Optional[str] + status=None, # type: Optional[str] + sort=None, # type: Optional[str] + page=0, # type: int + limit=10, # type: int + group_by=None, # type: Optional[Union[str, List[str]]] + request=None, # type: Optional[Request] + ): # type: (...) -> Union[JobListAndCount, JobCategoriesAndCount] + raise NotImplementedError + + @abc.abstractmethod + def clear_jobs(self): + # type: () -> bool + raise NotImplementedError + class StoreQuotes(StoreInterface): type = "quotes" + @abc.abstractmethod + def save_quote(self, quote): + # type: (Quote) -> Quote + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_id(self, quote_id): + # type: (str) -> Quote + raise NotImplementedError + + @abc.abstractmethod + def list_quotes(self): + # type: (...) -> List[Quote] + raise NotImplementedError + + @abc.abstractmethod + def find_quotes(self, process_id=None, page=0, limit=10, sort=None): + # type: (Optional[str], int, int, Optional[str]) -> Tuple[List[Quote], int] + raise NotImplementedError + class StoreBills(StoreInterface): type = "bills" + + @abc.abstractmethod + def save_bill(self, bill): + # type: (Bill) -> Bill + raise NotImplementedError + + @abc.abstractmethod + def fetch_by_id(self, bill_id): + # type: (str) -> Bill + raise NotImplementedError + + @abc.abstractmethod + def list_bills(self): + # type: (...) -> List[Bill] + raise NotImplementedError + + @abc.abstractmethod + def find_bills(self, quote_id=None, page=0, limit=10, sort=None): + # type: (Optional[str], int, int, Optional[str]) -> Tuple[List[Bill], int] + raise NotImplementedError diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 9b25388a6..a6671bea0 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -6,7 +6,6 @@ from typing import TYPE_CHECKING import pymongo -import six from pymongo import ASCENDING, DESCENDING from pyramid.request import Request from pywps import Process as ProcessWPS @@ -45,14 +44,14 @@ from weaver.store.base import StoreBills, StoreJobs, StoreProcesses, StoreQuotes, StoreServices from weaver.utils import get_base_url, get_sane_name, get_weaver_url, islambda, now from weaver.visibility import VISIBILITY_PRIVATE, VISIBILITY_PUBLIC, VISIBILITY_VALUES +from weaver.wps.utils import get_wps_url if TYPE_CHECKING: - from weaver.typedefs import AnyValue, AnyProcess, AnyProcessType # noqa: F401 - from pymongo.collection import Collection # noqa: F401 - from typing import Any, AnyStr, Callable, Dict, List, Optional, Tuple, Union # noqa: F401 - JobListAndCount = Tuple[List[Job], int] # noqa: F401 - JobCategory = Dict[AnyStr, Union[AnyValue, Job]] # noqa: F401 - JobCategoriesAndCount = Tuple[List[JobCategory], int] # noqa: F401 + from typing import Any, Callable, Dict, List, Optional, Tuple, Union + from pymongo.collection import Collection + + from weaver.store.base import JobCategoriesAndCount, JobListAndCount + from weaver.typedefs import AnyProcess, AnyProcessType LOGGER = logging.getLogger(__name__) @@ -63,7 +62,7 @@ class MongodbStore(object): """ def __init__(self, collection, sane_name_config=None): - # type: (Collection, Optional[Dict[AnyStr, Any]]) -> None + # type: (Collection, Optional[Dict[str, Any]]) -> None if not isinstance(collection, pymongo.collection.Collection): raise TypeError("Collection not of expected type.") self.collection = collection # type: Collection @@ -94,8 +93,8 @@ def __init__(self, *args, **kwargs): StoreServices.__init__(self) MongodbStore.__init__(self, *db_args, **db_kwargs) - def save_service(self, service, overwrite=True, request=None): - # type: (Service, bool, Optional[Request]) -> Service + def save_service(self, service, overwrite=True): + # type: (Service, bool) -> Service """ Stores an OWS service in mongodb. """ @@ -118,18 +117,18 @@ def save_service(self, service, overwrite=True, request=None): type=service.type, public=service.public, auth=service.auth).params()) - return self.fetch_by_url(url=service_url, request=request) + return self.fetch_by_url(url=service_url) - def delete_service(self, name, request=None): # noqa: E811 - # type: (AnyStr, Optional[Request]) -> bool + def delete_service(self, name): + # type: (str) -> bool """ Removes service from `MongoDB` storage. """ self.collection.delete_one({"name": name}) return True - def list_services(self, request=None): # noqa: E811 - # type: (Optional[Request]) -> List[Service] + def list_services(self): + # type: () -> List[Service] """ Lists all services in `MongoDB` storage. """ @@ -138,8 +137,8 @@ def list_services(self, request=None): # noqa: E811 my_services.append(Service(service)) return my_services - def fetch_by_name(self, name, visibility=None, request=None): # noqa: E811 - # type: (AnyStr, Optional[AnyStr], Optional[Request]) -> Service + def fetch_by_name(self, name, visibility=None): + # type: (str, Optional[str]) -> Service """ Gets service for given ``name`` from `MongoDB` storage. """ @@ -153,8 +152,8 @@ def fetch_by_name(self, name, visibility=None, request=None): # noqa: E811 raise ServiceNotAccessible("Service '{}' cannot be accessed.".format(name)) return service - def fetch_by_url(self, url, request=None): # noqa: E811 - # type: (AnyStr, Optional[Request]) -> Service + def fetch_by_url(self, url): + # type: (str) -> Service """ Gets service for given ``url`` from `MongoDB` storage. """ @@ -163,8 +162,8 @@ def fetch_by_url(self, url, request=None): # noqa: E811 raise ServiceNotFound return Service(service) - def clear_services(self, request=None): # noqa: E811 - # type: (Optional[Request]) -> bool + def clear_services(self): + # type: () -> bool """ Removes all OWS services from `MongoDB` storage. """ @@ -184,8 +183,8 @@ def __init__(self, *args, **kwargs): settings = kwargs.get("settings", {}) if not registry else registry.settings default_processes = kwargs.get("default_processes") self.default_host = get_weaver_url(settings) - self.default_wps_endpoint = "{host}{wps}".format(host=self.default_host, - wps=settings.get("weaver.wps_path", "")) + self.default_wps_endpoint = get_wps_url(settings) + # enforce default process re-registration to receive any applicable update if default_processes: registered_processes = [process.identifier for process in self.list_processes()] @@ -235,30 +234,29 @@ def _get_process_field(process, function_dict): raise ProcessInstanceError("Unsupported process type '{}'".format(type(process))) def _get_process_id(self, process): - # type: (AnyProcess) -> AnyStr + # type: (AnyProcess) -> str return self._get_process_field(process, lambda: process.identifier) def _get_process_type(self, process): - # type: (AnyProcess) -> AnyStr + # type: (AnyProcess) -> str return self._get_process_field(process, {Process: lambda: process.type, ProcessWPS: lambda: getattr(process, "type", PROCESS_WPS)}).lower() def _get_process_endpoint_wps1(self, process): - # type: (AnyProcess) -> AnyStr + # type: (AnyProcess) -> str url = self._get_process_field(process, {Process: lambda: process.processEndpointWPS1, ProcessWPS: lambda: None}) if not url: url = self.default_wps_endpoint return url - def save_process(self, process, overwrite=True, request=None): # noqa: E811 - # type: (Union[Process, ProcessWPS], bool, Optional[Request]) -> Process + def save_process(self, process, overwrite=True): + # type: (Union[Process, ProcessWPS], bool) -> Process """ Stores a process in storage. :param process: An instance of :class:`weaver.datatype.Process`. :param overwrite: Overwrite the matching process instance by name if conflicting. - :param request: """ process_id = self._get_process_id(process) sane_name = get_sane_name(process_id, **self.sane_name_config) @@ -271,31 +269,30 @@ def save_process(self, process, overwrite=True, request=None): # noqa: E811 self._add_process(process) return self.fetch_by_id(sane_name) - def delete_process(self, process_id, visibility=None, request=None): - # type: (AnyStr, Optional[AnyStr], Optional[Request]) -> bool + def delete_process(self, process_id, visibility=None): + # type: (str, Optional[str]) -> bool """ Removes process from database, optionally filtered by visibility. If ``visibility=None``, the process is deleted (if existing) regardless of its visibility value. """ sane_name = get_sane_name(process_id, **self.sane_name_config) - process = self.fetch_by_id(sane_name, visibility=visibility, request=request) + process = self.fetch_by_id(sane_name, visibility=visibility) if not process: raise ProcessNotFound("Process '{}' could not be found.".format(sane_name)) return bool(self.collection.delete_one({"identifier": sane_name}).deleted_count) - def list_processes(self, visibility=None, request=None): # noqa: E811 - # type: (Optional[AnyStr], Optional[Request]) -> List[Process] + def list_processes(self, visibility=None): + # type: (Optional[str]) -> List[Process] """ Lists all processes in database, optionally filtered by `visibility`. :param visibility: One value amongst `weaver.visibility`. - :param request: """ db_processes = [] search_filters = {} if visibility is None: visibility = VISIBILITY_VALUES - if isinstance(visibility, six.string_types): + if isinstance(visibility, str): visibility = [visibility] for v in visibility: if v not in VISIBILITY_VALUES: @@ -306,15 +303,14 @@ def list_processes(self, visibility=None, request=None): # noqa: E811 db_processes.append(Process(process)) return db_processes - def fetch_by_id(self, process_id, visibility=None, request=None): # noqa: E811 - # type: (AnyStr, Optional[AnyStr], Optional[Request]) -> Process + def fetch_by_id(self, process_id, visibility=None): + # type: (str, Optional[str]) -> Process """ Get process for given `process_id` from storage, optionally filtered by `visibility`. If ``visibility=None``, the process is retrieved (if existing) regardless of its visibility value. :param process_id: process identifier :param visibility: one value amongst `weaver.visibility`. - :param request: :return: An instance of :class:`weaver.datatype.Process`. """ sane_name = get_sane_name(process_id, **self.sane_name_config) @@ -326,8 +322,8 @@ def fetch_by_id(self, process_id, visibility=None, request=None): # noqa: E811 raise ProcessNotAccessible("Process '{}' cannot be accessed.".format(sane_name)) return process - def get_visibility(self, process_id, request=None): # noqa: E811 - # type: (AnyStr, Optional[Request]) -> AnyStr + def get_visibility(self, process_id): + # type: (str) -> str """ Get `visibility` of a process. @@ -336,14 +332,13 @@ def get_visibility(self, process_id, request=None): # noqa: E811 process = self.fetch_by_id(process_id) return process.visibility - def set_visibility(self, process_id, visibility, request=None): # noqa: E811 - # type: (AnyStr, AnyStr, Optional[Request]) -> None + def set_visibility(self, process_id, visibility): + # type: (str, str) -> None """ Set `visibility` of a process. :param visibility: One value amongst `weaver.visibility`. :param process_id: - :param request: :raises TypeError: when :paramref:`visibility` is not :class:`str`. :raises ValueError: when :paramref:`visibility` is not one of :py:data:`weaver.visibility.VISIBILITY_VALUES`. """ @@ -351,12 +346,10 @@ def set_visibility(self, process_id, visibility, request=None): # noqa: E811 process.visibility = visibility self.save_process(process, overwrite=True) - def clear_processes(self, request=None): # noqa: E811 - # type: (Optional[Request]) -> bool + def clear_processes(self): + # type: () -> bool """ Clears all processes from the store. - - :param request: """ self.collection.drop() return True @@ -372,24 +365,25 @@ def __init__(self, *args, **kwargs): MongodbStore.__init__(self, *db_args, **db_kwargs) def save_job(self, - task_id, # type: AnyStr - process, # type: AnyStr - service=None, # type: Optional[AnyStr] + task_id, # type: str + process, # type: str + service=None, # type: Optional[str] inputs=None, # type: Optional[List[Any]] is_workflow=False, # type: bool + is_local=False, # type: bool user_id=None, # type: Optional[int] execute_async=True, # type: bool - custom_tags=None, # type: Optional[List[AnyStr]] - access=None, # type: Optional[AnyStr] - notification_email=None, # type: Optional[AnyStr] - accept_language=None, # type: Optional[AnyStr] + custom_tags=None, # type: Optional[List[str]] + access=None, # type: Optional[str] + notification_email=None, # type: Optional[str] + accept_language=None, # type: Optional[str] ): # type: (...) -> Job """ Stores a job in mongodb. """ try: tags = ["dev"] - tags.extend(list(filter(lambda t: t, custom_tags or []))) + tags.extend(list(filter(lambda t: bool(t), custom_tags or []))) # remove empty tags if is_workflow: tags.append(PROCESS_WORKFLOW) else: @@ -409,8 +403,9 @@ def save_job(self, "status": map_status(STATUS_ACCEPTED), "execute_async": execute_async, "is_workflow": is_workflow, + "is_local": is_local, "created": now(), - "tags": list(set(tags)), + "tags": list(set(tags)), # remove duplicates "access": access, "notification_email": notification_email, "accept_language": accept_language, @@ -437,16 +432,16 @@ def update_job(self, job): raise JobUpdateError("Error occurred during job update: [{}]".format(repr(ex))) raise JobUpdateError("Failed to update specified job: '{}'".format(str(job))) - def delete_job(self, job_id, request=None): # noqa: E811 - # type: (AnyStr, Optional[Request]) -> bool + def delete_job(self, job_id): + # type: (str) -> bool """ Removes job from `MongoDB` storage. """ self.collection.delete_one({"id": job_id}) return True - def fetch_by_id(self, job_id, request=None): # noqa: E811 - # type: (AnyStr, Optional[Request]) -> Job + def fetch_by_id(self, job_id): + # type: (str) -> Job """ Gets job for given ``job_id`` from `MongoDB` storage. """ @@ -455,8 +450,8 @@ def fetch_by_id(self, job_id, request=None): # noqa: E811 raise JobNotFound("Could not find job matching: '{}'".format(job_id)) return Job(job) - def list_jobs(self, request=None): # noqa: E811 - # type: (Optional[Request]) -> List[Job] + def list_jobs(self): + # type: () -> List[Job] """ Lists all jobs in `MongoDB` storage. For user-specific access to available jobs, use :meth:`MongodbJobStore.find_jobs` instead. @@ -467,17 +462,17 @@ def list_jobs(self, request=None): # noqa: E811 return jobs def find_jobs(self, - request, # type: Request - process=None, # type: Optional[AnyStr] - service=None, # type: Optional[AnyStr] - tags=None, # type: Optional[List[AnyStr]] - access=None, # type: Optional[AnyStr] - notification_email=None, # type: Optional[AnyStr] - status=None, # type: Optional[AnyStr] - sort=None, # type: Optional[AnyStr] + process=None, # type: Optional[str] + service=None, # type: Optional[str] + tags=None, # type: Optional[List[str]] + access=None, # type: Optional[str] + notification_email=None, # type: Optional[str] + status=None, # type: Optional[str] + sort=None, # type: Optional[str] page=0, # type: int limit=10, # type: int - group_by=None, # type: Optional[Union[AnyStr, List[AnyStr]]] + group_by=None, # type: Optional[Union[str, List[str]]] + request=None, # type: Optional[Request] ): # type: (...) -> Union[JobListAndCount, JobCategoriesAndCount] """ Finds all jobs in `MongoDB` storage matching search filters to obtain results with requested paging or grouping. @@ -486,7 +481,7 @@ def find_jobs(self, :param process: process name to filter matching jobs. :param service: service name to filter matching jobs. :param tags: list of tags to filter matching jobs. - :param access: access visibility to filter matching jobs (default: PUBLIC). + :param access: access visibility to filter matching jobs (default: :py:data:`VISIBILITY_PUBLIC`). :param notification_email: notification email to filter matching jobs. :param status: status to filter matching jobs. :param sort: field which is used for sorting results (default: creation date, descending). @@ -528,21 +523,24 @@ def find_jobs(self, search_filters = {} - if request.has_permission("admin") and access in VISIBILITY_VALUES: - search_filters["access"] = access + if not request: + search_filters.setdefault("access", VISIBILITY_PUBLIC) else: - user_id = request.authenticated_userid - if user_id is not None: - search_filters["user_id"] = user_id - if access in VISIBILITY_VALUES: - search_filters["access"] = access + if request.has_permission("admin") and access in VISIBILITY_VALUES: + search_filters["access"] = access else: - search_filters["access"] = VISIBILITY_PUBLIC + user_id = request.authenticated_userid + if user_id is not None: + search_filters["user_id"] = user_id + if access in VISIBILITY_VALUES: + search_filters["access"] = access + else: + search_filters["access"] = VISIBILITY_PUBLIC if tags: search_filters["tags"] = {"$all": tags} - if status in JOB_STATUS_CATEGORIES.keys(): + if status in JOB_STATUS_CATEGORIES: search_filters["status"] = {"$in": JOB_STATUS_CATEGORIES[status]} elif status: search_filters["status"] = status @@ -570,7 +568,7 @@ def find_jobs(self, # results by group categories if group_by: - group_by = [group_by] if isinstance(group_by, six.string_types) else group_by # type: List[AnyStr] + group_by = [group_by] if isinstance(group_by, str) else group_by # type: List[str] group_categories = {field: "$" + field for field in group_by} # fields that can generate groups pipeline.extend([{ "$group": { @@ -598,8 +596,8 @@ def find_jobs(self, total = self.collection.count_documents(search_filters) return items, total - def clear_jobs(self, request=None): # noqa: E811 - # type: (Optional[Request]) -> bool + def clear_jobs(self): + # type: () -> bool """ Removes all jobs from `MongoDB` storage. """ @@ -633,7 +631,7 @@ def save_quote(self, quote): return quote def fetch_by_id(self, quote_id): - # type: (AnyStr) -> Quote + # type: (str) -> Quote """ Gets quote for given ``quote_id`` from `MongoDB` storage. """ @@ -653,7 +651,7 @@ def list_quotes(self): return quotes def find_quotes(self, process_id=None, page=0, limit=10, sort=None): - # type: (Optional[AnyStr], int, int, Optional[AnyStr]) -> Tuple[List[Quote], int] + # type: (Optional[str], int, int, Optional[str]) -> Tuple[List[Quote], int] """ Finds all quotes in `MongoDB` storage matching search filters. @@ -662,7 +660,7 @@ def find_quotes(self, process_id=None, page=0, limit=10, sort=None): """ search_filters = {} - if isinstance(process_id, six.string_types): + if isinstance(process_id, str): search_filters["process"] = process_id if sort is None: @@ -704,7 +702,7 @@ def save_bill(self, bill): return Bill(bill) def fetch_by_id(self, bill_id): - # type: (AnyStr) -> Bill + # type: (str) -> Bill """ Gets bill for given ``bill_id`` from `MongoDB` storage. """ @@ -724,7 +722,7 @@ def list_bills(self): return bills def find_bills(self, quote_id=None, page=0, limit=10, sort=None): - # type: (Optional[AnyStr], int, int, Optional[AnyStr]) -> Tuple[List[Bill], int] + # type: (Optional[str], int, int, Optional[str]) -> Tuple[List[Bill], int] """ Finds all bills in `MongoDB` storage matching search filters. @@ -733,7 +731,7 @@ def find_bills(self, quote_id=None, page=0, limit=10, sort=None): """ search_filters = {} - if isinstance(quote_id, six.string_types): + if isinstance(quote_id, str): search_filters["quote"] = quote_id if sort is None: diff --git a/weaver/tweens.py b/weaver/tweens.py index 4f043accd..c73976ec3 100644 --- a/weaver/tweens.py +++ b/weaver/tweens.py @@ -12,6 +12,9 @@ OWS_TWEEN_HANDLED = "OWS_TWEEN_HANDLED" +# FIXME: +# https://github.com/crim-ca/weaver/issues/215 +# define common Exception classes that won't require this type of conversion def ows_response_tween(request, handler): """Tween that wraps any API request with appropriate dispatch of error conversion to handle formatting.""" try: @@ -20,16 +23,19 @@ def ows_response_tween(request, handler): if isinstance(result, Exception) and not isinstance(result, (HTTPSuccessful, HTTPRedirection)): raise result # let the previous tween handler handle this case return result + # NOTE: + # Handle exceptions from most explicit definitions to least explicit. + # Exceptions in 'weaver.exceptions' sometimes derive from 'OWSException' to provide additional details. + # Furthermore, 'OWSException' have extensive details with references to 'HTTPException' and 'pywps.exceptions'. except HTTPException as err: LOGGER.debug("http exception -> ows exception response.") # Use the same json formatter than OWSException raised_error = err raised_error._json_formatter = OWSException.json_formatter return_error = raised_error - exc_info_err = False exc_log_lvl = logging.WARNING if err.status_code < 500 else logging.ERROR - except OWSException as err: + except OWSException as err: # could be 'WeaverException' with 'OWSException' base LOGGER.debug("direct ows exception response") raised_error = err return_error = err @@ -47,11 +53,17 @@ def ows_response_tween(request, handler): return_error = OWSException(detail=str(err), status=HTTPInternalServerError) exc_info_err = sys.exc_info() exc_log_lvl = logging.ERROR + # FIXME: + # https://github.com/crim-ca/weaver/issues/215 + # convivial generation of this repr format should be directly in common exception class + raised_err_code = getattr(raised_error, "code", getattr(raised_error, "status_code", 500)) + raised_err_repr = "({}) <{}> {!s}".format(type(raised_error).__name__, raised_err_code, raised_error) if raised_error != return_error: - err_msg = "\n Raised: [{!r}]\n Return: [{!r}]".format(raised_error, return_error) + err_msg = "\n Raised: [{}]\n Return: [{!r}]".format(raised_err_repr, return_error) else: - err_msg = " [{!r}]".format(raised_error) + err_msg = " [{}]".format(raised_err_repr) LOGGER.log(exc_log_lvl, "Handled request exception:%s", err_msg, exc_info=exc_info_err) + LOGGER.debug("Handled request details:\n%s\n%s", raised_err_repr, getattr(raised_error, "text", "")) return return_error diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 846bcd2db..17557e941 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -1,70 +1,86 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - # pylint: disable=W0611,unused-import,C0103,invalid-name - from weaver.processes.wps_process_base import WpsProcessInterface - from weaver.datatype import Process - from weaver.status import AnyStatusType - from webob.headers import ResponseHeaders, EnvironHeaders - from webob.response import Response as WebobResponse - from pyramid.response import Response as PyramidResponse - from pyramid.registry import Registry - from pyramid.request import Request as PyramidRequest - from pyramid.config import Configurator - from celery.app import Celery - from requests.structures import CaseInsensitiveDict - from cwltool.factory import Callable as CWLFactoryCallable # noqa: F401 # provide alias name, not used here - from webtest.response import TestResponse - from pywps.app import WPSRequest - from pywps import Process as ProcessWPS - from typing import Any, AnyStr, Callable, Dict, List, Optional, Tuple, Type, Union + import os import typing + from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union if hasattr(typing, "TypedDict"): from typing import TypedDict # pylint: disable=E0611,no-name-in-module else: from typing_extensions import TypedDict - import lxml.etree - import os if hasattr(os, "PathLike"): - FileSystemPathType = Union[os.PathLike, AnyStr] + FileSystemPathType = Union[os.PathLike, str] else: - FileSystemPathType = AnyStr + FileSystemPathType = str + + import lxml.etree + from celery.app import Celery + from pyramid.httpexceptions import HTTPSuccessful, HTTPRedirection + from pyramid.registry import Registry + from pyramid.request import Request as PyramidRequest + from pyramid.response import Response as PyramidResponse + from pyramid.testing import DummyRequest + from pyramid.config import Configurator + from pywps.app import WPSRequest + from pywps import Process as ProcessWPS + from requests import Request as RequestsRequest + from requests.structures import CaseInsensitiveDict + from webob.headers import ResponseHeaders, EnvironHeaders + from webob.response import Response as WebobResponse + from webtest.response import TestResponse + from werkzeug.wrappers import Request as WerkzeugRequest + + from weaver.processes.wps_process_base import WpsProcessInterface + from weaver.datatype import Process + from weaver.status import AnyStatusType + # pylint: disable=C0103,invalid-name Number = Union[int, float] - ValueType = Union[AnyStr, Number, bool] + ValueType = Union[str, Number, bool] AnyValue = Optional[ValueType] - AnyKey = Union[AnyStr, int] - JsonList = List["JSON"] - JsonObject = Dict[AnyStr, "JSON"] - JSON = Union[AnyValue, JsonObject, JsonList] - CWL = TypedDict("CWL", {"cwlVersion": AnyStr, "class": AnyStr, "inputs": JSON, "outputs": JSON}) - XML = lxml.etree._Element # noqa: W0212 + AnyValueType = AnyValue # alias + AnyKey = Union[str, int] + # add more levels of explicit definitions than necessary to simulate JSON recursive structure better than 'Any' + # amount of repeated equivalent definition makes typing analysis 'work well enough' for most use cases + _JsonObjectItem = Dict[str, Union["JSON", "_JsonListItem"]] + _JsonListItem = List[Union[AnyValue, _JsonObjectItem, "_JsonListItem", "JSON"]] + _JsonItem = Union[AnyValue, _JsonObjectItem, _JsonListItem] + JSON = Union[Dict[str, _JsonItem], List[_JsonItem]] + CWL = TypedDict("CWL", {"cwlVersion": str, "class": str, "inputs": JSON, "outputs": JSON, + "requirements": JSON, "hints": JSON, "label": str, "doc": str, "s:keywords": str, + "$namespaces": Dict[str, str], "$schemas": Dict[str, str]}, total=False) + KVPType = Union[ValueType, Sequence[ValueType]] + KVP = Union[Sequence[Tuple[str, KVPType]], Dict[str, KVPType]] + XML = lxml.etree._Element # noqa AnyContainer = Union[Configurator, Registry, PyramidRequest, Celery] - SettingValue = Optional[JSON] - SettingsType = Dict[AnyStr, SettingValue] + SettingValue = Optional[Union[JSON, AnyValue]] + SettingsType = Dict[str, SettingValue] AnySettingsContainer = Union[AnyContainer, SettingsType] AnyRegistryContainer = AnyContainer AnyDatabaseContainer = AnyContainer - CookiesType = Dict[AnyStr, AnyStr] - HeadersType = Dict[AnyStr, AnyStr] - CookiesTupleType = List[Tuple[AnyStr, AnyStr]] - HeadersTupleType = List[Tuple[AnyStr, AnyStr]] + CookiesType = Dict[str, str] + HeadersType = Dict[str, str] + CookiesTupleType = List[Tuple[str, str]] + HeadersTupleType = List[Tuple[str, str]] CookiesBaseType = Union[CookiesType, CookiesTupleType] HeadersBaseType = Union[HeadersType, HeadersTupleType] - OptionalHeaderCookiesType = Union[Tuple[None, None], Tuple[HeadersBaseType, CookiesBaseType]] + HeaderCookiesType = Union[HeadersBaseType, CookiesBaseType] + HeaderCookiesTuple = Union[Tuple[None, None], Tuple[HeadersBaseType, CookiesBaseType]] AnyHeadersContainer = Union[HeadersBaseType, ResponseHeaders, EnvironHeaders, CaseInsensitiveDict] AnyCookiesContainer = Union[CookiesBaseType, WPSRequest, PyramidRequest, AnyHeadersContainer] - AnyResponseType = Union[WebobResponse, PyramidResponse, TestResponse] + AnyResponseType = Union[PyramidResponse, WebobResponse, TestResponse] + AnyRequestType = Union[PyramidRequest, WerkzeugRequest, RequestsRequest, DummyRequest] + HTTPValid = Union[HTTPSuccessful, HTTPRedirection] AnyProcess = Union[Process, ProcessWPS] AnyProcessType = Union[Type[Process], Type[ProcessWPS]] - GlobType = TypedDict("GlobType", {"glob": AnyStr}) - ExpectedOutputType = TypedDict("ExpectedOutputType", {"type": AnyStr, "id": AnyStr, "outputBinding": GlobType}) - GetJobProcessDefinitionFunction = Callable[[AnyStr, Dict[AnyStr, AnyStr], Dict[AnyStr, Any]], WpsProcessInterface] - ToolPathObjectType = Dict[AnyStr, Any] + GlobType = TypedDict("GlobType", {"glob": str}) + ExpectedOutputType = TypedDict("ExpectedOutputType", {"type": str, "id": str, "outputBinding": GlobType}) + GetJobProcessDefinitionFunction = Callable[[str, Dict[str, str], Dict[str, Any]], WpsProcessInterface] + ToolPathObjectType = Dict[str, Any] # update_status(provider, message, progress, status) - UpdateStatusPartialFunction = Callable[[AnyStr, AnyStr, int, AnyStatusType], None] + UpdateStatusPartialFunction = Callable[[str, str, int, AnyStatusType], None] diff --git a/weaver/utils.py b/weaver/utils.py index b906cc196..524672488 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -11,11 +11,12 @@ from copy import deepcopy from datetime import datetime from typing import TYPE_CHECKING +from urllib.parse import ParseResult, parse_qs, urlparse, urlunsplit import boto3 +import colander import pytz import requests -import six from celery.app import Celery from lxml import etree from pyramid.config import Configurator @@ -26,20 +27,29 @@ from requests import HTTPError as RequestsHTTPError, Response from requests.structures import CaseInsensitiveDict from requests_file import FileAdapter -from six.moves.urllib.parse import ParseResult, parse_qs, urlparse, urlunsplit from urlmatch import urlmatch from webob.headers import EnvironHeaders, ResponseHeaders -from weaver.exceptions import InvalidIdentifierValue from weaver.status import map_status from weaver.warning import TimeZoneInfoAlreadySetWarning if TYPE_CHECKING: - from weaver.typedefs import ( # noqa: F401 - AnyValue, AnyKey, AnySettingsContainer, AnyRegistryContainer, AnyHeadersContainer, - AnyResponseType, HeadersType, SettingsType, JSON, XML, Number + from typing import Any, Dict, List, Iterable, Optional, Type, Union + + from weaver.typedefs import ( + AnyKey, + AnyHeadersContainer, + AnySettingsContainer, + AnyRegistryContainer, + AnyResponseType, + AnyValue, + HeadersType, + JSON, + KVP, + Number, + SettingsType, + XML ) - from typing import Union, Any, Dict, List, AnyStr, Iterable, Optional, Type # noqa: F401 LOGGER = logging.getLogger(__name__) @@ -60,7 +70,7 @@ def __call__(cls): return cls.__instance__ -class _NullType(six.with_metaclass(_Singleton)): +class _NullType(metaclass=_Singleton): """Represents a ``null`` value to differentiate from ``None``.""" # pylint: disable=E1101,no-member @@ -86,14 +96,14 @@ def __nonzero__(): def get_weaver_url(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """Retrieves the home URL of the `weaver` application.""" value = get_settings(container).get("weaver.url", "") or "" # handle explicit None return value.rstrip("/").strip() def get_any_id(info): - # type: (JSON) -> Union[AnyStr, None] + # type: (JSON) -> Union[str, None] """Retrieves a dictionary `id-like` key using multiple common variations ``[id, identifier, _id]``. :param info: dictionary that potentially contains an `id-like` key. :returns: value of the matched `id-like` key or ``None`` if not found.""" @@ -109,15 +119,15 @@ def get_any_value(info): def get_any_message(info): - # type: (JSON) -> AnyStr + # type: (JSON) -> str """Retrieves a dictionary 'value'-like key using multiple common variations [message]. :param info: dictionary that potentially contains a 'message'-like key. :returns: value of the matched 'message'-like key or an empty string if not found. """ return info.get("message", "").strip() -def get_registry(container): - # type: (AnyRegistryContainer) -> Registry +def get_registry(container, nothrow=False): + # type: (AnyRegistryContainer, bool) -> Optional[Registry] """Retrieves the application ``registry`` from various containers referencing to it.""" if isinstance(container, Celery): return container.conf.get("PYRAMID_REGISTRY", {}) @@ -125,6 +135,8 @@ def get_registry(container): return container.registry if isinstance(container, Registry): return container + if nothrow: + return None raise TypeError("Could not retrieve registry from container object of type [{}].".format(type(container))) @@ -141,7 +153,7 @@ def get_settings(container): def get_header(header_name, header_container): - # type: (AnyStr, AnyHeadersContainer) -> Union[AnyStr, None] + # type: (str, AnyHeadersContainer) -> Union[str, None] """ Searches for the specified header by case/dash/underscore-insensitive ``header_name`` inside ``header_container``. """ @@ -160,7 +172,7 @@ def get_header(header_name, header_container): def get_cookie_headers(header_container, cookie_header_name="Cookie"): - # type: (AnyHeadersContainer, Optional[AnyStr]) -> HeadersType + # type: (AnyHeadersContainer, Optional[str]) -> HeadersType """ Looks for ``cookie_header_name`` header within ``header_container``. :returns: new header container in the form ``{'Cookie': }`` if it was matched, or empty otherwise. @@ -175,9 +187,9 @@ def get_cookie_headers(header_container, cookie_header_name="Cookie"): def get_url_without_query(url): - # type: (Union[AnyStr, ParseResult]) -> AnyStr + # type: (Union[str, ParseResult]) -> str """Removes the query string part of an URL.""" - if isinstance(url, six.string_types): + if isinstance(url, str): url = urlparse(url) if not isinstance(url, ParseResult): raise TypeError("Expected a parsed URL.") @@ -185,13 +197,26 @@ def get_url_without_query(url): def is_valid_url(url): - # type: (Union[AnyStr, None]) -> bool + # type: (Optional[str]) -> bool try: return bool(urlparse(url).scheme) except Exception: # noqa: W0703 # nosec: B110 return False +UUID_PATTERN = re.compile(colander.UUID_REGEX, re.IGNORECASE) + + +def is_uuid(maybe_uuid): + # type: (Any) -> bool + """ + Evaluates if the provided input is a UUID-like string. + """ + if not isinstance(maybe_uuid, str): + return False + return re.match(UUID_PATTERN, str(maybe_uuid)) is not None + + def parse_extra_options(option_str): """ Parses the extra options parameter. @@ -251,7 +276,7 @@ def expires_at(hours=1): def localize_datetime(dt, tz_name="UTC"): - # type: (datetime, Optional[AnyStr]) -> datetime + # type: (datetime, Optional[str]) -> datetime """ Provide a timezone-aware object for a given datetime and timezone name """ @@ -267,7 +292,7 @@ def localize_datetime(dt, tz_name="UTC"): def get_base_url(url): - # type: (AnyStr) -> AnyStr + # type: (str) -> str """ Obtains the base URL from the given ``url``. """ @@ -279,7 +304,7 @@ def get_base_url(url): def xml_path_elements(path): - # type: (AnyStr) -> List[AnyStr] + # type: (str) -> List[str] elements = [el.strip() for el in path.split("/")] elements = [el for el in elements if len(el) > 0] return elements @@ -297,7 +322,7 @@ def xml_strip_ns(tree): def ows_context_href(href, partial=False): - # type: (AnyStr, Optional[bool]) -> JSON + # type: (str, Optional[bool]) -> JSON """Returns the complete or partial dictionary defining an ``OWSContext`` from a reference.""" context = {"offering": {"content": {"href": href}}} if partial: @@ -345,9 +370,9 @@ def raise_on_xml_exception(xml_node): def str2bytes(string): - # type: (Union[AnyStr, bytes]) -> bytes + # type: (Union[str, bytes]) -> bytes """Obtains the bytes representation of the string.""" - if not isinstance(string, (six.string_types, bytes)): + if not isinstance(string, (str, bytes)): raise TypeError("Cannot convert item to bytes: {!r}".format(type(string))) if isinstance(string, bytes): return string @@ -355,9 +380,9 @@ def str2bytes(string): def bytes2str(string): - # type: (Union[AnyStr, bytes]) -> str + # type: (Union[str, bytes]) -> str """Obtains the unicode representation of the string.""" - if not isinstance(string, (six.string_types, bytes)): + if not isinstance(string, (str, bytes)): raise TypeError("Cannot convert item to unicode: {!r}".format(type(string))) if not isinstance(string, bytes): return string @@ -374,13 +399,13 @@ def islambda(func): def convert_snake_case(name): - # type: (AnyStr) -> AnyStr + # type: (str) -> str s1 = first_cap_re.sub(r"\1_\2", name) return all_cap_re.sub(r"\1_\2", s1).lower() def parse_request_query(request): - # type: (Request) -> Dict[AnyStr, Dict[AnyKey, AnyStr]] + # type: (Request) -> Dict[str, Dict[AnyKey, str]] """ :param request: :return: dict of dict where k=v are accessible by d[k][0] == v and q=k=v are accessible by d[q][k] == v, lowercase @@ -390,36 +415,91 @@ def parse_request_query(request): for q in queries: queries_dict[q] = dict() for i, kv in enumerate(queries[q]): - kvs = kv.split("=") - if len(kvs) > 1: - queries_dict[q][kvs[0]] = kvs[1] + kvp = kv.split("=") + if len(kvp) > 1: + queries_dict[q][kvp[0]] = kvp[1] else: - queries_dict[q][i] = kvs[0] + queries_dict[q][i] = kvp[0] return queries_dict +def get_path_kvp(path, sep=",", **params): + # type: (str, str, KVP) -> str + """ + Generates the WPS URL with Key-Value-Pairs (KVP) query parameters. + + :param path: WPS URL or Path + :param sep: separator to employ when multiple values are provided. + :param params: keyword parameters and their corresponding single or multi values to generate KVP. + :return: combined path and query parameters as KVP. + """ + + def _value(_v): + if isinstance(_v, (list, set, tuple)): + return sep.join([str(_) for _ in _v]) + return str(_v) + + kvp = ["{}={}".format(k, _value(v)) for k, v in params.items()] + return path + "?" + "&".join(kvp) + + def get_log_fmt(): - # type: (...) -> AnyStr + # type: (...) -> str + """ + Logging format employed for job output reporting. + """ return "[%(asctime)s] %(levelname)-8s [%(name)s] %(message)s" def get_log_date_fmt(): - # type: (...) -> AnyStr + # type: (...) -> str + """ + Logging date format employed for job output reporting. + """ return "%Y-%m-%d %H:%M:%S" def get_log_monitor_msg(job_id, status, percent, message, location): - # type: (AnyStr, AnyStr, Number, AnyStr, AnyStr) -> AnyStr + # type: (str, str, Number, str, str) -> str return "Monitoring job {jobID} : [{status}] {percent} - {message} [{location}]".format( jobID=job_id, status=status, percent=percent, message=message, location=location ) def get_job_log_msg(status, message, progress=0, duration=None): - # type: (AnyStr, AnyStr, Optional[Number], Optional[AnyStr]) -> AnyStr + # type: (str, str, Optional[Number], Optional[str]) -> str return "{d} {p:3d}% {s:10} {m}".format(d=duration or "", p=int(progress or 0), s=map_status(status), m=message) +def setup_loggers(settings, level=None): + # type: (AnySettingsContainer, Optional[Union[int, str]]) -> None + """ + Update logging configuration known loggers based on application settings. + + When ``weaver.log_level`` exists in settings, it **overrides** any other INI configuration logging levels. + Otherwise, undefined logger levels will be set according to whichever is found first between ``weaver.log_level``, + the :paramref:`level` parameter or default :py:data:`logging.INFO`. + """ + log_level = settings.get("weaver.log_level") + override = False + if log_level: + override = True + else: + log_level = level or logging.INFO + if not isinstance(log_level, int): + log_level = logging.getLevelName(log_level.upper()) + for logger_name in ["weaver", "cwltool"]: + logger = logging.getLogger(logger_name) + if override or logger.level == logging.NOTSET: + logger.setLevel(log_level) + # define basic formatter/handler if config INI did not provide it + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter(get_log_fmt()) + handler.setFormatter(formatter) + logger.addHandler(handler) + + def make_dirs(path, mode=0o755, exist_ok=False): """ Alternative to ``os.makedirs`` with ``exists_ok`` parameter only available for ``python>3.5``. @@ -486,7 +566,7 @@ def stack_(frame): def get_ssl_verify_option(method, url, settings, request_options=None): - # type: (AnyStr, AnyStr, AnySettingsContainer, Optional[SettingsType]) -> bool + # type: (str, str, AnySettingsContainer, Optional[SettingsType]) -> bool """ Obtains the SSL verification option from combined settings from ``weaver.ssl_verify`` and parsed ``weaver.request_options`` file for the corresponding request. @@ -509,7 +589,7 @@ def get_ssl_verify_option(method, url, settings, request_options=None): def get_request_options(method, url, settings): - # type: (AnyStr, AnyStr, AnySettingsContainer) -> SettingsType + # type: (str, str, AnySettingsContainer) -> SettingsType """ Obtains the *request options* corresponding to the request according to configuration file specified by pre-loaded setting ``weaver.request_options``. @@ -518,7 +598,7 @@ def get_request_options(method, url, settings): .. seealso:: - :func:`get_ssl_verify_option` - - `config/request_options.yml.example <../../config/config/request_options.yml.example>`_ + - `config/request_options.yml.example <../../../config/request_options.yml.example>`_ :param method: request method (GET, POST, etc.). :param url: request URL. @@ -563,8 +643,8 @@ def get_request_options(method, url, settings): return request_options -def request_extra(method, # type: AnyStr - url, # type: AnyStr +def request_extra(method, # type: str + url, # type: str retries=None, # type: Optional[int] backoff=None, # type: Optional[Number] intervals=None, # type: Optional[List[Number]] @@ -723,7 +803,7 @@ def request_extra(method, # type: AnyStr def fetch_file(file_reference, file_outdir, settings=None, **request_kwargs): - # type: (AnyStr, AnyStr, Optional[AnySettingsContainer], **Any) -> AnyStr + # type: (str, str, Optional[AnySettingsContainer], **Any) -> str """ Fetches a file from a local path, an AWS-S3 bucket or remote URL, and dumps it's content to the specified output directory. @@ -748,9 +828,7 @@ def fetch_file(file_reference, file_outdir, settings=None, **request_kwargs): file_path = os.path.join(file_outdir, file_name) if file_reference.startswith("file://"): file_reference = file_reference[7:] - LOGGER.debug("Fetch file resolved:\n" - " Reference: [%s]\n" - " File Path: [%s]", file_href, file_path) + LOGGER.debug("Fetching file reference: [%s]", file_href) if os.path.isfile(file_reference): LOGGER.debug("Fetch file resolved as local reference.") # NOTE: @@ -760,7 +838,8 @@ def fetch_file(file_reference, file_outdir, settings=None, **request_kwargs): # Do symlink operation by hand instead of with argument to have Python-2 compatibility. if os.path.islink(file_reference): os.symlink(os.readlink(file_reference), file_path) - else: + # otherwise copy the file if not already available + elif not os.path.isfile(file_path) or os.path.realpath(file_path) != os.path.realpath(file_reference): shutil.copyfile(file_reference, file_path) elif file_reference.startswith("s3://"): LOGGER.debug("Fetch file resolved as S3 bucket reference.") @@ -798,7 +877,9 @@ def fetch_file(file_reference, file_outdir, settings=None, **request_kwargs): scheme = "" if len(scheme) < 2 else scheme[0] raise ValueError("Unresolved fetch file scheme: '{!s}', supported: {}" .format(scheme, list(SUPPORTED_FILE_SCHEMES))) - LOGGER.debug("Fetch file written") + LOGGER.debug("Fetch file resolved:\n" + " Reference: [%s]\n" + " File Path: [%s]", file_href, file_path) return file_path @@ -807,7 +888,7 @@ def fetch_file(file_reference, file_outdir, settings=None, **request_kwargs): def get_sane_name(name, min_len=3, max_len=None, assert_invalid=True, replace_character="_"): - # type: (AnyStr, Optional[int], Optional[Union[int, None]], Optional[bool], Optional[AnyStr]) -> Union[AnyStr, None] + # type: (str, Optional[int], Optional[Union[int, None]], Optional[bool], Optional[str]) -> Union[str, None] """ Returns a cleaned-up version of the input name, replacing invalid characters matched with ``REGEX_SEARCH_INVALID_CHARACTERS`` by ``replace_character``. @@ -821,7 +902,7 @@ def get_sane_name(name, min_len=3, max_len=None, assert_invalid=True, replace_ch :param assert_invalid: If ``True``, fail conditions or invalid characters will raise an error instead of replacing. :param replace_character: Single character to use for replacement of invalid ones if ``assert_invalid=False``. """ - if not isinstance(replace_character, six.string_types) and not len(replace_character) == 1: + if not isinstance(replace_character, str) and not len(replace_character) == 1: raise ValueError("Single replace character is expected, got invalid [{!s}]".format(replace_character)) max_len = max_len or len(name) if assert_invalid: @@ -841,8 +922,10 @@ def assert_sane_name(name, min_len=3, max_len=None): .. seealso:: - argument details in :func:`get_sane_name` """ - if name is None: - raise InvalidIdentifierValue("Invalid name : {0}".format(name)) + from weaver.exceptions import InvalidIdentifierValue, MissingIdentifierValue + + if name is None or len(name) == 0: + raise MissingIdentifierValue("Invalid name : {0}".format(name)) name = name.strip() if "--" in name \ or name.startswith("-") \ @@ -854,7 +937,7 @@ def assert_sane_name(name, min_len=3, max_len=None): def clean_json_text_body(body): - # type: (AnyStr) -> AnyStr + # type: (str) -> str """ Cleans a textual body field of superfluous characters to provide a better human-readable text in a JSON response. """ diff --git a/weaver/wps/__init__.py b/weaver/wps/__init__.py new file mode 100644 index 000000000..d426a71ac --- /dev/null +++ b/weaver/wps/__init__.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Views for WPS-XML endpoint implemented with :mod:`pywps`. +""" +import logging + +from cornice.service import Service +from pyramid.settings import asbool + +from weaver.formats import OUTPUT_FORMAT_XML +from weaver.utils import get_settings +from weaver.wps.utils import get_wps_path +from weaver.wps_restapi import swagger_definitions as sd + + +def includeme(config): + from weaver.wps.app import pywps_view + + settings = get_settings(config) + logger = logging.getLogger(__name__) + if not asbool(settings.get("weaver.wps", True)): + logger.debug("Weaver WPS disable. WPS KVP/XML endpoint will not be available.") + else: + logger.debug("Weaver WPS enabled.") + wps_path = get_wps_path(settings) + wps_service = Service(name="wps", path=wps_path) + logger.debug("Adding WPS KVP/XML schemas.") + wps_tags = [sd.TAG_GETCAPABILITIES, sd.TAG_DESCRIBEPROCESS, sd.TAG_EXECUTE, sd.TAG_WPS] + wps_service.add_view("GET", pywps_view, tags=wps_tags, renderer=OUTPUT_FORMAT_XML, + schema=sd.WPSEndpoint(), response_schemas=sd.wps_responses) + wps_service.add_view("POST", pywps_view, tags=wps_tags, renderer=OUTPUT_FORMAT_XML, + schema=sd.WPSEndpoint(), response_schemas=sd.wps_responses) + logger.debug("Adding WPS KVP/XML view.") + config.add_route(**sd.service_api_route_info(wps_service, settings)) + config.add_view(pywps_view, route_name=wps_service.name) diff --git a/weaver/wps/app.py b/weaver/wps/app.py new file mode 100644 index 000000000..d02520f76 --- /dev/null +++ b/weaver/wps/app.py @@ -0,0 +1,20 @@ +""" +pywps 4.x wrapper +""" +import logging + +from pyramid.wsgi import wsgiapp2 + +from weaver.wps.service import get_pywps_service + +LOGGER = logging.getLogger(__name__) + + +@wsgiapp2 +def pywps_view(environ, start_response): + """ + Served location for PyWPS Service that provides *older* WPS-1/2 XML endpoint. + """ + LOGGER.debug("pywps env: %s", environ) + service = get_pywps_service(environ) + return service(environ, start_response) diff --git a/weaver/wps/service.py b/weaver/wps/service.py new file mode 100644 index 000000000..e0a8b082a --- /dev/null +++ b/weaver/wps/service.py @@ -0,0 +1,259 @@ +import logging +import os +from configparser import ConfigParser +from typing import TYPE_CHECKING + +from owslib.wps import WPSExecution +from pyramid.httpexceptions import HTTPBadRequest, HTTPSeeOther +from pyramid_celery import celery_app as app +from pywps.app import Process as ProcessWPS, WPSRequest +from pywps.app.Service import Service as ServiceWPS +from pywps.inout.storage import StorageAbstract +from pywps.response import WPSResponse +from pywps.response.execute import ExecuteResponse + +from weaver.database import get_db +from weaver.exceptions import handle_known_exceptions +from weaver.formats import CONTENT_TYPE_APP_JSON +from weaver.owsexceptions import OWSNoApplicableCode +from weaver.processes.convert import wps2json_job_payload +from weaver.processes.execution import submit_job_handler +from weaver.processes.types import PROCESS_WORKFLOW +from weaver.processes.utils import get_job_submission_response, get_process +from weaver.store.base import StoreProcesses +from weaver.utils import get_header, get_settings, get_weaver_url +from weaver.visibility import VISIBILITY_PUBLIC +from weaver.wps.utils import check_wps_status, get_wps_local_status_location, load_pywps_config +from weaver.wps_restapi import swagger_definitions as sd + +LOGGER = logging.getLogger(__name__) +if TYPE_CHECKING: + from typing import Any, Optional, Union + from weaver.typedefs import HTTPValid, JSON, SettingsType + + +class ReferenceStatusLocationStorage(StorageAbstract): + """ + Simple storage that simply redirects to a pre-existing status location. + """ + # pylint: disable=W0222 # ignore mismatch signature of method params not employed + + def __init__(self, url_location, settings): + # type: (str, SettingsType) -> None + self._url = url_location + # location might not exist yet based on worker execution timing + self._file = get_wps_local_status_location(url_location, settings, must_exist=False) + + def url(self, *_, **__): + return self._url + + def location(self, *_, **__): + return self._file + + def store(self, *_, **__): + pass + + def write(self, *_, **__): + pass + + +class WorkerExecuteResponse(ExecuteResponse): + """ + XML response generator from predefined job status URL and executed process definition. + """ + # pylint: disable=W0231,W0233 # FIXME: tmp until patched + + def __init__(self, wps_request, uuid, process, job_url, settings, *_, **__): + # type: (WPSRequest, str, ProcessWPS, str, SettingsType, Any, Any) -> None + + # FIXME: https://github.com/geopython/pywps/pull/578 + # temp patch, do what 'ExecuteResponse.__init__' does bypassing the problem super() call + WPSResponse.__init__(self, wps_request, uuid) # pylint: disable=W0231,W0233 # tmp until patched + self.process = process + self.outputs = {o.identifier: o for o in self.process.outputs} + # should be following call, but causes infinite recursion until above fix is applied + # super(WorkerExecuteResponse, self).__init__(wps_request, job_id, process=wps_process) + # --- end of patch --- + + # extra setup + self.process._status_store = ReferenceStatusLocationStorage(job_url, settings) + self.store_status_file = True # enforce storage to provide the status location URL + self.wps_request.raw = None # make sure doc gets generated by disabling alternate raw data mode + self._update_status_doc() # generate 'doc' property with XML content for response + + +class WorkerService(ServiceWPS): + """ + Dispatches PyWPS requests from *older* WPS-1/2 XML endpoint to WPS-REST as appropriate. + + .. note:: + For every WPS-Request type, the parsing of XML content is already handled by the PyWPS service for GET/POST. + All data must be retrieved from parsed :class:`WPSRequest` to avoid managing argument location and WPS versions. + + When ``GetCapabilities`` or ``DescribeProcess`` requests are received, directly return to result as XML based + on content (no need to subprocess as Celery task that gets resolved quickly with only the process(es) details). + When JSON content is requested, instead return the redirect link to corresponding WPS-REST API endpoint. + + When receiving ``Execute`` request, convert the XML payload to corresponding JSON and + dispatch it to the Celery Worker to actually process it after job setup for monitoring. + """ + def __init__(self, *_, is_worker=False, settings=None, **__): + super(WorkerService, self).__init__(*_, **__) + self.is_worker = is_worker + self.settings = settings or get_settings(app) + + @handle_known_exceptions + def _get_capabilities_redirect(self, wps_request, *_, **__): + # type: (WPSRequest, Any, Any) -> Optional[Union[WPSResponse, HTTPValid]] + """ + Redirects to WPS-REST endpoint if requested ``Content-Type`` is JSON. + """ + req = wps_request.http_request + accept_type = get_header("Accept", req.headers) + if accept_type == CONTENT_TYPE_APP_JSON: + url = get_weaver_url(self.settings) + resp = HTTPSeeOther(location="{}{}".format(url, sd.processes_uri)) # redirect + setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising + return resp + return None + + def get_capabilities(self, wps_request, *_, **__): + # type: (WPSRequest, Any, Any) -> Union[WPSResponse, HTTPValid] + """ + Redirect to WPS-REST endpoint if requested ``Content-Type`` is JSON or handle ``GetCapabilities`` normally. + """ + resp = self._get_capabilities_redirect(wps_request, *_, **__) + return resp or super(WorkerService, self).get_capabilities(wps_request, *_, **__) + + @handle_known_exceptions + def _describe_process_redirect(self, wps_request, *_, **__): + # type: (WPSRequest, Any, Any) -> Optional[Union[WPSResponse, HTTPValid]] + """ + Redirects to WPS-REST endpoint if requested ``Content-Type`` is JSON. + """ + req = wps_request.http_request + accept_type = get_header("Accept", req.headers) + if accept_type == CONTENT_TYPE_APP_JSON: + url = get_weaver_url(self.settings) + proc = wps_request.identifiers + if not proc: + raise HTTPBadRequest(sd.BadRequestGetProcessInfoResponse.description) + if len(proc) > 1: + raise HTTPBadRequest("Unsupported multi-process ID for description. Only provide one.") + path = sd.process_uri.format(process_id=proc[0]) + resp = HTTPSeeOther(location="{}{}".format(url, path)) # redirect + setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising + return resp + return None + + def describe(self, wps_request, *_, **__): + # type: (WPSRequest, Any, Any) -> Union[WPSResponse, HTTPValid] + """ + Redirect to WPS-REST endpoint if requested ``Content-Type`` is JSON or handle ``DescribeProcess`` normally. + """ + resp = self._describe_process_redirect(wps_request, *_, **__) + return resp or super(WorkerService, self).describe(wps_request, *_, **__) + + @handle_known_exceptions + def _submit_job(self, wps_request): + # type: (WPSRequest) -> Union[WPSResponse, HTTPValid, JSON] + """ + Dispatch operation to WPS-REST endpoint, which in turn should call back the real Celery Worker for execution. + """ + req = wps_request.http_request + pid = wps_request.identifier + proc = get_process(process_id=pid, settings=self.settings) # raises if invalid or missing + wps_process = self.processes.get(pid) + + # create the JSON payload from the XML content and submit job + is_workflow = proc.type == PROCESS_WORKFLOW + tags = req.args.get("tags", "").split(",") + ["xml", "wps-{}".format(wps_request.version)] + data = wps2json_job_payload(wps_request, wps_process) + body = submit_job_handler(data, self.settings, proc.processEndpointWPS1, + process_id=pid, is_local=True, is_workflow=is_workflow, visibility=VISIBILITY_PUBLIC, + language=wps_request.language, tags=tags, auth=dict(req.headers)) + + # if Accept was JSON, provide response content as is + accept_type = get_header("Accept", req.headers) + if accept_type == CONTENT_TYPE_APP_JSON: + resp = get_job_submission_response(body) + setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising + return resp + + return body + + def execute(self, identifier, wps_request, uuid): + # type: (str, WPSRequest, str) -> Union[WPSResponse, HTTPValid] + """ + Submit WPS request to corresponding WPS-REST endpoint and convert back for requested ``Accept`` content-type. + + Overrides the original execute operation, that instead will get handled by :meth:`execute_job` following + callback from Celery Worker that handles process job creation and monitoring. + + If ``Accept`` is JSON, the result is directly returned from :meth:`_submit_job`. + If ``Accept`` is XML or undefined, :class:`WorkerExecuteResponse` converts the received JSON with XML template. + """ + result = self._submit_job(wps_request) + if not isinstance(result, dict): # JSON + return result # direct WPS response + + # otherwise, recreate the equivalent content with expected XML template format + job_id = result["jobID"] + job_url = result["location"] + wps_process = self.processes.get(wps_request.identifier) + + # when called by the WSGI app, 'WorkerExecuteResponse.__call__' on will generate the XML from 'doc' property, + # which itself is generated by template substitution of data from above 'json' property + try: + return WorkerExecuteResponse(wps_request, job_id, wps_process, job_url, settings=self.settings) + except Exception as ex: # noqa + LOGGER.exception("Error building XML response by PyWPS Service during WPS Execute result from worker.") + message = "Failed building XML response from WPS Execute result. Error [{!r}]".format(ex) + raise OWSNoApplicableCode(message, locator=job_id) + + def execute_job(self, process_id, wps_inputs, wps_outputs, mode, job_uuid): + """ + Real execution of the process by active Celery Worker. + """ + execution = WPSExecution(version="2.0", url="localhost") + xml_request = execution.buildRequest(process_id, wps_inputs, wps_outputs, mode=mode, lineage=True) + wps_request = WPSRequest() + wps_request.identifier = process_id + wps_request.set_version("2.0.0") + request_parser = wps_request._post_request_parser(wps_request.WPS.Execute().tag) # noqa: W0212 + request_parser(xml_request) + # NOTE: + # Setting 'status = false' will disable async execution of 'pywps.app.Process.Process' + # but this is needed since this job is running within Celery worker already async + # (daemon process can't have children processes) + # Because if how the code in PyWPS is made, we have to re-enable creation of status file + wps_request.status = "false" + wps_response = super(WorkerService, self).execute(process_id, wps_request, job_uuid) + wps_response.store_status_file = True + # update execution status with actual status file and apply required references + execution = check_wps_status(location=wps_response.process.status_location, settings=self.settings) + execution.request = xml_request + return execution + + +def get_pywps_service(environ=None, is_worker=False): + """ + Generates the PyWPS Service that provides *older* WPS-1/2 XML endpoint. + """ + environ = environ or {} + try: + # get config file + settings = get_settings(app) + pywps_cfg = environ.get("PYWPS_CFG") or settings.get("PYWPS_CFG") or os.getenv("PYWPS_CFG") + if not isinstance(pywps_cfg, ConfigParser) or not settings.get("weaver.wps_configured"): + load_pywps_config(app, config=pywps_cfg) + + # call pywps application with processes filtered according to the adapter's definition + process_store = get_db(app).get_store(StoreProcesses) + processes_wps = [process.wps() for process in + process_store.list_processes(visibility=VISIBILITY_PUBLIC)] + service = WorkerService(processes_wps, is_worker=is_worker, settings=settings) + except Exception as ex: + LOGGER.exception("Error occurred during PyWPS Service and/or Processes setup.") + raise OWSNoApplicableCode("Failed setup of PyWPS Service and/or Processes. Error [{!r}]".format(ex)) + return service diff --git a/weaver/wps.py b/weaver/wps/utils.py similarity index 76% rename from weaver/wps.py rename to weaver/wps/utils.py index c72e87a74..07c056c51 100644 --- a/weaver/wps.py +++ b/weaver/wps/utils.py @@ -1,44 +1,35 @@ -""" -pywps 4.x wrapper -""" import logging import os import tempfile +from configparser import ConfigParser from typing import TYPE_CHECKING +from urllib.parse import urlparse -import six -from lxml import etree +import lxml.etree from owslib.wps import WPSExecution from pyramid.httpexceptions import HTTPNotFound -from pyramid.settings import asbool -from pyramid.threadlocal import get_current_request -from pyramid.wsgi import wsgiapp2 -from pyramid_celery import celery_app as app from pywps import configuration as pywps_config -from pywps.app.Service import Service -from six.moves.configparser import ConfigParser -from six.moves.urllib.parse import urlparse from weaver.config import get_weaver_configuration -from weaver.database import get_db -from weaver.owsexceptions import OWSNoApplicableCode -from weaver.store.base import StoreProcesses -from weaver.utils import get_settings, get_url_without_query, get_weaver_url, make_dirs, request_extra -from weaver.visibility import VISIBILITY_PUBLIC +from weaver.utils import get_settings, get_url_without_query, get_weaver_url, is_uuid, make_dirs, request_extra LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer # noqa: F401 - from typing import AnyStr, Dict, Union, Optional # noqa: F401 + from typing import Dict, Union, Optional + + from pyramid.request import Request + from owslib.wps import WebProcessingService + + from weaver.typedefs import AnySettingsContainer, XML def _get_settings_or_wps_config(container, # type: AnySettingsContainer - weaver_setting_name, # type: AnyStr - config_setting_section, # type: AnyStr - config_setting_name, # type: AnyStr - default_not_found, # type: AnyStr - message_not_found, # type: AnyStr - ): # type: (...) -> AnyStr + weaver_setting_name, # type: str + config_setting_section, # type: str + config_setting_name, # type: str + default_not_found, # type: str + message_not_found, # type: str + ): # type: (...) -> str settings = get_settings(container) found = settings.get(weaver_setting_name) @@ -46,32 +37,34 @@ def _get_settings_or_wps_config(container, # type: AnySettingsC if not settings.get("weaver.wps_configured"): load_pywps_config(container) found = pywps_config.CONFIG.get(config_setting_section, config_setting_name) - if not isinstance(found, six.string_types): + if not isinstance(found, str): LOGGER.warning("%s not set in settings or WPS configuration, using default value.", message_not_found) found = default_not_found return found.strip() def get_wps_path(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """ Retrieves the WPS path (without hostname). + Searches directly in settings, then `weaver.wps_cfg` file, or finally, uses the default values if not found. """ return _get_settings_or_wps_config(container, "weaver.wps_path", "server", "url", "/ows/wps", "WPS path") def get_wps_url(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """ - Retrieves the full WPS URL (hostname + WPS path). + Retrieves the full WPS URL (hostname + WPS path) + Searches directly in settings, then `weaver.wps_cfg` file, or finally, uses the default values if not found. """ return get_settings(container).get("weaver.wps_url") or get_weaver_url(container) + get_wps_path(container) def get_wps_output_dir(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """ Retrieves the WPS output directory path where to write XML and result files. Searches directly in settings, then `weaver.wps_cfg` file, or finally, uses the default values if not found. @@ -82,7 +75,7 @@ def get_wps_output_dir(container): def get_wps_output_path(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """ Retrieves the WPS output path (without hostname) for staging XML status, logs and process outputs. Searches directly in settings, then `weaver.wps_cfg` file, or finally, uses the default values if not found. @@ -91,7 +84,7 @@ def get_wps_output_path(container): def get_wps_output_url(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str """ Retrieves the WPS output URL that maps to WPS output directory path. Searches directly in settings, then `weaver.wps_cfg` file, or finally, uses the default values if not found. @@ -103,8 +96,8 @@ def get_wps_output_url(container): def get_wps_local_status_location(url_status_location, container, must_exist=True): - # type: (AnyStr, AnySettingsContainer, bool) -> Optional[AnyStr] - """Attempts to retrieve the local file path corresponding to the WPS status location as URL. + # type: (str, AnySettingsContainer, bool) -> Optional[str] + """Attempts to retrieve the local XML file path corresponding to the WPS status location as URL. :param url_status_location: URL reference pointing to some WPS status location XML. :param container: any settings container to map configured local paths. @@ -118,7 +111,15 @@ def get_wps_local_status_location(url_status_location, container, must_exist=Tru out_path = os.path.join(dir_path, req_out_url.replace(wps_out_url, "").lstrip("/")) else: out_path = url_status_location.replace("file://", "") - if must_exist and not os.path.isfile(out_path): + found = os.path.isfile(out_path) + if not found and "/jobs/" in url_status_location: + job_uuid = url_status_location.rsplit("/jobs/", 1)[-1].split("/", 1)[0] + if is_uuid(job_uuid): + out_path_join = os.path.join(dir_path, "{}.xml".format(job_uuid)) + found = os.path.isfile(out_path_join) + if found or not must_exist: + out_path = out_path_join + if not found and must_exist: out_path_join = os.path.join(dir_path, out_path[1:] if out_path.startswith("/") else out_path) if not os.path.isfile(out_path_join): LOGGER.debug("Could not map WPS status reference [%s] to input local file path [%s].", @@ -129,12 +130,16 @@ def get_wps_local_status_location(url_status_location, container, must_exist=Tru return out_path -def check_wps_status(url=None, response=None, sleep_secs=2, verify=True, settings=None): - # type: (Optional[AnyStr], Optional[etree.ElementBase], int, bool, Optional[AnySettingsContainer]) -> WPSExecution +def check_wps_status(location=None, # type: Optional[str] + response=None, # type: Optional[XML] + sleep_secs=2, # type: int + verify=True, # type: bool + settings=None, # type: Optional[AnySettingsContainer] + ): # type: (...) -> WPSExecution """ Run :func:`owslib.wps.WPSExecution.checkStatus` with additional exception handling. - :param url: job URL where to look for job status. + :param location: job URL or file path where to look for job status. :param response: WPS response document of job status. :param sleep_secs: number of seconds to sleep before returning control to the caller. :param verify: Flag to enable SSL verification. @@ -143,9 +148,9 @@ def check_wps_status(url=None, response=None, sleep_secs=2, verify=True, setting """ def _retry_file(): LOGGER.warning("Failed retrieving WPS status-location, attempting with local file.") - out_path = get_wps_local_status_location(url, settings) + out_path = get_wps_local_status_location(location, settings) if not out_path: - raise HTTPNotFound("Could not find file resource from [{}].".format(url)) + raise HTTPNotFound("Could not find file resource from [{}].".format(location)) LOGGER.info("Resolved WPS status-location using local file reference.") return open(out_path, "r").read() @@ -153,11 +158,11 @@ def _retry_file(): if response: LOGGER.debug("Retrieving WPS status from XML response document...") xml = response - elif url: + elif location: xml_resp = HTTPNotFound() try: LOGGER.debug("Attempt to retrieve WPS status-location from URL...") - xml_resp = request_extra("get", url, verify=verify, settings=settings) + xml_resp = request_extra("get", location, verify=verify, settings=settings) xml = xml_resp.content except Exception as ex: LOGGER.debug("Got exception during get status: [%r]", ex) @@ -167,18 +172,18 @@ def _retry_file(): xml = _retry_file() else: raise Exception("Missing status-location URL/file reference or response with XML object.") - if isinstance(xml, six.string_types): + if isinstance(xml, str): xml = xml.encode("utf8", errors="ignore") execution.checkStatus(response=xml, sleepSecs=sleep_secs) if execution.response is None: raise Exception("Missing response, cannot check status.") - if not isinstance(execution.response, etree._Element): # noqa: W0212 - execution.response = etree.fromstring(execution.response) + if not isinstance(execution.response, lxml.etree._Element): # noqa + execution.response = lxml.etree.fromstring(execution.response) return execution def load_pywps_config(container, config=None): - # type: (AnySettingsContainer, Optional[Union[AnyStr, Dict[AnyStr, AnyStr]]]) -> ConfigParser + # type: (AnySettingsContainer, Optional[Union[str, Dict[str, str]]]) -> ConfigParser """ Loads and updates the PyWPS configuration using Weaver settings. """ @@ -226,7 +231,7 @@ def load_pywps_config(container, config=None): # note: needs to be configured even when using S3 bucket since XML status is provided locally if "weaver.wps_output_url" not in settings: output_path = settings.get("weaver.wps_output_path", "") - if isinstance(output_path, six.string_types): + if isinstance(output_path, str): output_url = os.path.join(get_weaver_url(settings), output_path.strip("/")) else: output_url = pywps_config.get_config_value("server", "outputurl") @@ -276,38 +281,38 @@ def load_pywps_config(container, config=None): return pywps_config.CONFIG -# @app.task(bind=True) -@wsgiapp2 -def pywps_view(environ, start_response): - """ - * TODO: add xml response renderer +def set_wps_language(wps, accept_language=None, request=None): + # type: (WebProcessingService, Optional[str], Optional[Request]) -> None + """Set the :attr:`language` property on the :class:`WebProcessingService` object. + + Given the `Accept-Language` header value, match the best language + to the supported languages. + + By default, and if no match is found, the :attr:`WebProcessingService.language` + property is set to None. + + https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language + (q-factor weighting is ignored, only order is considered) + + :param wps: process for which to set the language header if it is accepted + :param str accept_language: the value of the Accept-Language header + :param request: request from which to extract Accept-Language header if not provided directly """ - LOGGER.debug("pywps env: %s", environ.keys()) - - try: - # get config file - settings = get_settings(app) - pywps_cfg = environ.get("PYWPS_CFG") or settings.get("PYWPS_CFG") or os.getenv("PYWPS_CFG") - if not isinstance(pywps_cfg, ConfigParser) or not settings.get("weaver.wps_configured"): - load_pywps_config(app, config=pywps_cfg) - - # call pywps application with processes filtered according to the adapter"s definition - process_store = get_db(app).get_store(StoreProcesses) - processes_wps = [process.wps() for process in - process_store.list_processes(visibility=VISIBILITY_PUBLIC, request=get_current_request())] - service = Service(processes_wps) - except Exception as ex: - LOGGER.exception("Error occurred during PyWPS Service and/or Processes setup.") - raise OWSNoApplicableCode("Failed setup of PyWPS Service and/or Processes. Error [{!r}]".format(ex)) - - return service(environ, start_response) - - -def includeme(config): - settings = get_settings(config) - if asbool(settings.get("weaver.wps", True)): - LOGGER.debug("Weaver WPS enabled.") - config.include("weaver.config") - wps_path = get_wps_path(settings) - config.add_route("wps", wps_path) - config.add_view(pywps_view, route_name="wps") + if not accept_language and request: + accept_language = request.accept_language.header_value + + if not accept_language: + return + + if not hasattr(wps, "languages"): + # owslib version doesn't support setting a language + return + + accepted_languages = [lang.strip().split(";")[0] for lang in accept_language.lower().split(",")] + + for accept in accepted_languages: + for language in wps.languages.supported: # noqa + # Accept-Language header could be only 'fr' instead of 'fr-CA' + if language.lower().startswith(accept): + wps.language = language + return diff --git a/weaver/wps_restapi/__init__.py b/weaver/wps_restapi/__init__.py index 251c9e4ac..af8af265d 100644 --- a/weaver/wps_restapi/__init__.py +++ b/weaver/wps_restapi/__init__.py @@ -2,25 +2,22 @@ from pyramid.settings import asbool -from weaver.wps_restapi import api -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON +from weaver.formats import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) def includeme(config): - from weaver.wps_restapi import swagger_definitions as sd + from weaver.wps_restapi import api, swagger_definitions as sd + settings = config.registry.settings if asbool(settings.get("weaver.wps_restapi", True)): LOGGER.info("Adding WPS REST API...") config.registry.settings["handle_exceptions"] = False # avoid cornice conflicting views - config.include("cornice") - config.include("cornice_swagger") config.include("weaver.wps_restapi.jobs") config.include("weaver.wps_restapi.providers") config.include("weaver.wps_restapi.processes") config.include("weaver.wps_restapi.quotation") - config.include("pyramid_mako") config.add_route(**sd.service_api_route_info(sd.api_frontpage_service, settings)) config.add_route(**sd.service_api_route_info(sd.api_swagger_json_service, settings)) config.add_route(**sd.service_api_route_info(sd.api_swagger_ui_service, settings)) diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index c448af137..02a75c2a6 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -1,7 +1,7 @@ import logging -from typing import TYPE_CHECKING, AnyStr, Optional +from typing import TYPE_CHECKING +from urllib.parse import urlparse -import six from cornice.service import get_services from cornice_swagger import CorniceSwagger from pyramid.authentication import Authenticated, IAuthenticationPolicy @@ -20,18 +20,19 @@ from pyramid.response import Response from pyramid.settings import asbool from simplejson import JSONDecodeError -from six.moves.urllib.parse import urlparse from weaver.__meta__ import __version__ as weaver_version -from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_PLAIN +from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_TEXT_PLAIN, OUTPUT_FORMAT_JSON from weaver.owsexceptions import OWSException from weaver.utils import get_header, get_settings, get_weaver_url +from weaver.wps.utils import get_wps_url from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.colander_extras import CustomTypeConversionDispatcher -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON, get_wps_restapi_base_url, wps_restapi_base_path +from weaver.wps_restapi.utils import get_wps_restapi_base_url, wps_restapi_base_path if TYPE_CHECKING: - from weaver.typedefs import JSON # noqa: F401 + from typing import Optional + from weaver.typedefs import JSON LOGGER = logging.getLogger(__name__) @@ -43,7 +44,6 @@ def api_frontpage(request): # import here to avoid circular import errors from weaver.config import get_weaver_configuration - from weaver.wps import get_wps_url settings = get_settings(request) weaver_url = get_weaver_url(settings) @@ -66,7 +66,7 @@ def api_frontpage(request): if weaver_api_def: weaver_links.append({"href": weaver_api_def, "rel": "service", "type": CONTENT_TYPE_APP_JSON, "title": "API definition of this service."}) - if isinstance(weaver_api_doc, six.string_types): + if isinstance(weaver_api_doc, str): if "." in weaver_api_doc: # pylint: disable=E1135,unsupported-membership-test ext_type = weaver_api_doc.split(".")[-1] doc_type = "application/{}".format(ext_type) @@ -124,7 +124,7 @@ def api_conformance(request): # noqa: F811 def get_swagger_json(http_scheme="http", http_host="localhost", base_url=None, use_docstring_summary=True): - # type: (AnyStr, AnyStr, Optional[AnyStr], bool) -> dict + # type: (str, str, Optional[str], bool) -> JSON """Obtains the JSON schema of weaver API from request and response views schemas. :param http_scheme: Protocol scheme to use for building the API base if not provided by base URL parameter. @@ -178,10 +178,10 @@ def api_swagger_ui(request): def get_request_info(request, detail=None): - # type: (Request, Optional[AnyStr]) -> JSON + # type: (Request, Optional[str]) -> JSON """Provided additional response details based on the request and execution stack on failure.""" content = {u"route": str(request.upath_info), u"url": str(request.url), u"method": request.method} - if isinstance(detail, six.string_types): + if isinstance(detail, str): content.update({"detail": detail}) if hasattr(request, "exception"): # handle error raised simply by checking for 'json' property in python 3 when body is invalid diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index f26e8cc00..5578d1a38 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -80,6 +80,15 @@ def deserialize(self, cstruct): return result +class OneOfCaseInsensitive(colander.OneOf): + """ + Validator that ensures the given value matches one of the available choices, but allowing case insensitve values. + """ + def __call__(self, node, value): + if str(value).lower() not in (choice.lower() for choice in self.choices): + return super(OneOfCaseInsensitive, self).__call__(node, value) + + class OneOfMappingSchema(colander.MappingSchema): """ Allows specifying multiple supported mapping schemas variants for an underlying schema definition. diff --git a/weaver/wps_restapi/jobs/__init__.py b/weaver/wps_restapi/jobs/__init__.py index 906c2691d..14dc53cd1 100644 --- a/weaver/wps_restapi/jobs/__init__.py +++ b/weaver/wps_restapi/jobs/__init__.py @@ -1,8 +1,8 @@ import logging +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.jobs import jobs as j -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index dd8b6d901..bb65c54b1 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -6,6 +6,7 @@ from pyramid.settings import asbool from pyramid_celery import celery_app as app +from notify import encrypt_email from weaver import sort, status from weaver.database import get_db from weaver.datatype import Job @@ -18,17 +19,16 @@ ServiceNotFound, log_unhandled_exceptions ) +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.store.base import StoreJobs, StoreProcesses, StoreServices from weaver.utils import get_any_id, get_any_value, get_settings from weaver.visibility import VISIBILITY_PUBLIC -from weaver.wps import get_wps_output_url +from weaver.wps.utils import get_wps_output_url from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.jobs.notify import encrypt_email -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON if TYPE_CHECKING: - from weaver.typedefs import AnySettingsContainer, JSON # noqa: F401 - from typing import AnyStr, Optional, Tuple, Union # noqa: F401 + from typing import Optional, Tuple + from weaver.typedefs import AnySettingsContainer, JSON LOGGER = get_task_logger(__name__) @@ -44,7 +44,7 @@ def get_job(request): job_id = request.matchdict.get("job_id") store = get_db(request).get_store(StoreJobs) try: - job = store.fetch_by_id(job_id, request=request) + job = store.fetch_by_id(job_id) except JobNotFound: raise HTTPNotFound("Could not find job with specified 'job_id'.") @@ -59,7 +59,7 @@ def get_job(request): def validate_service_process(request): - # type: (Request) -> Tuple[Union[None, AnyStr], Union[None, AnyStr]] + # type: (Request) -> Tuple[Optional[str], Optional[str]] """ Verifies that service or process specified by path or query will raise the appropriate error if applicable. """ @@ -74,18 +74,18 @@ def validate_service_process(request): item_type = "Service" item_test = service_name store = get_db(request).get_store(StoreServices) - service = store.fetch_by_name(service_name, visibility=VISIBILITY_PUBLIC, request=request) + service = store.fetch_by_name(service_name, visibility=VISIBILITY_PUBLIC) if process_name: item_type = "Process" item_test = process_name # local process if not service: store = get_db(request).get_store(StoreProcesses) - store.fetch_by_id(process_name, visibility=VISIBILITY_PUBLIC, request=request) + store.fetch_by_id(process_name, visibility=VISIBILITY_PUBLIC) # remote process else: from weaver.wps_restapi.processes.processes import list_remote_processes - processes = list_remote_processes(service, request=request) + processes = list_remote_processes(service, request) if process_name not in [p.id for p in processes]: raise ProcessNotFound except (ServiceNotFound, ProcessNotFound): @@ -131,7 +131,7 @@ def get_queried_jobs(request): groups = request.params.get("groups", "") groups = groups.split(",") if groups else None store = get_db(request).get_store(StoreJobs) - items, total = store.find_jobs(request, group_by=groups, **filters) + items, total = store.find_jobs(request=request, group_by=groups, **filters) body = {"total": total} def _job_list(jobs): diff --git a/weaver/wps_restapi/processes/__init__.py b/weaver/wps_restapi/processes/__init__.py index 6103055a6..2e6d70b6f 100644 --- a/weaver/wps_restapi/processes/__init__.py +++ b/weaver/wps_restapi/processes/__init__.py @@ -4,9 +4,9 @@ def includeme(config): + from weaver.formats import OUTPUT_FORMAT_JSON from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes import processes as p - from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER.info("Adding WPS REST API processes...") settings = config.registry.settings diff --git a/weaver/wps_restapi/processes/processes.py b/weaver/wps_restapi/processes/processes.py index 750fd86cf..dfda5d103 100644 --- a/weaver/wps_restapi/processes/processes.py +++ b/weaver/wps_restapi/processes/processes.py @@ -1,431 +1,40 @@ import logging -import os -from time import sleep from typing import TYPE_CHECKING import colander -import six -from celery.utils.log import get_task_logger -from lxml import etree -from owslib.util import clean_ows_url -from owslib.wps import ComplexDataInput, WebProcessingService +from owslib.wps import WebProcessingService from pyramid.httpexceptions import ( HTTPBadRequest, - HTTPCreated, HTTPForbidden, HTTPNotFound, - HTTPNotImplemented, HTTPOk, HTTPServiceUnavailable, - HTTPSuccessful, - HTTPUnauthorized, HTTPUnprocessableEntity ) from pyramid.request import Request from pyramid.settings import asbool -from pyramid_celery import celery_app as app from weaver.config import WEAVER_CONFIGURATION_EMS, get_weaver_configuration from weaver.database import get_db -from weaver.datatype import Service -from weaver.exceptions import InvalidIdentifierValue, ProcessNotAccessible, ProcessNotFound, log_unhandled_exceptions -from weaver.execute import ( - EXECUTE_MODE_ASYNC, - EXECUTE_MODE_AUTO, - EXECUTE_MODE_SYNC, - EXECUTE_RESPONSE_DOCUMENT, - EXECUTE_TRANSMISSION_MODE_REFERENCE -) -from weaver.formats import CONTENT_TYPE_APP_JSON -from weaver.owsexceptions import OWSNoApplicableCode -from weaver.processes import opensearch, wps_package -from weaver.processes.constants import WPS_COMPLEX_DATA -from weaver.processes.types import PROCESS_BUILTIN, PROCESS_WORKFLOW -from weaver.processes.utils import convert_process_wps_to_db, deploy_process_from_payload, jsonify_output -from weaver.status import STATUS_ACCEPTED, STATUS_FAILED, STATUS_STARTED, STATUS_SUCCEEDED, map_status -from weaver.store.base import StoreJobs, StoreProcesses, StoreServices -from weaver.utils import ( - get_any_id, - get_any_value, - get_cookie_headers, - get_settings, - get_ssl_verify_option, - raise_on_xml_exception, - request_extra, - wait_secs -) +from weaver.datatype import Process, Service +from weaver.exceptions import ProcessNotFound, log_unhandled_exceptions +from weaver.formats import OUTPUT_FORMAT_JSON +from weaver.processes import opensearch +from weaver.processes.execution import submit_job +from weaver.processes.types import PROCESS_BUILTIN +from weaver.processes.utils import deploy_process_from_payload, get_job_submission_response, get_process +from weaver.store.base import StoreProcesses, StoreServices +from weaver.utils import get_any_id, get_cookie_headers, get_settings, parse_request_query, request_extra from weaver.visibility import VISIBILITY_PUBLIC, VISIBILITY_VALUES -from weaver.wps import ( - check_wps_status, - get_wps_local_status_location, - get_wps_output_path, - get_wps_output_url, - load_pywps_config -) +from weaver.wps.utils import set_wps_language from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.jobs.notify import encrypt_email, notify_job_complete -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON, get_wps_restapi_base_url, parse_request_query if TYPE_CHECKING: - # pylint: disable=W0611,unused-import - from weaver.datatype import Job, Process as ProcessDB # noqa: F401 - from weaver.typedefs import JSON, SettingsType # noqa: F401 - from typing import AnyStr, List, Tuple, Optional # noqa: F401 + from weaver.typedefs import JSON + from typing import List, Tuple LOGGER = logging.getLogger(__name__) -# job process execution progress -JOB_PROGRESS_SETUP = 1 -JOB_PROGRESS_DESCRIBE = 2 -JOB_PROGRESS_GET_INPUTS = 4 -JOB_PROGRESS_GET_OUTPUTS = 6 -JOB_PROGRESS_EXECUTE_REQUEST = 8 -JOB_PROGRESS_EXECUTE_STATUS_LOCATION = 10 -JOB_PROGRESS_EXECUTE_MONITOR_START = 15 -JOB_PROGRESS_EXECUTE_MONITOR_LOOP = 20 -JOB_PROGRESS_EXECUTE_MONITOR_ERROR = 85 -JOB_PROGRESS_EXECUTE_MONITOR_END = 90 -JOB_PROGRESS_NOTIFY = 95 -JOB_PROGRESS_DONE = 100 - - -@app.task(bind=True) -def execute_process(self, job_id, url, headers=None, notification_email=None): - LOGGER.debug("Job execute process called.") - settings = get_settings(app) - task_logger = get_task_logger(__name__) - load_pywps_config(settings) - - task_logger.debug("Job task setup.") - - # reset the connection because we are in a forked celery process - db = get_db(app, reset_connection=True) - store = db.get_store(StoreJobs) - - job = store.fetch_by_id(job_id) - job.task_id = self.request.id - job.progress = JOB_PROGRESS_SETUP - job.save_log(logger=task_logger, message="Job task setup completed.") - job = store.update_job(job) - - try: - try: - job.progress = JOB_PROGRESS_DESCRIBE - job.save_log(logger=task_logger, message="Execute WPS request for process [{!s}]".format(job.process)) - ssl_verify = get_ssl_verify_option("get", url, settings=settings) - wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) - set_wps_language(wps, accept_language=job.accept_language) - raise_on_xml_exception(wps._capabilities) # noqa - except Exception as ex: - raise OWSNoApplicableCode("Failed to retrieve WPS capabilities. Error: [{}].".format(str(ex))) - try: - process = wps.describeprocess(job.process) - except Exception as ex: - raise OWSNoApplicableCode("Failed to retrieve WPS process description. Error: [{}].".format(str(ex))) - - # prepare inputs - job.progress = JOB_PROGRESS_GET_INPUTS - job.save_log(logger=task_logger, message="Fetching job input definitions.") - complex_inputs = [] - for process_input in process.dataInputs: - if WPS_COMPLEX_DATA in process_input.dataType: - complex_inputs.append(process_input.identifier) - - try: - wps_inputs = list() - for process_input in job.inputs: - input_id = get_any_id(process_input) - process_value = get_any_value(process_input) - # in case of array inputs, must repeat (id,value) - input_values = process_value if isinstance(process_value, list) else [process_value] - - # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// - input_values = [val[7:] if str(val).startswith("file://") else val for val in input_values] - - # need to use ComplexDataInput structure for complex input - # need to use literal String for anything else than complex - # TODO: BoundingBox not supported - wps_inputs.extend([ - (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) - for input_value in input_values]) - except KeyError: - wps_inputs = [] - - # prepare outputs - job.progress = JOB_PROGRESS_GET_OUTPUTS - job.save_log(logger=task_logger, message="Fetching job output definitions.") - outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] - - mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC - job.progress = JOB_PROGRESS_EXECUTE_REQUEST - job.save_log(logger=task_logger, message="Starting job process execution.") - job.save_log(logger=task_logger, - message="Following updates could take a while until the Application Package answers...") - execution = wps.execute(job.process, inputs=wps_inputs, output=outputs, mode=mode, lineage=True) - if not execution.process and execution.errors: - raise execution.errors[0] - - # adjust status location - wps_status_path = get_wps_local_status_location(execution.statusLocation, settings) - job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION - LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) - if not wps_status_path.startswith("http") and not os.path.isfile(wps_status_path): - LOGGER.warning("WPS status location not resolved to local path: [%s]", wps_status_path) - job.save_log(logger=task_logger, level=logging.DEBUG, - message="Updated job status location: [{}].".format(wps_status_path)) - - job.status = map_status(STATUS_STARTED) - job.status_message = execution.statusMessage or "{} initiation done.".format(str(job)) - job.status_location = wps_status_path - job.request = execution.request - job.response = etree.tostring(execution.response) - job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START - job.save_log(logger=task_logger, message="Starting monitoring of job execution.") - job = store.update_job(job) - - max_retries = 5 - num_retries = 0 - run_step = 0 - while execution.isNotComplete() or run_step == 0: - if num_retries >= max_retries: - raise Exception("Could not read status document after {} retries. Giving up.".format(max_retries)) - try: - # NOTE: - # Don't actually log anything here until process is completed (success or fail) so that underlying - # WPS execution logs can be inserted within the current job log and appear continuously. - # Only update internal job fields in case they get referenced elsewhere. - job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP - execution = check_wps_status(url=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) - job_msg = (execution.statusMessage or "").strip() - job.response = etree.tostring(execution.response) - job.status = map_status(execution.getStatus()) - job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ - .format(execution.percentCompleted, job_msg or "n/a") - # job.save_log(logger=task_logger) - # job = store.update_job(job) - - if execution.isComplete(): - job.mark_finished() - job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END - msg_progress = " (status: {})".format(job_msg) if job_msg else "" - if execution.isSucceded(): - job.status = map_status(STATUS_SUCCEEDED) - job.status_message = "Job succeeded{}.".format(msg_progress) - wps_package.retrieve_package_job_log(execution, job) - job.save_log(logger=task_logger) - job_results = [jsonify_output(output, process, settings) - for output in execution.processOutputs] - job.results = make_results_relative(job_results, settings) - else: - task_logger.debug("Job failed.") - job.status_message = "Job failed{}.".format(msg_progress) - wps_package.retrieve_package_job_log(execution, job) - job.save_log(errors=execution.errors, logger=task_logger) - task_logger.debug("Mapping Job references with generated WPS locations.") - map_locations(job, settings) - - except Exception as exc: - num_retries += 1 - task_logger.debug("Exception raised: %s", repr(exc)) - job.status_message = "Could not read status XML document for {!s}. Trying again...".format(job) - job.save_log(errors=execution.errors, logger=task_logger) - sleep(1) - else: - # job.status_message = "Update {}...".format(str(job)) - # job.save_log(logger=task_logger) - num_retries = 0 - run_step += 1 - finally: - job = store.update_job(job) - - except Exception as exc: - LOGGER.exception("Failed running [%s]", job) - job.status = map_status(STATUS_FAILED) - job.status_message = "Failed to run {!s}.".format(job) - job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR - exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) - errors = "{0}: {1!s}".format(exception_class, exc) - job.save_log(errors=errors, logger=task_logger) - finally: - job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END - job.status_message = "Job {}.".format(job.status) - job.save_log(logger=task_logger) - - # Send email if requested - if notification_email is not None: - job.progress = JOB_PROGRESS_NOTIFY - try: - notify_job_complete(job, notification_email, settings) - message = "Notification email sent successfully." - job.save_log(logger=task_logger, message=message) - except Exception as exc: - exception_class = "{}.{}".format(type(exc).__module__, type(exc).__name__) - exception = "{0}: {1!s}".format(exception_class, exc) - message = "Couldn't send notification email ({})".format(exception) - job.save_log(errors=message, logger=task_logger, message=message) - - job.progress = JOB_PROGRESS_DONE - job.save_log(logger=task_logger, message="Job task complete.") - job = store.update_job(job) - - return job.status - - -def make_results_relative(results, settings): - # type: (List[JSON], SettingsType) -> List[JSON] - """ - Redefines job results to be saved in database as relative paths to output directory configured in PyWPS - (i.e.: relative to ``weaver.wps_output_dir``). - - This allows us to easily adjust the exposed result HTTP path according to server configuration - (i.e.: relative to ``weaver.wps_output_path`` and/or ``weaver.wps_output_url``) and it also avoid rewriting - the whole database job results if the setting is changed later on. - """ - wps_url = get_wps_output_url(settings) - wps_path = get_wps_output_path(settings) - for res in results: - ref = res.get("reference") - if isinstance(ref, six.string_types) and ref: - if ref.startswith(wps_url): - ref = ref.replace(wps_url, "", 1) - if ref.startswith(wps_path): - ref = ref.replace(wps_path, "", 1) - res["reference"] = ref - return results - - -def map_locations(job, settings): - # type: (Job, SettingsType) -> None - """ - Generates symlink references from the Job UUID to PyWPS UUID results (outputs directory, status and log locations). - Update the Job's WPS ID if applicable (job executed locally). - Assumes that all results are located under the same reference UUID. - """ - local_path = get_wps_local_status_location(job.status_location, settings) - if not local_path: - LOGGER.debug("Not possible to map Job to WPS locations.") - return - base_dir, status_xml = os.path.split(local_path) - job.wps_id = os.path.splitext(status_xml)[0] - wps_loc = os.path.join(base_dir, job.wps_id) - job_loc = os.path.join(base_dir, job.id) - if wps_loc == job_loc: - LOGGER.debug("Job already refers to WPS locations.") - return - for loc_ext in ["", ".log", ".xml"]: - wps_ref = wps_loc + loc_ext - job_ref = job_loc + loc_ext - if os.path.exists(wps_ref): # possible that there are no results (e.g.: failed job) - os.symlink(wps_ref, job_ref) - - -def set_wps_language(wps, accept_language=None, request=None): - # type: (WebProcessingService, Optional[str], Optional[Request]) -> None - """Set the :attr:`language` property on the :class:`WebProcessingService` object. - - Given the `Accept-Language` header value, match the best language - to the supported languages. - - By default, and if no match is found, the :attr:`WebProcessingService.language` - property is set to None. - - https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language - (q-factor weighting is ignored, only order is considered) - - :param wps: process for which to set the language header if it is accepted - :param str accept_language: the value of the Accept-Language header - :param request: request from which to extract Accept-Language header if not provided directly - """ - if not accept_language and request: - accept_language = request.accept_language.header_value - - if not accept_language: - return - - if not hasattr(wps, "languages"): - # owslib version doesn't support setting a language - return - - accepted_languages = [lang.strip().split(";")[0] for lang in accept_language.lower().split(",")] - - for accept in accepted_languages: - for language in wps.languages.supported: # noqa - # Accept-Language header could be only 'fr' instead of 'fr-CA' - if language.lower().startswith(accept): - wps.language = language - return - - -def validate_supported_submit_job_handler_parameters(json_body): - """ - Tests supported parameters not automatically validated by colander deserialize. - """ - if json_body["mode"] not in [EXECUTE_MODE_ASYNC, EXECUTE_MODE_AUTO]: - raise HTTPNotImplemented(detail="Execution mode '{}' not supported.".format(json_body["mode"])) - - if json_body["response"] != EXECUTE_RESPONSE_DOCUMENT: - raise HTTPNotImplemented(detail="Execution response type '{}' not supported.".format(json_body["response"])) - - for job_output in json_body["outputs"]: - if job_output["transmissionMode"] != EXECUTE_TRANSMISSION_MODE_REFERENCE: - raise HTTPNotImplemented(detail="Execute transmissionMode '{}' not supported." - .format(job_output["transmissionMode"])) - - -def submit_job_handler(request, service_url, is_workflow=False, visibility=None): - # type: (Request, AnyStr, bool, Optional[AnyStr]) -> HTTPSuccessful - - # validate body with expected JSON content and schema - if CONTENT_TYPE_APP_JSON not in request.content_type: - raise HTTPBadRequest("Request 'Content-Type' header other than '{}' not supported." - .format(CONTENT_TYPE_APP_JSON)) - try: - json_body = request.json_body - except Exception as ex: - raise HTTPBadRequest("Invalid JSON body cannot be decoded for job submission. [{}]".format(ex)) - try: - json_body = sd.Execute().deserialize(json_body) - except colander.Invalid as ex: - raise HTTPBadRequest("Invalid schema: [{}]".format(str(ex))) - - # TODO: remove when all parameter variations are supported - validate_supported_submit_job_handler_parameters(json_body) - - settings = get_settings(request) - provider_id = request.matchdict.get("provider_id") # None OK if local - process_id = request.matchdict.get("process_id") - tags = request.params.get("tags", "").split(",") - is_execute_async = json_body["mode"] != EXECUTE_MODE_SYNC # convert auto to async - notification_email = json_body.get("notification_email") - encrypted_email = encrypt_email(notification_email, settings) if notification_email else None - - store = get_db(request).get_store(StoreJobs) - job = store.save_job(task_id=STATUS_ACCEPTED, process=process_id, service=provider_id, - inputs=json_body.get("inputs"), is_workflow=is_workflow, access=visibility, - user_id=request.authenticated_userid, execute_async=is_execute_async, custom_tags=tags, - notification_email=encrypted_email, accept_language=request.accept_language.header_value) - result = execute_process.delay( - job_id=job.id, - url=clean_ows_url(service_url), - # Convert EnvironHeaders to a simple dict (should cherry-pick the required headers) - headers={k: v for k, v in request.headers.items()}, - notification_email=notification_email) - LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) - - # local/provider process location - location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else "" - location = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( - base_url=get_wps_restapi_base_url(settings), - location_base=location_base, - process_id=process_id, - job_id=job.id) - body_data = { - "jobID": job.id, - "status": map_status(STATUS_ACCEPTED), - "location": location - } - return HTTPCreated(location=location, json=body_data) - @sd.jobs_full_service.post(tags=[sd.TAG_PROVIDERS, sd.TAG_PROCESSES, sd.TAG_EXECUTE, sd.TAG_JOBS], renderer=OUTPUT_FORMAT_JSON, schema=sd.PostProviderProcessJobRequest(), @@ -437,12 +46,13 @@ def submit_provider_job(request): """ store = get_db(request).get_store(StoreServices) provider_id = request.matchdict.get("provider_id") - service = store.fetch_by_name(provider_id, request=request) - return submit_job_handler(request, service.url) + service = store.fetch_by_name(provider_id) + body = submit_job(request, service, tags=["wps-rest"]) + return get_job_submission_response(body) def list_remote_processes(service, request): - # type: (Service, Request) -> List[ProcessDB] + # type: (Service, Request) -> List[Process] """ Obtains a list of remote service processes in a compatible :class:`weaver.datatype.Process` format. @@ -451,7 +61,7 @@ def list_remote_processes(service, request): wps = WebProcessingService(url=service.url, headers=get_cookie_headers(request.headers)) set_wps_language(wps, request=request) settings = get_settings(request) - return [convert_process_wps_to_db(service, process, settings) for process in wps.processes] + return [Process.from_ows(service, process, settings) for process in wps.processes] @sd.provider_processes_service.get(tags=[sd.TAG_PROVIDERS, sd.TAG_PROCESSES, sd.TAG_PROVIDERS, sd.TAG_GETCAPABILITIES], @@ -464,13 +74,13 @@ def get_provider_processes(request): """ provider_id = request.matchdict.get("provider_id") store = get_db(request).get_store(StoreServices) - service = store.fetch_by_name(provider_id, request=request) - processes = list_remote_processes(service, request=request) + service = store.fetch_by_name(provider_id) + processes = list_remote_processes(service, request) return HTTPOk(json={"processes": [p.process_summary() for p in processes]}) def describe_provider_process(request): - # type: (Request) -> ProcessDB + # type: (Request) -> Process """ Obtains a remote service process description in a compatible local process format. @@ -479,11 +89,11 @@ def describe_provider_process(request): provider_id = request.matchdict.get("provider_id") process_id = request.matchdict.get("process_id") store = get_db(request).get_store(StoreServices) - service = store.fetch_by_name(provider_id, request=request) + service = store.fetch_by_name(provider_id) wps = WebProcessingService(url=service.url, headers=get_cookie_headers(request.headers)) set_wps_language(wps, request=request) process = wps.describeprocess(process_id) - return convert_process_wps_to_db(service, process, get_settings(request)) + return Process.from_ows(service, process, get_settings(request)) @sd.provider_process_service.get(tags=[sd.TAG_PROVIDERS, sd.TAG_PROCESSES, sd.TAG_PROVIDERS, sd.TAG_DESCRIBEPROCESS], @@ -503,13 +113,13 @@ def get_provider_process(request): def get_processes_filtered_by_valid_schemas(request): - # type: (Request) -> Tuple[List[JSON], List[AnyStr]] + # type: (Request) -> Tuple[List[JSON], List[str]] """ Validates the processes summary schemas and returns them into valid/invalid lists. :returns: list of valid process summaries and invalid processes IDs for manual cleanup. """ store = get_db(request).get_store(StoreProcesses) - processes = store.list_processes(visibility=VISIBILITY_PUBLIC, request=request) + processes = store.list_processes(visibility=VISIBILITY_PUBLIC) valid_processes = list() invalid_processes_ids = list() for process in processes: @@ -573,25 +183,6 @@ def add_local_process(request): return deploy_process_from_payload(request.json, request) -def get_process(request): - # type: (Request) -> ProcessDB - process_id = request.matchdict.get("process_id") - if not isinstance(process_id, six.string_types): - raise HTTPUnprocessableEntity("Invalid parameter 'process_id'.") - try: - store = get_db(request).get_store(StoreProcesses) - process = store.fetch_by_id(process_id, visibility=VISIBILITY_PUBLIC, request=request) - return process - except InvalidIdentifierValue as ex: - raise HTTPBadRequest(str(ex)) - except ProcessNotAccessible: - raise HTTPUnauthorized("Process with id '{!s}' is not accessible.".format(process_id)) - except ProcessNotFound: - raise HTTPNotFound("Process with id '{!s}' does not exist.".format(process_id)) - except colander.Invalid as ex: - raise HTTPBadRequest("Invalid schema:\n[{0!r}].".format(ex)) - - @sd.process_service.get(tags=[sd.TAG_PROCESSES, sd.TAG_DESCRIBEPROCESS], renderer=OUTPUT_FORMAT_JSON, schema=sd.ProcessEndpoint(), response_schemas=sd.get_process_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorGetProcessResponse.description) @@ -600,7 +191,7 @@ def get_local_process(request): Get a registered local process information (DescribeProcess). """ try: - process = get_process(request) + process = get_process(request=request) process["inputs"] = opensearch.replace_inputs_describe_process(process.inputs, process.payload) process_offering = process.process_offering() return HTTPOk(json=process_offering) @@ -615,7 +206,7 @@ def get_local_process_package(request): """ Get a registered local process package definition. """ - process = get_process(request) + process = get_process(request=request) return HTTPOk(json=process.package or {}) @@ -626,7 +217,7 @@ def get_local_process_payload(request): """ Get a registered local process payload definition. """ - process = get_process(request) + process = get_process(request=request) return HTTPOk(json=process.payload or {}) @@ -638,15 +229,8 @@ def get_process_visibility(request): """ Get the visibility of a registered local process. """ - process_id = request.matchdict.get("process_id") - if not isinstance(process_id, six.string_types): - raise HTTPUnprocessableEntity("Invalid parameter 'process_id'.") - try: - store = get_db(request).get_store(StoreProcesses) - visibility_value = store.get_visibility(process_id, request=request) - return HTTPOk(json={u"value": visibility_value}) - except ProcessNotFound as ex: - raise HTTPNotFound(str(ex)) + process = get_process(request=request) + return HTTPOk(json={u"value": process.visibility}) @sd.process_visibility_service.put(tags=[sd.TAG_PROCESSES, sd.TAG_VISIBILITY], renderer=OUTPUT_FORMAT_JSON, @@ -659,9 +243,9 @@ def set_process_visibility(request): """ visibility_value = request.json.get("value") process_id = request.matchdict.get("process_id") - if not isinstance(process_id, six.string_types): + if not isinstance(process_id, str): raise HTTPUnprocessableEntity("Invalid process identifier.") - if not isinstance(visibility_value, six.string_types): + if not isinstance(visibility_value, str): raise HTTPUnprocessableEntity("Invalid visibility value specified. String expected.") if visibility_value not in VISIBILITY_VALUES: raise HTTPBadRequest("Invalid visibility value specified: {!s}".format(visibility_value)) @@ -671,7 +255,7 @@ def set_process_visibility(request): process = store.fetch_by_id(process_id) if process.type == PROCESS_BUILTIN: raise HTTPForbidden("Cannot change the visibility of builtin process.") - store.set_visibility(process_id, visibility_value, request=request) + store.set_visibility(process_id, visibility_value) return HTTPOk(json={u"value": visibility_value}) except TypeError: raise HTTPBadRequest("Value of visibility must be a string.") @@ -688,25 +272,15 @@ def delete_local_process(request): """ Unregister a local process. """ - process_id = request.matchdict.get("process_id") - if not isinstance(process_id, six.string_types): - raise HTTPUnprocessableEntity("Invalid parameter 'process_id'.") - try: - store = get_db(request).get_store(StoreProcesses) - process = store.fetch_by_id(process_id) - if process.type == PROCESS_BUILTIN: - raise HTTPForbidden("Cannot delete a builtin process.") - if store.delete_process(process_id, visibility=VISIBILITY_PUBLIC, request=request): - return HTTPOk(json={"undeploymentDone": True, "identifier": process_id}) - LOGGER.error("Existing process [%s] should have been deleted with success status.", process_id) - raise HTTPForbidden("Deletion of process has been refused by the database or could not have been validated.") - except InvalidIdentifierValue as ex: - raise HTTPBadRequest(str(ex)) - except ProcessNotAccessible: - raise HTTPUnauthorized("Process with id '{!s}' is not accessible.".format(process_id)) - except ProcessNotFound: - description = "Process with id '{!s}' does not exist.".format(process_id) - raise HTTPNotFound(description) + store = get_db(request).get_store(StoreProcesses) + process = get_process(request=request, store=store) + process_id = process.id + if process.type == PROCESS_BUILTIN: + raise HTTPForbidden("Cannot delete a builtin process.") + if store.delete_process(process_id, visibility=VISIBILITY_PUBLIC): + return HTTPOk(json={"undeploymentDone": True, "identifier": process_id}) + LOGGER.error("Existing process [%s] should have been deleted with success status.", process_id) + raise HTTPForbidden("Deletion of process has been refused by the database or could not have been validated.") @sd.process_jobs_service.post(tags=[sd.TAG_PROCESSES, sd.TAG_EXECUTE, sd.TAG_JOBS], renderer=OUTPUT_FORMAT_JSON, @@ -716,19 +290,6 @@ def submit_local_job(request): """ Execute a local process. """ - process_id = request.matchdict.get("process_id") - if not isinstance(process_id, six.string_types): - raise HTTPUnprocessableEntity("Invalid parameter 'process_id'.") - try: - store = get_db(request).get_store(StoreProcesses) - process = store.fetch_by_id(process_id, visibility=VISIBILITY_PUBLIC, request=request) - resp = submit_job_handler(request, process.processEndpointWPS1, - is_workflow=process.type == PROCESS_WORKFLOW, - visibility=process.visibility) - return resp - except InvalidIdentifierValue as ex: - raise HTTPBadRequest(str(ex)) - except ProcessNotAccessible: - raise HTTPUnauthorized("Process with id '{!s}' is not accessible.".format(process_id)) - except ProcessNotFound: - raise HTTPNotFound("The process with id '{!s}' does not exist.".format(process_id)) + process = get_process(request=request) + body = submit_job(request, process, tags=["wps-rest"]) + return get_job_submission_response(body) diff --git a/weaver/wps_restapi/providers/__init__.py b/weaver/wps_restapi/providers/__init__.py index 146722a4c..7a3952672 100644 --- a/weaver/wps_restapi/providers/__init__.py +++ b/weaver/wps_restapi/providers/__init__.py @@ -1,8 +1,8 @@ import logging +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.providers import providers as p -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) diff --git a/weaver/wps_restapi/providers/providers.py b/weaver/wps_restapi/providers/providers.py index 99b7517ad..02a6cb032 100644 --- a/weaver/wps_restapi/providers/providers.py +++ b/weaver/wps_restapi/providers/providers.py @@ -8,13 +8,14 @@ from weaver.database import get_db from weaver.datatype import Service from weaver.exceptions import ServiceNotFound, log_unhandled_exceptions +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.owsexceptions import OWSMissingParameterValue, OWSNotImplemented from weaver.processes.types import PROCESS_WPS from weaver.store.base import StoreServices from weaver.utils import get_any_id, get_settings from weaver.warning import NonBreakingExceptionWarning from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON, get_wps_restapi_base_url +from weaver.wps_restapi.utils import get_wps_restapi_base_url LOGGER = logging.getLogger(__name__) @@ -29,7 +30,7 @@ def get_providers(request): store = get_db(request).get_store(StoreServices) providers = [] - for service in store.list_services(request=request): + for service in store.list_services(): try: if service.type.lower() != "wps": continue @@ -77,7 +78,7 @@ def get_service(request): store = get_db(request).get_store(StoreServices) provider_id = request.matchdict.get("provider_id") try: - service = store.fetch_by_name(provider_id, request=request) + service = store.fetch_by_name(provider_id) except ServiceNotFound: raise HTTPNotFound("Provider {0} cannot be found.".format(provider_id)) return service, store @@ -103,7 +104,7 @@ def add_provider(request): new_service["auth"] = request.json["auth"] try: - store.save_service(new_service, request=request) + store.save_service(new_service) except NotImplementedError: raise OWSNotImplemented(sd.NotImplementedPostProviderResponse.description, value=new_service) @@ -120,7 +121,7 @@ def remove_provider(request): service, store = get_service(request) try: - store.delete_service(service.name, request=request) + store.delete_service(service.name) except NotImplementedError: raise OWSNotImplemented(sd.NotImplementedDeleteProviderResponse.description) diff --git a/weaver/wps_restapi/quotation/__init__.py b/weaver/wps_restapi/quotation/__init__.py index 76d5435d8..fc41788a5 100644 --- a/weaver/wps_restapi/quotation/__init__.py +++ b/weaver/wps_restapi/quotation/__init__.py @@ -1,8 +1,8 @@ import logging +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.quotation import bills as b, quotes as q -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) diff --git a/weaver/wps_restapi/quotation/bills.py b/weaver/wps_restapi/quotation/bills.py index 3449169ee..40ff4ff61 100644 --- a/weaver/wps_restapi/quotation/bills.py +++ b/weaver/wps_restapi/quotation/bills.py @@ -4,9 +4,9 @@ from weaver.database import get_db from weaver.exceptions import BillNotFound, log_unhandled_exceptions +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.store.base import StoreBills from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) diff --git a/weaver/wps_restapi/quotation/quotes.py b/weaver/wps_restapi/quotation/quotes.py index c291feace..b4981b06b 100644 --- a/weaver/wps_restapi/quotation/quotes.py +++ b/weaver/wps_restapi/quotation/quotes.py @@ -10,13 +10,13 @@ from weaver.database import get_db from weaver.datatype import Bill, Quote from weaver.exceptions import ProcessNotFound, QuoteNotFound, log_unhandled_exceptions +from weaver.formats import OUTPUT_FORMAT_JSON from weaver.processes.types import PROCESS_APPLICATION, PROCESS_WORKFLOW from weaver.processes.wps_package import get_package_workflow_steps, get_process_location from weaver.store.base import StoreBills, StoreQuotes from weaver.utils import get_settings, get_weaver_url from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.processes import submit_local_job -from weaver.wps_restapi.utils import OUTPUT_FORMAT_JSON LOGGER = logging.getLogger(__name__) @@ -49,7 +49,7 @@ def request_quote(request): process_id = request.matchdict.get("process_id") process_store = get_db(request).get_store("processes") try: - process = process_store.fetch_by_id(process_id, request=request) + process = process_store.fetch_by_id(process_id) except ProcessNotFound: raise HTTPNotFound("Could not find process with specified 'process_id'.") diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 0655e4736..5e3d5d347 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -4,6 +4,8 @@ """ # pylint: disable=C0103,invalid-name +from typing import TYPE_CHECKING + from colander import ( Boolean, DateTime, @@ -38,12 +40,18 @@ from weaver.visibility import VISIBILITY_PUBLIC, VISIBILITY_VALUES from weaver.wps_restapi.colander_extras import ( DropableNoneSchema, + OneOfCaseInsensitive, OneOfMappingSchema, SchemaNodeDefault, VariableMappingSchema ) from weaver.wps_restapi.utils import wps_restapi_base_path +if TYPE_CHECKING: + from weaver.typedefs import SettingsType, TypedDict + + ViewInfo = TypedDict("ViewInfo", {"name": str, "pattern": str}) + class SchemaNode(SchemaNodeDefault): """ @@ -158,6 +166,7 @@ class ExplicitMappingSchema(MapSchema): TAG_RESULTS = "Results" TAG_EXCEPTIONS = "Exceptions" TAG_LOGS = "Logs" +TAG_WPS = "WPS" ############################################################################### # These "services" are wrappers that allow Cornice to generate the JSON API @@ -589,7 +598,7 @@ def __init__(self, *args, **kwargs): # noqa: E811 class QuoteSortEnum(SchemaNode): schema_type = String - def __init__(self, *args, **kwargs): # noqa: E811 + def __init__(self, *args, **kwargs): # noqa: E811 kwargs.pop("validator", None) # ignore passed argument and enforce the validator super(QuoteSortEnum, self).__init__( self.schema_type(), @@ -600,9 +609,8 @@ def __init__(self, *args, **kwargs): # noqa: E811 class LaunchJobQuerystring(MappingSchema): - field_string = SchemaNode(String(), default=None, missing=drop, - description="Comma separated tags that can be used to filter jobs later") - field_string.name = "tags" + tags = SchemaNode(String(), default=None, missing=drop, + description="Comma separated tags that can be used to filter jobs later") class VisibilityValue(SchemaNode): @@ -640,6 +648,47 @@ class SwaggerUIEndpoint(MappingSchema): pass +class WPSParameters(MappingSchema): + service = SchemaNode(String(), example="WPS", description="Service selection.", + validator=OneOfCaseInsensitive(["WPS"])) + request = SchemaNode(String(), example="GetCapabilities", description="WPS operation to accomplish", + validator=OneOfCaseInsensitive(["GetCapabilities", "DescribeProcess", "Execute"])) + version = SchemaNode(String(), exaple="1.0.0", default="1.0.0", validator=OneOf(["1.0.0", "2.0.0"])) + identifier = SchemaNode(String(), exaple="hello", description="Process identifier.", missing=drop) + data_inputs = SchemaNode(String(), name="DataInputs", missing=drop, example="message=hi", + description="Process execution inputs provided as Key-Value Pairs (KVP).") + + +class WPSBody(MappingSchema): + content = SchemaNode(String(), description="XML data inputs provided for WPS POST request.") + + +class WPSEndpoint(MappingSchema): + header = AcceptHeader() + querystring = WPSParameters() + body = WPSBody() + + +class WPSXMLSuccessBodySchema(MappingSchema): + pass + + +class OkWPSResponse(MappingSchema): + description = "WPS operation successful" + header = XmlHeader() + body = WPSXMLSuccessBodySchema() + + +class WPSXMLErrorBodySchema(MappingSchema): + pass + + +class ErrorWPSResponse(MappingSchema): + description = "Unhandled error occurred on WPS endpoint." + header = XmlHeader() + body = WPSXMLErrorBodySchema() + + class ProviderEndpoint(ProviderPath): header = AcceptHeader() @@ -1175,8 +1224,8 @@ class ExecutionUnitList(SequenceSchema): class ProcessOffering(MappingSchema): - process = Process() processVersion = SchemaNode(String(), missing=drop) + process = Process() processEndpointWPS1 = SchemaNode(String(), missing=drop, format=URL) jobControlOptions = JobControlOptionsList(missing=drop) outputTransmission = TransmissionModeList(missing=drop) @@ -1326,9 +1375,18 @@ class PostProviderProcessJobRequest(MappingSchema): ################################# +class OWSExceptionResponse(MappingSchema): + code = SchemaNode(String(), description="OWS error code.", example="InvalidParameterValue") + locator = SchemaNode(String(), description="Indication of the element that caused the error.", example="identifier") + message = SchemaNode(String(), description="Specific description of the error.", example="Invalid process ID.") + + class ErrorJsonResponseBodySchema(MappingSchema): - code = SchemaNode(String(), example="NoApplicableCode") - description = SchemaNode(String(), example="Not authorized to access this resource.") + code = SchemaNode(Integer(), description="HTTP status code.", example=400) + status = SchemaNode(String(), description="HTTP status detail.", example="400 Bad Request") + title = SchemaNode(String(), description="HTTP status message.", example="Bad Request") + description = SchemaNode(String(), description="", example="Process identifier is invalid.") + exception = OWSExceptionResponse(missing=drop) class UnauthorizedJsonResponseSchema(MappingSchema): @@ -1336,6 +1394,11 @@ class UnauthorizedJsonResponseSchema(MappingSchema): body = ErrorJsonResponseBodySchema() +class ForbiddenJsonResponseSchema(MappingSchema): + header = JsonHeader() + body = ErrorJsonResponseBodySchema() + + class OkGetFrontpageResponse(MappingSchema): header = JsonHeader() body = FrontpageSchema() @@ -1446,6 +1509,11 @@ class OkGetProcessInfoResponse(MappingSchema): body = ProcessOffering() +class BadRequestGetProcessInfoResponse(MappingSchema): + description = "Missing process identifier." + body = MappingSchema(default={}) + + class InternalServerErrorGetProcessResponse(MappingSchema): description = "Unhandled error occurred during process description." @@ -1704,114 +1772,137 @@ class InternalServerErrorGetJobLogsResponse(MappingSchema): # https://github.com/crim-ca/weaver/issues/14 "200": OkPostProcessesResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorPostProcessesResponse(), } get_process_responses = { "200": OkGetProcessInfoResponse(description="success"), + "400": BadRequestGetProcessInfoResponse(), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProcessResponse(), } get_process_package_responses = { "200": OkGetProcessPackageSchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProcessPackageResponse(), } get_process_payload_responses = { "200": OkGetProcessPayloadSchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProcessPayloadResponse(), } get_process_visibility_responses = { "200": OkGetProcessVisibilitySchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProcessVisibilityResponse(), } put_process_visibility_responses = { "200": OkPutProcessVisibilitySchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorPutProcessVisibilityResponse(), } delete_process_responses = { "200": OkDeleteProcessResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorDeleteProcessResponse(), } get_providers_list_responses = { "200": OkGetProvidersListResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProvidersListResponse(), } get_provider_responses = { "200": OkGetProviderCapabilitiesSchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProviderCapabilitiesResponse(), } delete_provider_responses = { "204": NoContentDeleteProviderSchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorDeleteProviderResponse(), "501": NotImplementedDeleteProviderResponse(), } get_provider_processes_responses = { "200": OkGetProviderProcessesSchema(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProviderProcessesListResponse(), } get_provider_process_responses = { "200": OkGetProviderProcessDescriptionResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetProviderProcessResponse(), } post_provider_responses = { "201": CreatedPostProvider(description="success"), "400": MappingSchema(description=OWSMissingParameterValue.explanation), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorPostProviderResponse(), "501": NotImplementedPostProviderResponse(), } post_provider_process_job_responses = { "201": CreatedLaunchJobResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorPostProviderProcessJobResponse(), } post_process_jobs_responses = { "201": CreatedLaunchJobResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorPostProcessJobResponse(), } get_all_jobs_responses = { "200": OkGetQueriedJobsResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobsResponse(), } get_single_job_status_responses = { "200": OkGetJobStatusResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobStatusResponse(), } delete_job_responses = { "200": OkDismissJobResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorDeleteJobResponse(), } get_job_results_responses = { "200": OkGetJobResultsResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobResultsResponse(), } get_job_output_responses = { "200": OkGetOutputResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobOutputResponse(), } get_exceptions_responses = { "200": OkGetJobExceptionsResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobExceptionsResponse(), } get_logs_responses = { "200": OkGetJobLogsResponse(description="success"), "401": UnauthorizedJsonResponseSchema(description="unauthorized"), + "403": UnauthorizedJsonResponseSchema(description="forbidden"), "500": InternalServerErrorGetJobLogsResponse(), } get_quote_list_responses = { @@ -1844,6 +1935,10 @@ class InternalServerErrorGetJobLogsResponse(MappingSchema): "401": UnauthorizedJsonResponseSchema(description="unauthorized"), "500": InternalServerErrorGetBillInfoResponse(), } +wps_responses = { + "200": OkWPSResponse(), + "500": ErrorWPSResponse(), +} ################################################################# @@ -1852,5 +1947,6 @@ class InternalServerErrorGetJobLogsResponse(MappingSchema): def service_api_route_info(service_api, settings): + # type: (Service, SettingsType) -> ViewInfo api_base = wps_restapi_base_path(settings) return {"name": service_api.name, "pattern": "{base}{path}".format(base=api_base, path=service_api.path)} diff --git a/weaver/wps_restapi/utils.py b/weaver/wps_restapi/utils.py index f96df5bdd..a1956ff74 100644 --- a/weaver/wps_restapi/utils.py +++ b/weaver/wps_restapi/utils.py @@ -1,41 +1,23 @@ import logging -from distutils.version import LooseVersion from typing import TYPE_CHECKING -from lxml import etree -from pyramid.httpexceptions import HTTPSuccessful - -from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_XML -from weaver.utils import get_settings, get_weaver_url, parse_request_query, request_extra +from weaver.utils import get_settings, get_weaver_url if TYPE_CHECKING: - from pyramid.request import Request # noqa: F401 - from typing import AnyStr, Optional # noqa: F401 - from weaver.typedefs import AnySettingsContainer # noqa: F401 + from weaver.typedefs import AnySettingsContainer LOGGER = logging.getLogger(__name__) -WPS_VERSION_100 = "1.0.0" -WPS_VERSION_200 = "2.0.0" -OUTPUT_FORMAT_JSON = "json" -OUTPUT_FORMAT_XML = "xml" -OUTPUT_FORMATS = { - WPS_VERSION_100: OUTPUT_FORMAT_XML, - WPS_VERSION_200: OUTPUT_FORMAT_JSON, - CONTENT_TYPE_APP_XML: OUTPUT_FORMAT_XML, - CONTENT_TYPE_APP_JSON: OUTPUT_FORMAT_JSON, -} - def wps_restapi_base_path(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str settings = get_settings(container) restapi_path = settings.get("weaver.wps_restapi_path", "").rstrip("/").strip() return restapi_path def get_wps_restapi_base_url(container): - # type: (AnySettingsContainer) -> AnyStr + # type: (AnySettingsContainer) -> str settings = get_settings(container) weaver_rest_url = settings.get("weaver.wps_restapi_url") if not weaver_rest_url: @@ -43,60 +25,3 @@ def get_wps_restapi_base_url(container): restapi_path = wps_restapi_base_path(settings) weaver_rest_url = weaver_url + restapi_path return weaver_rest_url - - -# FIXME: deprecated? unused? -def get_wps_output_format(request, service_url=None): - # type: (Request, AnyStr) -> AnyStr - """ - Get the preferred output format from WPS after checking various hints: - - 'version' in query string - - Content-Type in accept headers - - GetCapabilities of the service - - :param request: request for which a response of WPS version-specific format must be generated. - :param service_url: endpoint URL of the service to request 'GetCapabilities' if version not found by previous hints. - :return: one of ``OUTPUT_FORMAT`` (default: 1.0.0 => 'xml' if no direct hint matched) - """ - # return specific type if requested by 'version' query - queries = parse_request_query(request) - if "version" in queries and len(queries["version"]) > 0: - out_version = min([LooseVersion(v) for v in queries["version"]]) - out_format = OUTPUT_FORMATS.pop(out_version.version, None) - return out_format or OUTPUT_FORMATS[WPS_VERSION_100] - - # version not specified as query, check accept headers for specific and unique case - accepts = [accept[0] for accept in request.accept.parsed] - matched_accepts = list(set(OUTPUT_FORMATS) & set(accepts)) - if len(matched_accepts) == 1: - return OUTPUT_FORMATS[matched_accepts[0]] - - # version still ambiguous, verify service's GetCapabilities - if service_url: - getcap_url_100 = "{}?service=WPS&request=GetCapabilities" - getcap_url_200 = "{}/processes".format(service_url) - getcap_resp_100 = request_extra("get", getcap_url_100, settings=request) - getcap_resp_200 = request_extra("get", getcap_url_200, settings=request) - - # analyse JSON response - if isinstance(getcap_resp_200, HTTPSuccessful): - try: - # TODO: update get version if it is ever added to 'GetCapabilities' from WPS REST response - # for now, suppose that a valid list in json body means that the service is WPS 2.0.0 - if isinstance(getcap_resp_200.json()["processes"], list): - return OUTPUT_FORMATS[WPS_VERSION_200] - except Exception as ex: - LOGGER.exception("Got exception in 'get_wps_output_format' JSON parsing: %r", ex) - - # analyse XML response - if isinstance(getcap_resp_100, HTTPSuccessful): - try: - # TODO XML implementation - etree.fromstring(getcap_resp_100.content) - return OUTPUT_FORMATS[WPS_VERSION_100] - except Exception as ex: - LOGGER.exception("Got exception in 'get_wps_output_format' XML parsing: %r", ex) - - # still not found, default to older version - # for most probable format supported by services - return OUTPUT_FORMATS[WPS_VERSION_100]