diff --git a/pywps/inout/formats/__init__.py b/pywps/inout/formats/__init__.py index c01c078f4..b0f6b1585 100644 --- a/pywps/inout/formats/__init__.py +++ b/pywps/inout/formats/__init__.py @@ -161,11 +161,11 @@ def json(self, jsonin): FORMATS = _FORMATS( Format('application/vnd.geo+json', extension='.geojson'), Format('application/json', extension='.json'), - Format('application/x-zipped-shp', extension='.zip'), + Format('application/x-zipped-shp', extension='.zip', encoding='base64'), Format('application/gml+xml', extension='.gml'), Format('application/vnd.google-earth.kml+xml', extension='.kml'), - Format('application/vnd.google-earth.kmz', extension='.kmz'), - Format('image/tiff; subtype=geotiff', extension='.tiff'), + Format('application/vnd.google-earth.kmz', extension='.kmz', encoding='base64'), + Format('image/tiff; subtype=geotiff', extension='.tiff', encoding='base64'), Format('application/x-ogc-wcs', extension='.xml'), Format('application/x-ogc-wcs; version=1.0.0', extension='.xml'), Format('application/x-ogc-wcs; version=1.1.0', extension='.xml'), diff --git a/pywps/inout/inputs.py b/pywps/inout/inputs.py index dc999655f..2e45060a2 100644 --- a/pywps/inout/inputs.py +++ b/pywps/inout/inputs.py @@ -134,20 +134,25 @@ def _json_data(self, data): data["type"] = "complex" - try: - data_doc = etree.parse(self.file) - data["data"] = etree.tostring(data_doc, pretty_print=True).decode("utf-8") - except Exception: + if self.data: - if self.data: - if isinstance(self.data, six.string_types): + if self.data_format.mime_type in ["application/xml", "application/gml+xml", "text/xml"]: + # Note that in a client-server round trip, the original and returned file will not be identical. + data_doc = etree.parse(self.file) + data["data"] = etree.tostring(data_doc, pretty_print=True).decode('utf-8') + + else: + if self.data_format.encoding == 'base64': + data["data"] = self.base64.decode('utf-8') + + else: + # Otherwise we assume all other formats are unsafe and need to be enclosed in a CDATA tag. if isinstance(self.data, bytes): - data["data"] = self.data.decode("utf-8") + out = self.data.encode(self.data_format.encoding or 'utf-8') else: - data["data"] = self.data + out = self.data - else: - data["data"] = etree.tostring(etree.CDATA(self.base64)) + data["data"] = u''.format(out) return data diff --git a/pywps/inout/outputs.py b/pywps/inout/outputs.py index fa7f8b34c..9d2bd51bb 100644 --- a/pywps/inout/outputs.py +++ b/pywps/inout/outputs.py @@ -123,29 +123,25 @@ def _json_data(self, data): data["type"] = "complex" - try: - data_doc = etree.parse(self.file) - data["data"] = etree.tostring(data_doc, pretty_print=True).decode("utf-8") - except Exception: - - if self.data: - # XML compatible formats don't have to be wrapped in a CDATA tag. - if self.data_format.mime_type in ["application/xml", "application/gml+xml", "text/xml"]: - fmt = "{}" - else: - fmt = "" + if self.data: + + if self.data_format.mime_type in ["application/xml", "application/gml+xml", "text/xml"]: + # Note that in a client-server round trip, the original and returned file will not be identical. + data_doc = etree.parse(self.file) + data["data"] = etree.tostring(data_doc, pretty_print=True).decode('utf-8') + else: if self.data_format.encoding == 'base64': - data["data"] = fmt.format(etree.CDATA(self.base64)) + data["data"] = self.base64.decode('utf-8') - elif isinstance(self.data, six.string_types): + else: + # Otherwise we assume all other formats are unsafe and need to be enclosed in a CDATA tag. if isinstance(self.data, bytes): - data["data"] = fmt.format(self.data.decode("utf-8")) + out = self.data.encode(self.data_format.encoding or 'utf-8') else: - data["data"] = fmt.format(self.data) + out = self.data - else: - raise NotImplementedError + data["data"] = u''.format(out) return data diff --git a/pywps/templates/1.0.0/execute/main.xml b/pywps/templates/1.0.0/execute/main.xml index f41c5e49a..39c7e6a8f 100644 --- a/pywps/templates/1.0.0/execute/main.xml +++ b/pywps/templates/1.0.0/execute/main.xml @@ -39,7 +39,7 @@ {{ input.title }} {% if input.type == "complex" %} - {{ input.data }} + {{ input.data | safe }} {% elif input.type == "literal" %} diff --git a/tests/data/text/unsafe.txt b/tests/data/text/unsafe.txt new file mode 100644 index 000000000..e0179af99 --- /dev/null +++ b/tests/data/text/unsafe.txt @@ -0,0 +1 @@ +< Bunch of characters that would break XML <> & "" ' \ No newline at end of file diff --git a/tests/test_complexdata_io.py b/tests/test_complexdata_io.py new file mode 100644 index 000000000..9398bc633 --- /dev/null +++ b/tests/test_complexdata_io.py @@ -0,0 +1,139 @@ +"""Test embedding different file formats and different encodings within the tag.""" + +import unittest +import os +from pywps import get_ElementMakerForVersion, E +from pywps.app.basic import get_xpath_ns +from pywps import Service, Process, ComplexInput, ComplexOutput, FORMATS +from pywps.tests import client_for, assert_response_success +from owslib.wps import WPSExecution, ComplexDataInput +from lxml import etree + +VERSION = "1.0.0" +WPS, OWS = get_ElementMakerForVersion(VERSION) +xpath_ns = get_xpath_ns(VERSION) + + +def get_resource(path): + return os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data', path) + + +test_fmts = {'json': (get_resource('json/point.geojson'), FORMATS.JSON), + 'geojson': (get_resource('json/point.geojson'), FORMATS.GEOJSON), + 'netcdf': (get_resource('netcdf/time.nc'), FORMATS.NETCDF), + 'geotiff': (get_resource('geotiff/dem.tiff'), FORMATS.GEOTIFF), + 'gml': (get_resource('gml/point.gml'), FORMATS.GML), + 'shp': (get_resource('shp/point.shp.zip'), FORMATS.SHP), + 'txt': (get_resource('text/unsafe.txt'), FORMATS.TEXT), + } + + +def create_fmt_process(name, fn, fmt): + """Create a dummy process comparing the input file on disk and the data that was passed in the request.""" + def handler(request, response): + # Load output from file and convert to data + response.outputs['complex'].file = fn + o = response.outputs['complex'].data + + # Get input data from the request + i = request.inputs['complex'][0].data + + assert i == o + return response + + return Process(handler=handler, + identifier='test-fmt', + title='Complex fmt test process', + inputs=[ComplexInput('complex', 'Complex input', + supported_formats=(fmt, ))], + outputs=[ComplexOutput('complex', 'Complex output', + supported_formats=(fmt, ))]) + + +def get_data(fn, encoding=None): + """Read the data from file and encode.""" + import base64 + mode = 'rb' if encoding == 'base64' else 'r' + with open(fn, mode) as fp: + data = fp.read() + + if encoding == 'base64': + data = base64.b64encode(data) + + if isinstance(data, bytes): + return data.decode('utf-8') + else: + return data + + +class RawInput(unittest.TestCase): + + def make_request(self, name, fn, fmt): + """Create XML request embedding encoded data.""" + data = get_data(fn, fmt.encoding) + + doc = WPS.Execute( + OWS.Identifier('test-fmt'), + WPS.DataInputs( + WPS.Input( + OWS.Identifier('complex'), + WPS.Data( + WPS.ComplexData(data, mimeType=fmt.mime_type, encoding=fmt.encoding)))), + version='1.0.0') + + return doc + + def compare_io(self, name, fn, fmt): + """Start the dummy process, post the request and check the response matches the input data.""" + + # Note that `WPSRequest` calls `get_inputs_from_xml` which converts base64 input to bytes + # See `_get_rawvalue_value` + client = client_for(Service(processes=[create_fmt_process(name, fn, fmt)])) + data = get_data(fn, fmt.encoding) + + wps = WPSExecution() + doc = wps.buildRequest('test-fmt', + inputs=[('complex', ComplexDataInput(data, mimeType=fmt.mime_type, + encoding=fmt.encoding))], + mode='sync') + resp = client.post_xml(doc=doc) + assert_response_success(resp) + wps.parseResponse(resp.xml) + out = wps.processOutputs[0].data[0] + + if 'gml' in fmt.mime_type: + xml_orig = etree.tostring(etree.fromstring(data.encode('utf-8'))).decode('utf-8') + xml_out = etree.tostring(etree.fromstring(out.decode('utf-8'))).decode('utf-8') + # Not equal because the output includes additional namespaces compared to the origin. + # self.assertEqual(xml_out, xml_orig) + + else: + self.assertEqual(out.strip(), data.strip()) + + def test_json(self): + key = 'json' + self.compare_io(key, *test_fmts[key]) + + def test_geojson(self): + key = 'geojson' + self.compare_io(key, *test_fmts[key]) + + def test_geotiff(self): + key = 'geotiff' + self.compare_io(key, *test_fmts[key]) + + def test_netcdf(self): + key = 'netcdf' + self.compare_io(key, *test_fmts[key]) + + def test_gml(self): + key = 'gml' + self.compare_io(key, *test_fmts[key]) + + def test_shp(self): + key = 'shp' + self.compare_io(key, *test_fmts[key]) + + def test_txt(self): + key = 'txt' + self.compare_io(key, *test_fmts[key]) diff --git a/tests/test_execute.py b/tests/test_execute.py index f8bd7a46f..4f8cdbf0c 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -165,12 +165,29 @@ def _handler(request, response): def get_output(doc): + """Return the content of LiteralData, Reference or ComplexData.""" + output = {} for output_el in xpath_ns(doc, '/wps:ExecuteResponse' '/wps:ProcessOutputs/wps:Output'): [identifier_el] = xpath_ns(output_el, './ows:Identifier') - [value_el] = xpath_ns(output_el, './wps:Data/wps:LiteralData') - output[identifier_el.text] = value_el.text + + lit_el = xpath_ns(output_el, './wps:Data/wps:LiteralData') + if lit_el != []: + output[identifier_el.text] = lit_el[0].text + + ref_el = xpath_ns(output_el, './wps:Reference') + if ref_el != []: + output[identifier_el.text] = ref_el[0].attrib['href'] + + data_el = xpath_ns(output_el, './wps:Data/wps:ComplexData') + if data_el != []: + if data_el[0].text: + output[identifier_el.text] = data_el[0].text + else: # XML children + ch = list(data_el[0])[0] + output[identifier_el.text] = lxml.etree.tostring(ch) + return output