From 01fef37e08a33b8affec1dbbd363967642aa5000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 20 Dec 2018 13:18:51 -0500 Subject: [PATCH 1/5] Fixed bad calculation of provided response for reports with disaggregated stats --- src/formpack/schema/fields.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 6dae7790..43cdf055 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -224,8 +224,11 @@ def get_disaggregated_stats(self, metrics, top_splitters, not_provided = 0 provided = 0 for val, counter in metrics.items(): - not_provided += counter.pop(None, 0) - provided += counter.pop('__submissions__', 0) + # `counter['__submissions__'] contains the count of all submissions, + # including those where no response was provided. + none_submissions = counter.pop(None, 0) + not_provided += none_submissions + provided += counter.pop('__submissions__', 0) - none_submissions return { 'total_count': not_provided + provided, From 860e4bef4a72bbe52de2da0fb0b0be5fffbe74b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Thu, 20 Dec 2018 16:45:38 -0500 Subject: [PATCH 2/5] Fixed calculation of disaggregate stats when some submissions do not have any responses --- src/formpack/schema/fields.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 43cdf055..75776d75 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -224,11 +224,18 @@ def get_disaggregated_stats(self, metrics, top_splitters, not_provided = 0 provided = 0 for val, counter in metrics.items(): - # `counter['__submissions__'] contains the count of all submissions, - # including those where no response was provided. none_submissions = counter.pop(None, 0) not_provided += none_submissions - provided += counter.pop('__submissions__', 0) - none_submissions + # `counter[None]` corresponds to submissions with no values for current field (`self`) + # If `val` is None, some submissions don't have any values for the `splitted_by_field`. + # We should consider all these submissions as `not_provided` + if val is None: + not_provided += sum(counter.values()) + else: + # `counter['__submissions__'] contains the count of all submissions, + # including those where no response was provided. + # We need to substract all submissions with no response + provided += counter.pop('__submissions__', 0) - none_submissions return { 'total_count': not_provided + provided, From 0b007c3921472b36ada291a628092789f8a52711 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20L=C3=A9ger?= Date: Mon, 11 Feb 2019 16:02:57 -0500 Subject: [PATCH 3/5] Added a unittest for disaggregated stats with empty answers --- tests/fixtures/auto_report/v1.json | 2 +- .../auto_report_without_answers/__init__.py | 15 ++++ .../auto_report_without_answers/v1.json | 68 +++++++++++++++++++ tests/test_autoreport.py | 38 +++++++++++ 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/auto_report_without_answers/__init__.py create mode 100644 tests/fixtures/auto_report_without_answers/v1.json diff --git a/tests/fixtures/auto_report/v1.json b/tests/fixtures/auto_report/v1.json index 4c81d608..d5be1dbd 100644 --- a/tests/fixtures/auto_report/v1.json +++ b/tests/fixtures/auto_report/v1.json @@ -68,4 +68,4 @@ "howmany": 1 } ] -} \ No newline at end of file +} diff --git a/tests/fixtures/auto_report_without_answers/__init__.py b/tests/fixtures/auto_report_without_answers/__init__.py new file mode 100644 index 00000000..0cee660e --- /dev/null +++ b/tests/fixtures/auto_report_without_answers/__init__.py @@ -0,0 +1,15 @@ +# coding: utf-8 + +from __future__ import (unicode_literals, print_function, + absolute_import, division) + + +from ..load_fixture_json import load_fixture_json + +DATA = { + u'title': 'Auto report without answers', + u'id_string': 'auto_report_without_answers', + u'versions': [ + load_fixture_json('auto_report_without_answers/v1') + ], +} diff --git a/tests/fixtures/auto_report_without_answers/v1.json b/tests/fixtures/auto_report_without_answers/v1.json new file mode 100644 index 00000000..3cf23634 --- /dev/null +++ b/tests/fixtures/auto_report_without_answers/v1.json @@ -0,0 +1,68 @@ +{ + "id_string": "auto_report", + "version": "atwav1", + "content": { + "survey": [ + { + "type": "select_one favorite_coffee", + "name": "favorite_coffee", + "label": "Favorite coffee" + }, + { + "type": "select one type_of_coffee", + "name": "type_of_coffee", + "label": "Type of coffee" + } + ], + "choices": [ + { + "list_name": "favorite_coffee", + "name": "nespresso", + "label": "Nespresso" + }, + { + "list_name": "favorite_coffee", + "name": "keurig", + "label": "Keurig" + }, + { + "list_name": "favorite_coffee", + "name": "tim_hortons", + "label": "Tim Hortons" + }, + { + "list_name": "type_of_coffee", + "name": "latte", + "label": "Latte" + }, + { + "list_name": "type_of_coffee", + "name": "regular", + "label": "Regular" + }, + { + "list_name": "type_of_coffee", + "name": "espresso", + "label": "Espresso" + } + ] + }, + "submissions": [ + { + "favorite_coffee": "tim_hortons", + "type_of_coffee": "latte" + }, + { + "favorite_coffee": "nespresso", + "type_of_coffee": "espresso" + }, + { + "favorite_coffee": "keurig", + "type_of_coffee": null + }, + { + "favorite_coffee": null, + "type_of_coffee": "latte" + } + ] +} diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index b3381ffa..4d632a97 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -381,3 +381,41 @@ def test_disaggregate_extended_fields(self): frequency_responses = [x[0] for x in value_list.get("frequency")] assert percentage_responses == frequency_responses assert percentage_responses[-1] == "..." + + def test_disaggregate_without_answers(self): + + title, schemas, submissions = build_fixture('auto_report_without_answers') + fp = FormPack(schemas, title) + + report = fp.autoreport() + stats = report.get_stats(submissions, split_by="favorite_coffee") + + stats = [(unicode(repr(field)), field_name, stats_dict) for field, field_name, stats_dict in stats] + + # Do not display None value. + # https://github.com/kobotoolbox/formpack/blob/master/src/formpack/schema/fields.py#L308 + + expected = { + "latte": { + "percentage": [("keurig", 0.0), ("nespresso", 0.0), ("tim_hortons", 25.0)], + "frequency": [("keurig", 0), ("nespresso", 0), ("tim_hortons", 1)] + }, + "espresso": { + "percentage": [("keurig", 0.0), ("nespresso", 25.0), ("tim_hortons", 0.0)], + "frequency": [("keurig", 0), ("nespresso", 1), ("tim_hortons", 0)] + }, + "": { + "percentage": [("keurig", 25.0), ("nespresso", 0.0), ("tim_hortons", 0.0)], + "frequency": [("keurig", 1), ("nespresso", 0), ("tim_hortons", 0)] + } + } + + for stat in stats: + stats_dict = dict(stat[2]) + for value in stats_dict.get("values"): + value_list = value[1] + value_group = value[0] + if value_group in expected: + assert value_list == expected.get(value_group) + + From 553ecd81f576e2b51cbeac03091bbe6f6751954b Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 19 Feb 2021 10:28:09 -0500 Subject: [PATCH 4/5] Add more submissions with null values and test the total count --- .../auto_report_without_answers/v1.json | 12 +++++++ tests/test_autoreport.py | 32 ++++--------------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/tests/fixtures/auto_report_without_answers/v1.json b/tests/fixtures/auto_report_without_answers/v1.json index 3cf23634..63a9f163 100644 --- a/tests/fixtures/auto_report_without_answers/v1.json +++ b/tests/fixtures/auto_report_without_answers/v1.json @@ -60,6 +60,18 @@ "favorite_coffee": "keurig", "type_of_coffee": null }, + { + "favorite_coffee": null, + "type_of_coffee": "latte" + }, + { + "favorite_coffee": null, + "type_of_coffee": "latte" + }, + { + "favorite_coffee": null, + "type_of_coffee": "latte" + }, { "favorite_coffee": null, "type_of_coffee": "latte" diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py index 12af8215..999f81b1 100644 --- a/tests/test_autoreport.py +++ b/tests/test_autoreport.py @@ -485,39 +485,19 @@ def test_disaggregate_extended_fields(self): def test_disaggregate_without_answers(self): - title, schemas, submissions = build_fixture('auto_report_without_answers') + title, schemas, submissions = build_fixture('auto_report_without_answers') # noqa fp = FormPack(schemas, title) report = fp.autoreport() stats = report.get_stats(submissions, split_by="favorite_coffee") - stats = [(unicode(repr(field)), field_name, stats_dict) for field, field_name, stats_dict in stats] - - # Do not display None value. - # https://github.com/kobotoolbox/formpack/blob/master/src/formpack/schema/fields.py#L308 - - expected = { - "latte": { - "percentage": [("keurig", 0.0), ("nespresso", 0.0), ("tim_hortons", 25.0)], - "frequency": [("keurig", 0), ("nespresso", 0), ("tim_hortons", 1)] - }, - "espresso": { - "percentage": [("keurig", 0.0), ("nespresso", 25.0), ("tim_hortons", 0.0)], - "frequency": [("keurig", 0), ("nespresso", 1), ("tim_hortons", 0)] - }, - "": { - "percentage": [("keurig", 25.0), ("nespresso", 0.0), ("tim_hortons", 0.0)], - "frequency": [("keurig", 1), ("nespresso", 0), ("tim_hortons", 0)] - } - } + stats = [ + (unicode(repr(field)), field_name, stats_dict) + for field, field_name, stats_dict in stats + ] for stat in stats: - stats_dict = dict(stat[2]) - for value in stats_dict.get("values"): - value_list = value[1] - value_group = value[0] - if value_group in expected: - assert value_list == expected.get(value_group) + assert len(submissions) == stat[2]['total_count'] def test_stats_with_non_numeric_value_for_numeric_field(self): """ From 43e3aa04065550d489916393fd0ee36359a6dbb5 Mon Sep 17 00:00:00 2001 From: Olivier Leger Date: Fri, 19 Feb 2021 10:29:31 -0500 Subject: [PATCH 5/5] Fixed typo --- src/formpack/schema/fields.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/formpack/schema/fields.py b/src/formpack/schema/fields.py index 817b318f..f473760f 100644 --- a/src/formpack/schema/fields.py +++ b/src/formpack/schema/fields.py @@ -216,15 +216,17 @@ def get_disaggregated_stats(self, metrics, top_splitters, for val, counter in metrics.items(): none_submissions = counter.pop(None, 0) not_provided += none_submissions - # `counter[None]` corresponds to submissions with no values for current field (`self`) - # If `val` is None, some submissions don't have any values for the `splitted_by_field`. + # `counter[None]` corresponds to submissions with no values for + # current field (`self`), + # If `val` is None, some submissions do not have any values for + # the `splitted_by_field`. # We should consider all these submissions as `not_provided` if val is None: not_provided += sum(counter.values()) else: - # `counter['__submissions__'] contains the count of all submissions, - # including those where no response was provided. - # We need to substract all submissions with no response + # `counter['__submissions__'] contains the count of all + # submissions, including those where no response was provided. + # We need to subtract all submissions with no response provided += counter.pop('__submissions__', 0) - none_submissions return {