Merge pull request #162 from kobotoolbox/issue-159

Columns in exports are out of sequence - fix
kobotoolbox · Mar 1, 2018 · b4637a9 · b4637a9
2 parents 05b1cea + 3980c7d
commit b4637a9
Show file tree

Hide file tree

Showing 9 changed files with 291 additions and 78 deletions.
diff --git a/.gitignore b/.gitignore
@@ -80,4 +80,5 @@ env.source
 
 # config file
 *sublime-*
-.vscode/
+.vscode/
+.idea/
diff --git a/src/formpack/pack.py b/src/formpack/pack.py
@@ -24,12 +24,21 @@
 
 class FormPack(object):
 
-    # TODO: make a clear signature for __init__
+
     def __init__(self, versions=None, title='Submissions', id_string=None,
                  default_version_id_key='__version__',
                  strict_schema=False,
                  root_node_name='data',
                  asset_type=None, submissions_xml=None):
+        """
+
+
+        :param versions: list. Versions of the asset. It must be sorted in ascending order. From oldest to newest.
+        :param title: string. The human readable name of the form.
+        :param id_string: The human readable id of the form.
+        :param default_version_id_key: string. The name of the field in submissions which stores the version ID
+        """
+        # @TODO: Complete the signature for __init__
 
         if not versions:
             versions = []
@@ -199,9 +208,10 @@ def _combine_field_choices(old_field, new_field):
         combined_options.update(new_choice.options)
         new_choice.options = combined_options
 
-
     def get_fields_for_versions(self, versions=-1, data_types=None):
-        """ Return a mapping containing fields
+
+        """
+            Return a mapping containing fields
 
             This is needed because when making an report for several
             versions of the same form, fields get added, removed, and
@@ -211,79 +221,74 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
 
             Labels are used as column headers.
 
+        :param versions: list
+        :param data_types: list
+        :return: list
         """
-
+        # Cast data_types if it's not already a list
         if data_types is not None:
             if isinstance(data_types, str_types):
                 data_types = [data_types]
 
-        versions = list(self._get_versions(versions).values())
 
-        # FIXME: you say that this is a list of tuples, but really, it's just a
-        # list of field objects, right?
-        all_fields = []  # [(name, field), (name...))]
-        processed_field_names = set()  # avoid expensive look ups
+        # tmp2 is a 2 dimensions list of `field`.
+        # First dimension is the position of fields where they should be in the latest version
+        # Second dimension is their position in the stack at the same position.
+        # For example:
+        #      ```
+        #      latest_version = [field1, field3]
+        #      version_just_before = [field2, field3]
+        #      ```
+        #
+        # Index 0 of tmp2d will be `[field1, field2]`
+        tmp2d = []
+        # This dict is used to remember final position of each field.
+        # Its keys are field_names and values are tuples of coordinates in tmp2d
+        # Keeping example above:
+        #       `positions[field1.name]` would be `(0, 0)`
+        #       `positions[field2.name]` would be `(0, 1)`
+        positions = {}
 
         # Create the initial field mappings from the first form version
-        for section in versions[0].sections.values():
-            all_fields.extend(section.fields.values())
-            processed_field_names.update(section.fields.keys())
+        versions_desc = list(reversed(self._get_versions(versions).values()))
 
-        # Process any new field added in the next versions
-        # The hard part is to insert it at a position that makes sense
-        for version in versions[1:]:
-            for section_name, section in version.sections.items():
+        index = 0
+        for section in versions_desc[0].sections.values():
+            for field_name, field_object in section.fields.items():
+                positions[field_name] = (index, 0)
+                tmp2d.append([field_object])
+                index += 1
 
-                # Potential new fields we want to add
-                new_fields = section.fields.items()
-
-                for i, (new_field_name, new_field_obj) in enumerate(new_fields):
-
-                    # The field already exists, let's replace it with the
-                    # last version
-                    if new_field_name in processed_field_names:
-                        final_list_copy = enumerate(list(all_fields))
-                        for y, _field in final_list_copy:
-                            if _field.name == new_field_name:
-                                self._combine_field_choices(
-                                    _field, new_field_obj)
-                                all_fields[y] = new_field_obj
-                                break
-                        continue
-
-                    # The field needs to be inserted at the proper place.
-                    # We take this new field, and look for all new fields after
-                    # it to find the first one that is already in the base
-                    # fields. Then we get its index, so we can insert our fresh
-                    # new field right before it. This gives us a coherent
-                    # order of fields so that they are always, at worst,
-                    # adjacent to the last field they used to be to.
-                    # FIXME: this loop never runs, because
-                    # `following_new_field` is a tuple of (name, field_obj) and
-                    # `processed_field_names` is just a set of strings
-                    for following_new_field in new_fields[i+1:]:
-                        if following_new_field in processed_field_names:
-                            final_list_copy = enumerate(list(all_fields))
-                            # FIXME: cannot work since final_list_copy contains
-                            # field objects, not tuples
-                            for y, (name, field) in final_list_copy:
-                                if name == following_new_field:
-                                    # FIXME: set the field to itself? What's
-                                    # the point? Again, this code never runs
-                                    # and has no tests
-                                    all_fields[y] = field
-                                    break
-                            break
+        for version in versions_desc[1:]:
+            index = 0
+            for section_name, section in version.sections.items():
+                for field_name, field_object in section.fields.items():
+                    if field_name in positions:
+                        position = positions[field_name]
+                        latest_field_object = tmp2d[position[0]][position[1]]
+                        # Because versions_desc are ordered from latest to oldest,
+                        # we use current field object as the old one and the one already
+                        # in position as the latest one.
+                        self._combine_field_choices(field_object, latest_field_object)
                     else:
-                        # We could not find a following_new_field,
-                        # so ad it at the end
-                        all_fields.append(new_field_obj)
-
-                    processed_field_names.add(new_field_name)
-
-        if data_types:
-            for dt in data_types:
-                all_fields = [f for f in all_fields if f.data_type == dt]
+                        if len(tmp2d) <= index:
+                            tmp2d.append([field_object])
+                        else:
+                            tmp2d[index].append(field_object)
+
+                        positions[field_name] = (index, len(tmp2d[index]) - 1)
+
+                    index += 1
+
+        all_fields = []
+        # We need to flatten the 2d list before returning it.
+        for first_dimension in tmp2d:
+            for field in first_dimension:
+                if data_types:
+                    if field.data_type in data_types:
+                        all_fields.append(field)
+                else:
+                    all_fields.append(field)
 
         return all_fields
 

diff --git a/tests/fixtures/field_position_with_multiple_versions/__init__.py b/tests/fixtures/field_position_with_multiple_versions/__init__.py
@@ -0,0 +1,25 @@
+# coding: utf-8
+
+from __future__ import (unicode_literals, print_function,
+                        absolute_import, division)
+
+'''
+field_position_with_multiple_versions:
+
+ * v1: `Fullname` and `Age` in repeating group
+ * v2: Add `City` after repeating group
+ * v3: Replace `Fullname` with `Firstname`, `Lastname`, and `Gender`
+
+'''
+
+from ..load_fixture_json import load_fixture_json
+
+DATA = {
+    u'title': 'Field Position with Multiple Versions',
+    u'id_string': 'field_position_with_multiple_versions',
+    u'versions': [
+        load_fixture_json('field_position_with_multiple_versions/v1'),
+        load_fixture_json('field_position_with_multiple_versions/v2'),
+        load_fixture_json('field_position_with_multiple_versions/v3'),
+    ],
+}
diff --git a/tests/fixtures/field_position_with_multiple_versions/v1.json b/tests/fixtures/field_position_with_multiple_versions/v1.json
@@ -0,0 +1,37 @@
+{
+  "version": "fpmv_1",
+  "content": {
+    "translated": [
+      "label"
+    ],
+    "survey": [
+      {
+        "type": "begin_repeat",
+        "name": "group_wy15q80",
+        "label": [
+          "Person"
+        ]
+      },
+      {
+        "name": "Fullname",
+        "required": false,
+        "type": "text",
+        "label": [
+          "Fullname"
+        ]
+      },
+      {
+        "name": "Age",
+        "required": false,
+        "type": "date",
+        "label": [
+          "Age"
+        ]
+      },
+      {
+        "type": "end_repeat"
+      }
+    ]
+  },
+  "submissions": []
+}
diff --git a/tests/fixtures/field_position_with_multiple_versions/v2.json b/tests/fixtures/field_position_with_multiple_versions/v2.json
@@ -0,0 +1,45 @@
+{
+  "version": "fpmv_2",
+  "content": {
+    "translated": [
+      "label"
+    ],
+    "survey": [
+      {
+        "type": "begin_repeat",
+        "name": "group_wy15q80",
+        "label": [
+          "Person"
+        ]
+      },
+      {
+        "name": "Fullname",
+        "required": false,
+        "type": "text",
+        "label": [
+          "Fullname"
+        ]
+      },
+      {
+        "name": "Age",
+        "required": false,
+        "type": "date",
+        "label": [
+          "Age"
+        ]
+      },
+      {
+        "type": "end_repeat"
+      },
+      {
+        "name": "City",
+        "required": false,
+        "type": "text",
+        "label": [
+          "City"
+        ]
+      }
+    ]
+  },
+  "submissions": []
+}
diff --git a/tests/fixtures/field_position_with_multiple_versions/v3.json b/tests/fixtures/field_position_with_multiple_versions/v3.json
@@ -0,0 +1,82 @@
+{
+  "version": "fpmv_3",
+  "content": {
+    "choices": [
+      {
+        "list_name": "kd0ep51",
+        "name": "male",
+        "label": [
+          "Male"
+        ]
+      },
+      {
+        "list_name": "kd0ep51",
+        "name": "female",
+        "label": [
+          "Female"
+        ]
+      },
+      {
+        "list_name": "kd0ep51",
+        "name": "other",
+        "label": [
+          "Other"
+        ]
+      }
+    ],
+    "survey": [
+      {
+        "type": "begin_repeat",
+        "name": "group_wy15q80",
+        "label": [
+          "Person"
+        ]
+      },
+      {
+        "name": "Firstname",
+        "required": false,
+        "type": "text",
+        "label": [
+          "Firstname"
+        ]
+      },
+      {
+        "name": "Lastname",
+        "required": false,
+        "type": "text",
+        "label": [
+          "Lastname"
+        ]
+      },
+      {
+        "select_from_list_name": "kd0ep51",
+        "name": "Gender",
+        "required": false,
+        "label": [
+          "Gender"
+        ],
+        "type": "select_one"
+      },
+      {
+        "name": "Age",
+        "required": false,
+        "type": "date",
+        "label": [
+          "Age"
+        ]
+      },
+      {
+        "type": "end_repeat"
+      },
+      {
+        "name": "City",
+        "required": false,
+        "type": "text",
+        "label": [
+          "City"
+        ]
+      }
+    ]
+  },
+  "submissions": []
+}
diff --git a/tests/test_autoreport.py b/tests/test_autoreport.py
@@ -381,4 +381,3 @@ def test_disaggregate_extended_fields(self):
                 frequency_responses = [x[0] for x in value_list.get("frequency")]
                 assert percentage_responses == frequency_responses
                 assert percentage_responses[-1] == "..."
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -381,4 +381,3 @@ def test_disaggregate_extended_fields(self):
		frequency_responses = [x[0] for x in value_list.get("frequency")]
		assert percentage_responses == frequency_responses
		assert percentage_responses[-1] == "..."