Skip to content

Commit

Permalink
Merge pull request #162 from kobotoolbox/issue-159
Browse files Browse the repository at this point in the history
Columns in exports are out of sequence - fix
  • Loading branch information
jnm authored Mar 1, 2018
2 parents 05b1cea + 3980c7d commit b4637a9
Show file tree
Hide file tree
Showing 9 changed files with 291 additions and 78 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,5 @@ env.source

# config file
*sublime-*
.vscode/
.vscode/
.idea/
135 changes: 70 additions & 65 deletions src/formpack/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,21 @@

class FormPack(object):

# TODO: make a clear signature for __init__

def __init__(self, versions=None, title='Submissions', id_string=None,
default_version_id_key='__version__',
strict_schema=False,
root_node_name='data',
asset_type=None, submissions_xml=None):
"""
:param versions: list. Versions of the asset. It must be sorted in ascending order. From oldest to newest.
:param title: string. The human readable name of the form.
:param id_string: The human readable id of the form.
:param default_version_id_key: string. The name of the field in submissions which stores the version ID
"""
# @TODO: Complete the signature for __init__

if not versions:
versions = []
Expand Down Expand Up @@ -199,9 +208,10 @@ def _combine_field_choices(old_field, new_field):
combined_options.update(new_choice.options)
new_choice.options = combined_options


def get_fields_for_versions(self, versions=-1, data_types=None):
""" Return a mapping containing fields

"""
Return a mapping containing fields
This is needed because when making an report for several
versions of the same form, fields get added, removed, and
Expand All @@ -211,79 +221,74 @@ def get_fields_for_versions(self, versions=-1, data_types=None):
Labels are used as column headers.
:param versions: list
:param data_types: list
:return: list
"""

# Cast data_types if it's not already a list
if data_types is not None:
if isinstance(data_types, str_types):
data_types = [data_types]

versions = list(self._get_versions(versions).values())

# FIXME: you say that this is a list of tuples, but really, it's just a
# list of field objects, right?
all_fields = [] # [(name, field), (name...))]
processed_field_names = set() # avoid expensive look ups
# tmp2 is a 2 dimensions list of `field`.
# First dimension is the position of fields where they should be in the latest version
# Second dimension is their position in the stack at the same position.
# For example:
# ```
# latest_version = [field1, field3]
# version_just_before = [field2, field3]
# ```
#
# Index 0 of tmp2d will be `[field1, field2]`
tmp2d = []
# This dict is used to remember final position of each field.
# Its keys are field_names and values are tuples of coordinates in tmp2d
# Keeping example above:
# `positions[field1.name]` would be `(0, 0)`
# `positions[field2.name]` would be `(0, 1)`
positions = {}

# Create the initial field mappings from the first form version
for section in versions[0].sections.values():
all_fields.extend(section.fields.values())
processed_field_names.update(section.fields.keys())
versions_desc = list(reversed(self._get_versions(versions).values()))

# Process any new field added in the next versions
# The hard part is to insert it at a position that makes sense
for version in versions[1:]:
for section_name, section in version.sections.items():
index = 0
for section in versions_desc[0].sections.values():
for field_name, field_object in section.fields.items():
positions[field_name] = (index, 0)
tmp2d.append([field_object])
index += 1

# Potential new fields we want to add
new_fields = section.fields.items()

for i, (new_field_name, new_field_obj) in enumerate(new_fields):

# The field already exists, let's replace it with the
# last version
if new_field_name in processed_field_names:
final_list_copy = enumerate(list(all_fields))
for y, _field in final_list_copy:
if _field.name == new_field_name:
self._combine_field_choices(
_field, new_field_obj)
all_fields[y] = new_field_obj
break
continue

# The field needs to be inserted at the proper place.
# We take this new field, and look for all new fields after
# it to find the first one that is already in the base
# fields. Then we get its index, so we can insert our fresh
# new field right before it. This gives us a coherent
# order of fields so that they are always, at worst,
# adjacent to the last field they used to be to.
# FIXME: this loop never runs, because
# `following_new_field` is a tuple of (name, field_obj) and
# `processed_field_names` is just a set of strings
for following_new_field in new_fields[i+1:]:
if following_new_field in processed_field_names:
final_list_copy = enumerate(list(all_fields))
# FIXME: cannot work since final_list_copy contains
# field objects, not tuples
for y, (name, field) in final_list_copy:
if name == following_new_field:
# FIXME: set the field to itself? What's
# the point? Again, this code never runs
# and has no tests
all_fields[y] = field
break
break
for version in versions_desc[1:]:
index = 0
for section_name, section in version.sections.items():
for field_name, field_object in section.fields.items():
if field_name in positions:
position = positions[field_name]
latest_field_object = tmp2d[position[0]][position[1]]
# Because versions_desc are ordered from latest to oldest,
# we use current field object as the old one and the one already
# in position as the latest one.
self._combine_field_choices(field_object, latest_field_object)
else:
# We could not find a following_new_field,
# so ad it at the end
all_fields.append(new_field_obj)

processed_field_names.add(new_field_name)

if data_types:
for dt in data_types:
all_fields = [f for f in all_fields if f.data_type == dt]
if len(tmp2d) <= index:
tmp2d.append([field_object])
else:
tmp2d[index].append(field_object)

positions[field_name] = (index, len(tmp2d[index]) - 1)

index += 1

all_fields = []
# We need to flatten the 2d list before returning it.
for first_dimension in tmp2d:
for field in first_dimension:
if data_types:
if field.data_type in data_types:
all_fields.append(field)
else:
all_fields.append(field)

return all_fields

Expand Down
25 changes: 25 additions & 0 deletions tests/fixtures/field_position_with_multiple_versions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# coding: utf-8

from __future__ import (unicode_literals, print_function,
absolute_import, division)

'''
field_position_with_multiple_versions:
* v1: `Fullname` and `Age` in repeating group
* v2: Add `City` after repeating group
* v3: Replace `Fullname` with `Firstname`, `Lastname`, and `Gender`
'''

from ..load_fixture_json import load_fixture_json

DATA = {
u'title': 'Field Position with Multiple Versions',
u'id_string': 'field_position_with_multiple_versions',
u'versions': [
load_fixture_json('field_position_with_multiple_versions/v1'),
load_fixture_json('field_position_with_multiple_versions/v2'),
load_fixture_json('field_position_with_multiple_versions/v3'),
],
}
37 changes: 37 additions & 0 deletions tests/fixtures/field_position_with_multiple_versions/v1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"version": "fpmv_1",
"content": {
"translated": [
"label"
],
"survey": [
{
"type": "begin_repeat",
"name": "group_wy15q80",
"label": [
"Person"
]
},
{
"name": "Fullname",
"required": false,
"type": "text",
"label": [
"Fullname"
]
},
{
"name": "Age",
"required": false,
"type": "date",
"label": [
"Age"
]
},
{
"type": "end_repeat"
}
]
},
"submissions": []
}
45 changes: 45 additions & 0 deletions tests/fixtures/field_position_with_multiple_versions/v2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"version": "fpmv_2",
"content": {
"translated": [
"label"
],
"survey": [
{
"type": "begin_repeat",
"name": "group_wy15q80",
"label": [
"Person"
]
},
{
"name": "Fullname",
"required": false,
"type": "text",
"label": [
"Fullname"
]
},
{
"name": "Age",
"required": false,
"type": "date",
"label": [
"Age"
]
},
{
"type": "end_repeat"
},
{
"name": "City",
"required": false,
"type": "text",
"label": [
"City"
]
}
]
},
"submissions": []
}
82 changes: 82 additions & 0 deletions tests/fixtures/field_position_with_multiple_versions/v3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"version": "fpmv_3",
"content": {
"choices": [
{
"list_name": "kd0ep51",
"name": "male",
"label": [
"Male"
]
},
{
"list_name": "kd0ep51",
"name": "female",
"label": [
"Female"
]
},
{
"list_name": "kd0ep51",
"name": "other",
"label": [
"Other"
]
}
],
"survey": [
{
"type": "begin_repeat",
"name": "group_wy15q80",
"label": [
"Person"
]
},
{
"name": "Firstname",
"required": false,
"type": "text",
"label": [
"Firstname"
]
},
{
"name": "Lastname",
"required": false,
"type": "text",
"label": [
"Lastname"
]
},
{
"select_from_list_name": "kd0ep51",
"name": "Gender",
"required": false,
"label": [
"Gender"
],
"type": "select_one"
},
{
"name": "Age",
"required": false,
"type": "date",
"label": [
"Age"
]
},
{
"type": "end_repeat"
},
{
"name": "City",
"required": false,
"type": "text",
"label": [
"City"
]
}
]
},
"submissions": []
}
1 change: 0 additions & 1 deletion tests/test_autoreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,4 +381,3 @@ def test_disaggregate_extended_fields(self):
frequency_responses = [x[0] for x in value_list.get("frequency")]
assert percentage_responses == frequency_responses
assert percentage_responses[-1] == "..."

Loading

0 comments on commit b4637a9

Please sign in to comment.