diff --git a/src/formpack/utils/string.py b/src/formpack/utils/string.py index a864d2c..06f9fb0 100644 --- a/src/formpack/utils/string.py +++ b/src/formpack/utils/string.py @@ -75,10 +75,14 @@ def ellipsize(s, max_len, ellipsis='...'): def unique_name_for_xls(sheet_name, other_sheet_names, base_ellipsis='...'): r""" - Return a sheet name that does not collide with any string in the iterable - `other_sheet_names` and does not exceed the Excel sheet name length limit. - Characters that are not allowed in sheet names are replaced with - underscores. + Return a unique Excel-compatible worksheet name that does not collide + with a sheet name in the iterable `other_sheet_names` + 1. Apply substitutions for worksheet names + a. Use '_' for disallowed characters ([]:*?/\) + b. Use '_' for leading or trailing apostrophes (') + 2. Limit worksheet name length to <= 31 characters, truncate with + base_ellipsis + 3. Ensure uniqueness with an incrementing parenthesized integer (n) :Example: >>> unique_name_for_xls( ... 'This string has more than 31 characters!', @@ -90,6 +94,7 @@ def unique_name_for_xls(sheet_name, other_sheet_names, base_ellipsis='...'): sheet_name = sheet_name.translate( {ord(c): '_' for c in EXCEL_FORBIDDEN_WORKSHEET_NAME_CHARACTERS} ) + sheet_name = re.sub(r"(^'|'$)", '_', sheet_name) candidate = ellipsize( sheet_name, EXCEL_SHEET_NAME_SIZE_LIMIT, base_ellipsis diff --git a/tests/test_utils_string.py b/tests/test_utils_string.py index cbe1653..19def71 100644 --- a/tests/test_utils_string.py +++ b/tests/test_utils_string.py @@ -1,5 +1,5 @@ # coding: utf-8 -from formpack.utils.string import orderable_with_none +from formpack.utils.string import orderable_with_none, unique_name_for_xls def test_sort_list_with_none(): @@ -10,3 +10,33 @@ def test_sort_list_with_none(): assert orderable_with_none(None).__class__.__name__ == '__OrderableNone' assert isinstance(orderable_with_none('foo'), str) assert sorted_list == expected_list + +def test_excel_compatible_worksheet_names(): + + # Truncate (with '...' by default) + length_limit = [ # to <= 31 characters + '123456789_123456789_123456789_12', + '123456789_123456789_12345678...', + ] + assert unique_name_for_xls(length_limit[0], []) == length_limit[1] + + # Replace disallowed characters ([]:*?/\) with '_' + char_safety = [ + '[hi]: *nice*? ok "/_o,o_\\"', + '_hi__ _nice__ ok "__o,o__"', + ] + assert unique_name_for_xls(char_safety[0], []) == char_safety[1] + + # Replace leading or trailing apostrophes with '_' + leading_trailing_apostrophes = [ + [ "'both'", '_both_' ], + [ "'leading", '_leading'], + [ "trailing'", 'trailing_'], + [ + "'_'mixed'''", + "__'mixed''_", + ] + ] + for test in leading_trailing_apostrophes: + assert unique_name_for_xls(test[0], []) == test[1] +