Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 126 additions & 49 deletions energyplus_regressions/diffs/table_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
__license__ = "GNU General Public License Version 3"

from pathlib import Path
from collections import Counter, defaultdict, deque
import sys
import getopt
import os.path
Expand Down Expand Up @@ -83,6 +84,9 @@
td.table_size_error {
background-color: #FCFF97;
}
td.stringdiff {
background-color: #F6D8AE;
}
.big {
background-color: #FF969D;
}
Expand All @@ -91,6 +95,10 @@
background-color: #FFBE84;
}

.stringdiff {
background-color: #F6D8AE;
}

"""


Expand All @@ -111,8 +119,8 @@ def thresh_abs_rel_diff(abs_thresh: float, rel_thresh: float, x: str, y: str) ->
diff = 'small'
return abs_diff, rel_diff, diff
except ValueError:
# if we couldn't get a float out of it, we are doing string comparison, check case-insensitively before leaving
if x.lower().strip() == y.lower().strip():
# if we couldn't get a float out of it, do a string comparison after trimming edge whitespace
if x.strip() == y.strip():
return 0, 0, 'equal'
else:
return f'{x} vs {y}', f'{x} vs {y}', 'stringdiff'
Expand Down Expand Up @@ -144,6 +152,79 @@ def get_table_unique_heading(table):
return None


def normalize_row_match_value(value):
"""Normalize a cell value for row matching without changing actual diff output."""
text = str(value).replace('\xa0', ' ')
return ' '.join(text.split()).casefold()


def should_ignore_table_diff_field(column_heading, row_label=None):
if column_heading == 'Version ID':
return True
if row_label and str(row_label).strip() == 'Program Version and Build':
return True
return False


def row_cells_for_match(trow):
return [normalize_row_match_value(tcol.get_text(' ', strip=True)) for tcol in trow('td')]


def reorder_rows_to_match(base_keys, search_keys, search_rows):
rows_by_key = defaultdict(deque)
for key, row in zip(search_keys, search_rows):
rows_by_key[tuple(key)].append(row)

reordered_rows = []
for key in base_keys:
normalized_key = tuple(key)
if not rows_by_key[normalized_key]:
return search_rows
reordered_rows.append(rows_by_key[normalized_key].popleft())
return reordered_rows


def match_search_rows_to_base_rows(base_rows, search_rows):
"""
Reorder search_rows to match base_rows when row order is not semantically meaningful.

First, try a whole-row match using normalized values so case-only formatting changes do
not block reorder detection. If rows have real diffs, fall back to the shortest unique
leading-column key that exists in both tables. This lets us align rows like coil sizing
outputs where the stable row identifier is early in the row, but values later in the row
may legitimately differ.
"""
if not base_rows or not search_rows:
return search_rows

base_keys = [row_cells_for_match(trow) for trow in base_rows]
search_keys = [row_cells_for_match(trow) for trow in search_rows]

if base_keys == search_keys:
return search_rows

if Counter(map(tuple, base_keys)) == Counter(map(tuple, search_keys)):
return reorder_rows_to_match(base_keys, search_keys, search_rows)

max_prefix_len = min(
min((len(key) for key in base_keys), default=0),
min((len(key) for key in search_keys), default=0),
)

for prefix_len in range(1, max_prefix_len + 1):
base_prefixes = [tuple(key[:prefix_len]) for key in base_keys]
if len(set(base_prefixes)) != len(base_prefixes):
continue

search_prefixes = [tuple(key[:prefix_len]) for key in search_keys]
if Counter(base_prefixes) != Counter(search_prefixes):
continue

return reorder_rows_to_match(base_prefixes, search_prefixes, search_rows)

return search_rows


def hdict2soup(soup, heading, num, hdict, tdict, horder):
"""Create soup table (including anchor and heading) from header dictionary and error dictionary"""
# Append table anchor
Expand Down Expand Up @@ -201,7 +282,7 @@ def hdict2soup(soup, heading, num, hdict, tdict, horder):
if h not in hdict:
tdtag = Tag(soup, name='td', attrs=[("class", "big")])
tdtag.append('ColumnHeadingDifference')
elif h == 'DummyPlaceholder' or h == 'Subcategory':
elif h == 'Subcategory':
# Some tables such as the Source Energy End Use Components
# have a blank row full of `<td>&nbsp;</td>` which won't be
# decoded nicely
Expand All @@ -212,6 +293,23 @@ def hdict2soup(soup, heading, num, hdict, tdict, horder):
except Exception: # pragma: no cover
val = val.encode('ascii', 'ignore').decode('ascii')
tdtag.append(str(val))
elif h == 'DummyPlaceholder':
val = hdict[h][i]
if isinstance(val, tuple) and len(val) == 2:
diff, which = val
tdtag = Tag(soup, name='td', attrs=[('class', which)])
try:
tdtag.append(str(diff))
except Exception: # pragma: no cover
diff = diff.encode('ascii', 'ignore').decode('ascii')
tdtag.append(str(diff))
else:
tdtag = Tag(soup, name='td')
try:
tdtag.append(str(val))
except Exception: # pragma: no cover
val = val.encode('ascii', 'ignore').decode('ascii')
tdtag.append(str(val))
else:
(diff, which) = hdict[h][i]
tdtag = Tag(soup, name='td', attrs=[('class', which)])
Expand Down Expand Up @@ -243,50 +341,12 @@ def table2hdict_horder(table, table_a=None):
# Assume we are going to just loop over the rows and compare the data
search_rows = trows[1:]

# But we can handle it specially if we passed in table_a and it's just a valid reorder
# There are some weird things to consider here though. For example, some tables have multiple entirely blank
# rows, just there for visual spacing. Also there are tables where the far left entry is not unique.
# Consider the End Uses by Subcategory table. One row starts with "Heating" and then "General".
# The next row then has nothing in the first column, but the second column is "Boiler".
# This implies that "Heating" was a grouping, and "General" or "Boiler" is the actual subcategory.
# I think the only way to handle this robustly would be to use the entire
# row as the key, which is annoying, but should work well.
# But we can handle it specially if we passed in table_a and the rows are just reordered.
# Prefer whole-row matching first, but if a row has actual value diffs we can still align it
# by the shortest unique leading-column key that exists in both tables.
if table_a:
# process the rows of the "base" table_a that was provided into a list of search keys
trows_a = table_a('tr')
table_a_row_order = []
for trow in trows_a[1:]:
search_key = []
for tcol in trow('td'):
if tcol.contents:
search_key.append(tcol.contents[0])
else: # pragma: no cover
# I really don't think we can make it here while searching, but I don't want to accidentally crash
search_key.append("")
table_a_row_order.append(search_key)
# process the rows of the "mod" table that was provided into a list of search keys
found_table_b_row_order = []
for trow in trows[1:]:
search_key = []
for tcol in trow('td'):
if tcol.contents:
search_key.append(tcol.contents[0])
else: # pragma: no cover
# I really don't think we can make it here while searching, but I don't want to accidentally crash
search_key.append("")
found_table_b_row_order.append(search_key)
# it's the same order exactly, skip any searching and just run with search_rows as-is
if table_a_row_order == found_table_b_row_order:
pass
# if not exactly the same but overall the same stuff, it's reordered and we can match things up
elif sorted(table_a_row_order) == sorted(found_table_b_row_order):
# now just build the list of trows to search by index based on table a order
search_rows = []
for to_find_val in table_a_row_order:
for search_row_index, trow in enumerate(trows[1:]):
if found_table_b_row_order[search_row_index] == to_find_val:
search_rows.append(trow)
break
search_rows = match_search_rows_to_base_rows(trows_a[1:], search_rows)

# whether it was reordered or just using the literal order, build out the hdict instance to pass back
for trow in search_rows:
Expand Down Expand Up @@ -503,6 +563,7 @@ def table_diff(
# but for all other tables, we can use the first table as a baseline to carefully match up the rows
else:
hdict2, horder2 = table2hdict_horder(table2, table1)
compare_row_label_fields = not any(k in uheading1 for k in row_order_dependent_table_keys)

# honestly, if the column headings have changed, this should be an indicator to all reviewers that this needs
# up close investigation. As such, we are going to trigger the following things:
Expand All @@ -526,21 +587,37 @@ def table_diff(

for h in horder1:
if h == 'DummyPlaceholder':
diff_dict[h] = hdict1[h]
if h not in horder2 or not compare_row_label_fields:
diff_dict[h] = hdict1[h]
else:
diff_dict[h] = []
for x, y in zip(hdict1[h], hdict2[h]):
diff_result = thresh_abs_rel_diff(0, 0, x, y)
if diff_result[2] == 'stringdiff':
diff_dict[h].append((diff_result[0], diff_result[2]))
table_string_diff += 1
count_of_string_diff += 1
else:
diff_dict[h].append(x)
else:
if h not in horder2:
diff_dict[h] = [[0, 0, 'big']] * (len(table1('tr')) - 1)
else:
(abs_thresh, rel_thresh) = thresh_dict.lookup(h)
h_thresh_dict[h] = (abs_thresh, rel_thresh)
diff_dict[h] = []
for x, y in zip(hdict1[h], hdict2[h]):
diff_dict[h].append(thresh_abs_rel_diff(abs_thresh, rel_thresh, x, y))
row_labels = hdict1.get('DummyPlaceholder', [])
for row_index, (x, y) in enumerate(zip(hdict1[h], hdict2[h])):
row_label = row_labels[row_index] if row_index < len(row_labels) else None
if should_ignore_table_diff_field(h, row_label):
diff_dict[h].append((0, 0, 'equal'))
else:
diff_dict[h].append(thresh_abs_rel_diff(abs_thresh, rel_thresh, x, y))

# Statistics local to this table
for diff_result in diff_dict[h]:
diff_type = diff_result[2]
if h == 'Version ID':
if should_ignore_table_diff_field(h):
table_equal += 1
count_of_equal += 1
elif diff_type == 'small':
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<html>
<body>
<!-- FullName:Input Verification and Results Summary_Entire Facility_General-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Value</td>
</tr>
<tr>
<td align="right">Program Version and Build</td>
<td align="right">EnergyPlus, Version 9.0.1-a7c9cc14ce, YMD=2018.11.20 17:26</td>
</tr>
<tr>
<td align="right">RunPeriod</td>
<td align="right">SUMMER RUN</td>
</tr>
</table>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<html>
<body>
<!-- FullName:Input Verification and Results Summary_Entire Facility_General-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Value</td>
</tr>
<tr>
<td align="right">Program Version and Build</td>
<td align="right">EnergyPlus, Version 9.0.1-deadbeef42, YMD=2018.11.20 17:26</td>
</tr>
<tr>
<td align="right">RunPeriod</td>
<td align="right">SUMMER RUN</td>
</tr>
</table>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<html>
<body>
<!-- FullName:Test Report_Entire Facility_Row Labels-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Value</td>
</tr>
<tr>
<td align="right">Alpha</td>
<td align="right">1</td>
</tr>
<tr>
<td align="right">Beta</td>
<td align="right">2</td>
</tr>
</table>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<html>
<body>
<!-- FullName:Test Report_Entire Facility_Row Labels-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Value</td>
</tr>
<tr>
<td align="right">Alpha Renamed</td>
<td align="right">1</td>
</tr>
<tr>
<td align="right">Beta</td>
<td align="right">2</td>
</tr>
</table>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<html>
<body>
<!-- FullName:Coil Sizing Details_Entire Facility_Coils-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Coil Type</td>
<td align="right">Coil Final Gross Total Capacity [W]</td>
</tr>
<tr>
<td align="right">TU1 VRF DX COOLING COIL</td>
<td align="right">COIL:COOLING:DX:VARIABLEREFRIGERANTFLOW</td>
<td align="right">100.0</td>
</tr>
<tr>
<td align="right">TU2 VRF DX COOLING COIL</td>
<td align="right">COIL:COOLING:DX:VARIABLEREFRIGERANTFLOW</td>
<td align="right">200.0</td>
</tr>
<tr>
<td align="right">TU1 VRF DX HEATING COIL</td>
<td align="right">COIL:HEATING:DX:VARIABLEREFRIGERANTFLOW</td>
<td align="right">300.0</td>
</tr>
<tr>
<td align="right">TU2 VRF DX HEATING COIL</td>
<td align="right">COIL:HEATING:DX:VARIABLEREFRIGERANTFLOW</td>
<td align="right">400.0</td>
</tr>
</table>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<html>
<body>
<!-- FullName:Coil Sizing Details_Entire Facility_Coils-->
<table border="1" cellpadding="4" cellspacing="0">
<tr><td></td>
<td align="right">Coil Type</td>
<td align="right">Coil Final Gross Total Capacity [W]</td>
</tr>
<tr>
<td align="right">TU1 VRF DX COOLING COIL</td>
<td align="right">Coil:Cooling:DX:VariableRefrigerantFlow</td>
<td align="right">100.0</td>
</tr>
<tr>
<td align="right">TU1 VRF DX HEATING COIL</td>
<td align="right">Coil:Heating:DX:VariableRefrigerantFlow</td>
<td align="right">300.0</td>
</tr>
<tr>
<td align="right">TU2 VRF DX COOLING COIL</td>
<td align="right">Coil:Cooling:DX:VariableRefrigerantFlow</td>
<td align="right">250.0</td>
</tr>
<tr>
<td align="right">TU2 VRF DX HEATING COIL</td>
<td align="right">Coil:Heating:DX:VariableRefrigerantFlow</td>
<td align="right">400.0</td>
</tr>
</table>
</body>
</html>
Loading
Loading