Skip to content

Commit

Permalink
Merge pull request #8 from quantifyearth/mwd-fix-chunk-impact-on-sum-…
Browse files Browse the repository at this point in the history
…float32

Fix issue with sum on float32 arrays being impacted by chunk size.
  • Loading branch information
mdales authored Nov 6, 2024
2 parents 2f6479c + a887e07 commit 75ff7be
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 7 deletions.
32 changes: 30 additions & 2 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,37 @@ def gdal_dataset_of_layer(layer: YirgacheffeLayer, filename: Optional[str]=None)
dataset.SetProjection(layer.projection)
return dataset

def numpy_to_gdal_type(val: np.array) -> int:
match val.dtype:
case np.float32:
return gdal.GDT_Float32
case np.float64:
return gdal.GDT_Float64
case np.int8:
return gdal.GDT_Byte
case np.int16:
return gdal.GDT_Int16
case np.int32:
return gdal.GDT_Int32
case np.int64:
return gdal.GDT_Int64
case np.uint8:
return gdal.GDT_Byte
case np.uint16:
return gdal.GDT_UInt16
case np.uint32:
return gdal.GDT_UInt32
case np.uint64:
return gdal.GDT_UInt64
case _:
raise ValueError


def gdal_dataset_with_data(origin: Tuple, pixel_pitch: float, data: np.array, filename: Optional[str]=None) -> gdal.Dataset:
assert data.ndim == 2
datatype = gdal.GDT_Byte

datatype = numpy_to_gdal_type(data)

if isinstance(data[0][0], float):
datatype = gdal.GDT_Float64
if filename:
Expand Down Expand Up @@ -220,4 +248,4 @@ def make_vectors_with_empty_feature(areas: Set[Tuple[Area,int]], filename: str)
feature = ogr.Feature(feature_definition)
layer.CreateFeature(feature)

package.Destroy()
package.Destroy()
26 changes: 26 additions & 0 deletions tests/test_operators.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import random
import tempfile

import numpy as np
import pytest
from osgeo import gdal

from helpers import gdal_dataset_with_data
import yirgacheffe
from yirgacheffe.layers import RasterLayer, ConstantLayer
from yirgacheffe.operators import LayerOperation

Expand Down Expand Up @@ -491,3 +494,26 @@ def test_write_mulitband_raster() -> None:
actual = layer.read_array(0, 0, 4, 2)

assert (expected == actual).all()

def test_sum_float32(monkeypatch) -> None:

random.seed(42)
data = []
for _ in range(10):
row = []
for _ in range(10):
row.append(random.random())
data.append(row)

data1 = np.array(data, dtype=np.float32)
layer1 = RasterLayer(gdal_dataset_with_data((0.0, 0.0), 0.02, data1))
assert layer1.datatype == gdal.GDT_Float32

# Sum forces things to float64
expected = np.sum(data1.astype(np.float64))

with monkeypatch.context() as m:
for blocksize in range(1,11):
m.setattr(yirgacheffe.constants, "YSTEP", blocksize)
actual = layer1.sum()
assert expected == actual
1 change: 1 addition & 0 deletions yirgacheffe/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
YSTEP = 512
12 changes: 7 additions & 5 deletions yirgacheffe/operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
from osgeo import gdal
from dill import dumps, loads

from . import constants
from .window import Window



YSTEP = 512

class LayerConstant:
def __init__(self, val):
self.val = val
Expand Down Expand Up @@ -93,7 +91,7 @@ def max(self):
class LayerOperation(LayerMathMixin):

def __init__(self, lhs, operator=None, rhs=None):
self.ystep = YSTEP
self.ystep = constants.YSTEP

if lhs is None:
raise ValueError("LHS on operation should not be none")
Expand Down Expand Up @@ -162,14 +160,18 @@ def _eval(self, index, step): # pylint: disable=W0221
return self.operator(lhs_data)

def sum(self):
# The result accumulator is float64, and for precision reasons
# we force the sum to be done in float64 also. Otherwise we
# see variable results depending on chunk size, as different parts
# of the sum are done in different types.
res = 0.0
computation_window = self.window
for yoffset in range(0, computation_window.ysize, self.ystep):
step=self.ystep
if yoffset+step > computation_window.ysize:
step = computation_window.ysize - yoffset
chunk = self._eval(yoffset, step)
res += np.sum(chunk)
res += np.sum(chunk.astype(np.float64))
return res

def min(self):
Expand Down

0 comments on commit 75ff7be

Please sign in to comment.