Skip to content

Commit 911474c

Browse files
fix: add logging and minor bug fixes
1 parent c5c5807 commit 911474c

File tree

1 file changed

+87
-69
lines changed

1 file changed

+87
-69
lines changed

map2loop/thickness_calculator.py

Lines changed: 87 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
from statistics import mean
2222
import shapely
2323
import math
24+
import map2loop.logging as logging
25+
26+
logging.set_level(logging.logging.WARNING)
2427

2528

2629
class ThicknessCalculator(ABC):
@@ -236,17 +239,17 @@ def compute(
236239
# Set default value
237240
# thicknesses["ThicknessMedian"] is the median thickness of the unit
238241
thicknesses["ThicknessMedian"] = -1.0
239-
thicknesses['ThicknessMean'] = -1.0
242+
thicknesses["ThicknessMean"] = -1.0
240243
# thicknesses["ThicknessStdDev"] is the standard deviation of the thickness of the unit
241244
thicknesses["ThicknessStdDev"] = 0
242-
thicknesses['ThicknessStdDev'] = thicknesses['ThicknessStdDev'].astype('float64')
245+
thicknesses["ThicknessStdDev"] = thicknesses["ThicknessStdDev"].astype("float64")
243246
basal_unit_list = basal_contacts["basal_unit"].to_list()
244247
# increase buffer around basal contacts to ensure that the points are included as intersections
245248
basal_contacts["geometry"] = basal_contacts["geometry"].buffer(0.01)
246249
# get the sampled contacts
247250
contacts = geopandas.GeoDataFrame(map_data.sampled_contacts)
248251
# build points from x and y coordinates
249-
geometry2 = geopandas.points_from_xy(contacts['X'], contacts['Y'])
252+
geometry2 = geopandas.points_from_xy(contacts["X"], contacts["Y"])
250253
contacts.set_geometry(geometry2, inplace=True)
251254

252255
# set the crs of the contacts to the crs of the units
@@ -340,44 +343,57 @@ def compute(
340343
p2[2] = map_data.get_value_from_raster(Datatype.DTM, p2[0], p2[1])
341344
# calculate the length of the shortest line
342345
line_length = scipy.spatial.distance.euclidean(p1, p2)
343-
# find the indices of the points that are within 5% of the length of the shortest line
344-
indices = shapely.dwithin(short_line, interp_points, line_length * 0.25)
346+
# find the indices of the points that are within 10% of the length of the shortest line
347+
indices = shapely.dwithin(short_line, interp_points, line_length * 1)
345348
# get the dip of the points that are within
346349
# 10% of the length of the shortest line
347-
_dip = numpy.deg2rad(dip[indices])
348-
# get the end points of the shortest line
349-
# calculate the true thickness t = L . sin dip
350-
thickness = line_length * numpy.sin(_dip)
351-
# Average thickness along the shortest line
352-
if all(numpy.isnan(thickness)):
350+
if all(numpy.isnan(dip[indices])):
353351
pass
354352
else:
355-
_thickness.append(numpy.nanmean(thickness))
353+
_dip = numpy.nanmean(dip[indices])
354+
_dip = numpy.deg2rad(_dip)
355+
# get the end points of the shortest line
356+
# calculate the true thickness t = L . sin dip
357+
thickness = line_length * numpy.sin(_dip)
358+
# Average thickness along the shortest line
359+
_thickness.append(thickness)
356360

357361
# calculate the median thickness and standard deviation for the unit
358362
_thickness = numpy.asarray(_thickness, dtype=numpy.float64)
359-
360-
median = numpy.nanmedian(_thickness)
361-
mean = numpy.nanmean(_thickness)
362-
std_dev = numpy.nanstd(_thickness, dtype=numpy.float64)
363-
364-
idx = thicknesses.index[
363+
364+
if all(numpy.isnan(_thickness)):
365+
logging.logger.warning(
366+
f"Cannot calculate thickness of {stratigraphic_order[i + 1]}. Near dip data not found. Assign NaN to thickness."
367+
)
368+
idx = thicknesses.index[
365369
thicknesses["name"] == stratigraphic_order[i + 1]
366370
].tolist()[0]
367-
thicknesses.loc[idx, "ThicknessMean"] = mean
368-
thicknesses.loc[idx, "ThicknessMedian"] = median
369-
thicknesses.loc[idx, "ThicknessStdDev"] = std_dev
371+
thicknesses.loc[idx, "ThicknessMean"] = numpy.nan
372+
thicknesses.loc[idx, "ThicknessMedian"] = numpy.nan
373+
thicknesses.loc[idx, "ThicknessStdDev"] = numpy.nan
374+
375+
else:
376+
median = numpy.nanmedian(_thickness)
377+
mean = numpy.nanmean(_thickness)
378+
std_dev = numpy.nanstd(_thickness, dtype=numpy.float64)
379+
380+
idx = thicknesses.index[
381+
thicknesses["name"] == stratigraphic_order[i + 1]
382+
].tolist()[0]
383+
thicknesses.loc[idx, "ThicknessMean"] = mean
384+
thicknesses.loc[idx, "ThicknessMedian"] = median
385+
thicknesses.loc[idx, "ThicknessStdDev"] = std_dev
370386

371387
else:
372-
print(
388+
logging.logger.warning(
373389
f"Cannot calculate thickness between {stratigraphic_order[i]} and {stratigraphic_order[i + 1]}"
374390
)
375391

376392
return thicknesses
377393

378394

379395
class StructuralPoint(ThicknessCalculator):
380-
'''
396+
"""
381397
This class is a subclass of the ThicknessCalculator abstract base class. It implements the thickness calculation using a deterministic workflow based on stratigraphic measurements.
382398
383399
Attributes:
@@ -388,7 +404,7 @@ class StructuralPoint(ThicknessCalculator):
388404
compute(units: pandas.DataFrame, stratigraphic_order: list, basal_contacts: pandas.DataFrame, map_data: MapData)
389405
-> pandas.DataFrame: Calculates the thickness in meters for each unit in the stratigraphic column.
390406
391-
'''
407+
"""
392408

393409
def __init__(self):
394410
self.thickness_calculator_label = "StructuralPoint"
@@ -444,7 +460,7 @@ def compute(
444460

445461
# grab geology polygons and calculate bounding boxes for each lithology
446462
geology = map_data.get_map_data(datatype=Datatype.GEOLOGY)
447-
geology[['minx', 'miny', 'maxx', 'maxy']] = geology.bounds
463+
geology[["minx", "miny", "maxx", "maxy"]] = geology.bounds
448464

449465
# create a GeoDataFrame of the sampled structures
450466
sampled_structures = geopandas.GeoDataFrame(
@@ -453,14 +469,15 @@ def compute(
453469
crs=basal_contacts.crs,
454470
)
455471
# add unitname to the sampled structures
456-
sampled_structures['unit_name'] = geopandas.sjoin(sampled_structures, geology)['UNITNAME']
457-
458-
# remove nans from sampled structures
472+
sampled_structures["unit_name"] = geopandas.sjoin(sampled_structures, geology)["UNITNAME"]
473+
474+
# remove nans from sampled structures
459475
# this happens when there are strati measurements within intrusions. If intrusions are removed from the geology map, unit_name will then return a nan
460-
print(f"skipping row(s) {sampled_structures[sampled_structures['unit_name'].isnull()].index.to_numpy()} in sampled structures dataset, as they do not spatially coincide with a valid geology polygon \n")
461-
sampled_structures = sampled_structures.dropna(subset=['unit_name'])
462-
463-
476+
print(
477+
f"skipping row(s) {sampled_structures[sampled_structures['unit_name'].isnull()].index.to_numpy()} in sampled structures dataset, as they do not spatially coincide with a valid geology polygon \n"
478+
)
479+
sampled_structures = sampled_structures.dropna(subset=["unit_name"])
480+
464481
# rebuild basal contacts lines based on sampled dataset
465482
sampled_basal_contacts = rebuild_sampled_basal_contacts(
466483
basal_contacts, map_data.sampled_contacts
@@ -476,65 +493,66 @@ def compute(
476493

477494
# loop over each sampled structural measurement
478495
for s in range(0, len(sampled_structures)):
479-
480496
# make a shapely point from the measurement
481497
measurement = sampled_structures.iloc[s]
482498
measurement_pt = shapely.Point(measurement.X, measurement.Y)
483499

484500
# find unit and strike
485-
litho_in = measurement['unit_name']
486-
strike = (measurement['DIPDIR'] - 90) % 360
501+
litho_in = measurement["unit_name"]
502+
strike = (measurement["DIPDIR"] - 90) % 360
487503

488504
# find bounding box of the lithology
489-
bbox_poly = geology[geology['UNITNAME'] == litho_in][['minx', 'miny', 'maxx', 'maxy']]
505+
bbox_poly = geology[geology["UNITNAME"] == litho_in][["minx", "miny", "maxx", "maxy"]]
490506

491507
# check if litho_in is in geology
492508
# for a special case when the litho_in is not in the geology
493-
if len(geology[geology['UNITNAME'] == litho_in]) == 0:
494-
print(f"There are structural measurements in unit - {litho_in} - that are not in the geology shapefile. Skipping this structural measurement")
509+
if len(geology[geology["UNITNAME"] == litho_in]) == 0:
510+
print(
511+
f"There are structural measurements in unit - {litho_in} - that are not in the geology shapefile. Skipping this structural measurement"
512+
)
495513
continue
496-
else:
514+
else:
497515
# make a subset of the geology polygon & find neighbour units
498-
GEO_SUB = geology[geology['UNITNAME'] == litho_in]['geometry'].values[0]
516+
GEO_SUB = geology[geology["UNITNAME"] == litho_in]["geometry"].values[0]
499517

500518
neighbor_list = list(
501-
basal_contacts[GEO_SUB.intersects(basal_contacts.geometry)]['basal_unit']
519+
basal_contacts[GEO_SUB.intersects(basal_contacts.geometry)]["basal_unit"]
502520
)
503521
# draw orthogonal line to the strike (default value 10Km), and clip it by the bounding box of the lithology
504522
B = calculate_endpoints(measurement_pt, strike, self.line_length, bbox_poly)
505-
b = geopandas.GeoDataFrame({'geometry': [B]}).set_crs(basal_contacts.crs)
523+
b = geopandas.GeoDataFrame({"geometry": [B]}).set_crs(basal_contacts.crs)
506524

507525
# find all intersections
508526
all_intersections = sampled_basal_contacts.overlay(
509-
b, how='intersection', keep_geom_type=False
527+
b, how="intersection", keep_geom_type=False
510528
)
511529
all_intersections = all_intersections[
512-
all_intersections['geometry'].geom_type == 'Point'
530+
all_intersections["geometry"].geom_type == "Point"
513531
]
514532

515533
# clip intersections by the neighbouring geology polygons
516534
final_intersections = all_intersections[
517-
all_intersections['basal_unit'].isin(neighbor_list)
535+
all_intersections["basal_unit"].isin(neighbor_list)
518536
]
519537

520538
# sometimes the intersections will return as MultiPoint, so we need to convert them to nearest point
521-
if 'MultiPoint' in final_intersections['geometry'].geom_type.values:
539+
if "MultiPoint" in final_intersections["geometry"].geom_type.values:
522540
multi = final_intersections[
523-
final_intersections['geometry'].geom_type == 'MultiPoint'
541+
final_intersections["geometry"].geom_type == "MultiPoint"
524542
].index
525543
for m in multi:
526544
nearest_ = shapely.ops.nearest_points(
527545
final_intersections.loc[m, :].geometry, measurement_pt
528546
)[0]
529-
final_intersections.at[m, 'geometry'] = nearest_
530-
final_intersections.at[m, 'geometry'] = nearest_
547+
final_intersections.at[m, "geometry"] = nearest_
548+
final_intersections.at[m, "geometry"] = nearest_
531549

532550
# check to see if there's less than 2 intersections
533551
if len(final_intersections) < 2:
534552
continue
535553

536554
# check to see if the intersections cross two lithologies"
537-
if len(final_intersections['basal_unit'].unique()) == 1:
555+
if len(final_intersections["basal_unit"].unique()) == 1:
538556
continue
539557

540558
# declare the two intersection points
@@ -552,16 +570,16 @@ def compute(
552570

553571
# find the segments that the intersections belong to
554572
seg1 = sampled_basal_contacts[
555-
sampled_basal_contacts['basal_unit'] == final_intersections.iloc[0]['basal_unit']
573+
sampled_basal_contacts["basal_unit"] == final_intersections.iloc[0]["basal_unit"]
556574
].geometry.iloc[0]
557575
seg2 = sampled_basal_contacts[
558-
sampled_basal_contacts['basal_unit'] == final_intersections.iloc[1]['basal_unit']
576+
sampled_basal_contacts["basal_unit"] == final_intersections.iloc[1]["basal_unit"]
559577
].geometry.iloc[0]
560578

561579
# simplify the geometries to LineString
562-
if seg1.geom_type == 'MultiLineString':
580+
if seg1.geom_type == "MultiLineString":
563581
seg1 = multiline_to_line(seg1)
564-
if seg2.geom_type == 'MultiLineString':
582+
if seg2.geom_type == "MultiLineString":
565583
seg2 = multiline_to_line(seg2)
566584

567585
# find the strike of the segments
@@ -576,47 +594,47 @@ def compute(
576594
# find the lenght of the segment
577595
L = math.sqrt(((int_pt1.x - int_pt2.x) ** 2) + ((int_pt1.y - int_pt2.y) ** 2))
578596
# calculate thickness
579-
thickness = L * math.sin(math.radians(measurement['DIP']))
597+
thickness = L * math.sin(math.radians(measurement["DIP"]))
580598

581599
thicknesses.append(thickness)
582600
lis.append(litho_in)
583601

584602
# create a DataFrame of the thicknesses median and standard deviation by lithology
585-
result = pandas.DataFrame({'unit': lis, 'thickness': thicknesses})
586-
result = result.groupby('unit')['thickness'].agg(['median', 'mean', 'std']).reset_index()
587-
result.rename(columns={'thickness': 'ThicknessMedian'}, inplace=True)
603+
result = pandas.DataFrame({"unit": lis, "thickness": thicknesses})
604+
result = result.groupby("unit")["thickness"].agg(["median", "mean", "std"]).reset_index()
605+
result.rename(columns={"thickness": "ThicknessMedian"}, inplace=True)
588606

589607
output_units = units.copy()
590608
# remove the old thickness column
591-
output_units['ThicknessMedian'] = numpy.empty((len(output_units)))
592-
output_units['ThicknessMean'] = numpy.empty((len(output_units)))
593-
output_units['ThicknessStdDev'] = numpy.empty((len(output_units)))
609+
output_units["ThicknessMedian"] = numpy.empty((len(output_units)))
610+
output_units["ThicknessMean"] = numpy.empty((len(output_units)))
611+
output_units["ThicknessStdDev"] = numpy.empty((len(output_units)))
594612

595613
# find which units have no thickness calculated
596-
names_not_in_result = units[~units['name'].isin(result['unit'])]['name'].to_list()
614+
names_not_in_result = units[~units["name"].isin(result["unit"])]["name"].to_list()
597615
# assign the thicknesses to the each unit
598616
for _, unit in result.iterrows():
599-
idx = units.index[units['name'] == unit['unit']].tolist()[0]
600-
output_units.loc[idx, 'ThicknessMedian'] = unit['median']
601-
output_units.loc[idx, 'ThicknessMean'] = unit['mean']
602-
output_units.loc[idx, 'ThicknessStdDev'] = unit['std']
617+
idx = units.index[units["name"] == unit["unit"]].tolist()[0]
618+
output_units.loc[idx, "ThicknessMedian"] = unit["median"]
619+
output_units.loc[idx, "ThicknessMean"] = unit["mean"]
620+
output_units.loc[idx, "ThicknessStdDev"] = unit["std"]
603621
# handle the units that have no thickness
604622
for unit in names_not_in_result:
605623
# if no thickness has been calculated for the unit
606624
if (
607625
# not a top//bottom unit
608-
(output_units[output_units['name'] == unit]['ThicknessMedian'].all() == 0)
626+
(output_units[output_units["name"] == unit]["ThicknessMedian"].all() == 0)
609627
and (unit != stratigraphic_order[-1])
610628
and (unit != stratigraphic_order[0])
611629
):
612630
idx = stratigraphic_order.index(unit)
613631
# throw warning to user
614632
print(
615-
'It was not possible to calculate thickness between unit ',
633+
"It was not possible to calculate thickness between unit ",
616634
unit,
617635
"and ",
618636
stratigraphic_order[idx + 1],
619-
'Assigning thickness of -1',
637+
"Assigning thickness of -1",
620638
)
621639
# assign -1 as thickness
622640
output_units.loc[output_units["name"] == unit, "ThicknessMedian"] = -1

0 commit comments

Comments
 (0)