Skip to content

Commit

Permalink
formatting with black
Browse files Browse the repository at this point in the history
  • Loading branch information
jsta committed Mar 13, 2024
1 parent b3f047d commit 3745c3a
Showing 1 changed file with 75 additions and 61 deletions.
136 changes: 75 additions & 61 deletions hydropop/dev/end_to_end_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,23 @@
pop_breaks = [-11, -10, -4, 0, 100] # coarse = [-11, -10, -4, 0, 100], fine = [-11, -10, -4, -1, 1, 2, 100]
hthi_breaks = [-.01, .4, .7, 1.01] # coarse = [-.01, .4, .7, 1.01], fine = [-.01, 0.3, 0.55, 0.75, 0.9, 1.01]
# fmt: on
min_hpu_size = 20 # in pixels - each HPU will have at least this many pixels
target_hpu_size = 300 # in pixels - not guaranteed, but will try to make each HPU this size
min_hpu_size = 20 # in pixels - each HPU will have at least this many pixels
target_hpu_size = (
300 # in pixels - not guaranteed, but will try to make each HPU this size
)

## Path parameters
path_bounding_box = r"data/roi_small.gpkg" # r"data/roi.gpkg"
path_results = r'results' # folder to store results
run_name = 'toronto_new_method' # string to prepend to exports
gee_asset = 'projects/cimmid/assets/toronto_coarse_hpus' # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
gdrive_folder_name = 'CIMMID_{}'.format(run_name)
path_bounding_box = r"data/roi_small.gpkg" # r"data/roi.gpkg"
path_results = r"results" # folder to store results
run_name = "toronto_new_method" # string to prepend to exports
gee_asset = "projects/cimmid/assets/toronto_coarse_hpus" # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
gdrive_folder_name = "CIMMID_{}".format(run_name)

## Pseduo-fixed parameters/variables """
# Paths to data
path_hthi = r"data/hydrotopo_hab_index.tif"
path_pop = r"data/pop_density_americas.tif"
path_gee_csvs = r'results/toronto_new_hpu_method/gee'
path_gee_csvs = r"results/toronto_new_hpu_method/gee"

## Here we go
paths = hut.prepare_export_paths(path_results, run_name)
Expand All @@ -46,7 +48,7 @@
hpugen = hpc.hpu(path_pop, path_hthi, bounding=path_bounding_box)

# Compute classes
breaks = {'hthi':hthi_breaks, 'pop':pop_breaks}
breaks = {"hthi": hthi_breaks, "pop": pop_breaks}
hpugen.compute_hp_classes_ranges(breaks)

# Simplify classes
Expand All @@ -57,101 +59,113 @@

# Export adjacency
adj_df = hpugen.compute_adjacency()
adj_df.to_csv(paths['adjacency'], index=False)
adj_df.to_csv(paths["adjacency"], index=False)

# Export HPU rasters
hpugen.export_raster('hpu_simplified', paths['hpu_raster'])
hpugen.export_raster('hpu_class_simplified', paths['hpu_class_raster'])
hpugen.export_raster("hpu_simplified", paths["hpu_raster"])
hpugen.export_raster("hpu_class_simplified", paths["hpu_class_raster"])

# Export classes as polygons for plotting
classes = hut.polygonize_hpu(hpugen.I['hpu_class_simplified'], hpugen.gt, hpugen.wkt)
classes.to_file(paths['hpu_class_gpkg'], driver='GPKG')
classes = hut.polygonize_hpu(hpugen.I["hpu_class_simplified"], hpugen.gt, hpugen.wkt)
classes.to_file(paths["hpu_class_gpkg"], driver="GPKG")

# Compute areagrid required for computing HP unit areas
agrid = hut.areagrid(paths['hpu_raster'])
gdobj = gdal.Open(paths['hpu_raster'])
wg.write_geotiff(agrid, gdobj.GetGeoTransform(), gdobj.GetProjection(), paths['areagrid'], dtype=gdal.GDT_Float32)
agrid = hut.areagrid(paths["hpu_raster"])
gdobj = gdal.Open(paths["hpu_raster"])
wg.write_geotiff(
agrid,
gdobj.GetGeoTransform(),
gdobj.GetProjection(),
paths["areagrid"],
dtype=gdal.GDT_Float32,
)

""" Compute statistics for HPUs """
# First, we do zonal stats on the locally-available rasters
# HPU stats and properties
do_stats = {'hthi' : [path_hthi, ['mean']],
'pop' : [path_pop, ['mean']],
'area' : [paths['areagrid'], ['sum']],
'hpu_class' :[paths['hpu_class_raster'], ['majority']]}
do_stats = {
"hthi": [path_hthi, ["mean"]],
"pop": [path_pop, ["mean"]],
"area": [paths["areagrid"], ["sum"]],
"hpu_class": [paths["hpu_class_raster"], ["majority"]],
}
hpugen.compute_hpu_stats(do_stats)
# Export the geopackage that contains all the HPU attributes
hpugen.hpus.to_file(paths['hpu_gpkg'], driver='GPKG')
hpugen.hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
# For the shapefile export, we only need the HPU id and the polygon
hpus_shp = gpd.GeoDataFrame(hpugen.hpus[['hpu_id', 'geometry']])
hpus_shp = gpd.GeoDataFrame(hpugen.hpus[["hpu_id", "geometry"]])
hpus_shp.crs = hpugen.hpus.crs
hpus_shp.to_file(paths['hpu_shapefile']) # shapefile needed to upload to GEE
hpus_shp.to_file(paths["hpu_shapefile"]) # shapefile needed to upload to GEE

""" STOP. Here you need to upload the hpu shapefile as a GEE asset. """
is_uploaded_to_gee = input("Next step, upload the following shapefile (and its components) to GEE (Y/n)")
is_uploaded_to_gee = input(
"Next step, upload the following shapefile (and its components) to GEE (Y/n)"
)
while is_uploaded_to_gee is "n":
is_uploaded_to_gee = input("Next step, upload the following shapefile (and its components) to GEE (Y/n)")
is_uploaded_to_gee = input(
"Next step, upload the following shapefile (and its components) to GEE (Y/n)"
)

""" Update the gee_asset variable. """
datasets, Datasets = gee.generate_datasets()

# check and do fmax
if 'fmax' in datasets.keys():
filename_out = 'fmax'
if "fmax" in datasets.keys():
filename_out = "fmax"
gee.export_fmax(gee_asset, filename_out, gdrive_folder_name)

# Spin up other datasets
urls, tasks = rabpro.basin_stats.compute(Datasets,
gee_feature_path=gee_asset,
folder=gdrive_folder_name)
urls, tasks = rabpro.basin_stats.compute(
Datasets, gee_feature_path=gee_asset, folder=gdrive_folder_name
)

""" STOP. Download the GEE exports (csvs) to path_gee_csvs """
hpus = gpd.read_file(paths['hpu_gpkg'])
hpus = gpd.read_file(paths["hpu_gpkg"])
gee_csvs = os.listdir(path_gee_csvs)
for key in datasets.keys():

# Find the csv associated with a dataset
if key == 'fmax':
look_for = 'fmax'
if key == "fmax":
look_for = "fmax"
else:
look_for = datasets[key]['path']
if datasets[key]['band'] != 'None':
look_for = look_for + '__' + datasets[key]['band']
look_for = look_for.replace('/', '-')
look_for = datasets[key]["path"]
if datasets[key]["band"] != "None":
look_for = look_for + "__" + datasets[key]["band"]
look_for = look_for.replace("/", "-")
this_csv = [c for c in gee_csvs if look_for in c][0]

# Ingest it
csv = pd.read_csv(os.path.join(path_gee_csvs,this_csv))
csv = pd.read_csv(os.path.join(path_gee_csvs, this_csv))

# Handle special cases first
if key == 'fmax':
csv = csv[['fmax', 'hpu_id']]
elif key == 'land_use':
csv = csv[['histogram', 'hpu_id']]
csv = gee.format_lc_type1(csv, fractionalize=True, prepend='lc_')
if key == "fmax":
csv = csv[["fmax", "hpu_id"]]
elif key == "land_use":
csv = csv[["histogram", "hpu_id"]]
csv = gee.format_lc_type1(csv, fractionalize=True, prepend="lc_")
else:
keepcols = ['hpu_id']
keepcols = ["hpu_id"]
renamer = {}
if 'mean' in datasets[key]['stats']:
keepcols.append('mean')
renamer.update({'mean' : key + '_mean'})
if 'std' in datasets[key]['stats'] or 'stdDev' in datasets[key]['stats']:
keepcols.append('stdDev')
renamer.update({'stdDev' : key + '_std'})
if "mean" in datasets[key]["stats"]:
keepcols.append("mean")
renamer.update({"mean": key + "_mean"})
if "std" in datasets[key]["stats"] or "stdDev" in datasets[key]["stats"]:
keepcols.append("stdDev")
renamer.update({"stdDev": key + "_std"})
csv = csv[keepcols]
csv = csv.rename({'mean': key + '_mean'}, axis=1)
hpus = pd.merge(hpus, csv, left_on='hpu_id', right_on='hpu_id')
hpus.to_file(paths['hpu_gpkg'], driver='GPKG')
csv = csv.rename({"mean": key + "_mean"}, axis=1)

hpus = pd.merge(hpus, csv, left_on="hpu_id", right_on="hpu_id")

hpus.to_file(paths["hpu_gpkg"], driver="GPKG")

# Export watershed/gage information - keep out of class since this is somewhat
# external...for now
path_watersheds = r"X:\Research\CIMMID\Data\Watersheds\Toronto\initial_basins.gpkg"
hpus = gpd.read_file(paths['hpu_gpkg'])
hpus = gpd.read_file(paths["hpu_gpkg"])
watersheds = gpd.read_file(path_watersheds)
df = hut.overlay_watersheds(hpus, watersheds)
df.to_csv(paths['gages'], index=False)
df.to_csv(paths["gages"], index=False)

# from matplotlib import pyplot as plt
# """
Expand Down

0 comments on commit 3745c3a

Please sign in to comment.