11import dataclasses
22import logging
33import os .path
4- import pathlib
54from argparse import Namespace
65from os import path
76
87import chemprop as cp
9- import pandas as pd
10- from keras .models import load_model
118
129from dfpl import autoencoder as ac
1310from dfpl import feedforwardNN as fNN
1714from dfpl import vae as vae
1815from dfpl .utils import createArgsFromJson , createDirectory , makePathAbsolute
1916
20- project_directory = pathlib .Path ("." ).parent .parent .absolute ()
21- test_train_opts = options .Options (
22- inputFile = f"{ project_directory } /input_datasets/S_dataset.pkl" ,
23- outputDir = f"{ project_directory } /output_data/console_test" ,
24- ecWeightsFile = f"{ project_directory } /output_data/case_00/AE_S/ae_S.encoder.hdf5" ,
25- ecModelDir = f"{ project_directory } /output_data/case_00/AE_S/saved_model" ,
26- type = "smiles" ,
27- fpType = "topological" ,
28- epochs = 100 ,
29- batchSize = 1024 ,
30- fpSize = 2048 ,
31- encFPSize = 256 ,
32- enableMultiLabel = False ,
33- testSize = 0.2 ,
34- kFolds = 2 ,
35- verbose = 2 ,
36- trainAC = False ,
37- trainFNN = True ,
38- compressFeatures = True ,
39- activationFunction = "selu" ,
40- lossFunction = "bce" ,
41- optimizer = "Adam" ,
42- fnnType = "FNN" ,
43- )
4417
45- test_pred_opts = options .Options (
46- inputFile = f"{ project_directory } /input_datasets/S_dataset.pkl" ,
47- outputDir = f"{ project_directory } /output_data/console_test" ,
48- outputFile = f"{ project_directory } /output_data/console_test/S_dataset.predictions_ER.csv" ,
49- ecModelDir = f"{ project_directory } /output_data/case_00/AE_S/saved_model" ,
50- fnnModelDir = f"{ project_directory } /output_data/console_test/ER_saved_model" ,
51- type = "smiles" ,
52- fpType = "topological" ,
53- )
54-
55-
56- def traindmpnn (opts : options .GnnOptions ):
18+ def traindmpnn (opts : options .GnnOptions ) -> None :
5719 """
5820 Train a D-MPNN model using the given options.
5921 Args:
6022 - opts: options.GnnOptions instance containing the details of the training
6123 Returns:
6224 - None
6325 """
64- os .environ ["CUDA_VISIBLE_DEVICES" ] = f"{ opts .gpu } "
65- ignore_elements = ["py/object" ]
6626 # Load options from a JSON file and replace the relevant attributes in `opts`
67- arguments = createArgsFromJson (
68- opts .configFile , ignore_elements , return_json_object = False
69- )
27+ arguments = createArgsFromJson (jsonFile = opts .configFile )
7028 opts = cp .args .TrainArgs ().parse_args (arguments )
7129 logging .info ("Training DMPNN..." )
72- # Train the model and get the mean and standard deviation of AUC score from cross-validation
7330 mean_score , std_score = cp .train .cross_validate (
7431 args = opts , train_func = cp .train .run_training
7532 )
7633 logging .info (f"Results: { mean_score :.5f} +/- { std_score :.5f} " )
7734
7835
79- def predictdmpnn (opts : options .GnnOptions , json_arg_path : str ) -> None :
36+ def predictdmpnn (opts : options .GnnOptions ) -> None :
8037 """
8138 Predict the values using a trained D-MPNN model with the given options.
8239 Args:
8340 - opts: options.GnnOptions instance containing the details of the prediction
84- - JSON_ARG_PATH: path to a JSON file containing additional arguments for prediction
8541 Returns:
8642 - None
8743 """
88- ignore_elements = [
89- "py/object" ,
90- "checkpoint_paths" ,
91- "save_dir" ,
92- "saving_name" ,
93- ]
9444 # Load options and additional arguments from a JSON file
95- arguments , data = createArgsFromJson (
96- json_arg_path , ignore_elements , return_json_object = True
97- )
98- arguments .append ("--preds_path" )
99- arguments .append ("" )
100- save_dir = data .get ("save_dir" )
101- name = data .get ("saving_name" )
102- # Replace relevant attributes in `opts` with loaded options
45+ arguments = createArgsFromJson (jsonFile = opts .configFile )
10346 opts = cp .args .PredictArgs ().parse_args (arguments )
104- opts .preds_path = save_dir + "/" + name
105- df = pd .read_csv (opts .test_path )
106- smiles = []
107- for index , rows in df .iterrows ():
108- my_list = [rows .smiles ]
109- smiles .append (my_list )
110- # Make predictions and return the result
111- cp .train .make_predictions (args = opts , smiles = smiles )
47+
48+ cp .train .make_predictions (args = opts )
11249
11350
11451def train (opts : options .Options ):
11552 """
11653 Run the main training procedure
11754 :param opts: Options defining the details of the training
11855 """
119-
120- os .environ ["CUDA_VISIBLE_DEVICES" ] = f"{ opts .gpu } "
121-
12256 # import data from file and create DataFrame
12357 if "tsv" in opts .inputFile :
12458 df = fp .importDataFile (
@@ -128,7 +62,7 @@ def train(opts: options.Options):
12862 df = fp .importDataFile (
12963 opts .inputFile , import_function = fp .importSmilesCSV , fp_size = opts .fpSize
13064 )
131- # initialize encoders to None
65+ # initialize (auto) encoders to None
13266 encoder = None
13367 autoencoder = None
13468 if opts .trainAC :
@@ -142,26 +76,27 @@ def train(opts: options.Options):
14276 # if feature compression is enabled
14377 if opts .compressFeatures :
14478 if not opts .trainAC :
145- if opts .aeType == "deterministic" :
146- (autoencoder , encoder ) = ac .define_ac_model (opts = options .Options ())
147- elif opts .aeType == "variational" :
79+ if opts .aeType == "variational" :
14880 (autoencoder , encoder ) = vae .define_vae_model (opts = options .Options ())
149- elif opts .ecWeightsFile == "" :
150- encoder = load_model (opts . ecModelDir )
81+ elif opts .aeType == "deterministic " :
82+ ( autoencoder , encoder ) = ac . define_ac_model (opts = options . Options () )
15183 else :
152- autoencoder .load_weights (
153- os .path .join (opts .ecModelDir , opts .ecWeightsFile )
154- )
84+ raise ValueError (f"Unknown autoencoder type: { opts .aeType } " )
85+
86+ if opts .ecWeightsFile != "" :
87+ encoder .load_weights (os .path .join (opts .ecModelDir , opts .ecWeightsFile ))
15588 # compress the fingerprints using the autoencoder
15689 df = ac .compress_fingerprints (df , encoder )
157- # ac.visualize_fingerprints(
158- # df,
159- # before_col="fp",
160- # after_col="fpcompressed",
161- # train_indices=train_indices,
162- # test_indices=test_indices,
163- # save_as=f"UMAP_{opts.aeSplitType}.png",
164- # )
90+ if opts .visualizeLatent and opts .trainAC :
91+ logging .info ("Visualizing latent space" )
92+ ac .visualize_fingerprints (
93+ df ,
94+ before_col = "fp" ,
95+ after_col = "fpcompressed" ,
96+ train_indices = train_indices ,
97+ test_indices = test_indices ,
98+ save_as = f"UMAP_{ opts .aeSplitType } .png" ,
99+ )
165100 # train single label models if requested
166101 if opts .trainFNN and not opts .enableMultiLabel :
167102 sl .train_single_label_models (df = df , opts = opts )
@@ -193,10 +128,10 @@ def predict(opts: options.Options) -> None:
193128 if opts .aeType == "variational" :
194129 (autoencoder , encoder ) = vae .define_vae_model (opts = options .Options ())
195130 # Load trained model for autoencoder
196- if opts .ecWeightsFile == "" :
197- encoder = load_model (opts .ecModelDir )
198- else :
131+ if opts .ecWeightsFile != "" :
199132 encoder .load_weights (os .path .join (opts .ecModelDir , opts .ecWeightsFile ))
133+ else :
134+ raise ValueError ("No weights file specified for encoder" )
200135 df = ac .compress_fingerprints (df , encoder )
201136
202137 # Run predictions on the compressed fingerprints and store the results in a dataframe
@@ -257,36 +192,22 @@ def main():
257192 raise ValueError ("Input directory is not a directory" )
258193 elif prog_args .method == "traingnn" :
259194 traingnn_opts = options .GnnOptions .fromCmdArgs (prog_args )
260-
195+ createLogger ( "traingnn.log" )
261196 traindmpnn (traingnn_opts )
262197
263198 elif prog_args .method == "predictgnn" :
264199 predictgnn_opts = options .GnnOptions .fromCmdArgs (prog_args )
265- fixed_opts = dataclasses .replace (
266- predictgnn_opts ,
267- test_path = makePathAbsolute (predictgnn_opts .test_path ),
268- preds_path = makePathAbsolute (predictgnn_opts .preds_path ),
269- )
270-
271- logging .info (
272- f"The following arguments are received or filled with default values:\n { prog_args } "
273- )
274-
275- predictdmpnn (fixed_opts , prog_args .configFile )
200+ createLogger ("predictgnn.log" )
201+ predictdmpnn (predictgnn_opts )
276202
277203 elif prog_args .method == "train" :
278204 train_opts = options .Options .fromCmdArgs (prog_args )
279- fixed_opts = dataclasses .replace (
280- train_opts ,
281- inputFile = makePathAbsolute (train_opts .inputFile ),
282- outputDir = makePathAbsolute (train_opts .outputDir ),
283- )
284- createDirectory (fixed_opts .outputDir )
285- createLogger (path .join (fixed_opts .outputDir , "train.log" ))
205+ createDirectory (train_opts .outputDir )
206+ createLogger (path .join (train_opts .outputDir , "train.log" ))
286207 logging .info (
287- f"The following arguments are received or filled with default values:\n { fixed_opts } "
208+ f"The following arguments are received or filled with default values:\n { train_opts } "
288209 )
289- train (fixed_opts )
210+ train (train_opts )
290211 elif prog_args .method == "predict" :
291212 predict_opts = options .Options .fromCmdArgs (prog_args )
292213 fixed_opts = dataclasses .replace (
@@ -298,8 +219,6 @@ def main():
298219 ),
299220 ecModelDir = makePathAbsolute (predict_opts .ecModelDir ),
300221 fnnModelDir = makePathAbsolute (predict_opts .fnnModelDir ),
301- trainAC = False ,
302- trainFNN = False ,
303222 )
304223 createDirectory (fixed_opts .outputDir )
305224 createLogger (path .join (fixed_opts .outputDir , "predict.log" ))
0 commit comments