Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 28 additions & 101 deletions dfpl/__main__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import dataclasses
import logging
import os.path
import pathlib
from argparse import Namespace
from os import path

import chemprop as cp
import pandas as pd
from keras.models import load_model

from dfpl import autoencoder as ac
from dfpl import feedforwardNN as fNN
Expand All @@ -17,108 +14,45 @@
from dfpl import vae as vae
from dfpl.utils import createArgsFromJson, createDirectory, makePathAbsolute

project_directory = pathlib.Path(".").parent.parent.absolute()
test_train_opts = options.Options(
inputFile=f"{project_directory}/input_datasets/S_dataset.pkl",
outputDir=f"{project_directory}/output_data/console_test",
ecWeightsFile=f"{project_directory}/output_data/case_00/AE_S/ae_S.encoder.hdf5",
ecModelDir=f"{project_directory}/output_data/case_00/AE_S/saved_model",
type="smiles",
fpType="topological",
epochs=100,
batchSize=1024,
fpSize=2048,
encFPSize=256,
enableMultiLabel=False,
testSize=0.2,
kFolds=2,
verbose=2,
trainAC=False,
trainFNN=True,
compressFeatures=True,
activationFunction="selu",
lossFunction="bce",
optimizer="Adam",
fnnType="FNN",
)

test_pred_opts = options.Options(
inputFile=f"{project_directory}/input_datasets/S_dataset.pkl",
outputDir=f"{project_directory}/output_data/console_test",
outputFile=f"{project_directory}/output_data/console_test/S_dataset.predictions_ER.csv",
ecModelDir=f"{project_directory}/output_data/case_00/AE_S/saved_model",
fnnModelDir=f"{project_directory}/output_data/console_test/ER_saved_model",
type="smiles",
fpType="topological",
)


def traindmpnn(opts: options.GnnOptions):
def traindmpnn(opts: options.GnnOptions) -> None:
"""
Train a D-MPNN model using the given options.
Args:
- opts: options.GnnOptions instance containing the details of the training
Returns:
- None
"""
os.environ["CUDA_VISIBLE_DEVICES"] = f"{opts.gpu}"
ignore_elements = ["py/object"]
# Load options from a JSON file and replace the relevant attributes in `opts`
arguments = createArgsFromJson(
opts.configFile, ignore_elements, return_json_object=False
)
arguments = createArgsFromJson(jsonFile=opts.configFile)
opts = cp.args.TrainArgs().parse_args(arguments)
logging.info("Training DMPNN...")
# Train the model and get the mean and standard deviation of AUC score from cross-validation
mean_score, std_score = cp.train.cross_validate(
args=opts, train_func=cp.train.run_training
)
logging.info(f"Results: {mean_score:.5f} +/- {std_score:.5f}")


def predictdmpnn(opts: options.GnnOptions, json_arg_path: str) -> None:
def predictdmpnn(opts: options.GnnOptions) -> None:
"""
Predict the values using a trained D-MPNN model with the given options.
Args:
- opts: options.GnnOptions instance containing the details of the prediction
- JSON_ARG_PATH: path to a JSON file containing additional arguments for prediction
Returns:
- None
"""
ignore_elements = [
"py/object",
"checkpoint_paths",
"save_dir",
"saving_name",
]
# Load options and additional arguments from a JSON file
arguments, data = createArgsFromJson(
json_arg_path, ignore_elements, return_json_object=True
)
arguments.append("--preds_path")
arguments.append("")
save_dir = data.get("save_dir")
name = data.get("saving_name")
# Replace relevant attributes in `opts` with loaded options
arguments = createArgsFromJson(jsonFile=opts.configFile)
opts = cp.args.PredictArgs().parse_args(arguments)
opts.preds_path = save_dir + "/" + name
df = pd.read_csv(opts.test_path)
smiles = []
for index, rows in df.iterrows():
my_list = [rows.smiles]
smiles.append(my_list)
# Make predictions and return the result
cp.train.make_predictions(args=opts, smiles=smiles)

cp.train.make_predictions(args=opts)


def train(opts: options.Options):
"""
Run the main training procedure
:param opts: Options defining the details of the training
"""

os.environ["CUDA_VISIBLE_DEVICES"] = f"{opts.gpu}"

# import data from file and create DataFrame
if "tsv" in opts.inputFile:
df = fp.importDataFile(
Expand All @@ -128,9 +62,9 @@ def train(opts: options.Options):
df = fp.importDataFile(
opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize
)
# initialize encoders to None

# initialize (auto)encoders to None
encoder = None
autoencoder = None
if opts.trainAC:
if opts.aeType == "deterministic":
encoder, train_indices, test_indices = ac.train_full_ac(df, opts)
Expand All @@ -142,26 +76,26 @@ def train(opts: options.Options):
# if feature compression is enabled
if opts.compressFeatures:
if not opts.trainAC:
if opts.aeType == "deterministic":
(autoencoder, encoder) = ac.define_ac_model(opts=options.Options())
elif opts.aeType == "variational":
if opts.aeType == "variational":
(autoencoder, encoder) = vae.define_vae_model(opts=options.Options())
elif opts.ecWeightsFile == "":
encoder = load_model(opts.ecModelDir)
else:
(autoencoder, encoder) = ac.define_ac_model(opts=options.Options())

if opts.ecWeightsFile != "":
autoencoder.load_weights(
os.path.join(opts.ecModelDir, opts.ecWeightsFile)
os.path.join(opts.outputDir, opts.ecWeightsFile)
)
# compress the fingerprints using the autoencoder
df = ac.compress_fingerprints(df, encoder)
# ac.visualize_fingerprints(
# df,
# before_col="fp",
# after_col="fpcompressed",
# train_indices=train_indices,
# test_indices=test_indices,
# save_as=f"UMAP_{opts.aeSplitType}.png",
# )
if opts.visualizeLatent:
# visualize latent space only if you train the autoencoder
ac.visualize_fingerprints(
df,
comressed_col="fpcompressed",
train_indices=train_indices,
test_indices=test_indices,
save_as=f"UMAP_{opts.aeType}.png",
)
# train single label models if requested
if opts.trainFNN and not opts.enableMultiLabel:
sl.train_single_label_models(df=df, opts=opts)
Expand Down Expand Up @@ -190,12 +124,13 @@ def predict(opts: options.Options) -> None:
# load trained model for autoencoder
if opts.aeType == "deterministic":
(autoencoder, encoder) = ac.define_ac_model(opts=options.Options())
if opts.aeType == "variational":
elif opts.aeType == "variational":
(autoencoder, encoder) = vae.define_vae_model(opts=options.Options())
# Load trained model for autoencoder
if opts.ecWeightsFile == "":
encoder = load_model(opts.ecModelDir)
else:
raise ValueError(f"Unknown autoencoder type: {opts.aeType}")

# Load trained model for autoencoder
if opts.ecWeightsFile != "":
encoder.load_weights(os.path.join(opts.ecModelDir, opts.ecWeightsFile))
df = ac.compress_fingerprints(df, encoder)

Expand Down Expand Up @@ -257,7 +192,6 @@ def main():
raise ValueError("Input directory is not a directory")
elif prog_args.method == "traingnn":
traingnn_opts = options.GnnOptions.fromCmdArgs(prog_args)

traindmpnn(traingnn_opts)

elif prog_args.method == "predictgnn":
Expand All @@ -267,12 +201,7 @@ def main():
test_path=makePathAbsolute(predictgnn_opts.test_path),
preds_path=makePathAbsolute(predictgnn_opts.preds_path),
)

logging.info(
f"The following arguments are received or filled with default values:\n{prog_args}"
)

predictdmpnn(fixed_opts, prog_args.configFile)
predictdmpnn(fixed_opts)

elif prog_args.method == "train":
train_opts = options.Options.fromCmdArgs(prog_args)
Expand All @@ -298,8 +227,6 @@ def main():
),
ecModelDir=makePathAbsolute(predict_opts.ecModelDir),
fnnModelDir=makePathAbsolute(predict_opts.fnnModelDir),
trainAC=False,
trainFNN=False,
)
createDirectory(fixed_opts.outputDir)
createLogger(path.join(fixed_opts.outputDir, "predict.log"))
Expand Down
Loading
Loading