Skip to content

Commit eb02306

Browse files
committed
some files
1 parent 47c42f9 commit eb02306

File tree

80 files changed

+4918
-38926
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+4918
-38926
lines changed

build/lib/dfpl/__init__.py

Whitespace-only changes.

build/lib/dfpl/__main__.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
import os
2+
from argparse import Namespace
3+
import logging
4+
import pathlib
5+
import dataclasses
6+
from os import path
7+
8+
9+
10+
from tensorflow import keras
11+
import wandb
12+
13+
from dfpl.utils import makePathAbsolute, createDirectory
14+
from dfpl import options
15+
from dfpl import fingerprint as fp
16+
from dfpl import autoencoder as ac
17+
from dfpl import feedforwardNN as fNN
18+
from dfpl import predictions
19+
from dfpl import single_label_model as sl
20+
from dfpl import normalization
21+
from sklearn.preprocessing import MinMaxScaler
22+
import pandas as pd
23+
import pickle
24+
import jsonpickle
25+
from pathlib import Path
26+
import sys
27+
sys.path.append("/home/shanavas/PycharmProjects/deepFPlearn/dfpl/")
28+
from normalization import normalize_acc_values, inverse_transform_predictions
29+
30+
wandb.init()
31+
32+
project_directory = pathlib.Path(".").parent.parent.absolute()
33+
test_train_opts = options.Options(
34+
inputFile=f'{project_directory}/input_datasets/toxcast_regression_AR.csv',
35+
outputDir=f'{project_directory}/output_data/console_test',
36+
ecWeightsFile=f'{project_directory}/output_data/case_regression_01/AR/ae.encoder.hdf5',
37+
ecModelDir=f'{project_directory}/output_data/case_regression_01/AR/saved_model',
38+
type='inchi',
39+
fpType='topological',
40+
epochs=100,
41+
batchSize=1024,
42+
fpSize=2048,
43+
encFPSize=256,
44+
enableMultiLabel=False,
45+
testSize=0.2,
46+
kFolds=1,
47+
verbose=2,
48+
trainAC=False,
49+
trainFNN=True,
50+
compressFeatures=False,
51+
activationFunction="selu",
52+
lossFunction='mae',
53+
optimizer='Adam',
54+
fnnType='REG', # todo: replace useRegressionModel with fnnType variable
55+
wabTarget='AR',
56+
wabTracking=True,
57+
normalizeACC = False #dilshana
58+
)
59+
60+
test_pred_opts = options.Options(
61+
inputFile=f"{project_directory}/input_datasets/S_dataset.pkl",
62+
outputDir=f"{project_directory}/output_data/console_test",
63+
outputFile=f"{project_directory}/output_data/console_test/S_dataset.predictions_ER.csv",
64+
ecModelDir=f"{project_directory}/output_data/case_00/AE_S/saved_model",
65+
fnnModelDir=f"{project_directory}/output_data/console_test/ER_saved_model",
66+
type="smiles",
67+
fpType="topological"
68+
)
69+
70+
71+
def train(opts: options.Options):
72+
"""
73+
Run the main training procedure
74+
:param opts: Options defining the details of the training
75+
"""
76+
77+
if opts.wabTracking:
78+
wandb.init(project=f"dfpl-reg-training-{opts.wabTarget}", entity="dfpl_regression", config=vars(opts))
79+
# opts = wandb.config
80+
81+
# df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)
82+
df = fp.importDataFile(opts.inputFile, import_function=fp.importCSV, fp_size=opts.fpSize)
83+
84+
# Create output dir if it doesn't exist
85+
createDirectory(opts.outputDir) # why? we just created that directory in the function before??
86+
encoder = None
87+
88+
89+
90+
91+
92+
if opts.trainAC:
93+
# train an autoencoder on the full feature matrix
94+
encoder = ac.train_full_ac(df, opts)
95+
96+
if opts.compressFeatures:
97+
98+
if not opts.trainAC:
99+
# load trained model for autoencoder
100+
encoder = keras.models.load_model(opts.ecModelDir)
101+
102+
# compress the fingerprints using the autoencoder
103+
df = ac.compress_fingerprints(df, encoder)
104+
105+
if opts.normalizeACC: #dilshana
106+
df, scaler_path = normalize_acc_values(df, column_name='AR', output_dir=opts.outputDir)
107+
108+
if opts.trainFNN:
109+
# train single label models
110+
# fNN.train_single_label_models(df=df, opts=opts)
111+
sl.train_single_label_models(df=df, opts=opts)
112+
113+
# train multi-label models
114+
if opts.enableMultiLabel:
115+
fNN.train_nn_models_multi(df=df, opts=opts)
116+
117+
118+
#dilshana
119+
120+
def predict(opts: options.Options) -> None:
121+
"""
122+
Run prediction given specific options
123+
:param opts: Options defining the details of the prediction
124+
"""
125+
df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)
126+
127+
128+
# Create output dir if it doesn't exist
129+
createDirectory(opts.outputDir)
130+
131+
if opts.compressFeatures:
132+
# load trained model for autoencoder
133+
encoder = keras.models.load_model(opts.ecModelDir)
134+
# compress the fingerprints using the autoencoder
135+
df = ac.compress_fingerprints(df, encoder)
136+
137+
logging.info(f"Raw predictions: {df.head()}")
138+
# predict
139+
df2 = predictions.predict_values(df=df, opts=opts)
140+
print(df2.head())
141+
logging.info(f"Raw predictions: {df2['predicted'].head()}")
142+
143+
144+
if opts.scalerFilePath:
145+
146+
147+
df2['predicted'] = inverse_transform_predictions(df2['predicted'].values, opts.scalerFilePath)
148+
149+
#normalized_file = os.path.join(opts.outputDir, "normalized_predictions.csv")
150+
#logging.info(f"Saving normalized predictions to {normalized_file}")
151+
#df2.to_csv(path_or_buf=normalized_file, index=False)
152+
153+
else:
154+
logging.warning("Normalization is enabled but scalerFilePath is not provided in the options. Skipping normalization step.")
155+
156+
157+
names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]
158+
159+
df2[names_columns].to_csv(path_or_buf=path.join(opts.outputDir, opts.outputFile))
160+
logging.info(f"Prediction successful. Results written to '{path.join(opts.outputDir, opts.outputFile)}'")
161+
162+
163+
164+
165+
166+
167+
def createLogger(filename: str) -> None:
168+
"""
169+
Set up a logger for the main function that also saves to a log file
170+
"""
171+
# get root logger and set its level
172+
logger = logging.getLogger()
173+
logger.setLevel(logging.INFO)
174+
# create file handler which logs info messages
175+
fh = logging.FileHandler(filename, mode="w")
176+
fh.setLevel(logging.INFO)
177+
# create console handler
178+
ch = logging.StreamHandler()
179+
ch.setLevel(logging.INFO)
180+
# create formatter and add it to the handlers
181+
formatterFile = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
182+
formatterConsole = logging.Formatter('%(levelname)-8s %(message)s')
183+
fh.setFormatter(formatterFile)
184+
ch.setFormatter(formatterConsole)
185+
# add the handlers to the logger
186+
logger.addHandler(fh)
187+
logger.addHandler(ch)
188+
189+
190+
def main():
191+
"""
192+
Main function that runs training/prediction defined by command line arguments
193+
"""
194+
parser = options.createCommandlineParser()
195+
prog_args: Namespace = parser.parse_args()
196+
197+
try:
198+
if prog_args.method == "convert":
199+
directory = makePathAbsolute(prog_args.f)
200+
if path.isdir(directory):
201+
createLogger(path.join(directory, "convert.log"))
202+
logging.info(f"Convert all data files in {directory}")
203+
fp.convert_all(directory)
204+
else:
205+
raise ValueError("Input directory is not a directory")
206+
if prog_args.method == "train":
207+
train_opts = options.Options.fromCmdArgs(prog_args)
208+
fixed_opts = dataclasses.replace(
209+
train_opts,
210+
inputFile=makePathAbsolute(train_opts.inputFile),
211+
outputDir=makePathAbsolute(train_opts.outputDir)
212+
)
213+
createDirectory(fixed_opts.outputDir)
214+
createLogger(path.join(fixed_opts.outputDir, "train.log"))
215+
logging.info(f"The following arguments are received or filled with default values:\n{fixed_opts}")
216+
train(fixed_opts)
217+
exit(0)
218+
elif prog_args.method == "predict":
219+
predict_opts = options.Options.fromCmdArgs(prog_args)
220+
fixed_opts = dataclasses.replace(
221+
predict_opts,
222+
inputFile=makePathAbsolute(predict_opts.inputFile),
223+
outputDir=makePathAbsolute(predict_opts.outputDir),
224+
outputFile=makePathAbsolute(path.join(predict_opts.outputDir, predict_opts.outputFile)),
225+
ecModelDir=makePathAbsolute(predict_opts.ecModelDir),
226+
fnnModelDir=makePathAbsolute(predict_opts.fnnModelDir),
227+
trainAC=False,
228+
trainFNN=False
229+
)
230+
createDirectory(fixed_opts.outputDir)
231+
createLogger(path.join(fixed_opts.outputDir, "predict.log"))
232+
logging.info(f"The following arguments are received or filled with default values:\n{prog_args}")
233+
predict(fixed_opts)
234+
exit(0)
235+
except AttributeError as e:
236+
print(e)
237+
parser.print_usage()
238+
239+
240+
if __name__ == '__main__':
241+
main()

0 commit comments

Comments
 (0)