yigbt
diff --git a/‎.RData‎
48 Bytes b/‎.RData‎
48 Bytes
diff --git a/‎.Rhistory‎
Lines changed: 5 additions & 0 deletions b/‎.Rhistory‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 5 deletions b/‎README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎cases/regression_example/create_regression_data.py‎
Lines changed: 2 additions & 2 deletions b/‎cases/regression_example/create_regression_data.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dfpl/normalization.py‎
Lines changed: 1 addition & 1 deletion b/‎dfpl/normalization.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dfpl/single_label_model.py‎
Lines changed: 34 additions & 3 deletions b/‎dfpl/single_label_model.py‎
Lines changed: 34 additions & 3 deletions
diff --git a/‎example/AR/comp_normal_additional/AR_saved_model/variables/variables.data-00000-of-00001‎
-175 KB b/‎example/AR/comp_normal_additional/AR_saved_model/variables/variables.data-00000-of-00001‎
-175 KB
@@ -0,0 +1,5 @@
+install.packages("rJava")
+q()
+install.packages("rJava")
+install.packages("rJava", type = "source")
+q()
@@ -143,15 +143,15 @@ you can provide all necessary information as commandline-parameters. Check
 
 ```shell script
 python -m dfpl --help
-python -m dfpl train --help
-python -m dfpl predict --help
+python -m dfpl train-good1 --help
+python -m dfpl predict-good1 --help
 ```
 
 However, using JSON files that contain all train/predict options an easy way to preserve what was run and you can use
 them instead of providing multiple commandline arguments.
 
 ```shell script
-python -m dfpl train -f path/to/file.json
+python -m dfpl train-good1 -f path/to/file.json
 ```
 
 See, e.g. the JSON files under `validation/case_XX` for examples. Also, you can use the following to create template
@@ -161,7 +161,7 @@ JSON files for training or prediction
 import dfpl.options as opts
 
 train_opts = opts.Options()
-train_opts.saveToFile("train.json")
+train_opts.saveToFile("train-good1.json")
 
 predict_opts = opts.Options()
 predict_opts.saveToFile("predict_bestER03.json")
@@ -176,7 +176,7 @@ of `dfpl.options.TrainingOptions` or
 import dfpl.__main__ as main
 import dfpl.options as opts
 
-o = opts.Options.fromJson("/path/to/train.json")
+o = opts.Options.fromJson("/path/to/train-good1.json")
 main.train(o)
 ```
 
 
@@ -8,15 +8,15 @@
 def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--filename",
-                        help="Path to CSV file containing classification train data which shall be randomly "
+                        help="Path to CSV file containing classification train-good1 data which shall be randomly "
                              "transformed to regression data")
     args = parser.parse_args()
     return args
 
 
 def main():
     """
-    Reads the 'example/train_data.csv' file containing train data. Generates random values in (0, 2] for active
+    Reads the 'example/train_data.csv' file containing train-good1 data. Generates random values in (0, 2] for active
     compounds (class: 1) and 0 for inactive compounds. Values are replaced. The original column names are kept. The
     result is written to a new CSV file 'example/train_data_reg.csv' where '_reg' stands for regression.
 
 
@@ -16,7 +16,7 @@ def normalize_acc_values(df, column_name='AR', output_dir='.'):
     """
     logging.info("Normalizing ACC values...")
     print("Normalizing ACC values...")
-    scaler = MinMaxScaler(feature_range=(0, 1))
+    scaler = MinMaxScaler(feature_range=(-1, 1))
     acc_values = df[column_name].values.reshape(-1, 1)
     scaled_acc_values = scaler.fit_transform(acc_values)
     df[column_name] = scaled_acc_values
 
@@ -2,6 +2,7 @@
 import math
 import shutil
 import sys
+import os
 from os import path
 from time import time
 
@@ -313,7 +314,7 @@ def acper(y_true, y_pred, t: float = 0.02):
             yield False
 
 
-def evaluate_regression_model(x_test: np.ndarray, y_test: np.ndarray, file_prefix: str, model: Model,
+def evaluate_regression_model(x_test: np.ndarray, y_test: np.ndarray,file_prefix: str, model: Model,
                               target: str, fold: int, threshold: float = 0.05) -> pd.DataFrame:
     """
     This function returns the values of performance metrics for the regression model.
@@ -338,7 +339,6 @@ def evaluate_regression_model(x_test: np.ndarray, y_test: np.ndarray, file_prefi
 
     y_predict = model.predict(x_test).flatten()
     pd.DataFrame(y_predict).to_csv(path_or_buf=f"{file_prefix}.y_test_predict.csv")
-
     error = np.array(y_predict) - np.array(y_test)
     abs_error = abs(error)
 
@@ -472,6 +472,7 @@ def fit_and_evaluate_model(x_train: np.ndarray, x_test: np.ndarray, y_train: np.
     # use callback model for evaluation
     callback_model = define_single_label_model(input_size=x_train.shape[1], opts=opts)
     callback_model.load_weights(filepath=checkpoint_model_weights_path)
+   # save_split_data(x_train, x_test, y_train, y_test, fold=fold, target=target,opts=opts)
 
     if opts.fnnType == 'REG':
         pl.plot_loss(hist=hist, file=f"{model_file_prefix}.history.jpg")
@@ -485,6 +486,20 @@ def fit_and_evaluate_model(x_train: np.ndarray, x_test: np.ndarray, y_train: np.
                                      target=target, fold=fold)
 
     return performance
+#def save_split_data(x_train, x_test, y_train, y_test, fold, target,opts: options.Options):
+#    """Helper function to save combined x and y data in the same CSV files for train/test splits."""
+#        # Combine x and y into a single DataFrame for train and test
+#    train_df = pd.DataFrame(x_train)
+#    train_df[target] = y_train  # Adding y values as a new column to the x data
+
+#    test_df = pd.DataFrame(x_test)
+#    test_df[target] = y_test  # Adding y values as a new column to the x data
+
+        # Generate file names based on fold_no (0 for single fold) and save CSVs
+#    train_df.to_csv(os.path.join(opts.outputDir, f"train_fold_{fold}_{target}.csv"), index=True)
+#    test_df.to_csv(os.path.join(opts.outputDir, f"test_fold_{fold}_{target}.csv"), index=True)
+
+
 
 
 def train_single_label_models(df: pd.DataFrame, opts: options.Options) -> None:
@@ -542,24 +557,36 @@ def train_single_label_models(df: pd.DataFrame, opts: options.Options) -> None:
             trained_model.load_weights(path.join(opts.outputDir, f"{target}_single-labeled_Fold-0.model.weights.hdf5"))
             trained_model.save(filepath=path.join(opts.outputDir, f"{target}_saved_model"))
 
+
         elif 1 < opts.kFolds < int(x.shape[0] / 100):
             # do a k-fold cross-validation
             if opts.fnnType != 'REG':
                 kfold_c_validator = StratifiedKFold(n_splits=opts.kFolds, shuffle=True, random_state=42)
             else:
                 kfold_c_validator = KFold(n_splits=opts.kFolds, shuffle=True, random_state=42)
+
+
+
             fold_no = 1
             # split the data
             for train, test in kfold_c_validator.split(x, y):
                 # for testing use one of the splits:
                 # kf = kfold_c_validator.split(x, y)
                 # train, test = next(kf)
+                train_indices_list = pd.DataFrame(train)
+                test_indices_list = pd.DataFrame(test)
+
                 performance = fit_and_evaluate_model(x_train=x[train], x_test=x[test],
                                                      y_train=y[train], y_test=y[test],
                                                      fold=fold_no, target=target, opts=opts)
                 performance_list.append(performance)
-                fold_no += 1
+
+               # fold_no += 1
                 # now next fold
+                train_indices_list.to_csv(os.path.join(opts.outputDir, f"train_fold_{fold_no}.csv"),index=False,header=["Train Index"])
+                test_indices_list.to_csv(os.path.join(opts.outputDir, f"test_fold_{fold_no}.csv"),index=False,header=["Test Index"])
+                fold_no += 1
+
 
         # select and copy best model - how to define the best model?
         if opts.fnnType == 'REG':
@@ -579,6 +606,10 @@ def train_single_label_models(df: pd.DataFrame, opts: options.Options) -> None:
                     ignore_index=True)['fold'][0]
             )
 
+
+
+
+
         # copy checkpoint model weights
         shutil.copy(
             src=path.join(opts.outputDir, f"{target}_single-labeled_Fold-{best_fold}.model.weights.hdf5"),