diff --git a/README.md b/README.md index ad98d4a..2142e4d 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,136 @@ Just a few guidelines to remember before you ask a question: - If your question has already been asked and answered adequately, please add a thumbs-up (or the emoji of your choice!) to the issue. This helps us in identifying common problems that people usually face. - Lastly, be civil and polite. :) + +- I am keep getting "Unicode Decode Error" when I run this code in jupyter lab. sometimes it works all the iterations sometimes not.... Please somebody help me....- + +!pip install -U pysr +import pandas as pd +import os + +def read_csv_to_numpy(file_path): + try: + # Specify the encoding explicitly to avoid UnicodeDecodeError + data = pd.read_csv(file_path, header=1, encoding="UTF-8").dropna() + return data + except Exception as e: + print(f"Failed to read {file_path} due to {e}") + return pd.DataFrame() # Return empty DataFrame on failure + +def load_data(file_names, data_path): + file_paths = [os.path.join(data_path, name) for name in file_names] + data_frames = [read_csv_to_numpy(path) for path in file_paths] + return pd.concat(data_frames, ignore_index=True) if data_frames else pd.DataFrame() + +# Example Usage: +# Update the path to your actual data directory +caljetData_path = "/Users/kevin/PSUresearch/2025PSUresearch/calijetDatacsv" +data = load_data([ + "0.1Ma_test1.csv", "0.1Ma_test2.csv", "0.1Ma_test3.csv", + "0.1Ma_test4.csv", "0.1Ma_test5.csv", "0.1Ma_test6.csv", + "0.1Ma_test7.csv", "0.1Ma_test8.csv", "0.1Ma_test9.csv", + "0.1Ma_test10.csv" +], caljetData_path) + +if data.empty: + print("No data loaded. Exiting.") +else: + # Extract necessary columns + X = data.iloc[:, :3].to_numpy() # Input Variables: Pitch, Yaw, Velocity + pit = X[:, 0] # Input Variable: Pitch + yaw = X[:, 1] # Input Variable: Yaw + in_u = X[:, 2] # Input Variable: Velocity + y = data.iloc[:, 9].to_numpy() # Assuming column 10 is Target Variable (Output): Pressure + + print("Data loaded successfully!") + print("X shape:", X.shape) + print("y shape:", y.shape) + +#Specify the data path +y_truepath = '/Users/kevin/PSUresearch/2025PSUresearch/calijetDatacsv' + +#Load y_true val +data = load_data([ + "averaged_0.1Ma.csv" +], y_truepath) + +if data.empty: + print("No data loaded. Exiting.") +else: + # Extract necessary columns + Press = data.iloc[:, :8].to_numpy() # Input Variable: Pitch, Yaw, Velocity + p1 = X[:, 3] + p2 = X[:, 4] + p3 = X[:, 5] + p4 = X[:, 6] + p5 = X[:, 7] + pavg = X[:, 8] + pstatic = X[:, 9] + pstag = X[:, 10] + + y = data.iloc[:, 11].to_numpy() # Assuming column 10 is Target Variable (Output): Pressure +# Example use case: Train a symbolic regression model +model = PySRRegressor( + model_selection='best', + unary_operators=["cos", "sin", "square", "inv(x) = 1/x", "exp"], + binary_operators=["+", "-", "/", "*"], + extra_sympy_mappings={"inv": lambda x: 1/x}, + niterations=400, + populations=30 +) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) +model.fit(X_train, y_train) +#model.get_best().equation +print(model.equations_) +def elementwise_loss(y_true, y_pred): + + # Convert inputs to numpy arrays + y_true = np.array(y_true) + y_pred = np.array(y_pred) + + # Element-wise squared error + squared_error = (y_true - y_pred) ** 2 + + # Element-wise absolute error + absolute_error = np.abs(y_true - y_pred) + + # MSE: mean of squared_error over axis=0 + mse = np.mean(squared_error, axis=0) + + # RMSE: square root of MSE + rmse = np.sqrt(mse) + + # MAE: mean of absolute_error + mae = np.mean(absolute_error, axis=0) + + # Scatter loss: Logarithmic scaled error + # (use small epsilon for numerical stability) + epsilon = 1e-20 + scatter_loss = np.abs(np.log((np.abs(y_pred) + epsilon) / (np.abs(y_true) + epsilon))) + + # Sign loss: Penalize differences in signs + sign_loss = 10 * (np.sign(y_pred) - np.sign(y_true)) ** 2 + + # Combined loss: sum of scatter_loss and sign_loss + combined_loss = scatter_loss + sign_loss + + # Return a dictionary of metrics + return { + "MSE": mse, + "RMSE": rmse, + "MAE": mae, + "Scatter Loss": np.mean(scatter_loss), + "Sign Loss": np.mean(sign_loss), + "Combined Loss": np.mean(combined_loss) + } + + + +y_pred = + +#Call the function +losses = elementwise_loss(y_true, y_pred) + +#Print out the results +print("Element-wise Losses:") +for key, value in losses.items():