Skip to content

Update README.md #547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 133 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,136 @@ Just a few guidelines to remember before you ask a question:
- If your question has already been asked and answered adequately, please add a thumbs-up (or the emoji of your choice!) to the issue. This helps us in identifying common problems that people usually face.
- Lastly, be civil and polite. :)


- I am keep getting "Unicode Decode Error" when I run this code in jupyter lab. sometimes it works all the iterations sometimes not.... Please somebody help me....-

!pip install -U pysr
import pandas as pd
import os

def read_csv_to_numpy(file_path):
try:
# Specify the encoding explicitly to avoid UnicodeDecodeError
data = pd.read_csv(file_path, header=1, encoding="UTF-8").dropna()
return data
except Exception as e:
print(f"Failed to read {file_path} due to {e}")
return pd.DataFrame() # Return empty DataFrame on failure

def load_data(file_names, data_path):
file_paths = [os.path.join(data_path, name) for name in file_names]
data_frames = [read_csv_to_numpy(path) for path in file_paths]
return pd.concat(data_frames, ignore_index=True) if data_frames else pd.DataFrame()

# Example Usage:
# Update the path to your actual data directory
caljetData_path = "/Users/kevin/PSUresearch/2025PSUresearch/calijetDatacsv"
data = load_data([
"0.1Ma_test1.csv", "0.1Ma_test2.csv", "0.1Ma_test3.csv",
"0.1Ma_test4.csv", "0.1Ma_test5.csv", "0.1Ma_test6.csv",
"0.1Ma_test7.csv", "0.1Ma_test8.csv", "0.1Ma_test9.csv",
"0.1Ma_test10.csv"
], caljetData_path)

if data.empty:
print("No data loaded. Exiting.")
else:
# Extract necessary columns
X = data.iloc[:, :3].to_numpy() # Input Variables: Pitch, Yaw, Velocity
pit = X[:, 0] # Input Variable: Pitch
yaw = X[:, 1] # Input Variable: Yaw
in_u = X[:, 2] # Input Variable: Velocity
y = data.iloc[:, 9].to_numpy() # Assuming column 10 is Target Variable (Output): Pressure

print("Data loaded successfully!")
print("X shape:", X.shape)
print("y shape:", y.shape)

#Specify the data path
y_truepath = '/Users/kevin/PSUresearch/2025PSUresearch/calijetDatacsv'

#Load y_true val
data = load_data([
"averaged_0.1Ma.csv"
], y_truepath)

if data.empty:
print("No data loaded. Exiting.")
else:
# Extract necessary columns
Press = data.iloc[:, :8].to_numpy() # Input Variable: Pitch, Yaw, Velocity
p1 = X[:, 3]
p2 = X[:, 4]
p3 = X[:, 5]
p4 = X[:, 6]
p5 = X[:, 7]
pavg = X[:, 8]
pstatic = X[:, 9]
pstag = X[:, 10]

y = data.iloc[:, 11].to_numpy() # Assuming column 10 is Target Variable (Output): Pressure
# Example use case: Train a symbolic regression model
model = PySRRegressor(
model_selection='best',
unary_operators=["cos", "sin", "square", "inv(x) = 1/x", "exp"],
binary_operators=["+", "-", "/", "*"],
extra_sympy_mappings={"inv": lambda x: 1/x},
niterations=400,
populations=30
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model.fit(X_train, y_train)
#model.get_best().equation
print(model.equations_)
def elementwise_loss(y_true, y_pred):

# Convert inputs to numpy arrays
y_true = np.array(y_true)
y_pred = np.array(y_pred)

# Element-wise squared error
squared_error = (y_true - y_pred) ** 2

# Element-wise absolute error
absolute_error = np.abs(y_true - y_pred)

# MSE: mean of squared_error over axis=0
mse = np.mean(squared_error, axis=0)

# RMSE: square root of MSE
rmse = np.sqrt(mse)

# MAE: mean of absolute_error
mae = np.mean(absolute_error, axis=0)

# Scatter loss: Logarithmic scaled error
# (use small epsilon for numerical stability)
epsilon = 1e-20
scatter_loss = np.abs(np.log((np.abs(y_pred) + epsilon) / (np.abs(y_true) + epsilon)))

# Sign loss: Penalize differences in signs
sign_loss = 10 * (np.sign(y_pred) - np.sign(y_true)) ** 2

# Combined loss: sum of scatter_loss and sign_loss
combined_loss = scatter_loss + sign_loss

# Return a dictionary of metrics
return {
"MSE": mse,
"RMSE": rmse,
"MAE": mae,
"Scatter Loss": np.mean(scatter_loss),
"Sign Loss": np.mean(sign_loss),
"Combined Loss": np.mean(combined_loss)
}



y_pred =

#Call the function
losses = elementwise_loss(y_true, y_pred)

#Print out the results
print("Element-wise Losses:")
for key, value in losses.items():