Skip to content

Commit

Permalink
Fix synth_pref functions (#511)
Browse files Browse the repository at this point in the history
* Add .DS_Store to .gitignore

* Add missing functions

* Move .DS_Store to main .gitignore
  • Loading branch information
ljvmiranda921 authored Jan 13, 2025
1 parent 7d8fbfe commit 45e0bde
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,5 @@ dmypy.json
.pyre/

.idea/
.vscode
.vscode
.DS_Store
62 changes: 62 additions & 0 deletions scripts/synth_pref/parse_preferences.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,67 @@ def get_resp(resp: dict[str, Any]) -> str:
return pref_df


def get_rating(resp: dict[str, Any]) -> str:
def _parse_number(s: str) -> int:
try:
int(s)
except ValueError:
return -1
else:
return int(s)

num_ratings = []
for r in resp:
str_rating = r["Rating"]
num_ratings.append(_parse_number(str_rating))
return num_ratings


def compute_mean_rating(row: dict[str, Any]) -> list[str]:
def _vmeans(data: list[list[int]]) -> Optional[list[float]]:
try:
array = np.array(data, dtype=float)
return list(np.nanmean(array, axis=0))
except ValueError:
# Handle jagged lists by padding with NaN
max_len = max(len(row) for row in data)
padded = [row + [np.nan] * (max_len - len(row)) for row in data]
array = np.array(padded, dtype=float)
return list(np.nanmean(array, axis=0))

rating_matrix = []
for aspect in aspects:
rating_matrix.append(row[f"{aspect}_ratings"])
return _vmeans(rating_matrix)


def binarize_pref(row):
ratings = row["mean_ratings"][:4]
chosen_idx = int(np.argmax(ratings))
if len(ratings) == 1:
logging.warning(f"Potential parse error for instance id: {row['id']}")
rejected_idx = chosen_idx
else:
rejected_idx = int(
np.random.choice([i for i in range(len(ratings)) if i != chosen_idx], 1)
)

try:
data = {
"chosen_text": row["completions"][chosen_idx],
"rejected_text": row["completions"][rejected_idx],
"chosen_rating": row["mean_ratings"][chosen_idx],
"rejected_rating": row["mean_ratings"][rejected_idx],
}
if "models" in row:
data["chosen_model"] = row["models"][chosen_idx]
data["rejected_model"] = row["models"][rejected_idx]

return pd.Series(data)

except Exception:
return None


if __name__ == "__main__":
main()

0 comments on commit 45e0bde

Please sign in to comment.