Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,698 changes: 4,698 additions & 0 deletions example/autorate/auto-rater.ipynb
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qq: what is this .ipynb file for?

Large diffs are not rendered by default.

Binary file added example/autorate/data/Chapter 5 Rome.docx
Binary file not shown.
497 changes: 497 additions & 0 deletions example/autorate/data/rome.txt

Large diffs are not rendered by default.

2,647 changes: 1,244 additions & 1,403 deletions example/data_generation/immigration_gen_data.ipynb
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qq: what is this .ipynb file for?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is used to generate synthetic immigration data by rephrasing.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"source": [
"from pykoi.chat import QuestionAnswerDatabase\n",
"from pykoi.rlhf import RLHFConfig\n",
"from pykoi.rlhf import SupervisedFinetuning"
"from pykoi.rlhf import PreTraining"
]
},
{
Expand Down Expand Up @@ -762,9 +762,9 @@
}
],
"source": [
"# run supervised finetuning\n",
"# run pre-training\n",
"config = RLHFConfig(base_model_path=\"elinas/llama-7b-hf-transformers-4.29\", dataset_type=\"local_db\")\n",
"rlhf_step1_sft = SupervisedFinetuning(config)\n",
"rlhf_step1_sft = PreTraining(config)\n",
"rlhf_step1_sft.train_and_save(\"./models/rlhf_step1_sft\")\n"
]
},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Demo for the supervised fine tuning.
"""Demo for the pre-training.

python -m example.rlhf.demo_supervised_finetuning_nike
python -m example.rlhf.demo_pre_training_nike
"""

from peft import LoraConfig, TaskType

from pykoi.rlhf import RLHFConfig, SupervisedFinetuning
from pykoi.rlhf import RLHFConfig, PreTraining

base_model_path = "meta-llama/Llama-2-7b-chat-hf"
dataset_name = "./output_self_instructed_data_nike_10k_2023_FULL.csv"
Expand Down Expand Up @@ -38,7 +38,7 @@
)


# run supervised finetuning
# run pre-training
config = RLHFConfig(
base_model_path=base_model_path,
dataset_type=dataset_type,
Expand All @@ -56,5 +56,5 @@
size_valid_set=size_valid_set,
lora_config_rl=lora_config,
)
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft = PreTraining(config)
rlhf_step1_sft.train_and_save(peft_model_path)
584 changes: 584 additions & 0 deletions example/rlhf/demo_supervised_finetuning_d2l_eval.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""Demo for the supervised fine tuning.
"""Demo for the pre-training.

python -m example.rlhf.supervised_finetuning_demo
python -m example.rlhf.pre_training_demo
"""

from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.rlhf import RLHFConfig, SupervisedFinetuning
from pykoi.rlhf import RLHFConfig, PreTraining

# get data from local database
qa_database = QuestionAnswerDatabase()
Expand All @@ -25,7 +25,7 @@
print(my_data_pd)
print("My local database has {} samples in total".format(my_data_pd.shape[0]))

# run supervised finetuning
# run pre-training
config = RLHFConfig(base_model_path="databricks/dolly-v2-3b", dataset_type="local_db")
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft = PreTraining(config)
rlhf_step1_sft.train_and_save("./models/rlhf_step1_sft")
56 changes: 56 additions & 0 deletions example/rlhf/supervised_finetuning_demo_d2l.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Demo for the supervised fine tuning.

python -m example.rlhf.supervised_finetuning_demo_d2l
"""

from peft import LoraConfig
from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.rlhf import RLHFConfig, SupervisedFinetuning

# get data from local database
qa_database = QuestionAnswerDatabase()
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd = my_data_pd[
[
QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_VOTE_STATUS,
]
]

# analyze the data
print(my_data_pd)
print("My local database has {} samples in total".format(my_data_pd.shape[0]))

# run supervised finetuning
config = RLHFConfig(base_model_path="mistralai/Mistral-7B-Instruct-v0.1",
dataset_type="local_csv", dataset_name="data/chapter22_trnvalfromseed_data_processed.csv",
train_test_split_ratio=0, # ratio for test set DH:TODO: COBINE TRAIN AND EVAL
max_seq_length=896,
per_device_eval_batch_size=1,
log_freq=20,
# dh: NOTE: 1 EPOCH iterates the dataset once. So log freq 20 means iterating 20 entries when training batch size = 1.
# (i.e., log_freq = 0.12 epoch when the dataset has 166 entires).
save_freq=40000,
num_train_epochs=20,
max_steps=-1, # if a positive number is given, it will override num_train_epochs
device_map="auto",
lora_config_rl=LoraConfig(
r=512,
lora_alpha=1024,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", ], # "gate_proj","up_proj","down_proj",], #"lm_head",],
bias="none",
task_type="CAUSAL_LM"
),
data_collator="DataCollatorForCompletionOnlyLM",
no_evaluation=True,
prepare_text="d2l",
split = "train[:10%]"
)
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft.train_and_save("./models/rlhf_step1_sft")
14 changes: 14 additions & 0 deletions pykoi/rlhf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from accelerate import Accelerator
from peft import LoraConfig, TaskType
# TODO: DH: num_train_epochs=20,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: what is this comment code for?



@dataclass
Expand Down Expand Up @@ -119,6 +120,7 @@ class RLHFConfig:
default="./rlhf_checkpoints",
metadata={"help": "Output directory for all model weights."},
)
num_train_epochs: Optional[int] = field(default=5, metadata={"help": "supervised fine tuning training epochs"})
log_freq: Optional[int] = field(default=1, metadata={"help": "Logging frequency."})
eval_freq: Optional[int] = field(
default=1000, metadata={"help": "Evaluation frequency."}
Expand Down Expand Up @@ -182,6 +184,18 @@ class RLHFConfig:
),
metadata={"help": "LoRA configuration."},
)
data_collator: Optional[str] = field(
default=None,
metadata={"help": "The name of data collator to use for training."},
)
no_evaluation: Optional[bool] = field(
default=False,
metadata={"help": "Whether to disable evaluations during training."},
)
prepare_text: Optional[str] = field(
default="sample",
metadata={"help": "How to prepare the text for the model."},
)

# Step 2 reward modeling parameters
reward_model_path: Optional[str] = field(
Expand Down
40 changes: 40 additions & 0 deletions pykoi/rlhf/customize_data_collator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Any, Dict, List, Union
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qq: do we still need this customized collator per our discussion,

from transformers import DataCollatorForLanguageModeling
import numpy as np


class DataCollatorForCompletionOnlyLM(DataCollatorForLanguageModeling):
def torch_call(
self, examples: List[Union[List[int], Any, Dict[str, Any]]]) -> Dict[str, Any]:
batch = super().torch_call(examples)

# The prompt ends with the response key plus a newline. We encode this and then try to find it in the
# sequence of tokens. This should just be a single token.
RESPONSE_KEY = "### Response:"
RESPONSE_KEY_NL = f"{RESPONSE_KEY}\n"
response_token_ids = self.tokenizer.encode(RESPONSE_KEY_NL)

labels = batch["labels"].clone()

for i in range(len(examples)):

response_token_ids_start_idx = None
for idx in np.where(
batch["labels"][i] == response_token_ids[0])[0]:
response_token_ids_start_idx = idx
break

if response_token_ids_start_idx is None:
raise RuntimeError(
f'Could not find response key {response_token_ids} in token IDs {batch["labels"][i]}'
)

response_token_ids_end_idx = response_token_ids_start_idx + 1

# Make pytorch loss function ignore all tokens up through the end
# of the response key
labels[i, :response_token_ids_end_idx] = -100

batch["labels"] = labels

return batch
Loading