Skip to content

[Prototype] Sandbox for Implementation of generate and integration of lm_eval (evaluation harness) #222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 47 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b8f6b62
changes for debugging
Apr 3, 2025
4022249
temporal hack to save logits
Apr 8, 2025
505e658
simple generate function and less hacky way to save logits
Apr 8, 2025
a53855b
moved mkdir
Apr 8, 2025
c14ca4d
fast llm classes
bigximik Apr 8, 2025
6a72203
refactored logits saving, test and added hidden_test return from the …
Apr 11, 2025
e713aa2
added notebook to check logits and hidden states diffs
Apr 11, 2025
04e914c
Merge branch 'denis/generate' of github.com:ServiceNow/Fast-LLM into …
Apr 11, 2025
51f59f8
fix to a document mask for attention mask
Apr 14, 2025
9c01471
fix for an absent attention_mask
Apr 15, 2025
7f1ca8a
updated classes and funcions naming, removed temporal param from init
Apr 17, 2025
0488fdb
updated manual test
Apr 17, 2025
c65d9ba
evaluation abstraction implementation
Apr 18, 2025
1543a56
fixes for evaluation only in trainer
Apr 18, 2025
dc2b5e0
added evaluate command
Apr 18, 2025
0cb3ad7
lm_eval integration, one gpu
Apr 20, 2025
c86ae20
fixing typos
Apr 21, 2025
145ee50
fixes to make lm_eval reporting to work with wrapper object instead o…
Apr 21, 2025
85b19d8
comments and some code formatting
Apr 22, 2025
b5603ed
merge from main
Apr 29, 2025
66a45ca
steps towards distributed inference
bigximik Apr 29, 2025
938a273
more manual tests
bigximik Apr 29, 2025
eb734d9
partial implementation of data parallel lm_eval integration
bigximik May 2, 2025
4e2175a
more communication primitives added
bigximik May 5, 2025
d1addda
temporarily create hf model wrapper in training the same as standalone
bigximik May 5, 2025
a880cd3
finished batch data parallel support for lm_eval integration
bigximik May 5, 2025
4b148e0
cleaned up lm_eval arg parser, partially wrapper and renamed wrapper …
bigximik May 6, 2025
047852e
removed HF hub params handling and tokenizer parallelism setting
bigximik May 6, 2025
0ae1b12
renamed evaluation to evaluator for relevant classes
bigximik May 6, 2025
c06d652
moved get_flops to model and made evaluator training vs evaluation on…
bigximik May 7, 2025
a535aae
more general run interface evaluator and class for return instead of …
bigximik May 7, 2025
929a1de
moved instantiation to get_evaluator, configurable cleanup
bigximik May 7, 2025
371d46b
fix to support models with different vocab_sizes in model config and …
bigximik May 9, 2025
c0569a4
refactored for wrapper to have from_model facotry class
bigximik May 9, 2025
2be37ac
added TrainingEvaluatorConfig, changed api to accept separate compone…
bigximik May 9, 2025
fcba8f7
new api examples changes
bigximik May 9, 2025
8005299
refactored returning of sampleing params from evaluators, renamed Tra…
bigximik May 9, 2025
7ed6375
removed done todo
bigximik May 9, 2025
9ef3f1c
remved can_generate override as default implementaion works
bigximik May 9, 2025
5630571
removed outputs from a notebook and the big image
bigximik May 9, 2025
993c84e
restored HuggingfaceBaseModel class name
bigximik May 9, 2025
98bb14e
streamlined hidden_states return
bigximik May 9, 2025
3787ed3
moved evaluation stuff to engine/evaluation
bigximik May 9, 2025
45f15e0
added backward compatibility for evaluations config field
bigximik May 9, 2025
7a1e773
merge from main
bigximik May 9, 2025
720f169
update of distribtuted manual test
bigximik May 12, 2025
b784c98
fix for optmizer load
bigximik Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
387 changes: 387 additions & 0 deletions check_logits_hidden_layers.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,387 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fast_llm.data.dataset.gpt.memmap import GPTMemmapDataset\n",
"from pathlib import Path\n",
"import numpy as np\n",
"from transformers import AutoTokenizer\n",
"import torch\n",
"import pickle\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"files_root = Path(\"/mnt/datasets/tests/denis/tensors_f32/\")\n",
"#files_root = Path(\"/mnt/datasets/tests/denis/tensors/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fm_files = {int(file.stem.split(\"tensor\")[1]): file for file in (files_root / \"fast_llm/logits/\").glob(\"tensor*.pt\")}\n",
"hf_files = {int(file.stem.split(\"tensor\")[1]): file for file in (files_root / \"hf/logits\").glob(\"tensor*.pt\")}\n",
"assert len(fm_files) == len(hf_files)\n",
"len(fm_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hf_tokens = []\n",
"fm_tokens = []\n",
"max_adiff = []\n",
"mean_adiff = []\n",
"sum_adiff = []\n",
"for i in range(len(fm_files)):\n",
" fm_data = torch.load(fm_files[i])\n",
" hf_data = torch.load(hf_files[i])\n",
" \n",
" hf_tokens.append(hf_data[0, -1, :].argmax().item())\n",
" fm_tokens.append(fm_data[0, -1, :].argmax().item())\n",
"\n",
" adiff = torch.abs(hf_data[0, -1, :] - fm_data[0, -1, :])\n",
" max_adiff.append(adiff.max().item())\n",
" mean_adiff.append(adiff.mean().item())\n",
" sum_adiff.append(adiff.sum().item())\n",
" \n",
"all(a == b for a, b in zip(hf_tokens, fm_tokens))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min(len(hf_tokens)+1 if ab[0] == ab[1] else i for i, ab in enumerate(zip(hf_tokens, fm_tokens)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True)\n",
"\n",
"# Left plot: max and mean absolute differences\n",
"axes[0].plot(max_adiff, label='max')\n",
"axes[0].plot(mean_adiff, label='mean')\n",
"axes[0].set_title('Max and Mean Absolute Difference')\n",
"axes[0].set_xlabel('Token Position Index')\n",
"axes[0].set_ylabel('Absolute Difference')\n",
"axes[0].legend()\n",
"axes[0].grid(True)\n",
"\n",
"# Right plot: sum absolute difference\n",
"axes[1].plot(sum_adiff, label='sum', color='tab:orange')\n",
"axes[1].set_title('Sum Absolute Difference')\n",
"axes[1].set_xlabel('Token Position Index')\n",
"axes[1].set_ylabel('Absolute Difference')\n",
"axes[1].legend()\n",
"axes[1].grid(True)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fm_hidden_files = {int(file.stem.split(\"data\")[1]): file for file in (files_root / \"fast_llm/hidden_states/\").glob(\"data*.pickle\")}\n",
"hf_hidden_files = {int(file.stem.split(\"data\")[1]): file for file in (files_root / \"hf/hidden_states\").glob(\"data*.pickle\")}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def mad(new_token_index, fm_hidden_files, hf_hidden_files):\n",
" with fm_hidden_files[new_token_index].open(\"rb\") as f:\n",
" fm_data = pickle.load(f)\n",
" with hf_hidden_files[new_token_index].open(\"rb\") as f:\n",
" hf_data = pickle.load(f)\n",
" max_adiffs_hidden_layers = []\n",
" for i in range(len(hf_data)):\n",
" max_adiff = torch.abs(hf_data[i][0,-1,:]-fm_data[i]['tensor'][0,-1,:]).max().item()\n",
" max_adiffs_hidden_layers.append(max_adiff)\n",
" return max_adiffs_hidden_layers\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"new_token_index = 107\n",
"new_token_index1 = 108\n",
"max_adiffs_hidden_layers = mad(0, fm_hidden_files, hf_hidden_files)\n",
"max_adiffs_hidden_layers2 = mad(new_token_index, fm_hidden_files, hf_hidden_files)\n",
"max_adiffs_hidden_layers3 = mad(new_token_index1, fm_hidden_files, hf_hidden_files)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True)\n",
"\n",
"axes[0].plot(max_adiffs_hidden_layers, label='new_token_0', color='blue')\n",
"axes[0].plot(max_adiffs_hidden_layers2, label=f'new_token_{new_token_index}', color='green')\n",
"axes[0].set_title('Max and Mean Absolute Difference')\n",
"axes[0].set_xlabel('Hidden Layer Index')\n",
"axes[0].set_ylabel('Max Absolute Difference')\n",
"axes[0].legend()\n",
"axes[0].grid(True)\n",
"\n",
"axes[1].plot(max_adiffs_hidden_layers, label='new_token_0', color='blue')\n",
"axes[1].plot(max_adiffs_hidden_layers3, label=f'new_token_{new_token_index1}', color='green')\n",
"axes[1].set_title('Max and Mean Absolute Difference')\n",
"axes[1].set_xlabel('Hidden Layer Index')\n",
"axes[1].set_ylabel('Max Absolute Difference')\n",
"axes[1].legend()\n",
"axes[1].grid(True)\n",
"\n",
"\n",
"\n",
"plt.title('Per-layer Max Absolute Differences')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(hf_tokens_bf16[106:120])\n",
"print(fm_tokens_b16[106:120])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(hf_tokens[106:120])\n",
"print(fm_tokens[106:120])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hf_tokens_bf16 = hf_tokens\n",
"fm_tokens_b16 = fm_tokens"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min(len(hf_tokens)+1 if ab[0] == ab[1] else i for i, ab in enumerate(zip(hf_tokens, fm_tokens)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min(len(hf_tokens)+1 if ab[0] == ab[1] else i for i, ab in enumerate(zip(hf_tokens, hf_tokens_bf16)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min(len(hf_tokens)+1 if ab[0] == ab[1] else i for i, ab in enumerate(zip(fm_tokens, fm_tokens_b16)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min(len(hf_tokens)+1 if ab[0] == ab[1] else i for i, ab in enumerate(zip(hf_tokens, fm_tokens)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import safetensors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# this is just to show possibility\n",
"# assumes no converiosn of key names or tensors or aggregation of tensors is needed\n",
"def load(path, model):\n",
" with safetensors.safe_open(path, 'pt', device=model.distributed.device) as f:\n",
" key = 'model.embed_tokens.weight'\n",
" # this would load only part of the tensor for this tensor parallel, etc rank\n",
" # get_local_slice_ranges would return a multidimensional range object \n",
" tensor = f.get_slice(key)[model.get_local_slice_ranges(key)]\n",
" model.import_tensor(key, tensor)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fast_llm.engine.distributed.config import DistributedConfig"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"| rank | local_rank | tensor_rank | pipeline_rank | data_rank | sequence_data_rank | batch_data_rank | | | | | | |\")\n",
"print(\"| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\")\n",
"for rank in range(16):\n",
" cfg = DistributedConfig(rank=rank, world_size=16, local_world_size=8, tensor_parallel=2, pipeline_parallel=2, sequence_data_parallel=2, pipeline_first=True)\n",
" res = f\"| {cfg.rank} | {cfg.local_rank} | {cfg.tensor_rank} | {cfg.pipeline_rank} | {cfg.data_rank} | {cfg.sequence_data_rank} | {cfg.batch_data_rank} |\"\n",
" for name, dm in cfg.distributed_dims.items():\n",
" if name == 'world':\n",
" continue\n",
" res += f\"{name}_{dm.id} |\"\n",
" print(res)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = '|'\n",
"for name, dm in cfg.distributed_dims.items():\n",
" if name == 'world':\n",
" continue\n",
" res += f\"{name}_{dm.id} |\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(\"/mnt/checkpoints/test/denis/smol_eval_experiment_test/lm_eval/batch_0.pkl\", 'rb') as f:\n",
" data = pickle.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data[1:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading