-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Changes for the v0.1.0 in the rust version
- Loading branch information
1 parent
52c8bcc
commit 66b26b6
Showing
10 changed files
with
124 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
|
||
use llm_models::tokenizer::LlmTokenizer; | ||
use crate::messages::errors; | ||
use std::path::PathBuf; | ||
|
||
pub fn tokenize_content(text: &str) -> Result< Vec<u32>, errors::ContentError > { | ||
|
||
let wd_folder: String = "/Users/franciscome/git/iteralabs/molina".to_owned(); | ||
let in_folder: &str = "/models"; | ||
let in_subfolder: &str = "/Meta-Llama-3-8B-Instruct"; | ||
let in_file: &str = "/tokenizer.json"; | ||
let in_path = wd_folder.clone() + in_folder + in_subfolder + in_file; | ||
let path_buf: PathBuf = PathBuf::from(in_path); | ||
|
||
let llama_tokenizer = LlmTokenizer::new_from_tokenizer_json(&path_buf).unwrap(); | ||
let llama_tokens = llama_tokenizer.tokenize(text); | ||
|
||
Ok(llama_tokens) | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1 @@ | ||
// Placeholder | ||
|
||
pub mod loader; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use std::fs; | ||
use std::path::Path; | ||
|
||
pub fn load_files(dir: &str) -> Vec<String> { | ||
let mut pdf_files = Vec::new(); | ||
let path = Path::new(dir); | ||
|
||
// Recursively visit each directory and collect PDF file paths | ||
if path.is_dir() { | ||
|
||
// Use fs::read_dir to iterate through entries in the directory | ||
if let Ok(entries) = fs::read_dir(path) { | ||
for entry in entries.filter_map(Result::ok) { | ||
let entry_path = entry.path(); | ||
|
||
// Check if the entry is a directory or a file | ||
if entry_path.is_dir() { | ||
// Recursion for subdirectories | ||
pdf_files.extend(load_files(entry_path.to_str().unwrap())); | ||
} else if entry_path.extension().map(|s| s == "pdf").unwrap_or(false) { | ||
// If it's a PDF file, add its path to the vector | ||
pdf_files.push(entry_path.to_string_lossy().into_owned()); | ||
} | ||
} | ||
} | ||
} | ||
|
||
pdf_files | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
// Placeholder | ||
pub mod loader; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters