Skip to content

Commit 9135f0c

Browse files
danieldkDaniël de Kok
authored and
Daniël de Kok
committed
Update to finalfusion 0.9
1 parent 45e8ff9 commit 9135f0c

File tree

7 files changed

+72
-55
lines changed

7 files changed

+72
-55
lines changed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ features = ["extension-module"]
2121
[dependencies]
2222
itertools = "0.8"
2323
failure = "0.1"
24-
finalfusion = "0.8.2"
24+
finalfusion = "0.9"
2525
libc = "0.2"
2626
ndarray = "0.12"
2727
numpy = "0.6"

nix/crates-io.nix

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -652,58 +652,58 @@ item that gets emitted.
652652

653653

654654
# end
655-
# finalfusion-0.8.0
655+
# finalfusion-0.9.0
656656

657-
crates.finalfusion."0.8.0" = deps: { features?(features_."finalfusion"."0.8.0" deps {}) }: buildRustCrate {
657+
crates.finalfusion."0.9.0" = deps: { features?(features_."finalfusion"."0.9.0" deps {}) }: buildRustCrate {
658658
crateName = "finalfusion";
659-
version = "0.8.0";
659+
version = "0.9.0";
660660
description = "Reader and writer for common word embedding formats";
661661
homepage = "https://github.com/finalfusion/finalfusion-rust";
662662
authors = [ "Daniël de Kok <[email protected]>" ];
663663
edition = "2018";
664-
sha256 = "1rbm5c95mxyiq07wzir2xzv0h7ryxfkwli3hnbabra8hpq05dj76";
664+
sha256 = "05p31mv12h9168cbi62b6grk7spq6v9g8qr87pxgrhpllz93a2zr";
665665
dependencies = mapFeatures features ([
666-
(crates."byteorder"."${deps."finalfusion"."0.8.0"."byteorder"}" deps)
667-
(crates."fnv"."${deps."finalfusion"."0.8.0"."fnv"}" deps)
668-
(crates."itertools"."${deps."finalfusion"."0.8.0"."itertools"}" deps)
669-
(crates."memmap"."${deps."finalfusion"."0.8.0"."memmap"}" deps)
670-
(crates."ndarray"."${deps."finalfusion"."0.8.0"."ndarray"}" deps)
671-
(crates."ordered_float"."${deps."finalfusion"."0.8.0"."ordered_float"}" deps)
672-
(crates."rand"."${deps."finalfusion"."0.8.0"."rand"}" deps)
673-
(crates."rand_xorshift"."${deps."finalfusion"."0.8.0"."rand_xorshift"}" deps)
674-
(crates."reductive"."${deps."finalfusion"."0.8.0"."reductive"}" deps)
675-
(crates."serde"."${deps."finalfusion"."0.8.0"."serde"}" deps)
676-
(crates."toml"."${deps."finalfusion"."0.8.0"."toml"}" deps)
666+
(crates."byteorder"."${deps."finalfusion"."0.9.0"."byteorder"}" deps)
667+
(crates."fnv"."${deps."finalfusion"."0.9.0"."fnv"}" deps)
668+
(crates."itertools"."${deps."finalfusion"."0.9.0"."itertools"}" deps)
669+
(crates."memmap"."${deps."finalfusion"."0.9.0"."memmap"}" deps)
670+
(crates."ndarray"."${deps."finalfusion"."0.9.0"."ndarray"}" deps)
671+
(crates."ordered_float"."${deps."finalfusion"."0.9.0"."ordered_float"}" deps)
672+
(crates."rand"."${deps."finalfusion"."0.9.0"."rand"}" deps)
673+
(crates."rand_xorshift"."${deps."finalfusion"."0.9.0"."rand_xorshift"}" deps)
674+
(crates."reductive"."${deps."finalfusion"."0.9.0"."reductive"}" deps)
675+
(crates."serde"."${deps."finalfusion"."0.9.0"."serde"}" deps)
676+
(crates."toml"."${deps."finalfusion"."0.9.0"."toml"}" deps)
677677
]);
678678
};
679-
features_."finalfusion"."0.8.0" = deps: f: updateFeatures f (rec {
680-
byteorder."${deps.finalfusion."0.8.0".byteorder}".default = true;
681-
finalfusion."0.8.0".default = (f.finalfusion."0.8.0".default or true);
682-
fnv."${deps.finalfusion."0.8.0".fnv}".default = true;
683-
itertools."${deps.finalfusion."0.8.0".itertools}".default = true;
684-
memmap."${deps.finalfusion."0.8.0".memmap}".default = true;
685-
ndarray."${deps.finalfusion."0.8.0".ndarray}".default = true;
686-
ordered_float."${deps.finalfusion."0.8.0".ordered_float}".default = true;
687-
rand."${deps.finalfusion."0.8.0".rand}".default = true;
688-
rand_xorshift."${deps.finalfusion."0.8.0".rand_xorshift}".default = true;
689-
reductive."${deps.finalfusion."0.8.0".reductive}".default = true;
679+
features_."finalfusion"."0.9.0" = deps: f: updateFeatures f (rec {
680+
byteorder."${deps.finalfusion."0.9.0".byteorder}".default = true;
681+
finalfusion."0.9.0".default = (f.finalfusion."0.9.0".default or true);
682+
fnv."${deps.finalfusion."0.9.0".fnv}".default = true;
683+
itertools."${deps.finalfusion."0.9.0".itertools}".default = true;
684+
memmap."${deps.finalfusion."0.9.0".memmap}".default = true;
685+
ndarray."${deps.finalfusion."0.9.0".ndarray}".default = true;
686+
ordered_float."${deps.finalfusion."0.9.0".ordered_float}".default = true;
687+
rand."${deps.finalfusion."0.9.0".rand}".default = true;
688+
rand_xorshift."${deps.finalfusion."0.9.0".rand_xorshift}".default = true;
689+
reductive."${deps.finalfusion."0.9.0".reductive}".default = true;
690690
serde = fold recursiveUpdate {} [
691-
{ "${deps.finalfusion."0.8.0".serde}"."derive" = true; }
692-
{ "${deps.finalfusion."0.8.0".serde}".default = true; }
691+
{ "${deps.finalfusion."0.9.0".serde}"."derive" = true; }
692+
{ "${deps.finalfusion."0.9.0".serde}".default = true; }
693693
];
694-
toml."${deps.finalfusion."0.8.0".toml}".default = true;
694+
toml."${deps.finalfusion."0.9.0".toml}".default = true;
695695
}) [
696-
(if deps."finalfusion"."0.8.0" ? "byteorder" then features_.byteorder."${deps."finalfusion"."0.8.0"."byteorder" or ""}" deps else {})
697-
(if deps."finalfusion"."0.8.0" ? "fnv" then features_.fnv."${deps."finalfusion"."0.8.0"."fnv" or ""}" deps else {})
698-
(if deps."finalfusion"."0.8.0" ? "itertools" then features_.itertools."${deps."finalfusion"."0.8.0"."itertools" or ""}" deps else {})
699-
(if deps."finalfusion"."0.8.0" ? "memmap" then features_.memmap."${deps."finalfusion"."0.8.0"."memmap" or ""}" deps else {})
700-
(if deps."finalfusion"."0.8.0" ? "ndarray" then features_.ndarray."${deps."finalfusion"."0.8.0"."ndarray" or ""}" deps else {})
701-
(if deps."finalfusion"."0.8.0" ? "ordered_float" then features_.ordered_float."${deps."finalfusion"."0.8.0"."ordered_float" or ""}" deps else {})
702-
(if deps."finalfusion"."0.8.0" ? "rand" then features_.rand."${deps."finalfusion"."0.8.0"."rand" or ""}" deps else {})
703-
(if deps."finalfusion"."0.8.0" ? "rand_xorshift" then features_.rand_xorshift."${deps."finalfusion"."0.8.0"."rand_xorshift" or ""}" deps else {})
704-
(if deps."finalfusion"."0.8.0" ? "reductive" then features_.reductive."${deps."finalfusion"."0.8.0"."reductive" or ""}" deps else {})
705-
(if deps."finalfusion"."0.8.0" ? "serde" then features_.serde."${deps."finalfusion"."0.8.0"."serde" or ""}" deps else {})
706-
(if deps."finalfusion"."0.8.0" ? "toml" then features_.toml."${deps."finalfusion"."0.8.0"."toml" or ""}" deps else {})
696+
(if deps."finalfusion"."0.9.0" ? "byteorder" then features_.byteorder."${deps."finalfusion"."0.9.0"."byteorder" or ""}" deps else {})
697+
(if deps."finalfusion"."0.9.0" ? "fnv" then features_.fnv."${deps."finalfusion"."0.9.0"."fnv" or ""}" deps else {})
698+
(if deps."finalfusion"."0.9.0" ? "itertools" then features_.itertools."${deps."finalfusion"."0.9.0"."itertools" or ""}" deps else {})
699+
(if deps."finalfusion"."0.9.0" ? "memmap" then features_.memmap."${deps."finalfusion"."0.9.0"."memmap" or ""}" deps else {})
700+
(if deps."finalfusion"."0.9.0" ? "ndarray" then features_.ndarray."${deps."finalfusion"."0.9.0"."ndarray" or ""}" deps else {})
701+
(if deps."finalfusion"."0.9.0" ? "ordered_float" then features_.ordered_float."${deps."finalfusion"."0.9.0"."ordered_float" or ""}" deps else {})
702+
(if deps."finalfusion"."0.9.0" ? "rand" then features_.rand."${deps."finalfusion"."0.9.0"."rand" or ""}" deps else {})
703+
(if deps."finalfusion"."0.9.0" ? "rand_xorshift" then features_.rand_xorshift."${deps."finalfusion"."0.9.0"."rand_xorshift" or ""}" deps else {})
704+
(if deps."finalfusion"."0.9.0" ? "reductive" then features_.reductive."${deps."finalfusion"."0.9.0"."reductive" or ""}" deps else {})
705+
(if deps."finalfusion"."0.9.0" ? "serde" then features_.serde."${deps."finalfusion"."0.9.0"."serde" or ""}" deps else {})
706+
(if deps."finalfusion"."0.9.0" ? "toml" then features_.toml."${deps."finalfusion"."0.9.0"."toml" or ""}" deps else {})
707707
];
708708

709709

nix/finalfusion-python.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ rec {
120120
syn = "0.15.42";
121121
synstructure = "0.10.2";
122122
};
123-
deps.finalfusion."0.8.0" = {
123+
deps.finalfusion."0.9.0" = {
124124
byteorder = "1.3.2";
125125
fnv = "1.0.6";
126126
itertools = "0.8.0";
@@ -135,7 +135,7 @@ rec {
135135
};
136136
deps.finalfusion_python."0.4.0" = {
137137
failure = "0.1.5";
138-
finalfusion = "0.8.0";
138+
finalfusion = "0.9.0";
139139
itertools = "0.8.0";
140140
libc = "0.2.60";
141141
ndarray = "0.12.1";

src/embeddings.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@ pub struct PyEmbeddings {
3434
embeddings: Rc<RefCell<EmbeddingsWrap>>,
3535
}
3636

37+
impl PyEmbeddings {
38+
/// Copy storage to an array.
39+
///
40+
/// This should only be used for storage types that do not provide
41+
/// an ndarray view that can be copied trivially, such as quantized
42+
/// storage.
43+
fn copy_storage_to_array(storage: &Storage) -> Array2<f32> {
44+
let (rows, dims) = storage.shape();
45+
46+
let mut array = Array2::<f32>::zeros((rows, dims));
47+
for idx in 0..rows {
48+
array.row_mut(idx).assign(&storage.embedding(idx).as_view());
49+
}
50+
51+
array
52+
}
53+
}
54+
3755
#[pymethods]
3856
impl PyEmbeddings {
3957
/// Load embeddings from the given `path`.
@@ -216,17 +234,16 @@ impl PyEmbeddings {
216234
StorageWrap::MmapArray(mmap) => mmap.view(),
217235
StorageWrap::NdArray(array) => array.0.view(),
218236
StorageWrap::QuantizedArray(quantized) => {
219-
let (rows, dims) = quantized.shape();
220-
let mut array = Array2::<f32>::zeros((rows, dims));
221-
for idx in 0..rows {
222-
array
223-
.row_mut(idx)
224-
.assign(&quantized.embedding(idx).as_view());
225-
}
237+
let array = Self::copy_storage_to_array(quantized);
238+
return array.to_pyarray(gil.python()).to_owned();
239+
}
240+
StorageWrap::MmapQuantizedArray(quantized) => {
241+
let array = Self::copy_storage_to_array(quantized);
226242
return array.to_pyarray(gil.python()).to_owned();
227243
}
228244
},
229245
};
246+
230247
matrix_view.to_pyarray(gil.python()).to_owned()
231248
}
232249

src/iter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl PyIterProtocol for PyEmbeddingIterator {
3232
let embeddings = slf.embeddings.borrow();
3333
let vocab = embeddings.vocab();
3434

35-
if slf.idx < vocab.len() {
35+
if slf.idx < vocab.words_len() {
3636
let word = vocab.words()[slf.idx].to_string();
3737
let embed = embeddings.storage().embedding(slf.idx);
3838
let norm = embeddings.norms().map(|n| n.0[slf.idx]).unwrap_or(1.);

src/vocab.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ impl PyVocab {
6262
impl PySequenceProtocol for PyVocab {
6363
fn __len__(&self) -> PyResult<usize> {
6464
let embeds = self.embeddings.borrow();
65-
Ok(embeds.vocab().len())
65+
Ok(embeds.vocab().words_len())
6666
}
6767

6868
fn __getitem__(&self, idx: isize) -> PyResult<String> {

0 commit comments

Comments
 (0)