diff --git a/src/runtime/c/pgf/db.cxx b/src/runtime/c/pgf/db.cxx index 98641e10d..444f0295f 100644 --- a/src/runtime/c/pgf/db.cxx +++ b/src/runtime/c/pgf/db.cxx @@ -143,6 +143,9 @@ static inline ssize_t get_mmap_size(size_t init_size, size_t page_size) return mmap_size; } +PGF_INTERNAL_DECL +size_t get_next_padovan(size_t min); + static inline ssize_t get_mremap_size(size_t file_size, size_t block_size, size_t free_size, size_t page_size) { size_t n_pages = diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index cd530b8f2..10a1c35b9 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -2264,13 +2264,7 @@ class PGF_INTERNAL PgfLinBuilder : public PgfLinBuilderIface throw pgf_error(builder_error_msg); ref entry = seq_id; - - size_t len = entry->backrefs->len; - ref> backrefs = - vector_resize(entry->backrefs, len+1, PgfDB::get_txn_id()); - backrefs->data[len].container = container; - backrefs->data[len].seq_index = seq_index; - entry->backrefs = backrefs; + phrasetable_add_backref(entry,PgfDB::get_txn_id(),container,seq_index); *vector_elem(seqs, seq_index) = entry->seq; diff --git a/src/runtime/c/pgf/phrasetable.cxx b/src/runtime/c/pgf/phrasetable.cxx index 1c2506ddc..96c553f1a 100644 --- a/src/runtime/c/pgf/phrasetable.cxx +++ b/src/runtime/c/pgf/phrasetable.cxx @@ -342,48 +342,27 @@ int backref_cmp(ref backref, ref lincat, siz return cmp; } -static -ref> phrasetable_update_backrefs(PgfPhrasetable table, - ref lincat, - object container, - size_t seq_index) +PGF_INTERNAL_DECL +size_t get_next_padovan(size_t min); + +PGF_INTERNAL_DECL +void phrasetable_add_backref(ref entry, txn_t txn_id, + object container, + size_t seq_index) { - size_t len = (table->value.backrefs != 0) - ? table->value.backrefs->len - : 0; - - ref> backrefs = - vector_resize(table->value.backrefs, len+1, table->txn_id); - ssize_t i = 0; - ssize_t j = len-1; - if (table->value.seq->syms.len == 0 && len > 0) { - // The backrefs for the epsilon sequence are sorted by lincat and r - - size_t r = (lincat!=0) ? (seq_index % lincat->fields->len) : 0; - while (i <= j) { - ssize_t k = (i + j) / 2; - ref backref = vector_elem(backrefs, k); - - int cmp = backref_cmp(backref, lincat, r); - if (cmp < 0) { - while (j >= k) { - backrefs->data[j+1] = backrefs->data[j]; - j--; - } - } else if (cmp > 0) { - i = k+1; - } else { - while (j > k) { - backrefs->data[j+1] = backrefs->data[j]; - j--; - } - break; - } - } + ref> backrefs = entry->backrefs; + + size_t len = (backrefs != 0) ? backrefs->len : 0; + if (entry->n_backrefs >= len) { + size_t new_len = get_next_padovan(entry->n_backrefs+1); + backrefs = PgfDB::realloc>(backrefs,len*sizeof(PgfSequenceBackref),new_len*sizeof(PgfSequenceBackref),txn_id); + backrefs->len = new_len; } - backrefs->data[j+1].container = container; - backrefs->data[j+1].seq_index = seq_index; - return backrefs; + backrefs->data[entry->n_backrefs].container = container; + backrefs->data[entry->n_backrefs].seq_index = seq_index; + + entry->n_backrefs++; + entry->backrefs = backrefs; } PGF_INTERNAL @@ -397,6 +376,7 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, if (table == 0) { PgfPhrasetableEntry entry; entry.seq = seq; + entry.n_backrefs = 1; entry.backrefs = vector_new(1); entry.backrefs->data[0].container = container; entry.backrefs->data[0].seq_index = seq_index; @@ -427,24 +407,16 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, } else { PgfSequence::release(seq); - size_t len = (table->value.backrefs) - ? table->value.backrefs->len - : 0; - - ref> backrefs = - phrasetable_update_backrefs(table,lincat,container,seq_index); - PgfPhrasetable new_table = Node::upd_node(table, table->left, table->right); - new_table->value.backrefs = backrefs; *pentry = ref::from_ptr(&new_table->value); + phrasetable_add_backref(*pentry,table->txn_id,container,seq_index); return new_table; } } PGF_INTERNAL ref phrasetable_relink(PgfPhrasetable table, - ref lincat, object container, size_t seq_index, size_t seq_id) @@ -454,14 +426,8 @@ ref phrasetable_relink(PgfPhrasetable table, if (seq_id < left_sz) table = table->left; else if (seq_id == left_sz) { - size_t len = (table->value.backrefs == 0) - ? 0 - : table->value.backrefs->len; - - ref> backrefs = - phrasetable_update_backrefs(table,lincat,container,seq_index); - table->value.backrefs = backrefs; - + auto entry = ref::from_ptr(&table->value); + phrasetable_add_backref(entry,table->txn_id,container,seq_index); return table->value.seq; } else { table = table->right; @@ -471,6 +437,7 @@ ref phrasetable_relink(PgfPhrasetable table, return 0; } +PGF_INTERNAL PgfPhrasetable phrasetable_delete(PgfPhrasetable table, object container, size_t seq_index, @@ -494,11 +461,12 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table, return Node::balanceL(table); } else { size_t len = table->value.backrefs->len; - if (len > 1) { + size_t n_backrefs = table->value.n_backrefs; + if (n_backrefs > 1) { ref> backrefs = - vector_resize(table->value.backrefs, len-1, table->txn_id); + PgfDB::realloc>(table->value.backrefs,len*sizeof(PgfSequenceBackref),n_backrefs*sizeof(PgfSequenceBackref),table->txn_id); size_t i = 0; - while (i < len-1) { + while (i < n_backrefs) { ref backref = vector_elem(backrefs, i); if (backref->container == container && @@ -508,14 +476,16 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table, i++; } i++; - while (i < len) { + while (i < n_backrefs) { *vector_elem(backrefs, i-1) = *vector_elem(table->value.backrefs, i); i++; } + n_backrefs--; PgfPhrasetable new_table = Node::upd_node(table, table->left, table->right); - new_table->value.backrefs = backrefs; + new_table->value.n_backrefs = n_backrefs; + new_table->value.backrefs = backrefs; return new_table; } else { PgfSequence::release(table->value.seq); @@ -570,24 +540,22 @@ void phrasetable_lookup(PgfPhrasetable table, phrasetable_lookup(table->right,sentence,case_sensitive,scanner,err); } else { auto backrefs = table->value.backrefs; - if (backrefs != 0) { - for (size_t i = 0; i < backrefs->len; i++) { - PgfSequenceBackref backref = *vector_elem(backrefs,i); - switch (ref::get_tag(backref.container)) { - case PgfConcrLin::tag: { - ref lin = ref::untagged(backref.container); - if (lin->absfun->type->hypos->len == 0) { - scanner->match(lin, backref.seq_index, err); - if (err->type != PGF_EXN_NONE) - return; - } - break; - } - case PgfConcrLincat::tag: { - //ignore - break; - } + for (size_t i = 0; i < table->value.n_backrefs; i++) { + PgfSequenceBackref backref = *vector_elem(backrefs,i); + switch (ref::get_tag(backref.container)) { + case PgfConcrLin::tag: { + ref lin = ref::untagged(backref.container); + if (lin->absfun->type->hypos->len == 0) { + scanner->match(lin, backref.seq_index, err); + if (err->type != PGF_EXN_NONE) + return; } + break; + } + case PgfConcrLincat::tag: { + //ignore + break; + } } } @@ -714,7 +682,7 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state, } state->queue.push(current); - for (size_t i = 0; i < backrefs->len; i++) { + for (size_t i = 0; i < table->value.n_backrefs; i++) { PgfSequenceBackref backref = *vector_elem(backrefs,i); switch (ref::get_tag(backref.container)) { case PgfConcrLin::tag: { @@ -849,7 +817,7 @@ void phrasetable_iter(PgfConcr *concr, return; if (table->value.backrefs != 0 && res == 0 && callback != 0) { - for (size_t i = 0; i < table->value.backrefs->len; i++) { + for (size_t i = 0; i < table->value.n_backrefs; i++) { PgfSequenceBackref backref = *vector_elem(table->value.backrefs,i); switch (ref::get_tag(backref.container)) { case PgfConcrLin::tag: { diff --git a/src/runtime/c/pgf/phrasetable.h b/src/runtime/c/pgf/phrasetable.h index b5553d2b4..720716861 100644 --- a/src/runtime/c/pgf/phrasetable.h +++ b/src/runtime/c/pgf/phrasetable.h @@ -6,6 +6,11 @@ struct PgfSequenceBackref; struct PGF_INTERNAL_DECL PgfPhrasetableEntry { ref seq; + + // Here n_backrefs tells us how many actual backrefs there are in + // the vector backrefs. On the other hand, backrefs->len tells us + // how big buffer we have allocated. + size_t n_backrefs; ref> backrefs; }; @@ -64,7 +69,6 @@ PgfPhrasetable phrasetable_internalize(PgfPhrasetable table, PGF_INTERNAL_DECL ref phrasetable_relink(PgfPhrasetable table, - ref lincat, object container, size_t seq_index, size_t seq_id); @@ -124,4 +128,11 @@ int text_sequence_cmp(PgfTextSpot *spot, const uint8_t *end, ref seq, size_t *p_i, bool case_sensitive, SeqMatch sm); +// The following is used internally in the grammar builder + +PGF_INTERNAL_DECL +void phrasetable_add_backref(ref entry, txn_t txn_id, + object container, + size_t seq_index); + #endif diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index f0a8b8366..387ac2b32 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -632,14 +632,14 @@ ref PgfReader::read_seq() return seq; } -ref>> PgfReader::read_seq_ids(ref lincat, object container) +ref>> PgfReader::read_seq_ids(object container) { size_t len = read_len(); ref>> vec = vector_new>(len); for (size_t i = 0; i < len; i++) { size_t seq_id = read_len(); ref seq = phrasetable_relink(concrete->phrasetable, - lincat, container, i, + container, i, seq_id); if (seq == 0) { throw pgf_error("Invalid sequence id"); @@ -659,6 +659,7 @@ PgfPhrasetable PgfReader::read_phrasetable(size_t len) size_t half = len/2; PgfPhrasetable left = read_phrasetable(half); value.seq = read_seq(); + value.n_backrefs = 0; value.backrefs = 0; PgfPhrasetable right = read_phrasetable(len-half-1); @@ -683,7 +684,7 @@ ref PgfReader::read_lincat() auto n_lindefs = read_len(); auto args = read_vector(&PgfReader::read_parg); auto res = read_vector(&PgfReader::read_presult2); - auto seqs = read_seq_ids(0, lincat.tagged()); + auto seqs = read_seq_ids(lincat.tagged()); lincat->abscat = namespace_lookup(abstract->cats, &lincat->name); lincat->fields = fields; @@ -718,7 +719,7 @@ ref PgfReader::read_lin() auto args = read_vector(&PgfReader::read_parg); auto res = read_vector(&PgfReader::read_presult2); - auto seqs = read_seq_ids(lin->lincat, lin.tagged()); + auto seqs = read_seq_ids(lin.tagged()); lin->args = args; lin->res = res; diff --git a/src/runtime/c/pgf/reader.h b/src/runtime/c/pgf/reader.h index 34eb64744..fc859418a 100644 --- a/src/runtime/c/pgf/reader.h +++ b/src/runtime/c/pgf/reader.h @@ -78,7 +78,7 @@ class PGF_INTERNAL_DECL PgfReader ref read_presult(); PgfSymbol read_symbol(); ref read_seq(); - ref>> read_seq_ids(ref lincat, object container); + ref>> read_seq_ids(object container); PgfPhrasetable read_phrasetable(size_t len); PgfPhrasetable read_phrasetable(); ref read_lin(); diff --git a/src/runtime/c/pgf/vector.h b/src/runtime/c/pgf/vector.h index 0a03b813f..48c17c435 100644 --- a/src/runtime/c/pgf/vector.h +++ b/src/runtime/c/pgf/vector.h @@ -28,25 +28,6 @@ ref vector_new(Vector C::* field, size_t len) return res; } -PGF_INTERNAL_DECL size_t -get_next_padovan(size_t min); - -/* Resize a vector by changing its length. If there is no enough space - * the implementation will create a copy, but whenever possible it will - * return the reference to the original vector. A copy is created also - * if txn_id is different from the current transaction. In this way - * it is safe to change the length. */ -template inline PGF_INTERNAL -ref> vector_resize(ref> vec, size_t len, txn_t txn_id) -{ - size_t new_len = get_next_padovan(len); - size_t old_len = get_next_padovan(vec->len); - - vec = PgfDB::realloc>(vec,old_len*sizeof(A),new_len*sizeof(A),txn_id); - vec->len = len; - return vec; -} - template inline PGF_INTERNAL ref vector_elem(ref> v, size_t index) {