diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index b989e89ec..b896dfef3 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -31,6 +31,14 @@ */ #define MAX_SENTENCE 254 /* Maximum number of words in a sentence */ +/* Since tracon IDs are unique per sentence, for convenience NULL + * connectors (zero-length tracons) have tracon IDs equal to the word + * number on which their disjunct resides. To that end an initial block + * of IDs is reserved. It is the maximum number of words in a sentence + * rounded up to a power of 2. + */ +#define NULL_TRACON_BLOCK 256 + /* For faster comparisons, the connector lc part is encoded into a number * and a mask. Each letter is encoded using LC_BITS bits. With 7 bits, it * is possible to encode up to 9 letters in an uint64_t. */ diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index c446c7c4b..0cc815868 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -806,12 +806,6 @@ static Disjunct *pack_disjuncts(Sentence sent, Tracon_sharing *ts, #define TLSZ 8192 /* Initial size of the tracon list table */ -/* Reserved tracon ID space for NULL connectors (zero-length tracons). - * Currently, tracons are unique per word. So this is actually the max. - * number of words in a sentence rounded up to a power of 2. - * FIXME: Derive it from MAX_SENTENCE. */ -#define WORD_OFFSET 256 - /** Create a context descriptor for disjuncts & connector memory "packing". * Allocate a memory block for all the disjuncts & connectors. * The current Connector struct size is 32 bytes, and the intention is @@ -873,7 +867,7 @@ static Tracon_sharing *pack_sentence_init(Sentence sent, bool is_pruning) ts->dblock = dblock; ts->num_connectors = ccnt; ts->num_disjuncts = dcnt; - ts->word_offset = is_pruning ? 1 : WORD_OFFSET; + ts->word_offset = is_pruning ? 1 : NULL_TRACON_BLOCK; ts->is_pruning = is_pruning; ts->next_id[0] = ts->next_id[1] = ts->word_offset; ts->last_token = (uintptr_t)-1; diff --git a/link-grammar/parse/extract-links.c b/link-grammar/parse/extract-links.c index 548391250..4eb9266ce 100644 --- a/link-grammar/parse/extract-links.c +++ b/link-grammar/parse/extract-links.c @@ -41,7 +41,7 @@ struct Parse_choice_struct Parse_choice * next; Parse_set * set[2]; Disjunct *md; /* the chosen disjunct for the middle word */ - Connector *lc, *rc; /* the connectors on the middle word */ + int32_t l_id, r_id; /* the tracon IDs used in this disjunct */ }; /* Parse_set serves as a header of Parse_choice chained elements, that @@ -49,11 +49,11 @@ struct Parse_choice_struct * tracons l_id and r_id on words lw and rw, correspondingly. */ struct Parse_set_struct { - Parse_choice *first; Connector *le, *re; + Parse_choice *first; + unsigned int num_pc; /* number of Parse_choice elements */ uint8_t lw, rw; /* left and right word index */ uint8_t null_count; /* number of island words */ - int32_t l_id, r_id; /* tracons on words lw, rw */ count_t count; /* The number of ways to parse. */ #ifdef RECOUNT @@ -105,13 +105,12 @@ make_choice(Parse_set *lset, Connector * lrc, Parse_set *rset, Connector * rlc, Disjunct *md, extractor_t* pex) { - Parse_choice *pc; - pc = pool_alloc(pex->Parse_choice_pool); + Parse_choice *pc = pool_alloc(pex->Parse_choice_pool); pc->next = NULL; pc->set[0] = lset; pc->set[1] = rset; - pc->lc = lrc; - pc->rc = rlc; + pc->l_id = (lrc == NULL) ? -1 : lrc->tracon_id; + pc->r_id = (rlc == NULL) ? -1 : rlc->tracon_id; pc->md = md; return pc; } @@ -126,6 +125,7 @@ static void record_choice( // Chain it into the parse set. pc->next = s->first; s->first = pc; + s->num_pc++; } /** @@ -201,8 +201,8 @@ extractor_t * extractor_new(Sentence sent) pex->x_table_size = (1 << log2_table_size); #ifdef DEBUG_X_TABLE - printf("Allocating x_table of size %u (nwords %d)\n", - pex->x_table_size, nwords); + printf("Allocating x_table of size %u (log2 %d)\n", + pex->x_table_size, log2_table_size); #endif /* DEBUG_X_TABLE */ pex->x_table = (Pset_bucket**) xalloc(pex->x_table_size * sizeof(Pset_bucket*)); @@ -267,23 +267,40 @@ void free_extractor(extractor_t * pex) xfree((void *) pex, sizeof(extractor_t)); } +/** + * Return a dummy connector that represents a null tracon for word \p w. + * Its purpose is to greatly simplify the condition in x_table_pointer(). + * \p w may be in the range [-1,sentence length]. + * We assume here is that an integer check and assignment is thread-safe. + */ +static Connector *dummy_null_tracon(int w) +{ + /* +1 for w+1 (see below). + * +1 for invocations with w equal to MAX_SENTENCE. */ + static Connector dnt[MAX_SENTENCE+1+1]; + + /* w+1 supports invocations with w==-1. */ + if (dnt[w+1].tracon_id != w) dnt[w+1].tracon_id = w; + return &dnt[w+1]; +} + /** * Returns the pointer to this info, NULL if not there. * Note that there is no need to use (lw, rw) as keys because tracon_id * values are not shared between words. */ -static Pset_bucket * x_table_pointer(int lw, int rw, - Connector *le, Connector *re, - unsigned int null_count, extractor_t * pex) +static Pset_bucket *x_table_pointer(int lw, int rw, + Connector *le, Connector *re, + unsigned int null_count, extractor_t * pex) { - Pset_bucket *t; int l_id = (NULL != le) ? le->tracon_id : lw; int r_id = (NULL != re) ? re->tracon_id : rw; unsigned int hash = pair_hash(lw, rw, l_id, r_id, null_count); - t = pex->x_table[hash & (pex->x_table_size-1)]; + Pset_bucket *t = pex->x_table[hash & (pex->x_table_size-1)]; - for (; t != NULL; t = t->next) { - if ((t->set.l_id == l_id) && (t->set.r_id == r_id) && + for (; t != NULL; t = t->next) + { + if ((t->set.le->tracon_id == l_id) && (t->set.re->tracon_id == r_id) && (t->set.null_count == null_count)) return t; } return NULL; @@ -296,22 +313,21 @@ static Pset_bucket * x_table_store(int lw, int rw, Connector *le, Connector *re, unsigned int null_count, extractor_t * pex) { - Pset_bucket **t, *n; - unsigned int h; + int32_t l_id = (NULL != le) ? le->tracon_id : lw; + int32_t r_id = (NULL != re) ? re->tracon_id : rw; + unsigned int h = pair_hash(lw, rw, l_id, r_id, null_count); + Pset_bucket **t = &pex->x_table[h & (pex->x_table_size -1)]; + Pset_bucket *n = pool_alloc(pex->Pset_bucket_pool); - n = pool_alloc(pex->Pset_bucket_pool); n->set.lw = lw; n->set.rw = rw; n->set.null_count = null_count; - n->set.l_id = (NULL != le) ? le->tracon_id : lw; - n->set.r_id = (NULL != re) ? re->tracon_id : rw; - n->set.le = le; - n->set.re = re; + n->set.le = (NULL != le) ? le : dummy_null_tracon(lw); + n->set.re = (NULL != re) ? re : dummy_null_tracon(rw); n->set.count = 0; n->set.first = NULL; + n->set.num_pc = 0; - h = pair_hash(lw, rw, n->set.l_id, n->set.r_id, null_count); - t = &pex->x_table[h & (pex->x_table_size -1)]; n->next = *t; *t = n; return n; @@ -716,19 +732,35 @@ bool build_parse_set(extractor_t* pex, Sentence sent, return set_overflowed(pex); } +static Connector *get_tracon_by_id(const Disjunct *d, int32_t tracon_id, + int dir) +{ + if (tracon_id < 0) return NULL; /* See make_choice() */ + for (Connector *c = dir ? d->right : d->left; c != NULL; c = c->next) + if (tracon_id == c->tracon_id) return c; + + assert(0, "tracon_id %d not found on disjunct %p in direction %d\n", + tracon_id, d, dir); +} + +static bool is_zero_tracon(Connector *c) +{ + return (c == NULL) || (c->tracon_id < NULL_TRACON_BLOCK); +} + /** * Assemble the link array and the chosen_disjuncts of a linkage. */ static void issue_link(Linkage lkg, int lr, Parse_choice *pc, const Parse_set *set) { - Connector *lc = lr ? pc->rc : set->le; - if (lc == NULL) return; /* No choice to record. */ + Connector *lc = lr ? get_tracon_by_id(pc->md, pc->r_id, 1) : set->le; + if (is_zero_tracon(lc)) return; /* No choice to record. */ lkg->chosen_disjuncts[lr ? pc->set[1]->lw : pc->set[0]->rw] = pc->md; - Connector *rc = lr ? set->re : pc->lc; - if (rc == NULL) return; /* No link to generate. */ + Connector *rc = lr ? set->re : get_tracon_by_id(pc->md, pc->l_id, 0); + if (is_zero_tracon(rc)) return; /* No choice to record. */ assert(lkg->num_links < lkg->lasz, "Linkage array too small!"); Link *link = &lkg->link_array[lkg->num_links]; @@ -792,12 +824,13 @@ static void issue_links_for_choice(Linkage lkg, Parse_choice *pc, * For S0: (Nindex % pc->set[0]->count) ranges from 0 to (S0ₘ-1). * For S1: (Nindex / pc->set[0]->count) ranges from 0 to (S1ₘ-1). */ -static void list_links(Linkage lkg, const Parse_set * set, int index) +static void list_links(Linkage lkg, Parse_set * set, int index) { Parse_choice *pc; count_t n; /* No overflow - see extract_links() and process_linkages() */ - if (set == NULL || set->first == NULL) return; + assert(set != NULL, "Unexpected NULL Parse_set"); + if (set->first == NULL) return; for (pc = set->first; pc != NULL; pc = pc->next) { n = pc->set[0]->count * pc->set[1]->count; if (index < n) break; @@ -810,32 +843,18 @@ static void list_links(Linkage lkg, const Parse_set * set, int index) } static void list_random_links(Linkage lkg, unsigned int *rand_state, - const Parse_set * set) + Parse_set * set) { - Parse_choice *pc; - int num_pc, new_index; + assert(set != NULL, "Unexpected NULL Parse_set"); + if (set->first == NULL) return; - if (set == NULL || set->first == NULL) return; + /* Avoid calling rand_r() for the common case of a single element. */ + unsigned int new_index = (set->num_pc == 1) ? 0 : + rand_r(rand_state) % set->num_pc; - /* Most of the time, there is only one list element. */ - if (set->first->next == NULL) - { - pc = set->first; - } - else - { - num_pc = 0; - for (pc = set->first; pc != NULL; pc = pc->next) { - num_pc++; - } - - new_index = rand_r(rand_state) % num_pc; - - num_pc = 0; - for (pc = set->first; new_index != num_pc; pc = pc->next) { - num_pc++; - } - } + Parse_choice *pc; + for (pc = set->first; new_index > 0; pc = pc->next) + new_index--; issue_links_for_choice(lkg, pc, set); list_random_links(lkg, rand_state, pc->set[0]);