/*************************************************************************/ /* */ /* Language Technologies Institute */ /* Carnegie Mellon University */ /* Copyright (c) 1999 */ /* All Rights Reserved. */ /* */ /* Permission is hereby granted, free of charge, to use and distribute */ /* this software and its documentation without restriction, including */ /* without limitation the rights to use, copy, modify, merge, publish, */ /* distribute, sublicense, and/or sell copies of this work, and to */ /* permit persons to whom this work is furnished to do so, subject to */ /* the following conditions: */ /* 1. The code must retain the above copyright notice, this list of */ /* conditions and the following disclaimer. */ /* 2. Any modifications must be clearly marked as such. */ /* 3. Original authors' names are not deleted. */ /* 4. The authors' names are not used to endorse or promote products */ /* derived from this software without specific prior written */ /* permission. */ /* */ /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ /* THIS SOFTWARE. */ /* */ /*************************************************************************/ /* Author: Alan W Black (awb@cs.cmu.edu) */ /* Date: December 1999 */ /*************************************************************************/ /* */ /* Lexicon related functions */ /* */ /*************************************************************************/ #ifndef _CST_LEXICON_H__ #define _CST_LEXICON_H__ #include "cst_item.h" #include "cst_lts.h" typedef struct lexicon_struct { const char *name; int num_entries; /* Entries are centered around bytes with value 255 */ /* entries and forward (compressed) pronunciations and backwards */ /* each are terminated (preceeded in pron case) by 0 */ /* This saves 4 bytes per entry for an index */ unsigned char *data; /* the entries and phone strings */ int num_bytes; /* the number of bytes in the data */ char **phone_table; cst_lts_rules *lts_rule_set; int (*syl_boundary)(const cst_item *i,const cst_val *p); cst_val *(*lts_function)(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats); char ***addenda; /* ngram frequency table used for packed entries */ const char * const *phone_hufftable; const char * const *entry_hufftable; cst_utterance *(*postlex)(cst_utterance *u); cst_val *lex_addenda; /* For pronunciations added at run time */ } cst_lexicon; cst_lexicon *new_lexicon(); void delete_lexicon(cst_lexicon *lex); cst_val *cst_lex_make_entry(const cst_lexicon *lex, const cst_string *entry); cst_val *cst_lex_load_addenda(const cst_lexicon *lex, const char *lexfile); cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos, const cst_features *feats); int in_lex(const cst_lexicon *l, const char *word, const char *pos, const cst_features *feats); CST_VAL_USER_TYPE_DCLS(lexicon,cst_lexicon) #endif