2
2
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4
4
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6
6
* The contents of this file are subject to the terms of either the GNU Lesser
7
7
* General Public License Version 2.1 only ("LGPL") or the Common Development and
8
8
* Distribution License ("CDDL")(collectively, the "License"). You may not use this
9
9
* file except in compliance with the License. You can obtain a copy of the CDDL at
10
10
* http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
11
* http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12
12
* specific language governing permissions and limitations under the License. When
13
13
* distributing the software, include this License Header Notice in each file and
14
14
* include the full text of the License in the License file as well as the
15
15
* following notice:
17
17
* NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19
19
* For Covered Software in this distribution, this License shall be governed by the
48
* A forget all history memory
48
* A forget all history memory
52
52
/** don't care word id, or seperator word id */
53
static const unsigned int DCWID;
53
static const uint32_t DCWID;
55
55
virtual ~CICHistory();
57
virtual bool seenBefore(unsigned int wid) = 0;
57
virtual bool seenBefore(uint32_t wid) = 0;
60
* memorize the context stream pointed by [its_wid, ite_wid)
62
virtual bool memorize(unsigned int* its_wid, unsigned int* ite_wid) = 0;
60
* memorize the context stream pointed by [its_wid, ite_wid)
62
virtual bool memorize(uint32_t* its_wid, uint32_t* ite_wid) = 0;
63
63
virtual void clear() = 0;
66
* remove a word id from history cache
68
virtual void forget(unsigned wid) = 0;
69
virtual void forget(unsigned int* its_wid, unsigned int* ite_wid) = 0;
72
* @param its_wid is the first word pointer of the context stream
73
* @param ite_wid is the last (exclusive) word pointer of the context stream
74
* @return pr(*(ite_wid-1) | *its_wid, ..., *(ite_wid-2))
75
* The return value could be zero, i.e. no need to smooth the probabilities
77
virtual double pr(unsigned int* its_wid, unsigned int* ite_wid) = 0;
80
* @param its_wid is the first word pointer of the history stream
81
* @param ite_wid is the last (exclusive) word pointer of the history stream
82
* @return pr(*wid | *its_wid, ..., *(ite_wid-1))
83
* The return value could be zero, i.e. no need to smooth the probabilities
85
virtual double pr(unsigned int* its_wid, unsigned int* ite_wid, unsigned int wid) = 0;
88
* allocate a buffer, and put the context memory's contect into it
89
* @param buf_ptr would be stored the buffer pointer
90
* @param sz would be the size in byte of the buffer allocated
91
* @return false on error
92
* Note: the buf_ptr should be used free(*buf_ptr) to free after usage
66
* remove a word id from history cache
68
virtual void forget(uint32_t wid) = 0;
69
virtual void forget(uint32_t* its_wid, uint32_t* ite_wid) = 0;
72
* @param its_wid is the first word pointer of the context stream
73
* @param ite_wid is the last (exclusive) word pointer of the context stream
74
* @return pr(*(ite_wid-1) | *its_wid, ..., *(ite_wid-2))
75
* The return value could be zero, i.e. no need to smooth the probabilities
77
virtual double pr(uint32_t* its_wid, uint32_t* ite_wid) = 0;
80
* @param its_wid is the first word pointer of the history stream
81
* @param ite_wid is the last (exclusive) word pointer of the history stream
82
* @return pr(*wid | *its_wid, ..., *(ite_wid-1))
83
* The return value could be zero, i.e. no need to smooth the probabilities
85
virtual double pr(uint32_t* its_wid,
90
* allocate a buffer, and put the context memory's contect into it
91
* @param buf_ptr would be stored the buffer pointer
92
* @param sz would be the size in byte of the buffer allocated
93
* @return false on error
94
* Note: the buf_ptr should be used free(*buf_ptr) to free after usage
95
97
bufferize(void** buf_ptr, size_t* sz) = 0;
98
* Load context memory according to the buf
99
* @param buf_ptr uffer pointer
100
* @param sz is the size in byte of the buffer
101
* @return false on error
102
* call with buf_ptr with NULL value would clear the context memory
100
* Load context memory according to the buf
101
* @param buf_ptr uffer pointer
102
* @param sz is the size in byte of the buffer
103
* @return false on error
104
* call with buf_ptr with NULL value would clear the context memory
105
107
loadFromBuffer(void* buf_ptr, size_t sz) = 0;
108
addStopWords(const std::set<unsigned int>& stopWords) = 0;
110
addStopWords(const std::set<uint32_t>& stopWords) = 0;
111
113
initStopWords() = 0;
120
122
virtual ~CBigramHistory();
122
virtual bool seenBefore(unsigned int wid);
124
virtual bool seenBefore(uint32_t wid);
124
virtual bool memorize(unsigned int* its_wid, unsigned int* ite_wid);
126
virtual bool memorize(uint32_t* its_wid, uint32_t* ite_wid);
125
127
virtual void clear();
127
virtual void forget(unsigned wid);
128
virtual void forget(unsigned int* its_wid, unsigned int* ite_wid);
131
* @param its_wid is the first word pointer of the context stream
132
* @param ite_wid is the last (exclusive) word pointer of the context stream
133
* @return pr(*(ite_wid-1) | *(ite_wid-2))
135
virtual double pr(unsigned int* its_wid, unsigned int* ite_wid);
138
* @param its_wid is the first word pointer of the history stream
139
* @param ite_wid is the last (exclusive) word pointer of the history stream
140
* @return pr(*wid | *(ite_wid-1))
142
virtual double pr(unsigned int* its_wid, unsigned int* ite_wid, unsigned int wid);
129
virtual void forget(uint32_t wid);
130
virtual void forget(uint32_t* its_wid, uint32_t* ite_wid);
133
* @param its_wid is the first word pointer of the context stream
134
* @param ite_wid is the last (exclusive) word pointer of the context stream
135
* @return pr(*(ite_wid-1) | *(ite_wid-2))
137
virtual double pr(uint32_t* its_wid, uint32_t* ite_wid);
140
* @param its_wid is the first word pointer of the history stream
141
* @param ite_wid is the last (exclusive) word pointer of the history stream
142
* @return pr(*wid | *(ite_wid-1))
144
virtual double pr(uint32_t* its_wid,
145
149
bufferize(void** buf_ptr, size_t* sz);
148
152
loadFromBuffer(void* buf_ptr, size_t sz);
151
loadFromFile (const char *fname);
155
loadFromFile(const char *fname);
154
saveToFile (const char *fname = NULL);
158
saveToFile(const char *fname = NULL);
156
virtual void addStopWords (const std::set<unsigned int>& stopWords);
157
virtual void initStopWords ();
160
virtual void addStopWords(const std::set<uint32_t>& stopWords);
161
virtual void initStopWords();
160
typedef unsigned TWordId;
164
typedef uint32_t TWordId;
161
165
typedef std::pair<TWordId, TWordId> TBigram;
162
typedef TWordId TUnigram;
166
typedef TWordId TUnigram;
163
167
typedef std::map<TBigram, int> TBigramPool;
164
168
typedef std::map<TUnigram, int> TUnigramPool;
165
169
typedef std::deque<TWordId> TContextMemory;