35
35
using Wintermute::Linguistics::Parser;
36
36
using Wintermute::Linguistics::English::EnglishParser;
37
37
using Wintermute::Linguistics::English::Phrase;
38
using Wintermute::Linguistics::English::NounPhrase;
39
using Wintermute::Linguistics::English::VerbPhrase;
40
38
using Wintermute::Data::Linguistics::Configuration;
41
39
using Wintermute::Data::Ontology::Instance;
43
EnglishParser::EnglishParser ( ) : Parser ( ) { }
45
const string EnglishParser::getLocale ( ) const {
49
char EnglishParser::getTextSeparator ( ) const {
53
SyntacticBranch EnglishParser::generateSyntactics ( const StringVector& tokens ) {
54
SyntacticBranch theTree;
56
for ( StringVector::const_iterator itr = tokens.begin ( ); itr != tokens.end ( ); itr ++ ) {
57
const string theRawWord = * itr;
60
// Do word santiziation.
62
for ( int i = 0; i < theRawWord.length ( ); i ++ ) {
63
char theChar = theRawWord.at ( i );
64
if ( isalpha ( theChar ) || isdigit ( theChar ) )
65
theWord += tolower ( theChar );
68
NodeVector theBranches;
69
const string theWordID = LexicalNode::toID ( theWord );
70
LexicalNode* theLexicalNode = LexicalNode::fromID ( theWordID, this->getLocale ( ) );
71
isPseudo = ( theLexicalNode == NULL );
73
SyntacticNode* theNode = SyntacticNode::buildPsuedo ( theRawWord , "B",this->getLocale ( ) );
74
theBranches.push_back ( theNode );
77
StringVector allPossibleMeanings = LexicalLink::obtainFlagsFor ( theLexicalNode,NULL );
78
paths *= allPossibleMeanings.size ( );
79
for ( StringVector::const_iterator stringItr = allPossibleMeanings.begin ( ); stringItr != allPossibleMeanings.end ( ); stringItr ++ ) {
80
const string theFlag = * stringItr;
81
SemanticNode* theMeaning = LexicalLink::getExactMeaningOf ( theLexicalNode,theFlag );
82
//cout << theFlag << " " << theMeaning->getID () << " => " << theLexicalNode->getID () << endl;
83
SyntacticNode* synNode = SyntacticNode::build ( theMeaning->getID ( ),theFlag,theRawWord,this->getLocale ( ) );
84
theBranches.push_back ( synNode );
86
//cout << "(linguistics) [EnglishParser] Discovered " << paths << " linkages for symbol '" << theRawWord << "'." << endl;
88
theTree.push_back ( theBranches );
93
Phrase* EnglishParser::formPhrase ( const NodeVector::iterator& itr, NodeVector& list ) {
94
SyntacticNode* curNode = dynamic_cast < SyntacticNode* > ( *itr );
95
Phrase* thePhrase = NULL;
96
//cout << "Word class: " << curNode->getType ( ) << ", " << curNode->getText() << endl;
97
switch ( curNode->getType ( ) ) {
101
* @todo How to determine the last noun used that refers to this?
103
if ( curNode->hasFlag ( 'x' ) ) {
104
/* found a determiner, now snag it up! */
106
NodeVector::iterator nextWord = itr + 1;
107
theWords.push_back ( curNode );
108
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
109
SyntacticNode* aNode = dynamic_cast < SyntacticNode* > ( *nextWord );
110
if ( aNode->getType ( ) == 'A' ||
111
aNode->getType ( ) == 'B' ||
112
aNode->getType ( ) == 'E' ||
113
aNode->getType ( ) == 'F' ) {
114
theWords.push_back ( curNode );
119
thePhrase = new Phrase ( theWords,curNode );
121
} else if ( curNode->hasFlag ( "rideg" ) ) {
129
* Nouns can be anything of the following:
131
* - concepts in the ontology
133
if ( curNode->isPsuedo ( ) ) {
134
/// Got a pseudo noun; link up the rest and make it a noun.
136
NodeVector::iterator nextWord = itr + 1;
137
theWords.push_back ( curNode );
138
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
139
SyntacticNode* aNode = dynamic_cast < SyntacticNode* > ( *nextWord );
140
if ( aNode->getType ( ) == 'B' && aNode->isPsuedo ( ) ) {
141
theWords.push_back ( curNode );
146
thePhrase = new Phrase ( theWords,curNode );
148
if ( curNode->hasFlag ( "coalp" ) ) {
157
if ( itr + 1 == list.end ( ) ) {
158
// Might be a reflexive subject; like "He flew." or "We ate."
160
NodeVector::iterator nextWord = itr + 1;
161
theWords.push_back ( curNode );
162
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
163
SyntacticNode* aNode = dynamic_cast < SyntacticNode* > ( *nextWord );
164
if ( aNode->getType ( ) == 'A' ||
165
aNode->getType ( ) == 'B' ) {
166
theWords.push_back ( curNode );
171
thePhrase = new Phrase ( theWords,curNode );
183
void EnglishParser::doPreparsingWork ( PhraseVector& phrase ) {
184
//cout << phrase.size ( ) << " phrases found." << endl;
187
SyntacticLink* EnglishParser::formSyntacticLink ( const PhraseVector::iterator& itr, PhraseVector& list ) {
188
Phrase* thePhrase = * itr;
189
const SyntacticNode* headNode = thePhrase->getHeadNode ( );
190
SyntacticLink* theLink = NULL;
191
switch ( headNode->getType ( ) ) {
198
if (headNode->isPsuedo ()){
199
/// This is a pseudo phrase like "John Mary", "Microsoft Kinect".
200
/// This handler right here does a bit of magic. ;)
201
/// @consider Pushing this up into the standard parser, so _all_ parsers benefit for?
203
/// Query the locale database for an entry associated with this term.
204
string rawText = thePhrase->toString();
205
Instance::QueryMap* results = Instance::query(rawText);
207
/// Wintermute's familiar with this concept.
208
cout << "I know about" << rawText << endl;
210
/// Wintermute has no idea what this is.
211
cout << "What's a " << rawText << endl;
220
default: // shouldn't even happen.
222
cout << "Not sure.." << endl;
b'\\ No newline at end of file'
41
namespace Wintermute {
42
namespace Linguistics {
45
EnglishParser::EnglishParser ( ) : Parser ( ) { }
47
const string EnglishParser::getLocale ( ) const {
51
char EnglishParser::getTextSeparator ( ) const {
55
SyntacticBranch EnglishParser::generateSyntactics ( const StringVector& tokens ) {
56
SyntacticBranch theTree;
58
for ( StringVector::const_iterator itr = tokens.begin ( ); itr != tokens.end ( ); itr ++ ) {
59
const string theRawWord = * itr;
62
// Do word santiziation.
64
for ( int i = 0; i < theRawWord.length ( ); i ++ ) {
65
char theChar = theRawWord.at ( i );
66
if ( isalpha ( theChar ) || isdigit ( theChar ) )
67
theWord += tolower ( theChar );
70
NodeVector theBranches;
71
const string theWordID = LexicalNode::toID ( theWord );
72
const LexicalNode* theLexicalNode = LexicalNode::fromID ( theWordID, this->getLocale ( ) );
73
isPseudo = ( theLexicalNode == NULL );
75
SyntacticNode* theNode = SyntacticNode::buildPsuedo ( theRawWord , "B",this->getLocale ( ) );
76
theBranches.push_back ( theNode );
79
StringVector allPossibleMeanings = LexicalLink::obtainFlagsFor ( theLexicalNode,NULL );
80
paths *= allPossibleMeanings.size ( );
81
for ( StringVector::const_iterator stringItr = allPossibleMeanings.begin ( ); stringItr != allPossibleMeanings.end ( ); stringItr ++ ) {
82
const string theFlag = * stringItr;
83
const SemanticNode* theMeaning = LexicalLink::getExactMeaningOf ( theLexicalNode,theFlag );
84
// cout << theFlag << " " << theMeaning->getID () << " => " << theLexicalNode->getID () << endl;
85
SyntacticNode* synNode = SyntacticNode::build ( theMeaning->getID ( ),theFlag,theRawWord,this->getLocale ( ) );
86
theBranches.push_back ( synNode );
88
// cout << "(linguistics) [EnglishParser] Discovered " << paths << " linkages for symbol '" << theRawWord << "'." << endl;
90
theTree.push_back ( theBranches );
95
Phrase* EnglishParser::formPhrase ( const NodeVector::iterator& itr, NodeVector& list ) {
96
const SyntacticNode* curNode = dynamic_cast < const SyntacticNode* > ( *itr );
97
Phrase* thePhrase = NULL;
98
//cout << "Word class: " << curNode->getType ( ) << ", " << curNode->getText() << endl;
99
switch ( curNode->getType ( ) ) {
103
* @todo How to determine the last noun used that refers to this?
105
if ( curNode->hasFlag ( 'x' ) ) {
106
/* found a determiner, now snag it up! */
108
NodeVector::iterator nextWord = itr + 1;
109
theWords.push_back ( curNode );
110
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
111
const SyntacticNode* aNode = dynamic_cast < const SyntacticNode* > ( *nextWord );
112
if ( aNode->getType ( ) == 'A' ||
113
aNode->getType ( ) == 'B' ||
114
aNode->getType ( ) == 'E' ||
115
aNode->getType ( ) == 'F' ) {
116
theWords.push_back ( curNode );
121
thePhrase = new Phrase ( theWords,curNode );
123
} else if ( curNode->hasFlag ( "rideg" ) ) {
131
* Nouns can be anything of the following:
133
* - concepts in the ontology
135
if ( curNode->isPsuedo ( ) ) {
136
/// Got a pseudo noun; link up the rest and make it a noun.
138
NodeVector::iterator nextWord = itr + 1;
139
theWords.push_back ( curNode );
140
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
141
const SyntacticNode* aNode = dynamic_cast < const SyntacticNode* > ( *nextWord );
142
if ( aNode->getType ( ) == 'B' && aNode->isPsuedo ( ) ) {
143
theWords.push_back ( curNode );
148
thePhrase = new Phrase ( theWords,curNode );
150
if ( curNode->hasFlag ( "coalp" ) ) {
159
if ( itr + 1 == list.end ( ) ) {
160
// Might be a reflexive subject; like "He flew.", "We quickly ate."
162
NodeVector::iterator nextWord = itr + 1;
163
theWords.push_back ( curNode );
164
for ( ;nextWord != list.end ( ) ;nextWord ++ ) {
165
const SyntacticNode* aNode = dynamic_cast < const SyntacticNode* > ( *nextWord );
166
if ( aNode->getType ( ) == 'A' || aNode->getType ( ) == 'B' || aNode->getType ( ) == 'D' || aNode->getType ( ) == 'E' ) {
167
theWords.push_back ( curNode );
172
thePhrase = new Phrase ( theWords,curNode );
184
void EnglishParser::doPreparsingWork ( PhraseVector& phrase ) {
185
//cout << phrase.size ( ) << " phrases found." << endl;
188
SyntacticLink* EnglishParser::formSyntacticLink ( const PhraseVector::iterator& itr, PhraseVector& list ) {
189
Phrase* thePhrase = * itr;
190
const SyntacticNode* headNode = thePhrase->getHeadNode ( );
191
SyntacticLink* theLink = NULL;
192
switch ( headNode->getType ( ) ) {
199
if ( headNode->isPsuedo ( ) ) {
200
/// This is a pseudo phrase like "John Mary", "Microsoft Kinect".
201
/// This handler right here does a bit of magic. ;)
202
/// @consider Pushing this up into the standard parser, so _all_ parsers benefit for?
204
/// Query the locale database for an entry associated with this term.
205
string rawText = thePhrase->toString ( );
206
Instance::QueryMap* results = Instance::query ( rawText );
208
/// Wintermute's familiar with this concept.
209
cout << "I know about" << rawText << endl;
211
/// Wintermute has no idea what this is.
212
cout << "What's a " << rawText << endl;
219
/// When it comes to linking, verbs need a subject & an object.
220
/// We only look for the object, to save effort.
221
PhraseVector::iterator aItr = itr + 1;
222
Phrase* phrObj = NULL;
223
NodeVector modifiers;
224
for ( ; aItr != list.end ( ); aItr ++ ) {
225
Phrase* aPhrase = * aItr;
226
const char theType = aPhrase->getHeadNode ( )->getType ( );
227
if ( theType == 'A' || theType == 'B' ) {
231
} else if ( theType == 'D' || theType == 'E' ) {
232
/// Grrr, it's an adverb or an adjective, hmm.
233
/// Shouldn't be bumping into this.
234
modifiers.push_back ( aPhrase );
240
theLink = SyntacticLink::build ( thePhrase,( ( phrObj == NULL )?NULL:phrObj ),"C",&modifiers );
243
default: // Shouldn't even happen; but people say crazy things :)
245
cout << "Not sure.." << endl;
253
void EnglishParser::interpretLinks ( const LinkVector& theLinks, const PhraseVector& thePhrase ) {
254
for ( LinkVector::const_iterator currentLinkItr = theLinks.begin ( ); currentLinkItr != theLinks.end ( ); currentLinkItr ++ ) {
255
const SyntacticLink* currentLink = dynamic_cast < const SyntacticLink* > ( *currentLinkItr );
256
const Phrase* srcPhrase = dynamic_cast < const Phrase* > ( currentLink->getSource ( ) );
257
const Phrase* dstPhrase = dynamic_cast < const Phrase* > ( currentLink->getDestination ( ) );
259
const char theFlag = currentLink->getFlags ( ).at ( 0 );
263
if ( dstPhrase == NULL ) {
264
PhraseVector::const_iterator itr = find ( thePhrase.begin ( ),thePhrase.end ( ),srcPhrase );
265
cout << "No subject specified for '" << srcPhrase->getHeadNode ( )->getText ( ) << "'." << endl;
271
cout << "I'm confused.." << endl;