2
# Copyright (c) 2001-2006, Len Kranendonk. All rights reserved.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License. You should have
6
# received a copy of the GPL license along with this program; if you
7
# did not, you can find it at http://www.gnu.org/
10
#-------------------------------------------------------------
11
# Sphinx Perl searchd client API
12
#-------------------------------------------------------------
21
# Constants to export.
23
SPH_MATCH_ALL SPH_MATCH_ANY SPH_MATCH_PHRASE SPH_MATCH_BOOLEAN SPH_MATCH_EXTENDED
24
SPH_SORT_RELEVANCE SPH_SORT_ATTR_DESC SPH_SORT_ATTR_ASC SPH_SORT_TIME_SEGMENTS SPH_SORT_EXTENDED
25
SPH_GROUPBY_DAY SPH_GROUPBY_WEEK SPH_GROUPBY_MONTH SPH_GROUPBY_YEAR SPH_GROUPBY_ATTR
28
# known searchd commands
29
use constant SEARCHD_COMMAND_SEARCH => 0;
30
use constant SEARCHD_COMMAND_EXCERPT => 1;
32
# current client-side command implementation versions
33
use constant VER_COMMAND_SEARCH => 0x104;
34
use constant VER_COMMAND_EXCERPT => 0x100;
36
# known searchd status codes
37
use constant SEARCHD_OK => 0;
38
use constant SEARCHD_ERROR => 1;
39
use constant SEARCHD_RETRY => 2;
42
use constant SPH_MATCH_ALL => 0;
43
use constant SPH_MATCH_ANY => 1;
44
use constant SPH_MATCH_PHRASE => 2;
45
use constant SPH_MATCH_BOOLEAN => 3;
46
use constant SPH_MATCH_EXTENDED => 4;
49
use constant SPH_SORT_RELEVANCE => 0;
50
use constant SPH_SORT_ATTR_DESC => 1;
51
use constant SPH_SORT_ATTR_ASC => 2;
52
use constant SPH_SORT_TIME_SEGMENTS => 3;
53
use constant SPH_SORT_EXTENDED => 4;
55
# known attribute types
56
use constant SPH_ATTR_INTEGER => 1;
57
use constant SPH_ATTR_TIMESTAMP => 2;
59
# known grouping functions
60
use constant SPH_GROUPBY_DAY => 0;
61
use constant SPH_GROUPBY_WEEK => 1;
62
use constant SPH_GROUPBY_MONTH => 2;
63
use constant SPH_GROUPBY_YEAR => 3;
64
use constant SPH_GROUPBY_ATTR => 4;
67
#-------------------------------------------------------------
69
#-------------------------------------------------------------
71
# create a new client object and fill defaults
79
_mode => SPH_MATCH_ALL,
81
_sort => SPH_SORT_RELEVANCE,
84
_max_id => 0xFFFFFFFF,
89
_groupfunc => SPH_GROUPBY_DAY,
98
# get last error message (string)
101
return $self->{_error};
104
# get last warning message (string)
107
return $self->{_warning};
116
croak("host is not defined") unless defined($host);
117
croak("port is not defined") unless defined($port);
119
$self->{_host} = $host;
120
$self->{_port} = $port;
123
#-------------------------------------------------------------
125
# connect to searchd server
132
socket($fp, PF_INET, SOCK_STREAM, getprotobyname('tcp')) || Carp::croak("socket: ".$!);
133
my $dest = sockaddr_in($self->{_port}, inet_aton($self->{_host}));
137
$self->{_error} = "connection to {$self->{_host}}:{$self->{_port}} failed: $!";
143
recv($fp, $buf, 4, 0) ne "" || croak("recv: ".$!);
144
my $v = unpack("N*", $buf);
147
close($fp) || croak("close: $!");
148
$self->{_error} = "expected searchd protocol version 1+, got version '$v'";
151
# All ok, send my version
152
send($fp, pack("N", 1),0);
156
#-------------------------------------------------------------
158
# get and check response packet from searchd server
162
my $client_ver = shift;
165
recv($fp, $header, 8, 0) ne "" || croak("recv: ".$!);
167
my ($status, $ver, $len ) = unpack("n2N", $header);
168
my ($chunk, $response);
169
while(defined($chunk = <$fp>)) {
175
if ( !$response || length($response) != $len ) {
176
$self->{_error} = $len
177
? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=". length($response) . ")"
178
: "received zero-sized searchd response";
183
if ( $status==SEARCHD_ERROR ) {
184
$self->{_error} = "searchd error: " . substr ( $response, 4 );
187
if ( $status==SEARCHD_RETRY ) {
188
$self->{_error} = "temporary searchd error: " . substr ( $response, 4 );
191
if ( $status!=SEARCHD_OK ) {
192
$self->{_error} = "unknown status code '$status'";
197
if ( $ver<$client_ver ) {
198
$self->{_warning} = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
199
$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
205
#-------------------------------------------------------------
207
#-------------------------------------------------------------
209
# set match offset/limits
214
my $max = shift || 0;
215
croak("offset should be an integer >= 0") unless ($offset =~ /^\d+$/ && $offset >= 0) ;
216
croak("limit should be an integer >= 0") unless ($limit =~ /^\d+/ && $offset >= 0);
217
$self->{_offset} = $offset;
218
$self->{_limit} = $limit;
220
$self->{_maxmatches} = $max;
228
croak("Match mode not defined") unless defined($mode);
229
croak("Unknown matchmode: $mode") unless ( $mode==SPH_MATCH_ALL || $mode==SPH_MATCH_ANY
230
|| $mode==SPH_MATCH_PHRASE || $mode==SPH_MATCH_BOOLEAN || $mode==SPH_MATCH_EXTENDED );
231
$self->{_mode} = $mode;
239
croak("Sort mode not defined") unless defined($mode);
240
croak("Unknown sort mode: $mode") unless ( $mode==SPH_SORT_RELEVANCE
241
|| $mode==SPH_SORT_ATTR_DESC || $mode==SPH_SORT_ATTR_ASC
242
|| $mode==SPH_SORT_TIME_SEGMENTS || $mode==SPH_SORT_EXTENDED );
243
croak("Sortby must be defined") unless ($mode==SPH_SORT_RELEVANCE || length($sortby));
244
$self->{_sort} = $mode;
245
$self->{_sortby} = $sortby;
248
# set per-field weights
252
croak("Weights is not an array reference") unless (ref($weights) eq 'ARRAY');
253
foreach my $weight (@$weights) {
254
croak("Weight: $weight is not an integer") unless ($weight =~ /^\d+$/);
256
$self->{_weights} = $weights;
259
# set IDs range to match
260
# only match those records where document ID
261
# is beetwen $min and $max (including $min and $max)
266
croak("min_id is not an integer") unless ($min =~ /^\d+$/);
267
croak("max_id is not an integer") unless ($max =~ /^\d+$/);
268
croak("min_id is larger than or equal to max_id") unless ($min < $max);
269
$self->{_min_id} = $min;
270
$self->{_max_id} = $max;
275
my $attribute = shift;
277
croak("attribute is not defined") unless (defined $attribute);
278
croak("values is not an array reference") unless (ref($values) eq 'ARRAY');
279
croak("values reference is empty") unless (scalar(@$values));
281
foreach my $value (@$values) {
282
croak("value $value is not an integer") unless ($value =~ /^\d+$/);
284
$self->{_filter}{$attribute} = $values;
288
# only match those records where $attribute column value
289
# is beetwen $min and $max (including $min and $max)
292
my $attribute = shift;
295
croak("attribute is not defined") unless (defined $attribute);
296
croak("min: $min is not an integer") unless ($min =~ /^\d+$/);
297
croak("max: $max is not an integer") unless ($max =~ /^\d+$/);
298
croak("min value should be <= max") unless ($min <= $max);
300
$self->{_min}{$attribute} = $min;
301
$self->{_max}{$attribute} = $max;
308
my $attribute = shift;
310
croak("attribute is not defined") unless (defined $attribute);
311
croak("Unknown grouping function: $func") unless ($func==SPH_GROUPBY_DAY
312
|| $func==SPH_GROUPBY_WEEK
313
|| $func==SPH_GROUPBY_MONTH
314
|| $func==SPH_GROUPBY_YEAR
315
|| $func==SPH_GROUPBY_ATTR );
317
$self->{_groupby} = $attribute;
318
$self->{_groupfunc} = $func;
321
# connect to searchd server and run given search query
323
# $query is query string
324
# $query is index name to query, default is "*" which means to query all indexes
326
# returns false on failure
327
# returns hash which has the following keys on success:
329
# array containing hashes with found documents ( "doc", "weight", "group", "stamp" )
331
# total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
333
# total amount of matching documents in index
337
# hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
344
my $fp = $self->_Connect();
345
return 0 unless ($fp);
352
$req = pack ( "NNNN", $self->{_offset}, $self->{_limit}, $self->{_mode}, $self->{_sort} ); # mode and limits
353
$req .= pack ( "N", length($self->{_sortby}) ) . $self->{_sortby};
354
$req .= pack ( "N", length($query) ) . $query; # query itself
355
$req .= pack ( "N", scalar(@{$self->{_weights}}) ); # weights
356
foreach my $weight (@{$self->{_weights}}) {
357
$req .= pack ( "N", int($weight));
359
$req .= pack ( "N", length($index) ) . $index; # indexes
361
pack ( "N", int($self->{_min_id}) ) .
362
pack ( "N", int($self->{_max_id}) );
365
$req .= pack ( "N", scalar(keys %{$self->{_min}}) + scalar(keys %{$self->{_filter}}) );
367
foreach my $attr (keys %{$self->{_min}}) {
369
pack ( "N", length($attr) ) . $attr .
370
pack ( "NNN", 0, $self->{_min}{$attr}, $self->{_max}{$attr} );
373
foreach my $attr (keys %{$self->{_filter}}) {
374
my $values = $self->{_filter}{$attr};
376
pack ( "N", length($attr) ) . $attr .
377
pack ( "N", scalar(@$values) );
379
foreach my $value ( @$values ) {
380
$req .= pack ( "N", $value );
385
$req .= pack ( "NN", $self->{_groupfunc}, length($self->{_groupby}) ) . $self->{_groupby};
387
# max matches to retrieve
388
$req .= pack ( "N", $self->{_maxmatches} );
391
# send query, get response
394
my $len = length($req);
395
$req = pack ( "nnN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len ) . $req; # add header
398
my $response = $self->_GetResponse ( $fp, VER_COMMAND_SEARCH );
399
return 0 unless ($response);
405
my $result = {}; # Empty hash ref
406
$result->{matches} = []; # Empty array ref
407
my $max = length($response); # Protection from broken response
412
my (%attrs, @attr_list);
414
my $nfields = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
415
while ( $nfields-->0 && $p<$max ) {
416
my $len = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
417
push(@fields, substr ( $response, $p, $len )); $p += $len;
419
$result->{"fields"} = \@fields;
421
my $nattrs = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
422
while ( $nattrs-->0 && $p<$max ) {
423
my $len = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
424
my $attr = substr ( $response, $p, $len ); $p += $len;
425
my $type = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
426
$attrs{$attr} = $type;
427
push(@attr_list, $attr);
429
$result->{"attrs"} = \%attrs;
432
my $count = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
435
while ( $count-->0 && $p<$max ) {
437
( $data->{doc}, $data->{weight} ) = unpack("N*N*", substr($response,$p,8));
440
foreach my $attr (@attr_list) {
441
$data->{$attr} = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
443
push(@{$result->{matches}}, $data);
446
($result->{total}, $result->{total_found}, $result->{time}, $words) = unpack("N*N*N*N*", substr($response, $p, 16));
447
$result->{time} = sprintf ( "%.3f", $result->{"time"}/1000 );
450
while ( $words-->0 ) {
451
my $len = unpack ( "N*", substr ( $response, $p, 4 ) );
453
my $word = substr ( $response, $p, $len );
455
my ($docs, $hits) = unpack ("N*N*", substr($response, $p, 8));
457
$result->{words}{$word} = {
465
#-------------------------------------------------------------
466
# excerpts generation
467
#-------------------------------------------------------------
469
# connect to searchd server and generate exceprts from given documents
471
# $index is a string specifiying the index which settings will be used
472
# for stemming, lexing and case folding
473
# $docs is an array reference of strings which represent the documents' contents
474
# $words is a string which contains the words to highlight
475
# $opts is a hash which contains additional optional highlighting parameters:
477
# a string to insert before a set of matching words, default is "<b>"
479
# a string to insert after a set of matching words, default is "<b>"
481
# a string to insert between excerpts chunks, default is " ... "
483
# max excerpt size in symbols (codepoints), default is 256
485
# how much words to highlight around each match, default is 5
487
# returns false on failure
488
# retrurns an array of string excerpts on success
490
my ($self, $docs, $index, $words, $opts) = @_;
492
croak("BuildExcepts() called with incorrect parameters") unless (ref($docs) eq 'ARRAY'
495
&& ref($opts) eq 'HASH');
496
my $fp = $self->_Connect();
497
return 0 unless ($fp);
502
$opts->{"before_match"} ||= "<b>";
503
$opts->{"after_match"} ||= "</b>";
504
$opts->{"chunk_separator"} ||= " ... ";
505
$opts->{"limit"} ||= 256;
506
$opts->{"around"} ||= 5;
514
$req = pack ( "NN", 0, 1 ); # mode=0, flags=1 (remove spaces)
515
$req .= pack ( "N", length($index) ) . $index; # req index
516
$req .= pack ( "N", length($words) ) . $words; # req words
519
$req .= pack ( "N", length($opts->{"before_match"}) ) . $opts->{"before_match"};
520
$req .= pack ( "N", length($opts->{"after_match"}) ) . $opts->{"after_match"};
521
$req .= pack ( "N", length($opts->{"chunk_separator"}) ) . $opts->{"chunk_separator"};
522
$req .= pack ( "N", int($opts->{"limit"}) );
523
$req .= pack ( "N", int($opts->{"around"}) );
526
$req .= pack ( "N", scalar(@$docs) );
527
foreach my $doc (@$docs) {
528
croak('BuildExcepts: Found empty document in $docs') unless ($doc);
529
$req .= pack("N", length($doc)) . $doc;
532
##########################
533
# send query, get response
534
##########################
536
my $len = length($req);
537
$req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; # add header
540
my $response = $self->_GetResponse($fp, VER_COMMAND_EXCERPT);
541
return 0 unless ($response);
544
my $res = []; # Empty hash ref
545
my $rlen = length($response);
546
for ( $i=0; $i< scalar(@$docs); $i++ ) {
547
my $len = unpack ( "N*", substr ( $response, $pos, 4 ) );
550
if ( $pos+$len > $rlen ) {
551
$self->_error = "incomplete reply";
554
push(@$res, substr ( $response, $pos, $len ));