~zorba-coders/zorba/bug-1158052-read-pdf

« back to all changes in this revision

Viewing changes to test/Queries/read-pdf/extractText-text.xq

  • Committer: Cezar Andrei
  • Date: 2012-09-27 22:36:26 UTC
  • Revision ID: cezar.lp@cezarandrei.com-20120927223626-8k1bd1i1cwhzu7zh
Reworked tests to avoid exact compare. Images are dependent of the fonts installed, text can be splited in diffrent ways.

Show diffs side-by-side

added added

removed removed

Lines of Context:
6
6
 
7
7
 
8
8
let $pdf := file:read-binary(resolve-uri("28msec-NoSQLNow.pdf"))
9
 
let $options  := 
 
9
let $options  :=
10
10
    <rpo:extract-text-options>
11
11
        <rpo:text-kind>simple</rpo:text-kind>
12
 
    </rpo:extract-text-options> 
 
12
    </rpo:extract-text-options>
 
13
let $str := read-pdf:extract-text($pdf, $options)
13
14
return
14
 
    read-pdf:extract-text($pdf, $options)
 
15
     (fn:contains($str, "JSONiq"),
 
16
      fn:contains($str, 'Processing Language'),
 
17
      fn:contains($str, "Harnessing Flexible Data in the Cloud"),
 
18
      fn:contains($str, "SportsML"),
 
19
      fn:contains($str, "reliability, availability, performance, and scalability"),
 
20
      fn:contains($str, "FLWOR Foundation"),
 
21
      fn:contains($str, "Visit us at our booth!"),
 
22
      fn:string-length($str) > 1000 )
15
23
 
16
24