~openerp-commiter/openobject-addons/trunk-extra-addons

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import time
import os
import StringIO
import odt2txt

#
# This should be the indexer
#
def content_index(content, filename=None, content_type=None):
	fname,ext = os.path.splitext(filename)
	result = ''
	if ext == '.doc': #or content_type ?
		(stdin,stdout) = os.popen2('antiword -', 'b')
		stdin.write(content)
		stdin.close()
		result = stdout.read().decode('latin1','replace').encode('utf-8','replace')
	elif ext == '.pdf':
		fname = os.tempnam(filename)+'.pdf'
		fp = file(fname,'wb')
		fp.write(content)
		fp.close()
		fp = os.popen('pdftotext -enc UTF-8 -nopgbrk '+fname+' -', 'r')
		result = fp.read()
		fp.close()
	elif ext == '.odt':
		s = StringIO.StringIO(content)
		o = odt2txt.OpenDocumentTextFile(s)
		result = o.toString().encode('ascii','replace')
	elif ext in ('.txt','.py','.patch','.html','.csv') :
		result = content
	return result