~ubuntu-branches/ubuntu/raring/sunpinyin/raring

« back to all changes in this revision

Viewing changes to python/utils.py

  • Committer: Package Import Robot
  • Author(s): YunQiang Su
  • Date: 2012-03-30 15:31:55 UTC
  • mfrom: (1.1.3) (1.2.7 sid)
  • Revision ID: package-import@ubuntu.com-20120330153155-qgls77sogzgtg9zp
Tags: 2.0.3+git20120222-1
* Team upload: git snapshot 20120222.
   - fix breaks if LDFLAGS in environment contains
       multiple words (Closese #646001).
   - rm patches merged to upstream:
       append-os-environ-toenv.patch
       fix-ftbfs-on-sh.patch
       remove-10-candidate-words-limitation.patch
   - refresh disable-lm-dict-compile.patch.
* Bump stardard version to 3.9.3: no modify needed.
* add libsunpinyin3-dbg and python-sunpinyin packages.
* debian/compat to 9, multiarch it.
* rewrite debian/rules with dh 7 format.

Show diffs side-by-side

added added

removed removed

Lines of Context:
57
57
        return "ngram: " + self.key.__str__() + " freq: " + str(self.freq)
58
58
 
59
59
def read_ch_sentences(file):
60
 
    nesting = 0
61
 
    buf = ""
 
60
    buf = []
62
61
    for line in file:
63
62
        if buf and (line[0].isspace() or len(buf) <= 40):
64
 
            yield buf
65
 
            buf, nesting = "", 0
 
63
            yield ''.join(buf)
 
64
            buf = []
66
65
 
67
66
        for ch in line:
68
 
            if ch.isspace():
 
67
            if ch.isspace() or ch == u'—':
69
68
                continue
70
69
 
71
 
            if ch in u"(“《":
72
 
                nesting +=1
73
 
            elif ch in u")”》":
74
 
                nesting -=1
75
 
 
76
 
            if ch in u";。!?…" and nesting == 0:
 
70
            if ch in u";。!?…—":
77
71
                if buf:
78
 
                    yield buf + ch
79
 
                    buf, nesting = "", 0
 
72
                    buf.append(ch)
 
73
                    yield ''.join (buf)
 
74
                    buf = []
80
75
            else:
81
 
                buf += ch
 
76
                buf.append (ch)
82
77
    if buf:
83
 
        yield buf
 
78
        yield ''.join (buf)
84
79
 
85
80
def mergesort (iters):
86
81
        heap=[]