2
# -*- coding: utf-8 -*-
3
# This program is free software: you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation, either version 3 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
# Author: marmuta <marmvta@gmail.com>
18
# Calculate keystroke saving rate
19
# -------------------------------
22
# "EFFECTS OF NGRAM ORDER AND TRAINING TEXT SIZE ON WORD PREDICTION"
23
# by Gregory W. Lesher, Ph.D., Bryan J. Moulton, M.S., and D. Jeffery
27
# ksr <language model> <typed text> <prediction limit>
28
# Calculates the number of keystrokes saved with a maximum number of
29
# <prediction limit> predictions to choose from when typing <typed text>.
32
# split_corpus moby.txt
33
# train training.txt 3 moby.lm
34
# ksr moby.lm testing.txt 10
35
# This loads language model moby.lm, uses it to create at most 10 predicted
36
# words per typed letter and simulates the typing of testing.txt.
39
from __future__ import division, print_function, unicode_literals
41
import sys, re, codecs, math
42
from pypredict import *
43
from optparse import OptionParser
44
import matplotlib.pyplot as plt
47
parser = OptionParser(usage="Usage: %prog [options] text")
48
parser.add_option("-m", "--language-model", type="str", dest="language_model",
49
help="optional filename of a language model")
50
parser.add_option("-n", "--num-choices", type="int", default="10",
52
help="number of virtual word choices")
53
parser.add_option("-l", "--learn", action="store_true", dest="learn",
54
help="learn after each sentence")
55
parser.add_option("-c", "--cached-model", action="store_true", dest="cached",
56
help="use a model with recency caching")
57
parser.add_option("-o", "--order", type="int", default="3",
59
help="order of the language model")
60
parser.add_option("-p", "--plot", action="store_true", dest="plot",
61
help="plot the result with matplotlib")
62
options, args = parser.parse_args()
65
print("Please supply a text file as input for simulated typing.")
70
model = CachedDynamicModel(order)
72
model = DynamicModel(order)
74
if options.language_model:
75
with timeit("loading model"):
76
model.load(options.language_model)
78
sentences, spans = split_sentences(read_corpus(args[0]))
79
num_choices = options.num_choices
81
learn_model = model if options.learn else None
82
total_chars, pressed_keys = simulate_typing(model, learn_model, sentences,
84
Progress(len(sentences),
86
#print get_stat_string(total_chars, pressed_keys)
90
plt.show() # blocks; allows for interaction with the chart, saving images
92
def get_stat_string(total_chars, pressed_keys):
93
saved_keystrokes = total_chars - pressed_keys
94
ksr = saved_keystrokes * 100.0 / total_chars if total_chars else 0
95
return "characters %8d, keystrokes %8d, " \
96
"saved %8d, ksr %6.2f%%" \
97
% (total_chars, pressed_keys, saved_keystrokes, ksr)
101
def __init__(self, num_sentences, plot = False):
103
self._plot_progress = PlotProgress()
104
self._num_sentences = num_sentences
106
def __call__(self, i, n, total_chars, pressed_keys):
108
step = max(1, self._num_sentences // 100)
109
if i == 0 or i == n-1 or (i+1) % step == 0:
110
saved_keystrokes = total_chars - pressed_keys
111
ksr = saved_keystrokes * 100.0 / total_chars if total_chars else 0
113
print("sentence {:6} of {:6}: {}" \
114
.format(i+1, n, get_stat_string(total_chars, pressed_keys)))
117
self._plot_progress(i+1, ksr)
125
def __call__(self, n, ksr):
126
self.xvalues.append(n)
127
self.ksrs.append(ksr)
128
plt.ion() # interactive mode on
132
plt.plot(self.xvalues, self.ksrs),
135
plt.xlabel("sentences")
136
plt.ylabel('ksr [%]')
137
ymin, ymax = plt.ylim()
138
plt.ylim(ymin, ymax+(ymax-ymin)*0.05)
139
plt.gcf().suptitle('Keystroke savings rate',
143
if __name__ == '__main__':