1
1
"""Count the frequencies of words in a string"""
3
from __future__ import division
4
9
"""Return a dictionary of words and word counts in a string."""
7
12
for word in text.split():
8
freqs[word] = freqs.get(word, 0) + 1
14
freqs[lword] = freqs.get(lword, 0) + 1
11
18
def print_wordfreq(freqs, n=10):
12
19
"""Print the n most common words and counts in the freqs dict."""
17
24
for (count, word) in items[:n]:
20
if __name__ == '__main__':
22
text = gzip.open('HISTORY.gz').read()
23
freqs = wordfreq(text)
b'\\ No newline at end of file'
28
def wordfreq_to_weightsize(worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
29
mincount = min(worddict.itervalues())
30
maxcount = max(worddict.itervalues())
32
for k, v in worddict.iteritems():
33
w = (v-mincount)/(maxcount-mincount)
34
alpha = minalpha + (maxalpha-minalpha)*w
35
size = minsize + (maxsize-minsize)*w
36
weights[k] = (alpha, size)
40
def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
41
from matplotlib import pyplot as plt
44
worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha)
47
ax = fig.add_subplot(111)
48
ax.set_position([0.0,0.0,1.0,1.0])
52
words = worddict.keys()
53
alphas = [v[0] for v in worddict.values()]
54
sizes = [v[1] for v in worddict.values()]
55
items = zip(alphas, sizes, words)
56
items.sort(reverse=True)
57
for alpha, size, word in items[:n]:
58
# xpos = random.normalvariate(0.5, 0.3)
59
# ypos = random.normalvariate(0.5, 0.3)
60
xpos = random.uniform(0.0,1.0)
61
ypos = random.uniform(0.0,1.0)
62
ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size)
b'\\ No newline at end of file'