2
Take two graphite data files and scatter plot x vs. y.
12
from scipy import stats
16
def parse_raw(raw_lines):
17
lines = raw_lines.split("\n")
19
# Sort by numerical values found in the dataset names (if any)
21
return [int(x) for x in re.findall(r'\d+', s)]
23
lines.sort(key=get_nums)
25
# print "lines", lines
32
for raw in [l for l in lines if len(l.strip()) > 0]:
34
# print "raw line", raw
36
(a, b) = raw.split("|")
46
(start, end, step) = [float(s) for s in araw[-3:]]
47
data1 = [to_float(s) for s in b.split(",")]
49
if start_end_step is not None:
50
assert start_end_step == (start, end, step)
52
start_end_step = (start, end, step)
54
name = ", ".join(names)
55
(start, end, step) = start_end_step
56
return ((name, start, end, step), data)
58
def do_plot(xfilespec, yfilespec, label_time):
59
print "xfilespec", xfilespec, "yfilespec", yfilespec
61
# Returns label, scale, ticks, filename
62
def get_params(filespec):
63
params = filespec.split(":")
65
retval = filespec, "1", None, filespec
67
retval = params[0:2] + [int(params[2]), ":".join(params[3:])]
68
print "params", retval
71
x_label, x_scale, x_ticks, xfile = get_params(xfilespec)
72
y_label, y_scale, y_ticks, yfile = get_params(yfilespec)
74
print "x,y", (x_label, x_scale, x_ticks, xfile), (y_label, y_scale, y_ticks, yfile)
76
xraw = open(xfile).read()
77
yraw = open(yfile).read()
79
# print "xraw", xraw, "yraw", yraw
82
xdata = parse_raw(xraw)
83
ydata = parse_raw(yraw)
85
# print "xdata", repr(xdata), "ydata", repr(ydata)
87
print "xrange", xdata[0]
88
print "yrange", ydata[0]
90
x = array([n / float(eval(x_scale)) if n else 0.0 for n in xdata[1]])
91
y = array([n / float(eval(y_scale)) if n else 0.0 for n in ydata[1]])
93
print "len(x)", len(x)
94
print "len(y)", len(y)
96
assert xdata[0][1:] == ydata[0][1:]
97
assert len(x) == len(y)
102
# print "t1", t1, "t2", t2
104
t1date = datetime.datetime.fromtimestamp(t1, pytz.timezone("UTC"))
105
t2date = datetime.datetime.fromtimestamp(t2, pytz.timezone("UTC"))
107
drange = "%s to %s" % (t1date, t2date)
109
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
111
print 'r value', r_value
112
print 'p_value', p_value
113
print 'standard error', std_err
114
print '%s = %s * %s + %s' % (y_label, slope, x_label, intercept)
116
line = slope*x+intercept
117
# plot(x,line,'r-',xi,y,'o')
120
line_label = "y = %.2g + %.2gx (r=%0.2f)" % (intercept, slope, r_value)
121
print "line_label", line_label
123
x_min, x_max, y_min, y_max = amin(x), amax(x), amin(y), amax(y)
125
# scatter([1,2,3], [2,4,5], s=1, facecolor='0.5', lw = 0)
126
plot(x, y, 'k.', alpha=0.1)
128
axis([x_min, x_max, y_min, y_max])
130
def filter_ticks(t, n, min_val, max_val):
133
print "rawloc", rawloc
136
loc = [x for x in rawloc if min_val <= x <= max_val]
138
print "filtered loc", loc
140
inc = max(len(rawloc) / n, 1)
144
print "filtered loc", loc[::inc]
149
xticks(filter_ticks(xticks(), x_ticks, x_min, x_max))
151
yticks(filter_ticks(yticks(), y_ticks, y_min, y_max))
153
t = "%s vs. %s" % (y_label, x_label)
155
t = "%s\n%s" % (drange, t)
159
# title(drange + "\nfilesync activity")
161
text(.98 * x_min + .02 * x_max, .93 * y_max + .07 * y_min, line_label,
162
color="r", weight="semibold", size="9")
167
if len(argv) % 2 != 1:
168
print "Usage: %s file1 file2 ..." % argv[0]
169
print "Or: %s label:scale:ticks:file1..."
172
pairs = zip(argv[1::2], argv[2::2])
174
print "Plotting pairs", pairs
176
rows = int(math.sqrt(len(pairs)))
177
columns = math.ceil(len(pairs) / float(rows))
180
# figure(figsize=(17,8.8))
181
figure(figsize=(17*.8,8.8*.8))
186
print "Adding plot %s on %s x %s grid" % (plotnum, rows, columns)
188
subplot(rows, columns, plotnum)
189
do_plot(i, j, label_time)
193
subplots_adjust(hspace=.35, wspace=.35)
195
if __name__ == '__main__':