~caravone/+junk/data

« back to all changes in this revision

Viewing changes to scatter.py

Committer: Curtis Caravone
Date: 2012-08-21 22:51:30 UTC
Revision ID: curtis.caravone@canonical.com-20120821225130-er7x5smsq7e3tu24

Added scripts for generating graphs of filesync performance

files added:

README

cpu_args.txt

cpu_args_half.txt

cpu_args_raw.txt

daily_graphs.sh

fetcher.py

make_plot.py

scatter.py

vsize_args.txt

vsize_args_half.txt

vsize_args_raw.txt

Show diffs side-by-side

added added

removed removed

scatter.py

"""

Take two graphite data files and scatter plot x vs. y.

"""

import datetime

import math

import pytz

import re

import sys

from pylab import *

from scipy import stats

import fetcher

def parse_raw(raw_lines):

lines = raw_lines.split("\n")

# Sort by numerical values found in the dataset names (if any)

def get_nums(s):

return [int(x) for x in re.findall(r'\d+', s)]

lines.sort(key=get_nums)

# print "lines", lines

# for line in lines:

# print "\t", line

names = []

start_end_step = None

data = []

for raw in [l for l in lines if len(l.strip()) > 0]:

# print "raw line", raw

(a, b) = raw.split("|")

araw = a.split(",")

name = araw[0]

def to_float(s):

try:

return float(s)

except:

return None

(start, end, step) = [float(s) for s in araw[-3:]]

data1 = [to_float(s) for s in b.split(",")]

names.append(name)

if start_end_step is not None:

assert start_end_step == (start, end, step)

else:

start_end_step = (start, end, step)

data += data1

name = ", ".join(names)

(start, end, step) = start_end_step

return ((name, start, end, step), data)

def do_plot(xfilespec, yfilespec, label_time):

print "xfilespec", xfilespec, "yfilespec", yfilespec

# Returns label, scale, ticks, filename

def get_params(filespec):

params = filespec.split(":")

if len(params) < 4:

retval = filespec, "1", None, filespec

else:

retval = params[0:2] + [int(params[2]), ":".join(params[3:])]

print "params", retval

return retval

x_label, x_scale, x_ticks, xfile = get_params(xfilespec)

y_label, y_scale, y_ticks, yfile = get_params(yfilespec)

print "x,y", (x_label, x_scale, x_ticks, xfile), (y_label, y_scale, y_ticks, yfile)

xraw = open(xfile).read()

yraw = open(yfile).read()

# print "xraw", xraw, "yraw", yraw

xdata = parse_raw(xraw)

ydata = parse_raw(yraw)

# print "xdata", repr(xdata), "ydata", repr(ydata)

print "xrange", xdata[0]

print "yrange", ydata[0]

x = array([n / float(eval(x_scale)) if n else 0.0 for n in xdata[1]])

y = array([n / float(eval(y_scale)) if n else 0.0 for n in ydata[1]])

print "len(x)", len(x)

print "len(y)", len(y)

assert xdata[0][1:] == ydata[0][1:]

assert len(x) == len(y)

t1 = xdata[0][1]

100

t2 = xdata[0][2]

101

102

# print "t1", t1, "t2", t2

103

104

t1date = datetime.datetime.fromtimestamp(t1, pytz.timezone("UTC"))

105

t2date = datetime.datetime.fromtimestamp(t2, pytz.timezone("UTC"))

106

107

drange = "%s to %s" % (t1date, t2date)

108

109

slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)

110

111

print 'r value', r_value

112

print 'p_value', p_value

113

print 'standard error', std_err

114

print '%s = %s * %s + %s' % (y_label, slope, x_label, intercept)

115

116

line = slope*x+intercept

117

# plot(x,line,'r-',xi,y,'o')

118

# show()

119

120

line_label = "y = %.2g + %.2gx (r=%0.2f)" % (intercept, slope, r_value)

121

print "line_label", line_label

122

123

x_min, x_max, y_min, y_max = amin(x), amax(x), amin(y), amax(y)

124

125

# scatter([1,2,3], [2,4,5], s=1, facecolor='0.5', lw = 0)

126

plot(x, y, 'k.', alpha=0.1)

127

plot(x, line, 'r-')

128

axis([x_min, x_max, y_min, y_max])

129

130

def filter_ticks(t, n, min_val, max_val):

131

rawloc, lab = t

132

133

print "rawloc", rawloc

134

print "labels", lab

135

136

loc = [x for x in rawloc if min_val <= x <= max_val]

137

138

print "filtered loc", loc

139

140

inc = max(len(rawloc) / n, 1)

141

142

print "inc", inc

143

144

print "filtered loc", loc[::inc]

145

146

return loc[::inc]

147

148

if x_ticks:

149

xticks(filter_ticks(xticks(), x_ticks, x_min, x_max))

150

if y_ticks:

151

yticks(filter_ticks(yticks(), y_ticks, y_min, y_max))

152

153

t = "%s vs. %s" % (y_label, x_label)

154

if label_time:

155

t = "%s\n%s" % (drange, t)

156

title(t)

157

158

# if label_time:

159

# title(drange + "\nfilesync activity")

160

161

text(.98 * x_min + .02 * x_max, .93 * y_max + .07 * y_min, line_label,

162

color="r", weight="semibold", size="9")

163

xlabel(x_label)

164

ylabel(y_label)

165

166

def main(argv):

167

if len(argv) % 2 != 1:

168

print "Usage: %s file1 file2 ..." % argv[0]

169

print "Or: %s label:scale:ticks:file1..."

170

sys.exit(1)

171

172

pairs = zip(argv[1::2], argv[2::2])

173

174

print "Plotting pairs", pairs

175

176

rows = int(math.sqrt(len(pairs)))

177

columns = math.ceil(len(pairs) / float(rows))

178

plotnum = 1

179

180

# figure(figsize=(17,8.8))

181

figure(figsize=(17*.8,8.8*.8))

182

183

label_time = True

184

for i, j in pairs:

185

186

print "Adding plot %s on %s x %s grid" % (plotnum, rows, columns)

187

188

subplot(rows, columns, plotnum)

189

do_plot(i, j, label_time)

190

plotnum += 1

191

label_time = False

192

193

subplots_adjust(hspace=.35, wspace=.35)

194

195

if __name__ == '__main__':

196

main(sys.argv)

197

show()

198