1
1
#!/usr/bin/env python
2
2
#-*- coding:utf-8 -*-
3
import sys, os, datetime, MySQLdb
4
# Licensed under MOZILLA PUBLIC LICENSE Version 1.1
5
# Author: Keith Hughitt <keith.hughitt@nasa.gov>
7
"""Helioviewer.org Database Coverage Plotter"""
15
from matplotlib import pyplot
4
16
from numpy import std, median
5
import matplotlib.pyplot as plt
9
print "Incorrect number of arguments. Please specify the database name, user, and password, e.g.: "
10
print "python coverage.py dbname dbuser dbpass"
13
db = MySQLdb.connect(use_unicode=True, charset = "utf8", host="localhost", db=argv[1], user=argv[2], passwd=argv[3])
19
"""Main application"""
22
dbname, dbuser, dbpass = getDatabaseInfo()
24
db = MySQLdb.connect(use_unicode=True, charset = "utf8", host="localhost",
25
db=dbname, user=dbuser, passwd=dbpass)
14
26
cursor = db.cursor()
16
28
sources = getDataSources(cursor)
30
# Setup directory structure to write graphs to
32
createDirectories(sources)
34
print("Unable to create directories.")
37
numDays = int(raw_input("How many days per graph? "))
38
timeIncrement = datetime.timedelta(days=numDays)
40
now = datetime.datetime.now()
20
42
# For each data source
21
for name, id in sources.iteritems():
22
dates,freqs = getFrequencies(cursor, name, id)
24
plotFrequencies(name, dates, freqs)
43
for name,sourceId in sources.iteritems():
44
print("Processing: " + name)
45
date = getDataSourceStartDate(sourceId, cursor)
47
# For each n day block from the start date until present
50
date = date + timeIncrement
52
# Find and plot the number of images per day
53
dates,freqs = getFrequencies(cursor, sourceId, startDate, date)
55
filename = "%s/%s_%s-%s.svg" % (name, name,
56
startDate.strftime("%Y%m%d"), date.strftime("%Y%m%d"))
57
filename = filename.replace(" ", "_")
59
plotFrequencies(name, filename, dates, freqs)
62
print("Cleaning up and exiting...")
64
def createDirectories(sources):
65
"""Creates a directory structure to use for storing the coverage graphs."""
66
dir = "Helioviewer_Coverage_" + datetime.datetime.now().strftime("%Y%m%d")
70
for name,sourceId in sources.iteritems():
71
os.mkdir(name.replace(" ", "_"))
73
def getDatabaseInfo():
74
"""Prompts the user for database information"""
76
print ("Please enter database information")
77
dbname = raw_input("Database: ")
78
dbuser = raw_input("User: ")
79
dbpass = getpass.getpass("Password: ")
81
if not checkDBInfo(dbname, dbuser, dbpass):
82
print ("Unable to connect to the database. "
83
"Please check your login information and try again.")
85
return dbname, dbuser,dbpass
87
def checkDBInfo(dbname, dbuser, dbpass):
88
"""Validates database login information"""
90
db = MySQLdb.connect(db=dbname, user=dbuser, passwd=dbpass)
91
except MySQLdb.Error, e:
98
def getDataSourceStartDate(sourceId, cursor):
99
"""Returns a datetime object for the beginning of the first day
100
where data was available for a given source id
102
cursor.execute("""SELECT date FROM images
104
ORDER BY date ASC LIMIT 1;""" % sourceId)
106
return cursor.fetchone()[0].replace(hour=0,minute=0,second=0,microsecond=0)
26
108
def getDataSources(cursor):
27
''' Returns a list of datasources to query '''
28
cursor.execute("SELECT name, id FROM datasource")
109
"""Returns a list of datasources to query"""
110
cursor.execute("SELECT name, id FROM datasources")
31
113
# Get data sources
32
114
for ds in cursor.fetchall():
116
sourceId = int(ds[1])
36
118
# Only include datasources which for images exist in the database
37
cursor.execute("SELECT COUNT(*) FROM image WHERE sourceId=%d" % id)
119
cursor.execute("""SELECT COUNT(*) FROM images
120
WHERE sourceId=%d""" % sourceId)
38
121
count = cursor.fetchone()[0]
41
datasources[name] = id
124
datasources[name] = sourceId
43
126
return datasources
45
def getFrequencies(cursor, name, id):
46
''' Returns arrays containing the dates queried and the counts for each of those days '''
48
# Find the start and end dates of available data (set time portion to 00:00:00.000)
49
cursor.execute("SELECT date FROM image WHERE sourceId = %d ORDER BY date ASC LIMIT 1;" % id)
50
startDate = cursor.fetchone()[0].replace(hour=0,minute=0,second=0,microsecond=0)
51
cursor.execute("SELECT date FROM image WHERE sourceId = %d ORDER BY date DESC LIMIT 1;" % id)
52
endDate = cursor.fetchone()[0].replace(hour=0,minute=0,second=0,microsecond=0)
128
def getFrequencies(cursor, sourceId, startDate, endDate):
129
"""Returns arrays containing the dates queried and the counts for
56
132
# Get counts for each day
59
135
day = datetime.timedelta(days=1)
60
ms = datetime.timedelta(microseconds = 1)
62
139
while date <= endDate:
63
sql = "SELECT COUNT(*) FROM image WHERE date BETWEEN '%s' AND '%s' AND sourceId = %d;" % (date, date + day - ms, id)
140
sql = """SELECT COUNT(*) FROM images
141
WHERE date BETWEEN '%s' AND '%s'
142
AND sourceId = %d;""" % (date, date + day, sourceId)
64
143
cursor.execute(sql)
65
144
n = int(cursor.fetchone()[0])
70
149
return dates,freqs
72
def plotFrequencies(name, dates, freqs):
73
# mean, median, and standard deviation
74
avg = sum(freqs) / len(freqs)
151
def plotFrequencies(name, filename, dates, freqs):
152
"""Creates a histogram representing the data counts for each day"""
153
# Mean, median, and standard deviation
155
avg = sum(freqs) / numDays
160
fig = pyplot.figure()
80
161
ax = fig.add_subplot(111)
81
162
ax.plot(dates, freqs, color='limegreen')
82
163
fig.autofmt_xdate()
85
plt.ylabel('Number of Images (per day)')
86
plt.title(r'$\mathrm{%s\ Coverage:}\ n=%d,\ \bar{x}=%.5f,\ x_{1/2}=%.5f,\ \hat{\sigma}=%.5f$' % (name, len(freqs), avg, med, sigma))
87
#plt.axis([0, 0.05, 0, 1])
165
pyplot.xlabel('Time')
166
pyplot.ylabel('Number of Images (per day)')
168
title = r'$\mathrm{%s\ Coverage:}\ n=%d,\ \bar{x}=%.5f,\ x_{1/2}=%.5f,\ \hat{\sigma}=%.5f$' % (name, numDays, avg, med, sigma)
170
#pyplot.axis([0, 0.05, 0, 1])
91
plt.savefig(name + ".svg", format="svg")
174
pyplot.savefig(filename, format="svg")
177
"""Displays a greeting message"""
179
Helioviewer Database Coverage Plotter
181
This script scans a Helioviewer image database and creates histograms
182
depicting the data coverage across the different datasource lifetimes.
183
Each column in the graph shows the number of images that were found for
186
Author : Keith Hughitt <keith.hughitt@nasa.gov>
187
Last update: Feb 18, 2011
94
190
if __name__ == '__main__':
95
191
sys.exit(main(sys.argv))