~launchpad-pqm/launchpad/devel

10637.3.1 by Guilherme Salgado
Use the default python version instead of a hard-coded version
1
#!/usr/bin/python -S
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
2
#
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
3
# Copyright 2009, 2010 Canonical Ltd.  This software is licensed under the
8687.15.10 by Karl Fogel
Add the copyright header block to top-level files.
4
# GNU Affero General Public License version 3 (see the file LICENSE).
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
5
6
"""Tests that get run automatically on a merge."""
16900.2.4 by William Grant
Port scripts that use pg_stat_activity.procpid/current_query to use the new pid/query names on PostgreSQL >= 9.2.
7
9722.1.1 by Gary Poster
[testfix][r=salgado][ui=none] Try to fix the buildbot problem: buildbot is starting test_on_merge in a way that no-one else is.
8
import _pythonpath
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
9
14612.2.9 by William Grant
Other bits and pieces.
10
import errno
11
import os
12
import select
13
from signal import (
14
    SIGHUP,
15
    SIGINT,
16
    SIGKILL,
17
    SIGTERM,
18
    )
19
from subprocess import (
20
    PIPE,
21
    Popen,
22
    STDOUT,
23
    )
24
import sys
25
import time
26
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
27
import psycopg2
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
28
16900.2.5 by William Grant
activity_col() is now activity_cols(), and returns a full mapping.
29
from lp.services.database import activity_cols
16900.2.4 by William Grant
Port scripts that use pg_stat_activity.procpid/current_query to use the new pid/query names on PostgreSQL >= 9.2.
30
6393.2.1 by Joey Stanford
change the rather long timeout setting to something more reasonable and cleanup the explaination
31
# The TIMEOUT setting (expressed in seconds) affects how long a test will run
32
# before it is deemed to be hung, and then appropriately terminated.
33
# It's principal use is preventing a PQM job from hanging indefinitely and
34
# backing up the queue.
10866.2.2 by Maris Fogels
Lowered the test timeout a bit.
35
# e.g. Usage: TIMEOUT = 60 * 10
36
# This will set the timeout to 10 minutes.
37
TIMEOUT = 60 * 10
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
38
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
39
HERE = os.path.dirname(os.path.realpath(__file__))
40
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
41
42
def main():
8234.1.4 by Gary Poster
test works, nominally; and bin/py is a bit more functional. problems with import warnings are more serious because they cause tests to fail.
43
    """Call bin/test with whatever arguments this script was run with.
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
44
10866.2.14 by Maris Fogels
Remove the tabnanny code as pocketlint and text editors already catch this stuff.
45
    Prior to running the tests this script sets up the test database.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
46
47
    Returns 1 on error, otherwise it returns the testrunner's exit code.
48
    """
49
    if setup_test_database() != 0:
50
        return 1
51
52
    return run_test_process()
53
54
55
def setup_test_database():
56
    """Set up a test instance of our postgresql database.
57
58
    Returns 0 for success, 1 for errors.
59
    """
1064.1.3 by James Henstridge
merge from marius
60
    # Sanity check PostgreSQL version. No point in trying to create a test
61
    # database when PostgreSQL is too old.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
62
    con = psycopg2.connect('dbname=template1')
1064.1.3 by James Henstridge
merge from marius
63
    cur = con.cursor()
64
    cur.execute('show server_version')
65
    server_version = cur.fetchone()[0]
66
    try:
67
        numeric_server_version = tuple(map(int, server_version.split('.')))
68
    except ValueError:
69
        # Skip this check if the version number is more complicated than
70
        # we expected.
71
        pass
72
    else:
3257.1.1 by Stuart Bishop
PostgreSQL 8.0+ required
73
        if numeric_server_version < (8, 0):
74
            print 'Your PostgreSQL version is too old.  You need 8.x.x'
1064.1.3 by James Henstridge
merge from marius
75
            print 'You have %s' % server_version
76
            return 1
77
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
78
    # Drop the template database if it exists - the Makefile does this
79
    # too, but we can explicity check for errors here
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
80
    con = psycopg2.connect('dbname=template1')
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
81
    con.set_isolation_level(0)
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
82
    cur = con.cursor()
1064.1.3 by James Henstridge
merge from marius
83
    try:
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
84
        cur.execute('drop database launchpad_ftest_template')
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
85
    except psycopg2.ProgrammingError as x:
1520 by Canonical.com Patch Queue Manager
Review and fix database security update code
86
        if 'does not exist' not in str(x):
87
            raise
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
88
89
    # If there are existing database connections, terminate. We have
90
    # rogue processes still connected to the database.
91
    for loop in range(2):
92
        cur.execute("""
16900.2.4 by William Grant
Port scripts that use pg_stat_activity.procpid/current_query to use the new pid/query names on PostgreSQL >= 9.2.
93
            SELECT usename, %(query)s
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
94
            FROM pg_stat_activity
95
            WHERE datname IN (
96
                'launchpad_dev', 'launchpad_ftest_template', 'launchpad_ftest')
16900.2.5 by William Grant
activity_col() is now activity_cols(), and returns a full mapping.
97
            """ % activity_cols(cur))
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
98
        results = list(cur.fetchall())
99
        if not results:
100
            break
101
        # Rogue processes. Report, sleep for a bit, and try again.
16900.2.4 by William Grant
Port scripts that use pg_stat_activity.procpid/current_query to use the new pid/query names on PostgreSQL >= 9.2.
102
        for usename, query in results:
103
            print '!! Open connection %s - %s' % (usename, query)
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
104
        print 'Sleeping'
105
        time.sleep(20)
106
    else:
107
        print 'Cannot rebuild database. There are open connections.'
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
108
        return 1
9893.6.7 by Stuart Bishop
Emit information about rouge database connections so we can track them down
109
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
110
    cur.close()
111
    con.close()
112
113
    # Build the template database. Tests duplicate this.
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
114
    schema_dir = os.path.join(HERE, 'database', 'schema')
10303.1.15 by Gary Poster
make test_on_merge.py work, so buildbot can pass
115
    if os.system('cd %s; make test > /dev/null' % (schema_dir)) != 0:
1764 by Canonical.com Patch Queue Manager
Now make check fails if anything goes wrong when loading the sampledata. r=stub
116
        print 'Failed to create database or load sampledata.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
117
        return 1
118
119
    # Sanity check the database. No point running tests if the
120
    # bedrock is crumbling.
5821.2.85 by James Henstridge
Add "make check_launchpad_storm_on_merge" target that runs the tests
121
    con = psycopg2.connect('dbname=launchpad_ftest_template')
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
122
    cur = con.cursor()
123
    cur.execute('show search_path')
124
    search_path = cur.fetchone()[0]
125
    if search_path != '$user,public,ts2':
126
        print 'Search path incorrect.'
127
        print 'Add the following line to /etc/postgresql/postgresql.conf:'
128
        print "    search_path = '$user,public,ts2'"
1064.1.3 by James Henstridge
merge from marius
129
        print "and tell postgresql to reload its configuration file."
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
130
        return 1
131
    cur.execute("""
132
        select pg_encoding_to_char(encoding) as encoding from pg_database
133
        where datname='launchpad_ftest_template'
134
        """)
135
    enc = cur.fetchone()[0]
3242.1.5 by Stuart Bishop
Make test_on_merge.py work with PostgreSQL 8.1
136
    if enc not in ('UNICODE', 'UTF8'):
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
137
        print 'Database encoding incorrectly set'
138
        return 1
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
139
    cur.execute(r"""
140
        SELECT setting FROM pg_settings
141
        WHERE context='internal' AND name='lc_ctype'
142
        """)
143
    loc = cur.fetchone()[0]
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
144
    #if not (loc.startswith('en_') or loc in ('C', 'en')):
145
    if loc != 'C':
1257 by Canonical.com Patch Queue Manager
Improve database locale checks, add locale sanity check to test_on_merge.py and improve test_on_merge output
146
        print 'Database locale incorrectly set. Need to rerun initdb.'
147
        return 1
148
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
149
    # Explicity close our connections - things will fail if we leave open
150
    # connections.
151
    cur.close()
152
    del cur
153
    con.close()
154
    del con
155
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
156
    return 0
157
158
159
def run_test_process():
160
    """Start the testrunner process and return its exit code."""
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
161
    print 'Running tests.'
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
162
    os.chdir(HERE)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
163
164
    # We run the test suite under a virtual frame buffer server so that the
165
    # JavaScript integration test suite can run.
10096.2.2 by Bjorn Tillenius
Run xvfb-run inside test_on_merge.py.
166
    cmd = [
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
167
        '/usr/bin/xvfb-run',
11243.1.1 by Maris Fogels
Make Xvfb log its startup errors, and clarify the startup arguments a bit.
168
        "--error-file=/var/tmp/xvfb-errors.log",
169
        "--server-args='-screen 0 1024x768x24'",
10866.2.7 by Maris Fogels
Split test_on_merge.py into functions, marked the tabnanny code as broken and needing fixing, and added a test process fork to address the case where the script was started directly from the command line.
170
        os.path.join(HERE, 'bin', 'test')] + sys.argv[1:]
10096.2.3 by Bjorn Tillenius
Use xvfb-run in test_on_merge.py
171
    command_line = ' '.join(cmd)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
172
    print "Running command:", command_line
4107.1.9 by Stuart Bishop
Increase test_on_merge timeout to one hour
173
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
174
    # Run the test suite.  Make the suite the leader of a new process group
175
    # so that we can signal the group without signaling ourselves.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
176
    xvfb_proc = Popen(
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
177
        command_line,
178
        stdout=PIPE,
179
        stderr=STDOUT,
10866.2.15 by Maris Fogels
Re-added the shell option to our subprocess call. The script fails with strange errors if we do not include this option.
180
        preexec_fn=os.setpgrp,
181
        shell=True)
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
182
10866.2.4 by Maris Fogels
Tidied the code for review.
183
    # This code is very similar to what takes place in Popen._communicate(),
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
184
    # but this code times out if there is no activity on STDOUT for too long.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
185
    # This keeps us from blocking when reading from a hung testrunner, allows
186
    # us to time out if the child process hangs, and avoids issues when using
187
    # Popen.communicate() with large data sets.
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
188
    open_readers = set([xvfb_proc.stdout])
3308.1.4 by Stuart Bishop
Fix test_on_merge.py incremental output
189
    while open_readers:
11316.12.4 by Martin Pool
Cleaner comment; complete sentence
190
        # select() blocks for a long time and can easily fail with EINTR
191
        # <https://bugs.launchpad.net/launchpad/+bug/615740>.  Really we
192
        # should have EINTR protection across the whole script (other syscalls
193
        # might be interrupted) but this is the longest and most likely to
194
        # hit, and doing it perfectly in python has proved to be quite hard in
195
        # bzr. -- mbp 20100924
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
196
        while True:
197
            try:
198
                rlist, wlist, xlist = select.select(open_readers, [], [], TIMEOUT)
199
                break
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
200
            except select.error as e:
11316.12.2 by Martin Pool
select.error doesn't have an errno attribute, only an args tuple
201
                # nb: select.error doesn't expose a named 'errno' attribute,
202
                # at least in python 2.6.5; see
203
                # <http://mail.python.org/pipermail/python-dev/2000-October/009671.html>
204
                if e[0] == errno.EINTR:
11316.12.1 by Martin Pool
tolerate EINTR in test_on_merge.py
205
                    continue
206
                else:
207
                    raise
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
208
4092.2.18 by Barry Warsaw
Respond to spiv's review:
209
        if len(rlist) == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
210
            # The select() statement timed out!
211
212
            if xvfb_proc.poll() is not None:
213
                # The process we were watching died.
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
214
                break
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
215
10866.2.5 by Maris Fogels
Nicer function name
216
            cleanup_hung_testrunner(xvfb_proc)
4092.2.12 by Barry Warsaw
Thanks to jamesh, do a better job of draining the subprocess's stdout and
217
            break
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
218
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
219
        if xvfb_proc.stdout in rlist:
220
            # Read a chunk of output from STDOUT.
221
            chunk = os.read(xvfb_proc.stdout.fileno(), 1024)
3367.1.2 by Stuart Bishop
Don't insert random spaces in test_on_merge.py output
222
            sys.stdout.write(chunk)
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
223
            if chunk == "":
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
224
                # Gracefully exit the loop if STDOUT is empty.
225
                open_readers.remove(xvfb_proc.stdout)
226
10866.2.4 by Maris Fogels
Tidied the code for review.
227
    rv = xvfb_proc.wait()
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
228
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
229
    if rv == 0:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
230
        print
231
        print 'Successfully ran all tests.'
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
232
    else:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
233
        print
234
        print 'Tests failed (exit code %d)' % rv
3308.1.2 by Stuart Bishop
Make test_on_merge.py do incremental output
235
236
    return rv
237
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
238
10866.2.5 by Maris Fogels
Nicer function name
239
def cleanup_hung_testrunner(process):
10866.2.4 by Maris Fogels
Tidied the code for review.
240
    """Kill and clean up the testrunner process and its children."""
241
    print
242
    print
243
    print ("WARNING: A test appears to be hung. There has been no "
244
        "output for %d seconds." % TIMEOUT)
245
    print "Forcibly shutting down the test suite"
246
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
247
    # This guarantees the process will die.  In rare cases
10866.2.4 by Maris Fogels
Tidied the code for review.
248
    # a child process may survive this if they are in a different
249
    # process group and they ignore the signals we send their parent.
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
250
    nice_killpg(process.pid)
251
252
    # The process should absolutely be dead now.
253
    assert process.poll() is not None
10866.2.4 by Maris Fogels
Tidied the code for review.
254
255
    # Drain the subprocess's stdout and stderr.
256
    print "The dying processes left behind the following output:"
257
    print "--------------- BEGIN OUTPUT ---------------"
258
    sys.stdout.write(process.stdout.read())
259
    print
260
    print "---------------- END OUTPUT ----------------"
261
262
10866.2.12 by Maris Fogels
Simplified much of the process killing and shenanigans code.
263
def nice_killpg(pgid):
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
264
    """Kill a Unix process group using increasingly harmful signals."""
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
265
    try:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
266
        print "Process group %d will be killed" % pgid
267
268
        # Attempt a series of increasingly brutal methods of killing the
269
        # process.
270
        for signum in [SIGTERM, SIGINT, SIGHUP, SIGKILL]:
271
            print "Sending signal %s to process group %d" % (signum, pgid)
272
            os.killpg(pgid, signum)
273
274
            # Give the processes some time to shut down.
275
            time.sleep(3)
276
15523.4.1 by Colin Watson
Use new-style "except Exception as e" syntax rather than "except Exception, e".
277
    except OSError as exc:
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
278
        if exc.errno == errno.ESRCH:
279
            # We tried to call os.killpg() and found the group to be empty.
280
            pass
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
281
        else:
282
            raise
10866.2.1 by Maris Fogels
Rewrote the signal handling to move the sentinal into its own process group, where it won't be killed along with its children. Move the process group control code from the testrunner into the sentinal.
283
    print "Process group %d is now empty." % pgid
284
2083 by Canonical.com Patch Queue Manager
[r=jamesh] testrunner improvements (?)
285
1102 by Canonical.com Patch Queue Manager
Lucille had some XXXs which should have been NOTEs
286
if __name__ == '__main__':
287
    sys.exit(main())