1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
|
#!/bin/bash
# This script will update staging. It's typically called from
# cron every 30 mins, and will only run when there's a new
# dump file from production to process. This is also the case
# even if we're not doing a DB reimport, as it helps ensure it
# happens once a day and the crontab entry doesn't need to
# be changed.
#
# There are a few options:
# - DB Import (as above)
# - Perform Remote (this includes updating code on the
# application server)
# - Perform Nightly (run the nightly scripts on the
# application server)
# - Run Langpack ((re)-create a langpack DB that the
# translations team can use for testing.
# Additionally, if you pass -f to the script, it will
# force an update (with no DB import). This won't
# work if there are no bzr updates since the last
# updates, however.
#######################
# Config Section
#######################
SOURCEDIR=/srv/code/db-stable/launchpad
ROOTDIR=/srv/staging.launchpad.net
LOGFILE=$ROOTDIR/staging-logs/$(date +%Y-%m-%d)-staging_restore.log
LANGPACK_LOGFILE=$ROOTDIR/staging-logs/langpack.log
DUMPFILE=$ROOTDIR/dumps_from_prod/launchpad_prod_3.$(date +%Y%m%d).dump
LOCK=/var/lock/staging.lock
# If you want to skip the DB restore, change this to
# anything but True
PERFORM_DB_IMPORT=True
# And if you want to skip DB updates altogether, set
# this to True
SKIP_DB_UPDATES=False
# If we want to update regardless of whether there's
# new code or not, this should be False. Note that if
# you are skipping DB import as well, it will update
# every time.
CHECK_NEW_CODE=True
# When we're ready for this to do the remote stuff
# such as updating code on asuka, change this to True
PERFORM_REMOTE=True
# When we're ready for nightly stuff to run, change
# this to True
PERFORM_NIGHTLY=True
# If we want to run the LANGPACK update set to True
# or if not, anything else
RUN_LANGPACK=False
#######################
# End Config Section
#######################
if [ "$1" = "-o" ]
then
if [ "$2" = "db" ]
then
CHECK_FOR_DUMPFILE=False
# We want to override this since we can't be
# sure there is a DB to import
PERFORM_DB_IMPORT=False
# We also want to disable this since we're
# looking for a quick update
PERFORM_NIGHTLY=False
elif [ "$2" = "code" ]
then
CHECK_FOR_DUMPFILE=True
PERFORM_DB_IMPORT=True
PERFORM_NIGHTLY=False
CHECK_NEW_CODE=False
else
echo "Usage: staging_restore.sh [-o [db|code]]"
echo ""
echo "-o db = Override DB (i.e. don't look for new DB dump)"
echo "-o code = Override Code (i.e. don't look for new code)"
exit
fi
else
if [ "$1" ]
then
echo "Usage: staging_restore.sh [-o [db|code]]"
echo ""
echo "-o db = Override DB (i.e. don't look for new DB dump)"
echo "-o code = Override Code (i.e. don't look for new code)"
exit
fi
CHECK_FOR_DUMPFILE=True
fi
# Only run this as the postgres user
USER=$(whoami)
if [ "postgres" != "$USER" ]
then
echo "Must be postgres user to run this script."
exit 1
fi
# Check for a lock file and if not, create one
lockfile -r0 -l 259200 $LOCK > /dev/null 2>&1
if [ $? -ne 0 ]; then
exit 1
fi
SCRIPT_START=$(date +%s)
if [ "$CHECK_NEW_CODE" = "True" ]
then
# Now check if there's any code to update
SOURCEREVNO=$(bzr revno ${SOURCEDIR} )
DESTREVNO=$(ssh launchpad@asuka bzr revno /srv/staging.launchpad.net/staging/launchpad )
if [ "$SOURCEREVNO" = "$DESTREVNO" ]
then
# Record that there's no code to update
scp launchpad@asuka:/srv/staging.launchpad.net/www/root/successful-updates.txt ${ROOTDIR}/www/root/successful-updates.txt
echo $(date '+%Y:%m:%d %H:%M') "No new code to update - already at $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
scp ${ROOTDIR}/www/root/successful-updates.txt launchpad@asuka:/srv/staging.launchpad.net/www/root/
echo $(date) "No new code to update - already at $(bzr revno ${ROOTDIR}/staging/launchpad)" >> $LOGFILE
rm -f ${ROOTDIR}/var/$(basename ${DUMPFILE}).done
rm -f $LOCK
exit
else
# Update code
rsync -a --delete ${SOURCEDIR}/ ${ROOTDIR}/staging/launchpad/
rm -rf ${ROOTDIR}/staging/librarian
mkdir ${ROOTDIR}/staging/librarian
echo $(date) "Local Staging Code updated" >> $LOGFILE 2>&1
# Check if there have been any DB updates
cd ${ROOTDIR}/staging/launchpad/
DBCHANGES=$(bzr status -S -r ${DESTREVNO}..${SOURCEREVNO} database/schema/ | grep -v "\(^P\|pending\|security.cfg\|Makefile\|unautovacuumable\|_pythonpath.py\)")
if [ -n "$DBCHANGES" ]; then
echo $(date) "There have been schema changes since last update" >> $LOGFILE 2>&1
else
echo $(date) "No schema changes since last update - skip DB restore" >> $LOGFILE 2>&1
CHECK_FOR_DUMPFILE=False
PERFORM_DB_IMPORT=False
SKIP_DB_UPDATES=True
PERFORM_NIGHTLY=False
fi
fi
fi
if [ "$CHECK_FOR_DUMPFILE" = "True" ]
then
# Check for our dump file
if [ -f "${DUMPFILE}" ]
then
PROCESSED=${ROOTDIR}/var/$(basename ${DUMPFILE}).done
if [ -f "$PROCESSED" ]; then
echo $(date) "We've already processed $DUMPFILE" >> $LOGFILE
rm -f $LOCK
exit 0
fi
echo $(date) "File ${DUMPFILE} found" >> $LOGFILE
touch $PROCESSED
else
echo $(date) "File ${DUMPFILE} not found. Exiting." >> $LOGFILE
rm -f $LOCK
exit 0
fi
fi
if [ "$PERFORM_DB_IMPORT" = "True" ]
then
cd ${ROOTDIR}/staging/launchpad
make build LPCONFIG=staging >> $LOGFILE 2>&1
make -C database/replication stagingsetup STAGING_DUMP=${DUMPFILE} PGUSER=postgres >> $LOGFILE 2>&1
if [ $? != 0 ]
then
echo $(date) "There was a problem running the stagingsetup target" >> $LOGFILE
exit $?
fi
echo $(date) "Two replicated _new databases built" >> $LOGFILE
fi
if [ "$PERFORM_REMOTE" = "True" ]
then
# Now we want to bring the app server down, and copy the
# new code to the appropriate places
# Let's put a "maintenance" file in place first so nagios doesn't scream
# and cron jobs know to not process
ssh launchpad@asuka "date +%s > /srv/staging.launchpad.net/maintenance.txt"
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_touch_maintenance importd@strawberry touch /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_touch_maintenance importd@marambio touch /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
ssh launchpad@asuka /srv/lists.staging.launchpad.net/initscript stop >> $LOGFILE 2>&1
ssh launchpad@asuka /srv/staging.launchpad.net/initscript stop >> $LOGFILE 2>&1
# Temporary hack because the staging app server dies without removing the pidfile
ssh launchpad@asuka rm -f /srv/staging.launchpad.net/var/staging-launchpad.pid >> $LOGFILE 2>&1
# Let's also rotate the librarian logs since other logs are handled by the app server itself
ssh launchpad@asuka /usr/sbin/logrotate -s /srv/staging.launchpad.net/etc/.logrotate.state /srv/staging.launchpad.net/etc/librarian_logrotate.conf
rsync --rsh="ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_code_sync" -a --delete ${SOURCEDIR}/ importd@strawberry:/srv/importd.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
rsync --rsh="ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_code_sync" -a --delete ${SOURCEDIR}/ importd@marambio:/srv/importd.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
rsync -a --delete ${SOURCEDIR}/ launchpad@asuka:/srv/lists.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
rsync -a --delete ${SOURCEDIR}/ launchpad@asuka:/srv/staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
ssh launchpad@asuka make -C /srv/staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
ssh launchpad@asuka make -C /srv/lists.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_make_build importd@strawberry make -C /srv/importd.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_make_build importd@marambio make -C /srv/importd.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
fi
# Kill all connections to these DBs and then stop new connections
if [ "$PERFORM_DB_IMPORT" = "True" ]
then
cd ${ROOTDIR}/staging/launchpad
make -C database/replication stagingswitch PGUSER=postgres >> $LOGFILE 2>&1
if [ $? != 0 ]
then
echo $(date) "There was a problem running the stagingswitch target" >> $LOGFILE
exit $?
fi
echo $(date) "Existing dbs and daemons killed, _new swapped into place" >> $LOGFILE
# Now perform any DB tasks needed fore the DB is re-opened
# 1) Let's turn off mirroring for non-bzrtools projects
echo $(date) "Turning off mirroring for non-bzrtools projects" >> $LOGFILE
psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/disable_mirror.sql >> $LOGFILE 2>&1
# 2) Setup trac for checkwatches
echo $(date) "Setting up trac for checkwatches" >> $LOGFILE
psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/checkwatches_trac.sql >> $LOGFILE 2>&1
# 3) Suspend code imports for staging
echo $(date) "Suspending code imports" >> $LOGFILE
psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/suspend_code_imports.sql >> $LOGFILE 2>&1
# 4) Set production code import machines offline
echo $(date) "Setting production code import machines offline" >> $LOGFILE
psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/code_import_machines.sql >> $LOGFILE 2>&1
else
if [ "$SKIP_DB_UPDATES" = "False" ]; then
# Now we need to update the code and run the upgrade on the DB
rsync -a --delete ${SOURCEDIR}/ ${ROOTDIR}/staging/launchpad/
cd ${ROOTDIR}/staging/launchpad
make build LPCONFIG=staging >> $LOGFILE 2>&1
cd ${ROOTDIR}/staging/launchpad/database/schema
export LPCONFIG=staging
# Kill any connections to the DB. New ones should be stopped by the
# maintenance text files being in place
psql -U postgres -d template1 -f ${ROOTDIR}/scripts/kill_staging_connections.sql >> $LOGFILE 2>&1
echo $(date) "Applying database updates and permissions to DB" >> $LOGFILE
./upgrade.py -U postgres >> $LOGFILE 2>&1
if [ $? != 0 ]; then
echo $(date) "ERROR: Failed to run upgrade.py" >> $LOGFILE
psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE
# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE
exit $?
else
echo $(date) "DB upgrades applied" >> $LOGFILE
fi
./fti.py -U postgres >> $LOGFILE 2>&1
if [ $? != 0 ]; then
echo $(date) "ERROR: Failed to run fti.py" >> $LOGFILE
psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE
# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE
exit $?
else
echo $(date) "Full text indexes rebuilt" >> $LOGFILE
fi
./security.py -U postgres >> $LOGFILE 2>&1
if [ $? != 0 ]; then
echo $(date) "ERROR: Failed to run security.py" >> $LOGFILE
psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE
# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE
exit $?
else
echo $(date) "Security applied to DB" >> $LOGFILE
fi
./security.py -U postgres -d lpmain_staging_slave >> $LOGFILE 2>&1
if [ $? != 0 ]; then
echo $(date) "ERROR: Failed to run security.py on slave" >> $LOGFILE
psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \
${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE
# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE
exit $?
else
echo $(date) "Security applied to slave DB" >> $LOGFILE
fi
fi
fi
if [ "$PERFORM_REMOTE" = "True" ]
then
echo $(date) "Recreating librarian storage" >> $LOGFILE
ssh launchpad@asuka rm -rf /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
ssh launchpad@asuka mkdir /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
ssh launchpad@asuka ls -l /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
echo $(date) "Librarian storage recreated" >> $LOGFILE
# Now we want to restart the app server
ssh launchpad@asuka /srv/staging.launchpad.net/initscript start >> $LOGFILE 2>&1
ssh launchpad@asuka /srv/lists.staging.launchpad.net/initscript start >> $LOGFILE 2>&1
# Sync staging mailman to production
echo $(date) "About to sync mailman to production" >> $LOGFILE
ssh launchpad@asuka /srv/lists.staging.launchpad.net/scripts/list_sync.sh >> $LOGFILE 2>&1
echo $(date) "Mailman synced to production" >> $LOGFILE
# Create a resource in the librarian that we can check for
ssh launchpad@asuka /srv/staging.launchpad.net/scripts/upload_librarian_nagios_check.sh >> $LOGFILE 2>&1
# Restart the import servers
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_rm_maintenance importd@strawberry rm /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_rm_maintenance importd@marambio rm /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
SCRIPT_END=$(date +%s)
let TIME_TAKEN=${SCRIPT_END}-${SCRIPT_START}
# Record that the update happened
scp launchpad@asuka:/srv/staging.launchpad.net/www/root/successful-updates.txt ${ROOTDIR}/www/root/successful-updates.txt
# And remove our maintenance text files
ssh launchpad@asuka rm -f /srv/staging.launchpad.net/maintenance.txt
if [ "$PERFORM_DB_IMPORT" = "True" ]
then
echo $(date '+%Y:%m:%d %H:%M') "Full update with DB reimport: bzr revno $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
echo "staging_restore_with_db_duration:${TIME_TAKEN}@${SCRIPT_END}" > ${ROOTDIR}/staging-logs/staging_restore.dat
else
echo $(date '+%Y:%m:%d %H:%M') "Code update without DB reimport: bzr revno $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
echo "staging_restore_no_db_duration:${TIME_TAKEN}@${SCRIPT_END}" > ${ROOTDIR}/staging-logs/staging_restore.dat
fi
scp ${ROOTDIR}/www/root/successful-updates.txt launchpad@asuka:/srv/staging.launchpad.net/www/root/
# Load timing into graphing system
scp -q -i /var/lib/postgresql/.ssh/id_rsa_copy_stg_timing ${ROOTDIR}/staging-logs/staging_restore.dat launchpad@loganberry:/srv/lpstats.canonical.com/data/sourcherry/staging_restore.dat && ssh -i /var/lib/postgresql/.ssh/id_rsa_load_stg_timing launchpad@loganberry /srv/lpstats.canonical.com/scripts/load_data.sh sourcherry/staging_restore.dat
fi
####################
# Langpack DB
####################
if [ "$RUN_LANGPACK" = "True" ] && [ "$PERFORM_DB_IMPORT" = "True" ]
then
DIR=$(dirname ${DUMPFILE})
FILE=$(basename ${DUMPFILE})
cd ${DIR}
# Remove existing DB
echo $(date) "Starting Langpack Restore" > $LANGPACK_LOGFILE
echo $(date) "About to destroy launchpad_langpack" >> $LANGPACK_LOGFILE
$ROOTDIR/scripts/pgmassacre.py launchpad_langpack >> $LANGPACK_LOGFILE 2>&1
sleep 10
echo $(date) "Destroyed launchpad_langpack" >> $LANGPACK_LOGFILE
# Create new DB
createdb -U postgres -E UNICODE launchpad_langpack >> $LANGPACK_LOGFILE 2>&1
echo $(date) "New database launchpad_langpack created" >> $LANGPACK_LOGFILE
# Restore to new DB
echo $(date) "About to restore production database dump to launchpad_langpack" >> $LANGPACK_LOGFILE
pg_restore -U postgres --exit-on-error --dbname=launchpad_langpack $FILE >> $LANGPACK_LOGFILE 2>&1
echo $(date) "Ending Langpack Restore" >> $LANGPACK_LOGFILE
# Send email to launchpad_error_reports list
cat $LANGPACK_LOGFILE | mail -s "Langpack restore" "launchpad-error-reports@lists.canonical.com"
# Since this is a separate logfile, let's add it to the main
# one for clarity
cat $LANGPACK_LOGFILE >> $LOGFILE
fi
####################
# End Langpack DB
####################
if [ "$PERFORM_NIGHTLY" = "True" ]
then
# Nightly processes
ssh launchpad@asuka /srv/staging.launchpad.net/scripts/nightly_processes.sh >> $LOGFILE 2>&1
fi
# Testing URLs
echo $(date) "Testing Staging URLs" >> $LOGFILE
${ROOTDIR}/scripts/check_staging_urls.py >> $LOGFILE 2>&1
# Finished
rm -f $LOCK
echo $(date) "Lock file deleted. Script finished" >> $LOGFILE
# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE
|