~abentley/launchpad/trunk : contents of staging

~abentley/launchpad/trunk : (revision 12)
#!/bin/bash

# This script will update staging. It's typically called from
# cron every 30 mins, and will only run when there's a new
# dump file from production to process. This is also the case
# even if we're not doing a DB reimport, as it helps ensure it
# happens once a day and the crontab entry doesn't need to
# be changed.
#
# There are a few options:
# - DB Import (as above)
# - Perform Remote (this includes updating code on the
#   application server)
# - Perform Nightly (run the nightly scripts on the 
#   application server)
# - Run Langpack ((re)-create a langpack DB that the
#   translations team can use for testing.
# Additionally, if you pass -f to the script, it will
# force an update (with no DB import). This won't 
# work if there are no bzr updates since the last 
# updates, however.

#######################
# Config Section
#######################
SOURCEDIR=/srv/code/db-stable/launchpad
ROOTDIR=/srv/staging.launchpad.net
LOGFILE=$ROOTDIR/staging-logs/$(date +%Y-%m-%d)-staging_restore.log
LANGPACK_LOGFILE=$ROOTDIR/staging-logs/langpack.log
DUMPFILE=$ROOTDIR/dumps_from_prod/launchpad_prod_3.$(date +%Y%m%d).dump
LOCK=/var/lock/staging.lock

# If you want to skip the DB restore, change this to
# anything but True
PERFORM_DB_IMPORT=True

# And if you want to skip DB updates altogether, set
# this to True
SKIP_DB_UPDATES=False

# If we want to update regardless of whether there's
# new code or not, this should be False. Note that if
# you are skipping DB import as well, it will update
# every time.
CHECK_NEW_CODE=True

# When we're ready for this to do the remote stuff
# such as updating code on asuka, change this to True
PERFORM_REMOTE=True

# When we're ready for nightly stuff to run, change
# this to True
PERFORM_NIGHTLY=True

# If we want to run the LANGPACK update set to True
# or if not, anything else
RUN_LANGPACK=False
#######################
# End Config Section
#######################

if [ "$1" = "-o" ]
then
	if [ "$2" = "db" ]
	then
 	        CHECK_FOR_DUMPFILE=False
		# We want to override this since we can't be
	        # sure there is a DB to import
		PERFORM_DB_IMPORT=False
		# We also want to disable this since we're 
		# looking for a quick update
	        PERFORM_NIGHTLY=False
	elif [ "$2" = "code" ]
	then
		CHECK_FOR_DUMPFILE=True
		PERFORM_DB_IMPORT=True
		PERFORM_NIGHTLY=False
		CHECK_NEW_CODE=False
	else
		echo "Usage: staging_restore.sh [-o [db|code]]"
                echo ""
                echo "-o db   = Override DB (i.e. don't look for new DB dump)"
                echo "-o code = Override Code (i.e. don't look for new code)"
                exit
	fi
else
	if [ "$1" ]
	then
		echo "Usage: staging_restore.sh [-o [db|code]]"
		echo ""
		echo "-o db   = Override DB (i.e. don't look for new DB dump)"
		echo "-o code = Override Code (i.e. don't look for new code)"
		exit
	fi
	CHECK_FOR_DUMPFILE=True
fi

# Only run this as the postgres user
USER=$(whoami)
if [ "postgres" != "$USER" ]
then
        echo "Must be postgres user to run this script."
        exit 1
fi

# Check for a lock file and if not, create one
lockfile -r0 -l 259200 $LOCK > /dev/null 2>&1
if [ $? -ne 0 ]; then
        exit 1
fi

SCRIPT_START=$(date +%s)

if [ "$CHECK_NEW_CODE" = "True" ]
then
	# Now check if there's any code to update
	SOURCEREVNO=$(bzr revno ${SOURCEDIR} )
	DESTREVNO=$(ssh launchpad@asuka bzr revno /srv/staging.launchpad.net/staging/launchpad )
	if [ "$SOURCEREVNO" = "$DESTREVNO" ]
	then
        	# Record that there's no code to update
	        scp launchpad@asuka:/srv/staging.launchpad.net/www/root/successful-updates.txt ${ROOTDIR}/www/root/successful-updates.txt
	        echo $(date '+%Y:%m:%d %H:%M') "No new code to update - already at $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
        	scp ${ROOTDIR}/www/root/successful-updates.txt launchpad@asuka:/srv/staging.launchpad.net/www/root/
	        echo $(date) "No new code to update - already at $(bzr revno ${ROOTDIR}/staging/launchpad)" >> $LOGFILE
		rm -f ${ROOTDIR}/var/$(basename ${DUMPFILE}).done
	        rm -f $LOCK
	        exit
        else
		# Update code
		rsync -a --delete ${SOURCEDIR}/ ${ROOTDIR}/staging/launchpad/
		rm -rf ${ROOTDIR}/staging/librarian
		mkdir ${ROOTDIR}/staging/librarian
		echo $(date) "Local Staging Code updated" >> $LOGFILE 2>&1
		# Check if there have been any DB updates
		cd ${ROOTDIR}/staging/launchpad/
		DBCHANGES=$(bzr status -S -r ${DESTREVNO}..${SOURCEREVNO} database/schema/ | grep -v "\(^P\|pending\|security.cfg\|Makefile\|unautovacuumable\|_pythonpath.py\)")
		if [ -n "$DBCHANGES" ]; then
			echo $(date) "There have been schema changes since last update" >> $LOGFILE 2>&1
		else
			echo $(date) "No schema changes since last update - skip DB restore" >> $LOGFILE 2>&1
			CHECK_FOR_DUMPFILE=False
			PERFORM_DB_IMPORT=False
			SKIP_DB_UPDATES=True
			PERFORM_NIGHTLY=False
		fi
	fi
fi

if [ "$CHECK_FOR_DUMPFILE" = "True" ]
then
        # Check for our dump file
        if [ -f "${DUMPFILE}" ]
        then
                PROCESSED=${ROOTDIR}/var/$(basename ${DUMPFILE}).done
                if [ -f "$PROCESSED" ]; then
                        echo $(date) "We've already processed $DUMPFILE" >> $LOGFILE
                        rm -f $LOCK
                        exit 0
                fi
                echo $(date) "File ${DUMPFILE} found" >> $LOGFILE
                touch $PROCESSED
        else
                echo $(date) "File ${DUMPFILE} not found. Exiting." >> $LOGFILE
                rm -f $LOCK
                exit 0
        fi
fi

if [ "$PERFORM_DB_IMPORT" = "True" ]
then
        cd ${ROOTDIR}/staging/launchpad
	make build LPCONFIG=staging >> $LOGFILE 2>&1
        make -C database/replication stagingsetup STAGING_DUMP=${DUMPFILE} PGUSER=postgres >> $LOGFILE 2>&1
	if [ $? != 0 ]
	then
		echo $(date) "There was a problem running the stagingsetup target" >> $LOGFILE
		exit $?
	fi
        echo $(date) "Two replicated _new databases built" >> $LOGFILE
fi

if [ "$PERFORM_REMOTE" = "True" ]
then
	# Now we want to bring the app server down, and copy the
	# new code to the appropriate places
        # Let's put a "maintenance" file in place first so nagios doesn't scream
        # and cron jobs know to not process
        ssh launchpad@asuka "date +%s > /srv/staging.launchpad.net/maintenance.txt"
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_touch_maintenance importd@strawberry touch /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_touch_maintenance importd@marambio touch /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
	ssh launchpad@asuka /srv/lists.staging.launchpad.net/initscript stop >> $LOGFILE 2>&1
	ssh launchpad@asuka /srv/staging.launchpad.net/initscript stop >> $LOGFILE 2>&1
	# Temporary hack because the staging app server dies without removing the pidfile
	ssh launchpad@asuka rm -f /srv/staging.launchpad.net/var/staging-launchpad.pid >> $LOGFILE 2>&1
        # Let's also rotate the librarian logs since other logs are handled by the app server itself
        ssh launchpad@asuka /usr/sbin/logrotate -s /srv/staging.launchpad.net/etc/.logrotate.state /srv/staging.launchpad.net/etc/librarian_logrotate.conf
        rsync --rsh="ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_code_sync" -a --delete ${SOURCEDIR}/ importd@strawberry:/srv/importd.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
        rsync --rsh="ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_code_sync" -a --delete ${SOURCEDIR}/ importd@marambio:/srv/importd.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
        rsync -a --delete ${SOURCEDIR}/ launchpad@asuka:/srv/lists.staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
        rsync -a --delete ${SOURCEDIR}/ launchpad@asuka:/srv/staging.launchpad.net/staging/launchpad/ >> $LOGFILE 2>&1
        ssh launchpad@asuka make -C /srv/staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
        ssh launchpad@asuka make -C /srv/lists.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_make_build importd@strawberry make -C /srv/importd.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_make_build importd@marambio make -C /srv/importd.staging.launchpad.net/staging/launchpad build LPCONFIG=staging >> $LOGFILE 2>&1
fi

# Kill all connections to these DBs and then stop new connections
if [ "$PERFORM_DB_IMPORT" = "True" ]
then
	cd ${ROOTDIR}/staging/launchpad
	make -C database/replication stagingswitch PGUSER=postgres >> $LOGFILE 2>&1
	if [ $? != 0 ]
        then
                echo $(date) "There was a problem running the stagingswitch target" >> $LOGFILE
                exit $?
        fi
	echo $(date) "Existing dbs and daemons killed, _new swapped into place" >> $LOGFILE

	# Now perform any DB tasks needed fore the DB is re-opened
	# 1) Let's turn off mirroring for non-bzrtools projects
	echo $(date) "Turning off mirroring for non-bzrtools projects" >> $LOGFILE
        psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/disable_mirror.sql >> $LOGFILE 2>&1
	# 2) Setup trac for checkwatches
	echo $(date) "Setting up trac for checkwatches" >> $LOGFILE
        psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/checkwatches_trac.sql >> $LOGFILE 2>&1
	# 3) Suspend code imports for staging
	echo $(date) "Suspending code imports" >> $LOGFILE
        psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/suspend_code_imports.sql >> $LOGFILE 2>&1
        # 4) Set production code import machines offline
        echo $(date) "Setting production code import machines offline" >> $LOGFILE
        psql -U postgres -d lpmain_staging -f ${ROOTDIR}/scripts/code_import_machines.sql >> $LOGFILE 2>&1

else

	if [ "$SKIP_DB_UPDATES" = "False" ]; then

		# Now we need to update the code and run the upgrade on the DB
		rsync -a --delete ${SOURCEDIR}/ ${ROOTDIR}/staging/launchpad/
		cd ${ROOTDIR}/staging/launchpad
	        make build LPCONFIG=staging >> $LOGFILE 2>&1
        	cd ${ROOTDIR}/staging/launchpad/database/schema
	        export LPCONFIG=staging
        	# Kill any connections to the DB. New ones should be stopped by the 
	        # maintenance text files being in place
        	psql -U postgres -d template1 -f ${ROOTDIR}/scripts/kill_staging_connections.sql >> $LOGFILE 2>&1

	        echo $(date) "Applying database updates and permissions to DB" >> $LOGFILE
        	./upgrade.py -U postgres >> $LOGFILE 2>&1
	        if [ $? != 0 ]; then
        	        echo $(date) "ERROR: Failed to run upgrade.py" >> $LOGFILE
                	psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
	                cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
        	            ${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
                	cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \ 
	                    ${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
        	        echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE

                	# Let's email it - since it's being run from
        	        # cron, just cat-ing the logfile will do this
                	cat $LOGFILE

	                exit $?
        	else
                	echo $(date) "DB upgrades applied" >> $LOGFILE
	        fi
	        ./fti.py -U postgres >> $LOGFILE 2>&1
        	if [ $? != 0 ]; then
                	echo $(date) "ERROR: Failed to run fti.py" >> $LOGFILE
	                psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
        	        cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
                	    ${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
	                cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \ 
        	            ${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
                	echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE

	                # Let's email it - since it's being run from
        	        # cron, just cat-ing the logfile will do this
                	cat $LOGFILE

	                exit $?
        	else
                	echo $(date) "Full text indexes rebuilt" >> $LOGFILE
	        fi
	        ./security.py -U postgres >> $LOGFILE 2>&1
        	if [ $? != 0 ]; then
                	echo $(date) "ERROR: Failed to run security.py" >> $LOGFILE
	                psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
        	        cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
                	    ${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
	                cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \ 
        	            ${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
                	echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE

	                # Let's email it - since it's being run from
        	        # cron, just cat-ing the logfile will do this
                	cat $LOGFILE

	                exit $?
        	else
                	echo $(date) "Security applied to DB" >> $LOGFILE
	        fi 
        	./security.py -U postgres -d lpmain_staging_slave >> $LOGFILE 2>&1
        	if [ $? != 0 ]; then
	                echo $(date) "ERROR: Failed to run security.py on slave" >> $LOGFILE
        	        psql -U postgres -d template1 -c "SELECT * FROM pg_stat_activity" >> $LOGFILE 2>&1
                	cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_master_staging-setup.log \
	                    ${ROOTDIR}/staging-logs/lpslon_main_master_staging-setup.$(date +%Y-%m-%d-%H-%M).log
        	        cp ${ROOTDIR}/staging/launchpad/database/replication/lpslon_main_slave_staging-setup.log \ 
                	    ${ROOTDIR}/staging-logs/lpslon_main_slave_staging-setup.$(date +%Y-%m-%d-%H-%M).log
	                echo "Replication logs copied to ${ROOTDIR}/staging-logs" >> $LOGFILE

	                # Let's email it - since it's being run from
        	        # cron, just cat-ing the logfile will do this
	                cat $LOGFILE

        	        exit $?
	        else
        	        echo $(date) "Security applied to slave DB" >> $LOGFILE
	        fi 
	fi
fi

if [ "$PERFORM_REMOTE" = "True" ]
then
	echo $(date) "Recreating librarian storage" >> $LOGFILE
	ssh launchpad@asuka rm -rf /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
	ssh launchpad@asuka mkdir /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
	ssh launchpad@asuka ls -l /srv/staging.launchpad.net/staging/librarian >> $LOGFILE 2>&1
	echo $(date) "Librarian storage recreated" >> $LOGFILE

	# Now we want to restart the app server
	ssh launchpad@asuka /srv/staging.launchpad.net/initscript start >> $LOGFILE 2>&1
	ssh launchpad@asuka /srv/lists.staging.launchpad.net/initscript start >> $LOGFILE 2>&1

	# Sync staging mailman to production
        echo $(date) "About to sync mailman to production" >> $LOGFILE
        ssh launchpad@asuka /srv/lists.staging.launchpad.net/scripts/list_sync.sh >> $LOGFILE 2>&1
	echo $(date) "Mailman synced to production" >> $LOGFILE

        # Create a resource in the librarian that we can check for
        ssh launchpad@asuka /srv/staging.launchpad.net/scripts/upload_librarian_nagios_check.sh >> $LOGFILE 2>&1

        # Restart the import servers
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_rm_maintenance importd@strawberry rm /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1
        ssh -i /var/lib/postgresql/.ssh/id_rsa_straw_rm_maintenance importd@marambio rm /srv/importd.staging.launchpad.net/maintenance.txt >> $LOGFILE 2>&1

	SCRIPT_END=$(date +%s)
	let TIME_TAKEN=${SCRIPT_END}-${SCRIPT_START}

	# Record that the update happened
	scp launchpad@asuka:/srv/staging.launchpad.net/www/root/successful-updates.txt ${ROOTDIR}/www/root/successful-updates.txt
        # And remove our maintenance text files
        ssh launchpad@asuka rm -f /srv/staging.launchpad.net/maintenance.txt
        if [ "$PERFORM_DB_IMPORT" = "True" ]
	then
		echo $(date '+%Y:%m:%d %H:%M') "Full update with DB reimport: bzr revno $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
		echo "staging_restore_with_db_duration:${TIME_TAKEN}@${SCRIPT_END}" > ${ROOTDIR}/staging-logs/staging_restore.dat
        else
		echo $(date '+%Y:%m:%d %H:%M') "Code update without DB reimport: bzr revno $(bzr revno ${ROOTDIR}/staging/launchpad)" >> ${ROOTDIR}/www/root/successful-updates.txt
		echo "staging_restore_no_db_duration:${TIME_TAKEN}@${SCRIPT_END}" > ${ROOTDIR}/staging-logs/staging_restore.dat
        fi
	scp ${ROOTDIR}/www/root/successful-updates.txt launchpad@asuka:/srv/staging.launchpad.net/www/root/

	# Load timing into graphing system
        scp -q -i /var/lib/postgresql/.ssh/id_rsa_copy_stg_timing ${ROOTDIR}/staging-logs/staging_restore.dat launchpad@loganberry:/srv/lpstats.canonical.com/data/sourcherry/staging_restore.dat && ssh -i /var/lib/postgresql/.ssh/id_rsa_load_stg_timing launchpad@loganberry /srv/lpstats.canonical.com/scripts/load_data.sh sourcherry/staging_restore.dat
fi

####################
# Langpack DB
####################

if [ "$RUN_LANGPACK" = "True" ] && [ "$PERFORM_DB_IMPORT" = "True" ]
then

	DIR=$(dirname ${DUMPFILE})
	FILE=$(basename ${DUMPFILE})
	cd ${DIR}
	# Remove existing DB
        echo $(date) "Starting Langpack Restore" > $LANGPACK_LOGFILE
        echo $(date) "About to destroy launchpad_langpack" >> $LANGPACK_LOGFILE
        $ROOTDIR/scripts/pgmassacre.py launchpad_langpack >> $LANGPACK_LOGFILE 2>&1
        sleep 10
        echo $(date) "Destroyed launchpad_langpack" >> $LANGPACK_LOGFILE
        # Create new DB
        createdb -U postgres -E UNICODE launchpad_langpack >> $LANGPACK_LOGFILE 2>&1
        echo $(date) "New database launchpad_langpack created" >> $LANGPACK_LOGFILE
        # Restore to new DB
        echo $(date) "About to restore production database dump to launchpad_langpack" >> $LANGPACK_LOGFILE
        pg_restore -U postgres --exit-on-error --dbname=launchpad_langpack $FILE >> $LANGPACK_LOGFILE 2>&1
        echo $(date) "Ending Langpack Restore" >> $LANGPACK_LOGFILE

        # Send email to launchpad_error_reports list
        cat $LANGPACK_LOGFILE | mail -s "Langpack restore" "launchpad-error-reports@lists.canonical.com"

        # Since this is a separate logfile, let's add it to the main
        # one for clarity
        cat $LANGPACK_LOGFILE >> $LOGFILE

fi

####################
# End Langpack DB
####################

if [ "$PERFORM_NIGHTLY" = "True" ]
then
	# Nightly processes
	ssh launchpad@asuka /srv/staging.launchpad.net/scripts/nightly_processes.sh >> $LOGFILE 2>&1

fi

# Testing URLs
echo $(date) "Testing Staging URLs" >> $LOGFILE
${ROOTDIR}/scripts/check_staging_urls.py >> $LOGFILE 2>&1

# Finished
rm -f $LOCK
echo $(date) "Lock file deleted. Script finished" >> $LOGFILE

# Let's email it - since it's being run from
# cron, just cat-ing the logfile will do this
cat $LOGFILE