#!/bin/bash

# Periodically check if the primary database is reachable. If not,
# promote this node to be the primary database.

# This script is NOT SUPPORTED by Red Hat Global Support Services.

set -eu

. /usr/libexec/pgsql-replication.sh

exec >>/var/log/pgsql-standby-check-promote.log 2>&1

if [ "$HOSTNAME" != "$STANDBY_HOST" ]; then
    log INFO "Not on $STANDBY_HOST, exiting"
    exit 0
fi

if [ ! -e /var/lib/pgsql/.pgpass ]; then
    log ERROR "Please create /var/lib/pgsql/.pgpass"
    exit 1
fi

if ! /sbin/service postgresql92-postgresql status >/dev/null; then
    log WARNING "PostgreSQL is not running locally, exiting."
    exit 1
fi

# Nothing to do if we are the primary already.
if ! [ -e $PGSQL_HOME/data/recovery.conf ]; then
    log INFO "I'm already the primary, nothing to do."
    exit 0
fi

# Check if we can connect to the database on the first node.
# It's either up in primary mode, or down.
#
# If it's down, we become the new primary and allow outside
# connections.
#
# To prevent short network outages from causing a failover,
# we only fail over after two consecutive failures.

if ! su -l -c "scl enable postgresql92 -- psql -h $PRIMARY_HOST -U $PGSQL_SQL_USER -w -c 'select 1' vmdb_production" postgres >/dev/null; then
    log INFO "$PRIMARY_HOST is down"

    PRIMARY_FAILCOUNT=0
    if [ -e $PGSQL_HOME/primary-failcount ]; then
	. $PGSQL_HOME/primary-failcount
    fi

    PRIMARY_FAILCOUNT=$(( PRIMARY_FAILCOUNT + 1 ))

    if [ $PRIMARY_FAILCOUNT -lt $MAX_PRIMARY_FAILCOUNT ]; then
	log INFO "Current failure count $PRIMARY_FAILCOUNT less than threshold $MAX_PRIMARY_FAILCOUNT, waiting"

	echo "PRIMARY_FAILCOUNT=$PRIMARY_FAILCOUNT" \
	    >$PGSQL_HOME/primary-failcount
    else
	log INFO "$MAX_PRIMARY_FAILCOUNT failures reached, promoting myself to primary"
	su -l -c  "scl enable postgresql92 -- pg_ctl -D $PGSQL_HOME/data promote" postgres

	iptables_open

	rm $PGSQL_HOME/primary-failcount
    fi
else
    log DEBUG "Primary $PRIMARY_HOST is up, nothing to do."

    if [ -e $PGSQL_HOME/primary-failcount ]; then
	log INFO "Resetting failure count to 0"
	rm $PGSQL_HOME/primary-failcount
    fi
fi
