#!/bin/bash
#
#
# OpenStack Neutron DHCP Service
#
# Description:  Manages an OpenStack Neutron DHCP Service process as an HA resource
#
# Authors: Emilien Macchi
# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han
#
# Support:      openstack@lists.launchpad.net
# License:      Apache Software License (ASL) 2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
#   OCF_RESKEY_binary
#   OCF_RESKEY_config
#   OCF_RESKEY_plugin_config
#   OCF_RESKEY_log_file
#   OCF_RESKEY_check_state_reports
#   OCF_RESKEY_state_reports_file
#   OCF_RESKEY_state_reports_timeout
#   OCF_RESKEY_user
#   OCF_RESKEY_pid
#   OCF_RESKEY_remove_artifacts_on_stop_start
#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
: ${OCF_FUEL_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/fuel}
. ${OCF_FUEL_FUNCTIONS_DIR}/ocf-fuel-funcs

#######################################################################

# Fill in some defaults if no values are specified

PATH=/sbin:/usr/sbin:/bin:/usr/bin

OCF_RESKEY_binary_default="neutron-dhcp-agent"
OCF_RESKEY_config_default="/etc/neutron/neutron.conf"
OCF_RESKEY_plugin_config_default="/etc/neutron/dhcp_agent.ini"
OCF_RESKEY_log_file_default="/var/log/neutron/dhcp-agent.log"
OCF_RESKEY_check_state_reports_default=false
OCF_RESKEY_state_reports_file_default="/var/lib/neutron/dhcp_agent_report.log"
OCF_RESKEY_state_reports_timeout_default=60
OCF_RESKEY_user_default="neutron"
OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid"
OCF_RESKEY_remove_artifacts_on_stop_start_default='true'

: ${HA_LOGTAG="ocf-neutron-dhcp-agent"}
: ${HA_LOGFACILITY="daemon"}
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}}
: ${OCF_RESKEY_check_state_reports=${OCF_RESKEY_check_state_reports_default}}
: ${OCF_RESKEY_state_reports_file=${OCF_RESKEY_state_reports_file_default}}
: ${OCF_RESKEY_state_reports_timeout=${OCF_RESKEY_state_reports_timeout_default}}
: ${OCF_RESKEY_remove_artifacts_on_stop_start=${OCF_RESKEY_remove_artifacts_on_stop_start_default}}



#######################################################################

usage() {
    cat <<UEND
        usage: $0 (start|stop|reload|validate-all|meta-data|status|monitor)

        $0 manages an OpenStack DHCP Service (${OCF_RESKEY_binary}) process as an HA resource

        The 'start' operation starts the networking service.
        The 'stop' operation stops the networking service.
        The 'reload' operation restarts the networking service without removing any artifacts
        The 'validate-all' operation reports whether the parameters are valid
        The 'meta-data' operation reports this RA's meta-data information
        The 'status' operation reports whether the networking service is running
        The 'monitor' operation reports whether the networking service seems to be working

UEND
}

meta_data() {
    cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="neutron-dhcp-agent">
<version>1.0</version>

<longdesc lang="en">
Resource agent for the OpenStack Neutron DHCP Service (${OCF_RESKEY_binary})
May manage a neutron-dhcp-agent instance or a clone set that
creates a distributed neutron-dhcp-agent cluster.
</longdesc>
<shortdesc lang="en">Manages the OpenStack DHCP Service (${OCF_RESKEY_binary})</shortdesc>
<parameters>

<parameter name="dummy" unique="1">
<longdesc lang="en">
This is a dummy parameter.
Pacemaker needs it to enable reload operation for the resource
</longdesc>
<shortdesc lang="en">Dummy parameter</shortdesc>
<content type="boolean" default="false" />
</parameter>

<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack DHCP Agent server binary (${OCF_RESKEY_binary})
</longdesc>
<shortdesc lang="en">OpenStack DHCP Agent server binary (${OCF_RESKEY_binary})</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>

<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack Neutron Service (neutron-server) configuration file
</longdesc>
<shortdesc lang="en">OpenStack DHCP Agent (neutron-server) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>

<parameter name="plugin_config" unique="0" required="0">
<longdesc lang="en">
Location of the OpenStack DHCP Service (${OCF_RESKEY_binary}) configuration file
</longdesc>
<shortdesc lang="en">OpenStack DHCP Agent (${OCF_RESKEY_binary}) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_plugin_config_default}" />
</parameter>

<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running OpenStack DHCP Service (${OCF_RESKEY_binary})
</longdesc>
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>

<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance
</longdesc>
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>

<parameter name="log_file" unique="0" required="0">
<longdesc lang="en">
The log file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance
</longdesc>
<shortdesc lang="en">OpenStack DHCP Service (${OCF_RESKEY_binary}) log file</shortdesc>
<content type="string" default="${OCF_RESKEY_log_file_default}" />
</parameter>

<parameter name="check_state_reports" unique="0" required="0">
<longdesc lang="en">
The flag, which enables or disables additional monitoring
based on agent's local state reports
</longdesc>
<shortdesc lang="en">Enable or disable local state reports based monitoring</shortdesc>
<content type="string" default="${OCF_RESKEY_check_state_reports_default}" />
</parameter>

<parameter name="state_reports_file" unique="0" required="0">
<longdesc lang="en">
This file contains DHCP agent local state report information.
There're three section in it: STARTUP, RPC_STATE_REPORT and SYNC_STATE.
</longdesc>
<shortdesc lang="en">DHCP agent local state report file</shortdesc>
<content type="string" default="${OCF_RESKEY_state_reports_file_default}" />
</parameter>

<parameter name="state_reports_timeout" unique="0" required="0">
<longdesc lang="en">
The timeout value for DHCP agent to update its local state report.
If it takes more time than the specified value agent is considered to be dead.
</longdesc>
<shortdesc lang="en">DHCP agent local state reports timeout</shortdesc>
<content type="string" default="${OCF_RESKEY_state_reports_timeout_default}" />
</parameter>

<parameter name="remove_artifacts_on_stop_start" unique="0" required="0">
<longdesc lang="en">
Clean up all resources created by Neutron DHCP agent, such as additional processes,
network namespaces, created interfaces, on agent stop and start.
</longdesc>
<shortdesc lang="en">Clean up all resources created by DHCP agent on its start and stop</shortdesc>
<content type="string" />
</parameter>



</parameters>

<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="reload" timeout="30" />
<action name="status" timeout="20" />
<action name="monitor" timeout="30" interval="20" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}

get_worker_pid() {
    local options
    local pid
    # FIXME: Remove if condition and set 'falo' statically once Fuel
    # discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined.
    if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then
      options='falo'
    else
      options='flo'
    fi
    pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '{print $1}'`
    echo $pid
}

#######################################################################
# Functions invoked by resource manager actions

neutron_dhcp_agent_validate() {
    local rc

    check_binary $OCF_RESKEY_binary
    check_binary netstat

    # A config file on shared storage that is not available
    # during probes is OK.
    if [ ! -f $OCF_RESKEY_config ]; then
        if ! ocf_is_probe; then
            ocf_log err "Config $OCF_RESKEY_config doesn't exist"
            return $OCF_ERR_INSTALLED
        fi
        ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
    fi

    getent passwd $OCF_RESKEY_user >/dev/null 2>&1
    rc=$?
    if [ $rc -ne 0 ]; then
        ocf_log err "User $OCF_RESKEY_user doesn't exist"
        return $OCF_ERR_INSTALLED
    fi

    return ${OCF_SUCCESS}
}

neutron_dhcp_agent_status() {
    local pid
    local f_pid
    local rc

    # check and make PID file dir
    local PID_DIR="$( dirname ${OCF_RESKEY_pid} )"
    if [ ! -d "${PID_DIR}" ] ; then
        ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}"
        mkdir -p "${PID_DIR}"
        chown -R ${OCF_RESKEY_user} "${PID_DIR}"
        chmod 755 "${PID_DIR}"
    fi

    pid=`get_worker_pid`
    if [ "xxx$pid" == "xxx" ] ; then
        ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running."
        return $OCF_NOT_RUNNING
    fi
    #ocf_log debug "PID='$pid'"

    # Check PID file and create if need
    if [ ! -f $OCF_RESKEY_pid ] ; then
        ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found."
        ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..."
        echo $pid > $OCF_RESKEY_pid
        return $OCF_SUCCESS
    fi

    # compare PID from file with PID from `pgrep...`
    f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'`
    if [ "xxx$pid" == "xxx$f_pid" ]; then
        return $OCF_SUCCESS
    fi

    # at this point we have PID file and PID from it
    # defferents with PID from `pgrep...`
    if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then
        # process with PID from PID-file not found
        ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found."
        ocf_log warn "PID-file will be re-created (with PID=$pid)."
        echo $pid > $OCF_RESKEY_pid
        return $OCF_SUCCESS
    fi

    # at this point we have alien PID-file and running prosess with this PID.
    ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid"
    return $OCF_ERR_GENERIC
}

get_local_reports_value() {

    local SECTION
    local KEY
    local VALUE

    SECTION=$1
    KEY=$2
    VALUE=$(awk "/$SECTION/,/}/" $OCF_RESKEY_state_reports_file | grep $KEY | awk '{$1=""; print $0}' | tr -d "\",")
    echo $VALUE
}

check_local_reports() {

    local SECTIONS
    local SYSTIME
    local RPC_STATE_TIMESTAMP

    if [ ! -f $OCF_RESKEY_state_reports_file ] ; then
        ocf_log warn "State reports file wasn't found"
        return $OCF_SUCCESS
    fi

    SECTIONS="STARTUP RPC_STATE_REPORT SYNC_STATE"
    SYSTIME=$(date +%s)
    RPC_STATE_TIMESTAMP=$(get_local_reports_value RPC_STATE_REPORT Timestamp)
    RPC_STATE_TIMESTAMP=${RPC_STATE_TIMESTAMP%.*}

    if [[ $(($SYSTIME-$RPC_STATE_TIMESTAMP)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
         ocf_log err "Agent is not reporting for too long"
         return $OCF_ERR_GENERIC
    fi

    for SECTION in $SECTIONS ; do
        STATUS=$(get_local_reports_value $SECTION Status)
        if [[ $STATUS =~ failure ]] ; then
             SINCE=$(date --date="$(get_local_reports_value SYNC_STATE Since)" +%s)
             if [[ $(($SYSTIME-$SINCE)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then
                  ocf_log err "$SECTION report is in failure status for too long"
                  return $OCF_ERR_GENERIC
             fi
        fi
    done
    return $OCF_SUCCESS
}

neutron_dhcp_agent_monitor() {
    local rc
    local pid
    local network_amqp_check

    neutron_dhcp_agent_status
    rc=$?

    # If status returned anything but success, return that immediately
    if [ $rc -ne $OCF_SUCCESS ]; then
        return $rc
    fi


    if ocf_is_true "$OCF_RESKEY_check_state_reports" ; then
        check_local_reports
        rc=$?
        if [ $rc -ne $OCF_SUCCESS ]; then
            return $rc
        fi
    fi

    ocf_log debug "OpenStack DHCP Agent (neutron-dhcp-agent) monitor succeeded"
    return $OCF_SUCCESS
}


neutron_dhcp_agent_start() {
    local rc
    # This variable is overridden by reload operation
    # to perform fast resource restart
    local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}
    neutron_dhcp_agent_status
    rc=$?
    if [ $rc -eq $OCF_SUCCESS ]; then
        ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already running"
        return $OCF_SUCCESS
    fi

    if ocf_is_true "$remove_artifacts_on_stop_start"; then
        neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
    fi
    rm -f $OCF_RESKEY_state_reports_file

    # run and detach to background agent as daemon.
    # Don't use ocf_run as we're sending the tool's output to /dev/null
    su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
        --config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file \
        >> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid'
    ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"

    # Spin waiting for the server to come up.
    # Let the CRM/LRM time us out if required
    while true; do
        neutron_dhcp_agent_monitor
        rc=$?
        [ $rc -eq $OCF_SUCCESS ] && break
        if [ $rc -ne $OCF_NOT_RUNNING ]; then
            ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) start failed"
            exit $OCF_ERR_GENERIC
        fi
        sleep 3
    done

    ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) started"
    return $OCF_SUCCESS
}


neutron_dhcp_agent_stop() {
    local rc
    local pid
    local shutdown_timeout=15
    if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
        shutdown_timeout=$(( ($OCF_RESKEY_CRM_meta_timeout/1000) ))
    fi

    # This variable is overridden by reload operation
    # to perform fast resource restart
    local remove_artifacts_on_stop_start=${1:-$OCF_RESKEY_remove_artifacts_on_stop_start}

    neutron_dhcp_agent_status
    rc=$?
    if [ $rc -eq $OCF_NOT_RUNNING ]; then
        if ocf_is_true "$remove_artifacts_on_stop_start"; then
            neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
        fi
        ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already stopped"
        return $OCF_SUCCESS
    fi

    # Terminate agent daemon
    pid=`get_worker_pid`
    proc_stop "${pid}" "${OCF_RESKEY_binary}" $shutdown_timeout
    rc=$?
    if [ "${rc}" -eq "${OCF_ERR_GENERIC}" ]; then
        ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) stop failed"
        return $OCF_ERR_GENERIC
    fi

    ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})"
    rm -f $OCF_RESKEY_pid

    if ocf_is_true "$remove_artifacts_on_stop_start"; then
        neutron-netns-cleanup --agent-type=dhcp --force --config-file ${OCF_RESKEY_config}
    fi

    return $OCF_SUCCESS
}


neutron_dhcp_agent_reload() {
    # Call stop and start without removing artifacts
    neutron_dhcp_agent_stop false
    neutron_dhcp_agent_start false
}

#######################################################################

case "$1" in
  meta-data)    meta_data
                exit $OCF_SUCCESS;;
  usage|help)   usage
                exit $OCF_SUCCESS;;
esac

# Anything except meta-data and help must pass validation
neutron_dhcp_agent_validate || exit $?
umask 0022

# What kind of method was invoked?
case "$1" in
  start)        neutron_dhcp_agent_start;;
  stop)         neutron_dhcp_agent_stop;;
  reload)       neutron_dhcp_agent_reload;;
  status)       neutron_dhcp_agent_status;;
  monitor)      neutron_dhcp_agent_monitor;;
  validate-all) ;;
  *)            usage
                exit $OCF_ERR_UNIMPLEMENTED;;
esac
