/*
 * bgp_med.c
 */

/* Gated Release 3.6 */

#include "include.h"
#include "inet.h"
#include "bgp_proto.h"
#include "bgp.h"
#include "bgp_var.h"

/*
 * BGP MED support.  The BGP MULTI_EXIT_DISC attribute is only comparable
 * between BGP routes from the same neighboring AS.  Since gated uses
 * pairwise comparisons between routes, gated needs help to know whether
 * MED values are "superior" (best MED seen from an AS) or "inferior".
 * This module allows BGP to set the "RTS_INFERIOR_MED" bit when a BGP
 * route is known to have an "inferior" MED.  In this manner, gated
 * can "compare" MEDs between BGP routes from different neighbor ASes.
 *
 * The gated function rt_change_aspath() doesn't reflect a changed state
 * bit too well.  Therefore, we use the tag field (not used elsewhere in
 * BGP) to force the RTS_INFERIOR_MED flag change.
 *
 * The most difficult case for BGP MED support is a BGP route change.
 * If the route change has changed the neighbor AS, then two MED elections
 * are required: one for the old neighbor AS, one for the new neighbor AS.
 * If the neighbor AS has not changed, we try to avoid running two elections.
 *
 * The three entry points into this module are bgp_med_rt_add(),
 * bgp_med_rt_change(), and bgp_med_rt_delete().  Both EBGP and IBGP routes
 * are handled by these functions.
 *
 * Note that the function of this module is independent of the relative
 * significance of route MED comparisons, among other tie-breakers.
 */

#define BGP_HAS_AS_NEIGHBOR(asp) \
	((asp) && (asp)->path_len > 0 && !PATH_ISSET(*PATH_SEG_PTR(asp)))
#define BGP_AS_NEIGHBOR(asp)	(*PATH_SHORT_PTR(asp))

#define BGP_MED_RT_CHANGE(rt, tag) \
    do { \
	bgpPeer *Xbnp = (bgpPeer *) (rt)->rt_gwp->gw_task->task_data; \
	if (Xbnp->bgp_group->bgpg_type == BGPG_INTERNAL_RT) { \
	    bgp_sync *Xbsp = (bgp_sync *) ((rt)->rt_gwp->gw_data); \
	    bsy_ibgp_rt *Xbrt = (bsy_ibgp_rt *) ((rt)->rt_data); \
	    sockaddr_un Xibgp_nexthop, *Xnexthops; \
	    assert(Xbrt); \
	    sockclear_in(&Xibgp_nexthop); \
	    sock2ip(&Xibgp_nexthop) = Xbrt->bsyb_nh->bsyn_ibgp_rti->bsyi_dest; \
	    Xnexthops = &Xibgp_nexthop; \
	    (void) bgp_sync_rt_change(Xbsp, \
				      Xbnp, \
				      (rt), \
				      (rt)->rt_metric, \
				      (rt)->rt_metric2, \
				      (tag), \
				      Xbrt->bsyb_pref, \
				      (rt)->rt_preference2, \
				      1, \
				      &Xnexthops, \
				      (rt)->rt_aspath); \
	} else { \
	    (void) rt_change_aspath((rt), \
				(rt)->rt_metric, \
				(rt)->rt_metric2, \
				(tag), \
				(rt)->rt_preference, \
				(rt)->rt_preference2, \
				(rt)->rt_n_gw, \
				(rt)->rt_routers, \
				(rt)->rt_aspath); \
	} \
    } while (0)

#define	BGP_MED_COMPARE		0
#define	BGP_MED_DEMOTE		1
#define	BGP_MED_PROMOTE		2

#define	CISCO_MED_COMPAT

#ifdef	CISCO_MED_COMPAT
#define MED_NORMALIZE(med) \
	((metric_t) (((med) == (metric_t) -1) ? 0 : (med)))
#else	/* CISCO_MED_COMPAT */
#define MED_NORMALIZE(med)	(med)
#endif	/* CISCO_MED_COMPAT */
	
/*
 * aspath_med_elect - elect list of superior MED routes from a neighbor AS.
 *                    This should be in aspath.c.
 */
rt_list *
aspath_med_elect __PF4(rth, rt_head *,
			skip_rt, rt_entry *,
			nas, as_t,
			gw_proto, proto_t)
{
    rt_entry *rt;
    as_path *asp;
    metric_t metric, best_metric;
    rt_list *election_list;

    /*
     * Compare against all existing routes.
     */
    best_metric = (metric_t) -1;
    election_list = (rt_list *) NULL;
    RT_ALLRT(rt, rth) {
	/*
	 * BGP metrics are comparable between routes from the same AS,
	 * whether they both are from local external BGP neighbours,
	 * or where one arrived via external BGP and the other arrived
	 * via internal BGP after being received from a BGP or EGP
	 * neighbour, or they both arrived from internal BGP neighbours.
	 * Make sure to compare HIDDEN routes -- they may have lost in
	 * the route selection process, but not necessary due to MEDs.
	 */
	if (BIT_TEST(rt->rt_state, RTS_DELETE))
	    break;

	asp = rt->rt_aspath;
	if (rt == skip_rt
	  || !BGP_HAS_AS_NEIGHBOR(asp)
	  || nas != BGP_AS_NEIGHBOR(asp)
	  || rt->rt_gwp->gw_proto != gw_proto)
	    continue;

	metric = MED_NORMALIZE(rt->rt_metric);

	/*
	 * Replace or add to list of best MED routes.
	 */
	if (metric < best_metric) {
	    best_metric = metric;
	    RTLIST_RESET(election_list);
	    RTLIST_ADD(election_list, rt);
	} else if (metric == best_metric) {
	    RTLIST_ADD(election_list, rt);
	}
    } RT_ALLRT_END(rt, rth);

    return election_list;
}

flag_t
bgp_med_rt_insert __PF3(rth, rt_head *,
			nas, as_t,
			metric, metric_t)
{
    rt_list *demote_list;
    rt_entry *rt;
    metric_t metric2;
    flag_t med_flag;
    int check_state;

    if (!rth) {
	return 0;
    }

    /*
     * Adjust metric, and elect current MED winners.
     */
    metric = MED_NORMALIZE(metric);
    demote_list = aspath_med_elect(rth, 0, nas, RTPROTO_BGP);
    check_state = BGP_MED_COMPARE;
    med_flag = 0;

    RT_LIST(rt, demote_list, rt_entry) {
	if (check_state == BGP_MED_COMPARE) {
	    /*
	     * The elected routes must be superior, but there are three
	     * cases for the new route:
	     * 1. If the new route is worse, set INFERIOR flag and quit.
	     * 2. If the new route is tied for best, clear INFERIOR flag
	     *    and quit.
	     * 3. If the new route is clearly best, clear INFERIOR flag and
	     *    demote the old winners.
	     */
	    assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	    metric2 = MED_NORMALIZE(rt->rt_metric);
	    if (metric > metric2) {
		med_flag = RTS_INFERIOR_MED;
		break;
	    }
	    if (metric == metric2) {
	        break;
	    }
	    check_state = BGP_MED_DEMOTE;
	}
	rt->rt_state |= RTS_INFERIOR_MED;
	BGP_MED_RT_CHANGE(rt, rt->rt_metric);
    } RT_LIST_END(rt, demote_list, rt_entry);

    RTLIST_RESET(demote_list);

    return med_flag;
}

void
bgp_med_rt_modify __PF5(rth, rt_head *,
			modify_rt, rt_entry *,
			nas, as_t,
			old_metric, metric_t,
			new_metric, metric_t)
{
    rt_entry *rt;
    rt_list *change_list;
    metric_t metric2;
    int check_state;

    assert(rth);

    /*
     * Adjust metrics. Metrics must change for us to care.
     */
    old_metric = MED_NORMALIZE(old_metric);
    new_metric = MED_NORMALIZE(new_metric);
    if (old_metric == new_metric) {
	return;
    }

    /*
     * An inferior MED route can't get any worse with an increasing MED.
     * Note that a superior route *can* get better with a decreasing MED --
     * it can break a previous tie.
     */
    if (old_metric < new_metric
      && BIT_TEST(modify_rt->rt_state, RTS_INFERIOR_MED)) {
	return;
    }
    change_list = aspath_med_elect(rth, modify_rt, nas, RTPROTO_BGP);
    check_state = BGP_MED_COMPARE;

    RT_LIST(rt, change_list, rt_entry) {
	if (check_state == BGP_MED_COMPARE) {
	    metric2 = MED_NORMALIZE(rt->rt_metric);

	    if (old_metric < new_metric) {
		assert(old_metric <= metric2);
		/*
		 * Cases for superior MED route with increasing (worsening) MED:
		 * 1. If the MED is still clearly the best, quit.
		 * 2. If the MED has increased to tie for best, promote
		 *    other best routes.
		 * 3. If the MED is no longer tied for best, set INFERIOR
		 *    flag and quit.
		 * 4. If the MED has otherwise increased to lose, set INFERIOR
		 *    flag and promote other best routes.
		 */
		if (new_metric < metric2) {	/* case 1 */
		    assert(BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		    break;
		}

		if (old_metric == metric2) {	/* case 3 */
		    assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		    modify_rt->rt_state |= RTS_INFERIOR_MED;
		    break;
		}

		/* cases 2 and 4 */
		check_state = BGP_MED_PROMOTE;
		assert(BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		rt->rt_state &= ~RTS_INFERIOR_MED;
		BGP_MED_RT_CHANGE(rt, 0);

		if (new_metric > metric2) {	/* case 4 */
		    modify_rt->rt_state |= RTS_INFERIOR_MED;
		}
	    } else if (BIT_TEST(modify_rt->rt_state, RTS_INFERIOR_MED)) {
		assert(old_metric > metric2);
		/*
		 * Cases for inferior MED route with decreasing (improving) MED:
		 * 1. If the MED is still inferior, quit.
		 * 2. If the MED has changed to tie for best, clear INFERIOR
		 *    flag and quit.
		 * 3. If the MED has changed to be clearly best, clear INFERIOR
		 *    flag and demote the old winners.
		 */
		if (new_metric > metric2) {	/* case 1 */
		    break;
		}

		/* cases 2 and 3 */
		modify_rt->rt_state &= ~RTS_INFERIOR_MED;
		if (new_metric == metric2) {	/* case 2 */
		    break;
		}

		/* case 3 */
		check_state = BGP_MED_DEMOTE;
		assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		rt->rt_state |= RTS_INFERIOR_MED;
		BGP_MED_RT_CHANGE(rt, rt->rt_metric);
	    } else {
		/*
		 * Cases for superior MED route with decreasing (improving) MED:
		 * 1. If the MED was previously tied for best, demote
		 *    the old winners.
		 * 2. Otherwise, the MED was clearly best, quit.
		 */
		if (old_metric == metric2) {	/* case 1 */
		    check_state = BGP_MED_DEMOTE;
		    assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		    rt->rt_state |= RTS_INFERIOR_MED;
		    BGP_MED_RT_CHANGE(rt, rt->rt_metric);
		} else {	/* case 2 */
		    assert(old_metric < metric2);
		    break;
		}
	    }
	} else if (check_state == BGP_MED_DEMOTE) {
	    assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	    rt->rt_state |= RTS_INFERIOR_MED;
	    BGP_MED_RT_CHANGE(rt, rt->rt_metric);
	} else if (check_state == BGP_MED_PROMOTE) {
	    assert(BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	    rt->rt_state &= ~RTS_INFERIOR_MED;
	    BGP_MED_RT_CHANGE(rt, 0);
	} else {
	    assert(FALSE);
	}
    } RT_LIST_END(rt, change_list, rt_entry);

    RTLIST_RESET(change_list);
}

void
bgp_med_rt_remove __PF4(rth, rt_head *,
			remove_rt, rt_entry *,
			nas, as_t,
			metric, metric_t)
{
    rt_entry *rt;
    rt_list *promote_list;
    metric_t metric2;
    int check_state;

    /*
     * An inferior route that is removed causes no harm.
     * Adjust metric, and elect new MED winners.
     */
    assert(rth);
    if (BIT_TEST(remove_rt->rt_state, RTS_INFERIOR_MED)) {
	return;
    }
    metric = MED_NORMALIZE(metric);
    promote_list = aspath_med_elect(rth, remove_rt, nas, RTPROTO_BGP);
    check_state = BGP_MED_COMPARE;

    RT_LIST(rt, promote_list, rt_entry) {
	if (check_state == BGP_MED_COMPARE) {
	    /*
	     * Since we do not check the removal of inferior routes,
	     * there are only two cases:
	     * 1. If the new winners were tied with the removed route, quit.
	     * 2. If the new winners were inferior, promote them first.
	     */
	    metric2 = MED_NORMALIZE(rt->rt_metric);
	    if (metric == metric2) {
		assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
		break;
	    }
	    check_state = BGP_MED_PROMOTE;
	    assert(metric < metric2);
	    assert(BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	}
	rt->rt_state &= ~RTS_INFERIOR_MED;
	BGP_MED_RT_CHANGE(rt, 0);
    } RT_LIST_END(rt, promote_list, rt_entry);
    
    RTLIST_RESET(promote_list);
}

#ifdef	BGP_MED_RT_SANITY
void
bgp_med_rt_sanity __PF2(rth, rt_head *,
			nas, as_t)
{
    rt_list *best_list;
    rt_entry *rt;
    as_path *asp;
    metric_t metric, metric2;

    best_list = aspath_med_elect(rth, 0, nas, RTPROTO_BGP);
    metric = (metric_t) -1;

    /*
     * Ensure that elected routes are the best.
     */
    RT_LIST(rt, best_list, rt_entry) {
	assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	metric = MED_NORMALIZE(rt->rt_metric);
    } RT_LIST_END(rt, best_list, rt_entry);

    /*
     * Ensure that all INFERIOR routes have a worse metric than the
     * elected routes, and all non-INFERIOR routes have the same
     * metric.
     */
    RT_ALLRT(rt, rth) {
	asp = rt->rt_aspath;
	if (BIT_TEST(rt->rt_state, RTS_DELETE))
	    break;
	if (BGP_HAS_AS_NEIGHBOR(asp)
	  && nas == BGP_AS_NEIGHBOR(asp)
	  && rt->rt_gwp->gw_proto == RTPROTO_BGP) {

	    metric2 = MED_NORMALIZE(rt->rt_metric);
	    if (BIT_TEST(rt->rt_state, RTS_INFERIOR_MED)) {
		assert(metric < metric2);
	    } else {
		assert(metric == metric2);
	    }
	}
    } RT_ALLRT_END(rt, rth);

    RTLIST_RESET(best_list);
}
#endif	/* BGP_MED_RT_SANITY */

rt_entry *
bgp_med_rt_add __PF4(bsp, bgp_sync *,
		     bnp, bgpPeer *,
		     rth, rt_head *,
		     rtp, rt_parms *)
{
    rt_entry *rt;
    as_path *asp;
    as_t nas;

    /*
     * Get the neighbor AS and MED of route.  See if route has
     * inferior MED.
     */
    asp = rtp->rtp_asp;
    if (BGP_HAS_AS_NEIGHBOR(asp)) {
	nas = BGP_AS_NEIGHBOR(asp);
	rtp->rtp_state |= bgp_med_rt_insert(rth, nas, rtp->rtp_metric);
    }

    if (bsp) {
	rt = bgp_sync_rt_add(bsp, bnp, rth, rtp);
    } else {
	rt = rt_add(rtp);
    }

    /*
     * Clear rtparms state field for next route!
     */
    rtp->rtp_state &= ~RTS_INFERIOR_MED;

#ifdef	BGP_MED_RT_SANITY
    if (rt && BGP_HAS_AS_NEIGHBOR(rt->rt_aspath))
	bgp_med_rt_sanity(rt->rt_head, nas);
#endif	/* BGP_MED_RT_SANITY */

    return rt;
}

rt_entry *
bgp_med_rt_change __PF11(bsp, bgp_sync *,
		 	  bnp, bgpPeer *,
			  rt, rt_entry *,
			  metric, metric_t,
			  localpref, metric_t,
			  tag, tag_t,
			  pref, pref_t,
			  pref2, pref_t,
			  n_gw, int,
			  nexthops, sockaddr_un **,
			  asp, as_path *)
{
    as_path *aspo;
    as_t naso, nas;
#ifdef	BGP_MED_RT_SANITY
    int check_old = 0, check_new = 0;
#endif	/* BGP_MED_RT_SANITY */

    /*
     * Get the old neighbor AS of route, if any.
     */
    aspo = rt->rt_aspath;
    if (BGP_HAS_AS_NEIGHBOR(aspo)) {
	/*
	 * Have old neighbor AS; check for new neighbor AS.
	 */
	naso = BGP_AS_NEIGHBOR(aspo);
	if (BGP_HAS_AS_NEIGHBOR(asp)) {
	    /*
	     * If we have different old and new neighbor ASes, then act
	     * as if the old route was deleted, and new route was added.
	     * Neighbor ASes change when IBGP peers inject a different
	     * BGP external route into the mesh, or when a route server
	     * (omitting its own AS) advertises a different route.
	     * If the neighbor AS remains the same, optimize by calling
	     * bgp_med_rt_modify.
	     */
	    nas = BGP_AS_NEIGHBOR(asp);
	    if (naso != nas) {
#ifdef	BGP_MED_RT_SANITY
		check_old = check_new = 1;
#endif	/* BGP_MED_RT_SANITY */
		bgp_med_rt_remove(rt->rt_head, rt, naso, rt->rt_metric);
		rt->rt_state &= ~RTS_INFERIOR_MED;
		rt->rt_state |= bgp_med_rt_insert(rt->rt_head, nas, metric);
	    } else {
#ifdef	BGP_MED_RT_SANITY
		check_new = 1;
#endif	/* BGP_MED_RT_SANITY */
		bgp_med_rt_modify(rt->rt_head, rt, nas, rt->rt_metric, metric);
	    }
	} else {
	    /*
	     * Handle case when new neighbor is incomparable.
	     */
#ifdef	BGP_MED_RT_SANITY
	    check_old = 1;
#endif	/* BGP_MED_RT_SANITY */
	    bgp_med_rt_remove(rt->rt_head, rt, naso, rt->rt_metric);
	    rt->rt_state &= ~RTS_INFERIOR_MED;
	}
    } else {
	/*
	 * Handle case when old neighbor was incomparable.
	 */
	assert(!BIT_TEST(rt->rt_state, RTS_INFERIOR_MED));
	if (BGP_HAS_AS_NEIGHBOR(asp)) {
#ifdef	BGP_MED_RT_SANITY
	    check_new = 1;
#endif	/* BGP_MED_RT_SANITY */
	    nas = BGP_AS_NEIGHBOR(asp);
	    rt->rt_state |= bgp_med_rt_insert(rt->rt_head, nas, metric);
	}
    }

    /*
     * The BGP tag must match the MED state of the route.
     */
    tag = (BIT_TEST(rt->rt_state, RTS_INFERIOR_MED)) ? metric : 0;
    if (bsp) {
	rt = bgp_sync_rt_change(bsp,
				bnp,
				rt,
				metric,
				localpref,
				tag,
				pref,
				pref2,
				n_gw,
				nexthops,
				asp);
    } else {
	rt = rt_change_aspath(rt,
			      metric,
			      localpref,
			      tag,
			      pref,
			      pref2,
			      n_gw,
			      nexthops,
			      asp);
	rt_refresh(rt);
    }

#ifdef	BGP_MED_RT_SANITY
    if (rt) {
	if (check_old)
	    bgp_med_rt_sanity(rt->rt_head, naso);
	if (check_new)
	    bgp_med_rt_sanity(rt->rt_head, nas);
    }
#endif	/* BGP_MED_RT_SANITY */

    return rt;
}

void
bgp_med_rt_delete __PF2(bsp, bgp_sync *,
			 rt, rt_entry *)
{
    rt_head *rth;
    as_path *asp;
    as_t nas;
#ifdef	BGP_MED_RT_SANITY
    sockaddr_un *dest = (sockaddr_un *) NULL, *dest_mask = (sockaddr_un *) NULL;
#endif	/* BGP_MED_RT_SANITY */

    /*
     * Get the neighbor AS and MED of rt.
     */
    asp = rt->rt_aspath;
    if (BGP_HAS_AS_NEIGHBOR(asp)) {
	nas = BGP_AS_NEIGHBOR(asp);
	bgp_med_rt_remove(rt->rt_head, rt, nas, rt->rt_metric);

#ifdef	BGP_MED_RT_SANITY
	dest = sockdup(rt->rt_dest);
	dest_mask = rt->rt_dest_mask;
#endif	/* BGP_MED_RT_SANITY */
    }

    if (bsp) {
	bgp_sync_rt_delete(bsp, rt);
    } else {
	rt_delete(rt);
    }

#ifdef	BGP_MED_RT_SANITY
    if (dest) {
	if (rth = rt_table_locate(dest, dest_mask))
	    bgp_med_rt_sanity(rth, nas);
	sockfree(dest);
    }
#endif	/* BGP_MED_RT_SANITY */
}
