Revision a6b51dd0

View differences:

.gitignore
1
*.aux
2
*.bbl
3
*.blg
4
*.fdb_latexmk
5
*.log
6
*.synctex.gz
7
*.fls
Readme.md
1
# Notes and Specs
2

  
3
This repo is about insights, models and specification for our analysis and code.
bgp_model/bgp_model.tex
1
\documentclass[twocolumn, 10pt]{article}
2
\usepackage{acronym} 
3
\usepackage{tikz} 
4
\usepackage{cleveref} 
5
\usetikzlibrary{calc,shapes,arrows}
6

  
7
\title{BGP model and other oddities}
8
\begin{document}
9
\maketitle
10

  
11
\acrodef{ADV}{advertisement}
12
\acrodef{AS}{autonomous system}
13
\acrodef{MiCe}{Milani Centrality}
14

  
15
We model a eBGP speaker graph; in the following \ac{AS}, speaker and node are used interchangeably.
16

  
17
\section{Preliminaries}
18
Nodes form a communication network to propagate BGP \acp{ADV}, the BGP messages.
19
\acp{ADV} can be of several types, the two related to prefix/destinations are 
20
\begin{itemize}
21
\item UPDATE;
22
\item WITHDRAWAL.
23
\end{itemize}
24
We are not interested in the others, devoted to connection management.
25

  
26
We use the terms prefix and destination interchangeably and they indicate an IP subnet or a collection of IP subnets.
27

  
28
A route is a pair $r=(d,\xi)$ of destination and attributes.
29
$\xi$ contains information such as:
30
\begin{itemize}
31
\item path, which is a list of \acp{AS};
32
\item originator id, which is the IP address for the entry point of the originating \ac{AS}.
33
\end{itemize}
34

  
35
\section{BGP graph}
36
Nodes form a strongly connected bidirectional graph $G(V,E)$.
37
We indicate as $G^k(V^k, E^k)$ the fully connected subgraph of tier-one BGP nodes, such that, $V^k\subset V,\ E^K = V^k\times V^k$.
38

  
39
Links can be either peer-to-peer or customer-provider depending on the role involved nodes are playing.
40
We indicate with $\Lambda=\{\pi,c,s\}$ the labels indicating peer, consumer and provider respectively.
41
The function $\lambda:E\to \Lambda$ assigns to a node edge $(i,j)$ the role $i$ has with respect to $j$.
42
Hence, $\lambda(i,j) = \pi \iff \lambda(j,i)=\pi$ and $\lambda(i,j) = c \iff \lambda(j,i)=s$
43

  
44
Note that, $\lambda(i,j) = \pi\ \forall i,j\in V^k$.
45

  
46
We call $N_i = \{j\in V: \exists (i,j)\in E\}$ the set of neighbours for node $i$ and $C_i=\{j\in N_i: \lambda(i,j)=s\}$ the set of customers of node $i$.
47
\section{Advertisement propagation}
48
When a node $i\in V$ generates an \ac{ADV} for the route $r$, it is propagated to all nodes using the links of $G(V,E)$.
49
Propagation considers:
50
\begin{itemize}
51
\item $\lambda()$;
52
\item node policies blocking \ac{ADV} forwarding.
53
\end{itemize}
54

  
55
In the following we neglect blocking policies for the ease of analysis but it should not change it too much.
56
\begin{figure}
57
\centering
58
\begin{tikzpicture}
59
\tikzset{edge/.style = {->,> = latex',line width=0.2mm}}
60

  
61
\node[rectangle,text width=5em,align=center,draw=black] (b1) {$j$ receives $r$ from $i$};
62
\node[diamond,text width=4em,align=center,draw=black,below of=b1,node distance=7em] (b2) {$\lambda(i,j)==c?$};
63
\node[rectangle,text width=5em,align=center,draw=black] at ($(b2.west)+(-1.0,-1.5)$) (b3) {$j$ propagates to $C_j$};
64
\node[rectangle,text width=5em,align=center,draw=black] at ($(b2.east)+(+1.0,-1.5)$) (b4) {$j$ propagates to $N_j\setminus \{i\}$};
65

  
66
\draw[edge] (b1)->(b2);
67
\draw[edge] (b2)-|(b3) node[midway, draw=none, above] {n} ;
68
\draw[edge] (b2)-|(b4) node[midway, draw=none, above] {y} ;
69

  
70
\end{tikzpicture}
71
\caption{Flow chart for \ac{ADV} forwarding.}
72
\label{fig:adv_forwarding}
73
\end{figure}
74

  
75
\cref{fig:adv_forwarding} represents the deciding policy for \ac{ADV} forwarding using the standard policy of \textit{no-valley and prefer-customer}~\cite{elmokashfi2010scalability}.
76

  
77
Propagation policies and $G$ structure (it includes a full meshed graph $G^k$ of nodes with peer links), implies a very specific pattern of route propagation from one node $i\in V\setminus V^k$ to the rest of the network.
78
We model this pattern by sub-dividing the graph $G$ in three components as the propagation happens in three phases:
79
\begin{itemize}
80
\item Phase 1: node $i\in V\setminus V^k$ generates an \ac{ADV} for route $r$ and this spreads toward a node $z\in V^k$; we call the resulting sub-graph $G^{(1)}(V^{(1)},E^{(1)})$;
81
\item Phase 2: \ac{ADV} propagates toward tier one nodes $V^k$; the resulting sub-graph is $G^{(2)}=G^k$;
82
\item Phase 3: \ac{ADV} propagates from $z\in V^k$ to all the nodes $j\in V\setminus V^k\setminus V^{(1)}$; we call the resulting sub-graph $G^{(3)}(V^{(3)},E^{(3)})$.
83
\end{itemize}
84

  
85
\subsection{Phase 1 graph}
86
Let $i$ be the \ac{ADV} originator and $j\in V$ another node in $G$. 
87
$j\in V^{(1)} \iff \exists p_{ij}$ a path in $G$ between $i$ and $j$, with $p_{ij}=(i,k_0)\dots (k_l,j)$ such that:
88
\begin{itemize}
89
\item $k_x\notin V^k,\ \forall x=0,\dots,l$;
90
\item we have one of the following conditions:
91
	\begin{itemize}
92
	\item $\lambda(\rho,t)=c\ \forall (\rho,t)\in p_{ij}$;
93
	\item $\exists z\leq l: \lambda(\rho,t)=c\ \forall (\rho,t)\in p_{ik_z}, \lambda(k_z,k_{z+1})=\pi, \lambda(\rho,t)=s\ \forall (\rho,t)\in p_{k_{z+1},j}$
94
	\end{itemize}
95
\end{itemize}
96
\subsection{Phase 3 graph}
97
Phase 3 graph is the one connecting the nodes $V^k$ to all the nodes $V^{(3)}=V\setminus V^k\setminus V^{(1)}$.
98
$j\in V^{(3)} \iff \exists p_{ij}$ a path in $G$ between $i\in V^k$ and $j$, with $p_{ij}=(i,k_0)\dots (k_l,j)$ such that:
99
\begin{itemize}
100
	\item $j,k_x\notin V^k\cup V^{(1)},\ \forall x=0,\dots,l$;
101
	\item $\lambda(\rho,t)=s\ \forall (\rho,t)\in p_{ij}$;
102
\end{itemize}
103

  
104
\subsection{Propagation graph}
105
Given an originator $i\in V\setminus V^k$, we call propagation graph $G^P=(V^{(1)}\cup V^{(2)}\cup V^{(3)},E^{(1)}\cup E^{(2)}\cup E^{(3)})$ the graph comprising all the feasible links for \ac{ADV} propagation.
106

  
107
\section{MRAI timers}
108
For each neighbour $j$, every node $i$ consider an MRAI timer value $T_{ij}$.
109
The typical value for it is 30 seconds.
110

  
111
Each node will consider route advertisements separately. 
112
Routes can differ for the destination $d$ or for attributes $\xi$.
113
If two routes share the same destination $d$ but they include different attributes $\xi_1\neq \xi_2$ then node $i$ will process two different UPDATES messages.
114

  
115
With respect to the destination $d$, node $i$ will not send more than one message every $T_{ij}$ seconds.
116
If multiple route messages $r_1,\dots,r_m$ arrive to $i$ for the same destination $d$ but with different attributes $\xi_1,\dots,\xi_m$, node $i$ will propagate them separately, waiting $T_{ij}$ seconds between one transmission and the other\footnote{Note: this behaviour is mandated by the RFC, section 9.2.1.1 which foresees anyway some overhead issues in actual implementation. Solution to overhead is left to implementers.}.
117

  
118
For each destination $d$, node $i$ has a queue of route advertisements $r_1,\dots,r_m$, and, for each $j\in N_i$ it keeps a function $\tau(i,j,d): V\times V\times D\to [0,T_{ij}]$ expressing the minimum remaining time for another transmission.
119

  
120
\subsection{Fabrikant issue}
121
In the Fabrikant et al. paper~\cite{fabrikant2011something} it is describe a possible issue related to timers and path exploration.
122

  
123
This issue happens when there are multipaths in the propagation graph $G^P$ and a specific timer allocation $T_{ij}$.
124
In the following we suppose that all nodes $i$ have $\tau(i,j,d)=0,\ \forall j\in N_i$ (ready to fire).
125

  
126
\textbf{Duplicate ADV.} Let $i\in V$ originate a route advertisement $r$ and let $j\in V$ receives it at certain time $t_0$.
127
$j$ will process the \ac{ADV} and propagate it at time $t_0+\epsilon$; if $j$ finds more advantageous, given the new advertisement, to reach $d$ through another node, let's say $z\in V$, it will implement that in its routing table and will propagate the \ac{ADV} with the modified $\xi$.
128
However, if $z$ also receives the \ac{ADV} from $i$ at $t_0$, it has to change its routing table and propagate the \ac{ADV} accordingly but, if it does that at time greater than $t_0+\epsilon$, it happens that $j$ indeed receives the updated route from $z$ which may change again its routing table but it cannot propagate the new information as $\tau(j,k,d)>0,\ \forall k\in N_j$.
129

  
130
This behaviour makes $j$ send two \acp{ADV} for the same destination $d$, the first of which was computed on outdated information.
131
Worse than that, the updated \ac{ADV} cannot be propagated before $T_{ik}$ seconds to the neighbours $k\in N_j$.
132

  
133
\textbf{Duplicate duplication.} Suppose that $k\in N_j$ receives the \ac{ADV} at time $t_0+\epsilon$.
134
$k$ generates hence an \ac{ADV} and propagates the wrong information. 
135
The duplication can happen again and $k$ receives at $t_1$ an \ac{ADV} from a node $h$ in a path from $k$ and $j$.
136
So $k$ has to wait $\tau(j,x,d)$ seconds before sending the updated information to its neighbour $x\in N_k$.
137
Now, if $T_{kx} > T_{jk}$ then $k$ will receive the update from $j$ and send only one duplicate \ac{ADV}.
138
But, if $T_{kx} < T_{jk}$ it means that the forementioned step can be repeated twice and $k$ will send a total of 4 \ac{ADV} messages.
139

  
140
\textbf{Exponential explosion.} In general and in presence of multipaths, if $p_{ij}=(i,k_0),\dots,(k_l,j)$ is a propagation path (a path in $G^P$), then the number of \ac{ADV} messages (and route oscillation) is exponential on the overall number of decreasing timers on the path.
141

  
142
\section{Solution sketch}
143
Ideally, we would like to have increasing $T_{ij}$ along the propagation paths in $G^P$.
144
Because of path aggregation\footnote{It is an important feature but Bird does not perform it.}, $\xi$ does not describe the exact path but just a sequence of node subsets.
145
Hence, it is difficult for a node to select its timer wrt its neighbours.
146

  
147
Instead we can use \ac{MiCe} to infer the position of a node\footnote{We would need the position on the path but \ac{MiCe} is computed graph-wide.} and tune the timers $T_{ij}$ accordingly.
148

  
149
Since, two nodes $j_1,j_2$ at the periphery, hence with similar centrality, could be one in $G^{(1)}$ and the other in $G^{(3)}$ respectively, we need to consider their case separately.
150

  
151
Considering a graph-wide maximum timer $T=30$ and \ac{MiCe} centrality $c_i\in [0,1]$ for node $i$, one strategy could be to assign:
152
\begin{itemize}
153
	\item $T_{ij}=\frac{Tc_i}{2},\ \forall i\in V^{(1)}$
154
	\item $T_{ij}=\frac{T}{2},\ \forall i\in V^k$
155
	\item $T_{ij}=\frac{T(1-c_i)}{2}+\frac{T}{2}=\frac{T(2-c_i)}{2},\ \forall i\in V^{(3)}$
156
\end{itemize}
157

  
158
\textbf{Note:} we are not sure about \ac{MiCe} normalization in the interval $[0,1]$ applicability.
159

  
160
\bibliographystyle{IEEEtran}
161
\bibliography{biblio}
162

  
163
\end{document}
bgp_model/biblio.bib
1
@ARTICLE{elmokashfi2010scalability,
2
author={A. {Elmokashfi} and A. {Kvalbein} and C. {Dovrolis}},
3
journal={IEEE Journal on Selected Areas in Communications},
4
title={On the Scalability of BGP: The Role of Topology Growth},
5
year={2010},
6
volume={28},
7
number={8},
8
pages={1250-1261},
9
keywords={Internet;routing protocols;telecommunication network reliability;telecommunication network topology;BGP scalability;topology growth;BGP routing;Internet;multihoming;peering;routing protocol;Peer to peer computing;Topology;Routing;Internet topology;Scalability;Network topology;Internetworking;Routing;Topology;BGP},
10
doi={10.1109/JSAC.2010.101003},
11
ISSN={0733-8716},
12
month={October},}
13

  
14
@article{fabrikant2011something,
15
  title={There's something about MRAI: Timing diversity can exponentially worsen BGP convergence},
16
  author={Alex Fabrikant and Umar Syed and Jennifer Rexford},
17
  journal={2011 Proceedings IEEE INFOCOM},
18
  year={2011},
19
  pages={2975-2983}
20
}
specs/bgp_graphml.tex
1
\documentclass[twocolumn, 10pt]{article}
2
\usepackage{acronym} 
3
\usepackage{tikz} 
4
\usepackage{cleveref} 
5
\usetikzlibrary{calc,shapes,arrows}
6

  
7
\title{BGP graphml specification}
8
\begin{document}
9
\maketitle
10

  
11
\newcommand{\mandatory}{\textcolor{red}}
12

  
13
\acrodef{AS}{autonomous system}
14
\acrodef{ADV}{advertisement}
15

  
16
This document describes the graphml representation of a BGP network.
17
Graphml is a standard for representing graphs enabling attribute embedding.
18

  
19
A BGP network is a simple undirected graph $G(V, E)$ (no self-loops, no multi-edges) modeling BGP \ac{ADV} communication.
20

  
21

  
22
\begin{itemize}
23
\item \textbf{A node} is an eBGP speaker node representing an \ac{AS};
24
\item \textbf{An edge} represents a BGP \ac{ADV} communication link (hence undirected).
25
\end{itemize}
26

  
27
\section{Attributes}
28

  
29
We indicate with \textit{destination\_string} a concatenation of IP subnets, in the form of: {\footnotesize{\verb[<ip_address>/<mask_bits>,<ip_address>/<mask_bits>,...[ }}
30
\\
31

  
32
Node attributes (mandatory in red):
33
\begin{itemize}
34
\item \mandatory{type}: \{T,M,C,CP\}, which respectively indicates if a node is tier-1, mid-level, customer or content provider;
35
\item destinations: [destination\_string], it indicates the destinations exposed by the node.
36
\end{itemize}
37

  
38
Edge attributes (mandatory in red):
39
\begin{itemize}
40
\item \mandatory{type}: \{transit, peer\}, which for a couple $(i, j)\in V\times V$, indicates whether there is a customer-provider or peer-to-peer relationship among $i$ and $j$.
41
\item \mandatory{customer}: $z\in V$, in case the edge $(i,j)$ type is \textit{transit}, $z$ identifies the customer among $i,j$ (the relationship provider is hence the other node). It is undefined in the case the edge type is not \textit{transit};
42
\item node\_a: $i\in (i, j)$;
43
\item node\_b: $j\in (i, j)$;
44
\item preference\_a: [integer], it indicates preference of node\_a to pick edge $(i,j)$ among node\_a neighbouring links;
45
\item preference\_b: [integer], it indicates preference of node\_b to pick edge $(i,j)$ among node\_b neighbouring links;
46

  
47
\item mrai: [interval] in seconds, it indicates the per link $(i,j)$ minimum route \ac{ADV} interval. Note, node-based mrai setup (Fabrikant gadget) can be described by setting all its mrai edge intervals to the same value.
48
\end{itemize}
49

  
50
\section{Example}
51
The following is a basic graphml example with mandatory attributes only.
52

  
53
\footnotesize
54
\begin{verbatim}
55
<?xml version='1.0' encoding='utf-8'?>
56
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
57
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
58
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
59
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
60
<key attr.name="type" attr.type="string" for="edge" id="d2"/>
61
<key attr.name="customer" attr.type="long" for="edge" id="d1"/>
62
<key attr.name="type" attr.type="string" for="node" id="d0"/>
63
<graph edgedefault="undirected">
64
<node id="1">
65
  <data key="d0">T</data>
66
</node>
67
<node id="2">
68
  <data key="d0">M</data>
69
</node>
70
<node id="3">
71
  <data key="d0">C</data>
72
</node>
73
<node id="4">
74
  <data key="d0">CP</data>
75
</node>
76
<edge source="2" target="1">
77
  <data key="d2">transit</data>
78
  <data key="d1">2</data>
79
</edge>
80
<edge source="3" target="2">
81
  <data key="d2">transit</data>
82
  <data key="d1">3</data>
83
</edge>
84
<edge source="4" target="2">
85
  <data key="d2">transit</data>
86
  <data key="d1">4</data>
87
</edge>
88
<edge source="3" target="4">
89
  <data key="d2">peer</data>
90
</edge>
91

  
92
</graph></graphml>
93
\end{verbatim}
94

  
95
%\bibliographystyle{IEEEtran}
96
%\bibliography{biblio}
97

  
98
\end{document}

Also available in: Unified diff