%!TEX root = main.tex
\chapter{Computation of Betweenness Centrality} \label{sec:computation_betweenness}
Recall the definition of betweenness centrality:
\begin{equation*}
\label{eq:betweenness_centrality}
c_B(v) = \sum_{s \neq t \neq v \in V} \delta_{st}(v) = \sum_{s \neq t \neq v \in V} \frac{\sigma_{st}(v)}{\sigma_{st}}
\end{equation*}
Traditionally, just by looking at the equation, we normally divide the task of computing betweenness centrality into 2 steps:
\begin{itemize}
\item First we need to compute tables with length and number of shortest paths between all pairs of vertices in the network.
\item Second for each vertex $v$, we need to consider all the pair $s, t$, we use the tables calculated above to determine the fraction of shortest paths between $s, t$ that are passing through $v$. Summing all the fraction for every possible $s, t$ and we arrive at the $c_B(v)$. This way of summing is referred to by the term \emph{naive summation of pair-dependencies}
\end{itemize}
The chapter starts with presenting some important concept in \cref{sec:computation_important_concepts}. Then \cref{sec:computation_counting_shortest_paths} gives an overview on classic ways to find the shortest paths. And \cref{sec:computation_summing_pair_dependencies} introduces 2 ways to sum all the pair-dependencies: (i) the naive way, (ii) the faster approach to accummulate pair-dependencies gradually by \cite{Brandes01afaster}. Up until now, the approach in \cite{Brandes01afaster} is the state-of-the-art in calculating the exact betweenness centrality, and it is implemented in many standard graph libraries such as
\emph{networkx} \footnote{https://networkx.github.io/},
\emph{Boost Graph Library} \footnote{www.boost.org/libs/graph/doc/}
The \cref{sec:computation_summary} intends to summarize the result of two previous section with \cref{tab:complexity_of_algorithms} recaping the complexity of some algorithms used in determining betweenness centrality.
After having a throughout background, \cref{sec:computation_heuristic} presents the heuristic way to calculate also the exact betweenness centrality by \cite{PuzisZEDB12}. However, the complexity of the Heuristic Betweenness Centrality is not guaranteed to perform better than the algorithm of \cite{Brandes01afaster}, its performance depends on the topology.
\section{Important concepts} \label{sec:computation_important_concepts}
Define the set of \emph{predecessors} of a vertex $v$ on the shortest path from $s$ as:
\begin{equation} \label{eq:predecessor}
P_s(v) = \{ u \in V : {u, v} \in E, d(s, v) = d(s, u) + \omega(u, v) \}
\end{equation}
where $d(s,u)$ is the minimum length of any path connecting $s$ and $u$. And $\omega(u, v)$ is the weight of the edge ${u, v}$.
Given $P_s(v)$, then we have the following relation:
\begin{equation}
\sigma_{st} = \sum_{u \in P_s(v)}{\sigma_{su}}
\end{equation}
\section{Counting the number of shortest paths} \label{sec:computation_counting_shortest_paths}
Before we begin, there are 2 types of shortest paths problem. The first one is called Single Source Shortest Paths (SSSP), where we compute the shortest paths between one given source vertex $s$ to all other vertices. The second one is All-Pairs Shortest Paths (APSP) where we compute the shortest paths between all vertex pairs.
The SSSP is a classical problem in Graph Theory, and it is usually solved by Breadth-First Search (BFS) for unweighted graphs in $\mathcal{O}(m + n)$, and Dijkstra's algorithm for the weighted graphs in $\mathcal{O}(m + n \log n)$.
The APSP can be computed straightforwardly by applying algorithm for SSSP $n$ times, for all vertex $s \in V$. The resulting time complexity thus will be $\mathcal{O}(mn + n^2)$, and $\mathcal{O}(mn + n^2 \log n)$ respectively. In the sparse graph, it is a good practice to solving the SSSP multiple times. However, in non-sparse graph, where the number of edges is close to the maximal number of edges, i.e. $m \simeq n(n-1) \simeq n^2$, then it is better to use Floyd/Warshall algorithm with the complexity of $\mathcal{O}(n^3)$.
\textbf{XXX What is the input}: the source vertex $s$
\textbf{XXX What is the output?}:
\subsection{Breadth-First Search for unweighted graph}
I understood the algorithm, but how can I explain it to another person?
\subsection{Dijkstra's algorithm for weighted graph}
The Dijkstra's algorithm can be modified so that when given a source vertex $s$, the output will be the set of predecessors and the number of shortest paths from $s$ to other $t \neq s \in V$
\begin{algorithm}
\caption{Dijkstra's algorithm} \label{algo:dijkstra}
\begin{algorithmic}[1]
\Procedure{Dijkstra for SSSP with source vertex s}{}
\State $S \leftarrow$ empty stack
\State $P[w] \leftarrow$ empty list, $w \in V$
\State $\sigma[t] \leftarrow 0, t \in V$; $\sigma[s] \leftarrow 1$
\State $d[t] \leftarrow -1, t \in V$; $d[s] \leftarrow 0$
\State $Q \leftarrow $ empty queue
\State enqueue $s \rightarrow Q$
\While{$Q$ \emph{not empty}}
dequeue $v \leftarrow Q$
push $v \rightarrow S$
\EndWhile
\EndProcedure
\end{algorithmic}
\end{algorithm}
\section{Summing all the pair-dependencies} \label{sec:computation_summing_pair_dependencies}
\subsubsection{Naive ways}
\subsubsection{Gradually accummulation of pair-dependencies} \label{sec:computation_accumulation_pair_dependencies}
\section{Complexity of determining betweenness centrality as a whole} \label{sec:computation_summary}
\begin{table} [h]
\caption{Complexity for the 1st step: counting the number and length of shortest paths}
\label{tab:complexity_of_algorithms}
\begin{tabular}{l|l|l}
\toprule
Algorithm name & Unweighted graphs & Weighted graph \\
\midrule
Breadth-First Search & $\mathcal{O}(mn + n^2)$ & not applicable\\
Dijkstra & & $\mathcal{O}(mn + n^2 \log n)$ \\
Floyd/Warshall & & $\mathcal{O}(n^3)$ \\
\bottomrule
\end{tabular}
\end{table}
\begin{table} [h]
\caption{Complexity for the 2nd step: summing all pair-dependencies}
\label{tab:complexity_of_algorithms}
\begin{tabular}{l|l|l}
\toprule
Algorithm name & Complexity \\
\midrule
Naive summation & $\mathcal{O}(n^3)$ \\
\cite{Brandes01afaster} & $\mathcal{O}(mn)$\\
\bottomrule
\end{tabular}
\end{table}
\section{Heuristic algorithm for the exact betweenness centrality} \label{sec:computation_heuristic}