Statistics
| Branch: | Revision:

## mobicen / plotterBCrealization.py @ 23c7ab1e

 1 ```import code # code.interact(local=dict(globals(), **locals())) ``` ```import operator ``` ```from scipy import stats ``` ```from collections import defaultdict ``` ```import os ``` ```import sys ``` ```from statsmodels.graphics.tsaplots import plot_acf, acf ``` ```from matplotlib.colors import LinearSegmentedColormap ``` ```import pandas as pd ``` ```from pprint import pprint ``` ```import numpy as np ``` ```import glob ``` ```import matplotlib ``` ```# matplotlib.use('Agg') ``` ```import matplotlib.pyplot as plt ``` ```import seaborn as sns ``` ```sns.set() ``` ```# https://en.wikipedia.org/wiki/Sample_entropy ``` ```def SampEn(U, m, r): ``` ``` """Compute Sample entropy""" ``` ``` def _maxdist(x_i, x_j): ``` ``` return max([abs(ua - va) for ua, va in zip(x_i, x_j)]) ``` ``` def _phi(m): ``` ``` x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)] ``` ``` C = [len([1 for j in range(len(x)) if i != j and _maxdist(x[i], x[j]) <= r]) for i in range(len(x))] ``` ``` return sum(C) ``` ``` N = len(U) ``` ``` return -np.log(_phi(m+1) / _phi(m)) ``` ```ss = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/sunspotarea.csv') ``` ```a10 = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv') ``` ```rand_small = np.random.randint(0, 100, size=36) ``` ```rand_big = np.random.randint(0, 100, size=136) ``` ```print(SampEn(ss.value, m=2, r=0.2*np.std(ss.value))) # 0.78 ``` ```print(SampEn(a10.value, m=2, r=0.2*np.std(a10.value))) # 0.41 ``` ```print(SampEn(rand_small, m=2, r=0.2*np.std(rand_small))) # 1.79 ``` ```print(SampEn(rand_big, m=2, r=0.2*np.std(rand_big))) # 2.42 ``` ```folder = sys.argv[1] ``` ```interval = 100 ``` ```if len(sys.argv) > 2: ``` ``` interval = int(sys.argv[2]) ``` ```nick = folder.split('/')[-2].split('_')[0]+"_" ``` ```os.chdir(folder) ``` ```dfn = pd.DataFrame() # columns=node, rows= BC at row-index time-instant ``` ```print "Loading data from", folder, "..." ``` ```for snap in sorted(glob.glob('./stats*')): ``` ``` # print snap ``` ``` node_id = int(snap.strip('.csv').strip('./stats')) ``` ``` df = pd.read_csv(snap, names=['time', str(node_id)], skiprows=1) ``` ``` dfn = pd.concat([dfn, df[str(node_id)]], axis=1) ``` ```code.interact(local=dict(globals(), **locals())) ``` ```print "Processing and plotting..." ``` ```if not os.path.exists("plots"+nick): ``` ``` os.makedirs("plots"+nick) ``` ```os.chdir("plots"+nick) ``` ```nodes = range(len(dfn.columns)) ``` ```initialCentrality = {} ``` ```for n in nodes: ``` ``` initialCentrality[n] = dfn.iloc[0][n] ``` ```n0 = dfn.iloc[:, 0] ``` ```y = n0.values ``` ```''' ``` ```#Batch Means of ACF ``` ```print "Bacth Means of ACF..." ``` ```nlg=15 ``` ```memo=50 ``` ```batMeans = [] ``` ```for i in range(0, len(y)-memo, memo): ``` ``` bacf = acf(y[i:i+memo], nlags=nlg) ``` ``` batMeans.append(np.mean(bacf)) ``` ``` ``` ```pd.Series(batMeans).plot() ``` ```plt.ylabel("Mean ACF for lags [0...15]") ``` ```plt.xlabel("Batches of 50 samples") ``` ```plt.savefig(nick+"batchMeansACF.pdf", format='pdf') ``` ```plt.clf()''' ``` ```# BC realization of a random node ``` ```print "BC realization of a random node..." ``` ```if not os.path.exists("BCreal"): ``` ``` os.makedirs("BCreal") ``` ```os.chdir("BCreal") ``` ```for i in range(0, len(y)-interval, interval): ``` ``` plt.plot(range(i, i+interval, 1), y[i:i+interval]) ``` ``` plt.ylim(min(y), max(y)) ``` ``` plt.xlabel("Time [s]") ``` ``` plt.ylabel("Betweenness Centrality (NON-norm)") ``` ``` plt.savefig(nick+"BCrealization["+str(i) + ``` ``` "-"+str(i+interval)+"].pdf", format='pdf') ``` ``` plt.clf() ``` ```os.chdir("./..") ``` ```''' ``` ```# BC Heatmaps for consecutive time-frames ``` ```print "BC Heatmaps for consecutive time-frames" ``` ```if not os.path.exists("TimeFramesHeatmaps"): ``` ``` os.makedirs("TimeFramesHeatmaps") ``` ``` ``` ```os.chdir("TimeFramesHeatmaps") ``` ```sns.set(font_scale=0.5) ``` ``` ``` ```for i in range(0, len(y)-interval, interval): ``` ``` xticks = range(i, i+interval) ``` ``` #yticks=range(0, len(dfn),5) ``` ``` sns.heatmap(dfn.iloc[xticks].T, cmap="Spectral", ``` ``` xticklabels=xticks, cbar_kws={'label': 'BC'}) ``` ``` #ax.set_xticks(range(i, i+interval)) ``` ``` plt.xlabel("Time [sec]") ``` ``` plt.ylabel("Nodes") ``` ``` plt.yticks(rotation=0) ``` ``` plt.savefig(nick+"BCrealization["+str(i) + ``` ``` "-"+str(i+interval)+"].pdf", format='pdf') ``` ``` plt.clf() ``` ```os.chdir("./..") ``` ```sns.set(font_scale=1) ``` ```''' ``` ```def coreNodesAtTime(t, perc): ``` ``` BCd = dict(dfn.iloc[t]) ``` ``` srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True) ``` ``` upto = int(len(srtd_BC) * (perc/100.0)) ``` ``` coreNodes = [int(e[0]) for e in srtd_BC[:upto]] ``` ``` coreDict = {k: v for k, v in srtd_BC[:upto]} ``` ``` coreRank = {} ``` ``` for i in range(upto): ``` ``` coreRank[srtd_BC[i][0]] = i ``` ``` return coreDict, coreRank, coreNodes ``` ```print "CoreResistence..." ``` ```'''dfCoreResist = pd.DataFrame() ``` ```for t in range(len(dfn.iloc[0])): ``` ``` coreT, coreRankT, coreNodes = coreNodesAtTime(t, 20) ``` ``` corePD = pd.DataFrame(coreNodes) ``` ``` dfCoreResist = pd.concat([dfCoreResist, corePD], axis=1)''' ``` ```activeMap = defaultdict(bool) ``` ```coreResistMap = [{}] ``` ```firstCore = coreNodesAtTime(0, 20)[2] ``` ```for n in nodes: ``` ``` flag = n in firstCore ``` ``` activeMap[n] = flag ``` ``` coreResistMap[0][n] = flag ``` ```print "\tComputing ResistMap..." ``` ```for t in range(1, len(dfn.iloc[:,0])): ``` ``` coreNodes = coreNodesAtTime(t, 20)[2] ``` ``` old_Actives = [k for k, v in activeMap.items() if v] ``` ``` #code.interact(local=dict(globals(), **locals())) ``` ``` # rimuovi chi non e' piu' nella top20 ``` ``` for n in old_Actives: ``` ``` if n not in coreNodes: ``` ``` activeMap[n] = False ``` ``` # aggiungi i nuovi arrivatim chi si trova nella meta' alta ``` ``` for n in coreNodes[:len(coreNodes)/2]: ``` ``` activeMap[n] = True ``` ``` # aggiorna la coreResistMap ``` ``` resistings = {} ``` ``` for n in nodes: ``` ``` if activeMap[n]: ``` ``` if n in coreNodes: ``` ``` resistings[n] = True ``` ``` else: ``` ``` resistings[n] = False ``` ``` coreResistMap.append(resistings) ``` ```print "\tPlotting ResistMap..." ``` ```cmap1 = LinearSegmentedColormap.from_list('mycmap1', ['white', 'blue'], 2) ``` ```resDF = pd.DataFrame(coreResistMap).T ``` ```plt.ylabel("Nodes") ``` ```plt.xlabel("Time") ``` ```#sns.heatmap(resDF, cmap=cmap1, xticklabels=range(10000), yticklabels=range(650), cbar_kws={ ``` ```# 'label': '\"Core Or Not\" (Blue or White)'}) ``` ```small=pd.DataFrame(resDF.iloc[:,0:1000]) ``` ```#code.interact(local=dict(globals(), **locals())) ``` ```sns.heatmap(small.applymap(int), cmap=cmap1, xticklabels=range(1000), yticklabels=range(len(small)), cbar_kws={ ``` ``` 'label': '\"Core Or Not\" (Blue or White)'}) ``` ```plt.savefig(nick+"coreResistMap-EntryTOP10LeavingTOP20.pdf", format='pdf') ``` ```plt.clf() ``` ```def activeIntervals(v): ``` ``` retval = [] ``` ``` current = 0 ``` ``` prev = False ``` ``` for i in range(0, len(v)): ``` ``` if v[i]: ``` ``` if prev == False: ``` ``` current += 1 ``` ``` prev = True ``` ``` elif prev == True: ``` ``` current += 1 ``` ``` elif v[i] == False: ``` ``` if prev == False: ``` ``` continue ``` ``` elif prev == True: ``` ``` retval.append(current) ``` ``` current = 0 ``` ``` prev = False ``` ``` return retval ``` ```#code.interact(local=dict(globals(), **locals())) ``` ```print "Distribuzione tempo permanenza nel core..." ``` ```nodes2interval = {} ``` ```for n in nodes: ``` ``` nodes2interval[n] = activeIntervals(resDF.iloc[n]) ``` ```allint = [] ``` ```for e in nodes2interval.values(): ``` ``` allint = allint+e ``` ```np.mean(allint) ``` ```#code.interact(local=dict(globals(), **locals())) ``` ```pd.DataFrame(allint).hist(bins=50,normed=True) ``` ```plt.xlabel("Intervals of Persistence in the core [sec]") ``` ```plt.ylabel("Normalized Frequency") ``` ```plt.savefig( ``` ``` nick+"PersistenceDistributionEntryTOP10LeavingTOP20.pdf", format='pdf') ``` ```plt.clf() ``` ```f = open(nick + "stats.txt", 'w') ``` ```f.write(str(pd.DataFrame(allint).describe())) ``` `f.close()`