Statistics
| Branch: | Revision:

mobicen / plotterBCrealization.py @ 9c01cdd6

History | View | Annotate | Download (5.5 KB)

1
import pandas as pd
2
from pprint import pprint
3
import numpy as np
4
import glob
5
import matplotlib
6
matplotlib.use('Agg')
7
import matplotlib.pyplot as plt
8
import seaborn as sns; sns.set()
9
from statsmodels.graphics.tsaplots import plot_acf, acf
10
import sys
11
import os
12
from collections import defaultdict
13
from scipy import stats
14
import operator
15
import code  # code.interact(local=dict(globals(), **locals()))
16

    
17
folder = sys.argv[1]
18
interval = 100
19
if len(sys.argv)>2:
20
    interval = int(sys.argv[2])
21
nick = folder.split('/')[-2].split('_')[0]+"_"
22

    
23

    
24
os.chdir(folder)
25

    
26
dfn = pd.DataFrame() #rows=nodes columns=BC at column-index time-instant 
27
print "Loading data from", folder, "..."
28
for snap in sorted(glob.glob('./BC*')):
29
    # print snap
30
    df = pd.read_csv(snap, names=['BC'], skiprows=1)
31
    dfn = pd.concat([dfn,df], axis=1)
32

    
33
print "Processing and plotting..."
34
if not os.path.exists("plots"+nick):
35
    os.makedirs("plots"+nick)
36
os.chdir("plots"+nick)
37

    
38
nodes = dfn.index.tolist()
39
initialCentrality = {}
40
for n in nodes:
41
    initialCentrality[n] = dfn.iloc[n][0]
42
n0 = dfn.iloc[0]
43
y = n0.values
44

    
45
'''
46
#Batch Means of ACF
47
print "Bacth Means of ACF..."
48
nlg=15
49
memo=50
50
batMeans = []
51
for i in range(0, len(y)-memo, memo):
52
    bacf = acf(y[i:i+memo], nlags=nlg)
53
    batMeans.append(np.mean(bacf))
54

55
pd.Series(batMeans).plot()
56
plt.ylabel("Mean ACF for lags [0...15]")
57
plt.xlabel("Batches of 50 samples")
58
plt.savefig(nick+"batchMeansACF.pdf", format='pdf')
59
plt.clf()'''
60

    
61
# BC realization of a random node
62
print "BC realization of a random node..."
63
if not os.path.exists("BCreal"):
64
    os.makedirs("BCreal")
65
os.chdir("BCreal")
66

    
67
for i in range(0, len(y)-interval, interval):
68
    plt.plot(range(i, i+interval, 1), y[i:i+interval])
69
    plt.ylim(min(y), max(y))
70
    plt.xlabel("Time [s]")
71
    plt.ylabel("Betweenness Centrality (NON-norm)")
72
    plt.savefig(nick+"BCrealization["+str(i)+"-"+str(i+interval)+"].pdf", format='pdf')
73
    plt.clf()
74
os.chdir("./..")
75

    
76
# BC Heatmaps for consecutive time-frames
77
print "BC Heatmaps for consecutive time-frames"
78
if not os.path.exists("TimeFramesHeatmaps"):
79
    os.makedirs("TimeFramesHeatmaps")
80
os.chdir("TimeFramesHeatmaps")
81
sns.set(font_scale=0.5)
82
for i in range(0, len(y)-interval, interval):
83
    xticks=range(i, i+interval)
84
    #yticks=range(0, len(dfn),5)
85
    sns.heatmap(dfn.iloc[:,xticks],cmap="Spectral", xticklabels = xticks, cbar_kws={'label': 'BC'})
86
    #ax.set_xticks(range(i, i+interval))
87
    plt.xlabel("Time [sec]")
88
    plt.ylabel("Nodes")
89
    plt.yticks(rotation=0)
90
    plt.savefig(nick+"BCrealization["+str(i)+"-"+str(i+interval)+"].pdf", format='pdf')
91
    plt.clf()
92
os.chdir("./..")
93
sns.set(font_scale=1)
94

    
95

    
96
def coreNodesAtTime(t, perc):
97
    BCd = dict(dfn.iloc[:, t])
98
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
99
    upto = int(len(srtd_BC) * (perc/100.0))
100
    coreNodes = [e[0] for e in srtd_BC[:upto]]
101
    coreDict = {k: v for k, v in srtd_BC[:upto]}
102
    coreRank = {}
103
    for i in range(upto):
104
        coreRank[srtd_BC[i][0]] = i
105
    return coreDict, coreRank, coreNodes
106

    
107
print "CoreResistence..."
108
'''dfCoreResist = pd.DataFrame()
109
for t in range(len(dfn.iloc[0])):
110
    coreT, coreRankT, coreNodes = coreNodesAtTime(t, 20)
111
    corePD = pd.DataFrame(coreNodes)
112
    dfCoreResist = pd.concat([dfCoreResist, corePD], axis=1)'''
113
activeMap = defaultdict(bool)
114
coreResistMap = [{}]
115
firstCore = coreNodesAtTime(0, 20)[2]
116
for n in nodes:
117
    flag = n in firstCore
118
    activeMap[n] = flag
119
    coreResistMap[0][n] = flag
120

    
121
for t in range(1, len(dfn.iloc[0])):
122
    coreNodes = coreNodesAtTime(t, 20)[2]
123
    old_Actives = [k for k, v in activeMap.items() if v]
124
    # rimuovi chi non e' piu' nella top20
125
    for n in old_Actives:
126
        if n not in coreNodes:
127
            activeMap[n] = False
128
    # aggiungi i nuovi arrivatim chi si trova nella meta' alta
129
    for n in coreNodes[:len(coreNodes)/2]:
130
        activeMap[n] = True
131
    # aggiorna la coreResistMap
132
    resistings = {}
133
    for n in nodes:
134
        if activeMap[n]:
135
            if n in coreNodes:
136
                resistings[n] = True
137
        else:
138
            resistings[n] = False
139
    coreResistMap.append(resistings)
140

    
141
from matplotlib.colors import LinearSegmentedColormap
142

    
143
cmap1 = LinearSegmentedColormap.from_list('mycmap1', ['white', 'blue'], 2)
144
resDF = pd.DataFrame(coreResistMap).transpose()
145
xticks = range(0, len(resDF.iloc[0]),1)
146
sns.heatmap(resDF, cmap=cmap1, xticklabels = xticks, cbar_kws={'label': '\"Core Or Not\" (Blue or White)'})#
147

    
148
plt.ylabel("Nodes")
149
plt.xlabel("Time")
150
plt.savefig(nick+"coreResistMap-EntryTOP10LeavingTOP20.pdf", format='pdf')
151
plt.clf()
152

    
153
def activeIntervals(v):
154
    retval = []
155
    current = 0
156
    prev = False
157
    for i in range(0, len(v)):
158
        if v[i]:
159
            if prev == False:
160
                current += 1
161
                prev = True
162
            elif prev == True:
163
                current += 1
164
        elif v[i] == False:
165
            if prev == False:
166
                continue
167
            elif prev == True:
168
                retval.append(current)
169
                current = 0
170
                prev = False
171
    return retval
172

    
173

    
174
nodes2interval = {}
175
for n in nodes:
176
    nodes2interval[n] = activeIntervals(resDF.iloc[n])
177

    
178
allint = []
179
for e in nodes2interval.values():
180
    allint = allint+e
181
np.mean(allint)
182

    
183
#code.interact(local=dict(globals(), **locals()))
184
pd.DataFrame(allint).hist(bins=50, normed=True)
185
plt.xlabel("Intervals of Persistence in the core [sec]")
186
plt.ylabel("Normalized Frequency")
187
plt.savefig(nick+"PersistenceDistributionEntryTOP10LeavingTOP20.pdf", format='pdf')
188
plt.clf()
189

    
190
f=open(nick +"stats.txt",'w')
191
f.write(str(pd.DataFrame(allint).describe()))
192
f.close()