Statistics
| Branch: | Revision:

mobicen / plotterBCrealization.py @ 1ef4948a

History | View | Annotate | Download (5.49 KB)

1
import pandas as pd
2
from pprint import pprint
3
import numpy as np
4
import glob
5
from matplotlib import pyplot
6
import seaborn as sns; sns.set()
7
from statsmodels.graphics.tsaplots import plot_acf, acf
8
import sys
9
import os
10
from collections import defaultdict
11
import matplotlib.pyplot as plt
12
from scipy import stats
13
import operator
14
import code  # code.interact(local=dict(globals(), **locals()))
15

    
16
folder = sys.argv[1]
17
interval = 100
18
if len(sys.argv)>2:
19
    interval = int(sys.argv[2])
20
nick = folder.split('/')[-2].split('_')[0]+"_"
21

    
22

    
23
os.chdir(folder)
24

    
25
dfn = pd.DataFrame() #rows=nodes columns=BC at column-index time-instant 
26
print "Loading data from", folder, "..."
27
for snap in sorted(glob.glob('./BC*')):
28
    # print snap
29
    df = pd.read_csv(snap, names=['BC'], skiprows=1)
30
    dfn = pd.concat([dfn,df], axis=1)
31

    
32
print "Processing and plotting..."
33
if not os.path.exists("plots"+nick):
34
    os.makedirs("plots"+nick)
35
os.chdir("plots"+nick)
36

    
37
nodes = dfn.index.tolist()
38
initialCentrality = {}
39
for n in nodes:
40
    initialCentrality[n] = dfn.iloc[n][0]
41
n0 = dfn.iloc[0]
42
y = n0.values
43

    
44
'''
45
#Batch Means of ACF
46
print "Bacth Means of ACF..."
47
nlg=15
48
memo=50
49
batMeans = []
50
for i in range(0, len(y)-memo, memo):
51
    bacf = acf(y[i:i+memo], nlags=nlg)
52
    batMeans.append(np.mean(bacf))
53

54
pd.Series(batMeans).plot()
55
plt.ylabel("Mean ACF for lags [0...15]")
56
plt.xlabel("Batches of 50 samples")
57
plt.savefig(nick+"batchMeansACF.pdf", format='pdf')
58
plt.clf()'''
59

    
60
# BC realization of a random node
61
print "BC realization of a random node..."
62
if not os.path.exists("BCreal"):
63
    os.makedirs("BCreal")
64
os.chdir("BCreal")
65

    
66
for i in range(0, len(y)-interval, interval):
67
    plt.plot(range(i, i+interval, 1), y[i:i+interval])
68
    plt.ylim(min(y), max(y))
69
    plt.xlabel("Time [s]")
70
    plt.ylabel("Betweenness Centrality (NON-norm)")
71
    plt.savefig(nick+"BCrealization["+str(i)+"-"+str(i+interval)+"].pdf", format='pdf')
72
    plt.clf()
73
os.chdir("./..")
74

    
75
# BC Heatmaps for consecutive time-frames
76
print "BC Heatmaps for consecutive time-frames"
77
if not os.path.exists("TimeFramesHeatmaps"):
78
    os.makedirs("TimeFramesHeatmaps")
79
os.chdir("TimeFramesHeatmaps")
80
sns.set(font_scale=0.5)
81
for i in range(0, len(y)-interval, interval):
82
    xticks=range(i, i+interval)
83
    #yticks=range(0, len(dfn),5)
84
    sns.heatmap(dfn.iloc[:,xticks],cmap="Spectral", xticklabels = xticks, cbar_kws={'label': 'BC'})
85
    #ax.set_xticks(range(i, i+interval))
86
    plt.xlabel("Time [sec]")
87
    plt.ylabel("Nodes")
88
    plt.yticks(rotation=0)
89
    plt.savefig(nick+"BCrealization["+str(i)+"-"+str(i+interval)+"].pdf", format='pdf')
90
    plt.clf()
91
os.chdir("./..")
92
sns.set(font_scale=1)
93

    
94

    
95
def coreNodesAtTime(t, perc):
96
    BCd = dict(dfn.iloc[:, t])
97
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
98
    upto = int(len(srtd_BC) * (perc/100.0))
99
    coreNodes = [e[0] for e in srtd_BC[:upto]]
100
    coreDict = {k: v for k, v in srtd_BC[:upto]}
101
    coreRank = {}
102
    for i in range(upto):
103
        coreRank[srtd_BC[i][0]] = i
104
    return coreDict, coreRank, coreNodes
105

    
106
print "CoreResistence..."
107
'''dfCoreResist = pd.DataFrame()
108
for t in range(len(dfn.iloc[0])):
109
    coreT, coreRankT, coreNodes = coreNodesAtTime(t, 20)
110
    corePD = pd.DataFrame(coreNodes)
111
    dfCoreResist = pd.concat([dfCoreResist, corePD], axis=1)'''
112
activeMap = defaultdict(bool)
113
coreResistMap = [{}]
114
firstCore = coreNodesAtTime(0, 20)[2]
115
for n in nodes:
116
    flag = n in firstCore
117
    activeMap[n] = flag
118
    coreResistMap[0][n] = flag
119

    
120
for t in range(1, len(dfn.iloc[0])):
121
    coreNodes = coreNodesAtTime(t, 20)[2]
122
    old_Actives = [k for k, v in activeMap.items() if v]
123
    # rimuovi chi non e' piu' nella top20
124
    for n in old_Actives:
125
        if n not in coreNodes:
126
            activeMap[n] = False
127
    # aggiungi i nuovi arrivatim chi si trova nella meta' alta
128
    for n in coreNodes[:len(coreNodes)/2]:
129
        activeMap[n] = True
130
    # aggiorna la coreResistMap
131
    resistings = {}
132
    for n in nodes:
133
        if activeMap[n]:
134
            if n in coreNodes:
135
                resistings[n] = True
136
        else:
137
            resistings[n] = False
138
    coreResistMap.append(resistings)
139

    
140
from matplotlib.colors import LinearSegmentedColormap
141

    
142
cmap1 = LinearSegmentedColormap.from_list('mycmap1', ['white', 'blue'], 2)
143
resDF = pd.DataFrame(coreResistMap).transpose()
144
xticks = range(0, len(resDF.iloc[0]),1)
145
sns.heatmap(resDF, cmap=cmap1, xticklabels = xticks, cbar_kws={'label': '\"Core Or Not\" (Blue or White)'})#
146

    
147
plt.ylabel("Nodes")
148
plt.xlabel("Time")
149
plt.savefig(nick+"coreResistMap-EntryTOP10LeavingTOP20.pdf", format='pdf')
150
plt.clf()
151

    
152
def activeIntervals(v):
153
    retval = []
154
    current = 0
155
    prev = False
156
    for i in range(0, len(v)):
157
        if v[i]:
158
            if prev == False:
159
                current += 1
160
                prev = True
161
            elif prev == True:
162
                current += 1
163
        elif v[i] == False:
164
            if prev == False:
165
                continue
166
            elif prev == True:
167
                retval.append(current)
168
                current = 0
169
                prev = False
170
    return retval
171

    
172

    
173
nodes2interval = {}
174
for n in nodes:
175
    nodes2interval[n] = activeIntervals(resDF.iloc[n])
176

    
177
allint = []
178
for e in nodes2interval.values():
179
    allint = allint+e
180
np.mean(allint)
181

    
182
#code.interact(local=dict(globals(), **locals()))
183
pd.DataFrame(allint).hist(bins=50, normed=True)
184
plt.xlabel("Intervals of Persistence in the core [sec]")
185
plt.ylabel("Normalized Frequency")
186
plt.savefig(nick+"PersistenceDistributionEntryTOP10LeavingTOP20.pdf", format='pdf')
187
plt.clf()
188

    
189
f=open(nick +"stats.txt",'w')
190
f.write(str(pd.DataFrame(allint).describe()))
191
f.close()