Statistics
| Branch: | Revision:

mobicen / plotterBCrealization.py @ 23c7ab1e

History | View | Annotate | Download (7.17 KB)

1
import code  # code.interact(local=dict(globals(), **locals()))
2
import operator
3
from scipy import stats
4
from collections import defaultdict
5
import os
6
import sys
7
from statsmodels.graphics.tsaplots import plot_acf, acf
8
from matplotlib.colors import LinearSegmentedColormap
9
import pandas as pd
10
from pprint import pprint
11
import numpy as np
12
import glob
13
import matplotlib
14
# matplotlib.use('Agg')
15
import matplotlib.pyplot as plt
16
import seaborn as sns
17
sns.set()
18

    
19

    
20

    
21
# https://en.wikipedia.org/wiki/Sample_entropy
22
def SampEn(U, m, r):
23
    """Compute Sample entropy"""
24
    def _maxdist(x_i, x_j):
25
        return max([abs(ua - va) for ua, va in zip(x_i, x_j)])
26

    
27
    def _phi(m):
28
        x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)]
29
        C = [len([1 for j in range(len(x)) if i != j and _maxdist(x[i], x[j]) <= r]) for i in range(len(x))]
30
        return sum(C)
31

    
32
    N = len(U)
33
    return -np.log(_phi(m+1) / _phi(m))
34

    
35
ss = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/sunspotarea.csv')
36
a10 = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/a10.csv')
37
rand_small = np.random.randint(0, 100, size=36)
38
rand_big = np.random.randint(0, 100, size=136)
39

    
40
print(SampEn(ss.value, m=2, r=0.2*np.std(ss.value)))      # 0.78
41
print(SampEn(a10.value, m=2, r=0.2*np.std(a10.value)))    # 0.41
42
print(SampEn(rand_small, m=2, r=0.2*np.std(rand_small)))  # 1.79
43
print(SampEn(rand_big, m=2, r=0.2*np.std(rand_big)))      # 2.42
44

    
45

    
46

    
47
folder = sys.argv[1]
48
interval = 100
49
if len(sys.argv) > 2:
50
    interval = int(sys.argv[2])
51
nick = folder.split('/')[-2].split('_')[0]+"_"
52

    
53

    
54
os.chdir(folder)
55

    
56
dfn = pd.DataFrame()  # columns=node, rows= BC at row-index time-instant
57
print "Loading data from", folder, "..."
58
for snap in sorted(glob.glob('./stats*')):
59
    # print snap
60
    node_id = int(snap.strip('.csv').strip('./stats'))
61
    df = pd.read_csv(snap, names=['time', str(node_id)], skiprows=1)
62
    dfn = pd.concat([dfn, df[str(node_id)]], axis=1)
63

    
64
code.interact(local=dict(globals(), **locals()))
65

    
66
print "Processing and plotting..."
67
if not os.path.exists("plots"+nick):
68
    os.makedirs("plots"+nick)
69
os.chdir("plots"+nick)
70

    
71
nodes = range(len(dfn.columns))
72
initialCentrality = {}
73
for n in nodes:
74
    initialCentrality[n] = dfn.iloc[0][n]
75

    
76
n0 = dfn.iloc[:, 0]
77
y = n0.values
78

    
79
'''
80
#Batch Means of ACF
81
print "Bacth Means of ACF..."
82
nlg=15
83
memo=50
84
batMeans = []
85
for i in range(0, len(y)-memo, memo):
86
    bacf = acf(y[i:i+memo], nlags=nlg)
87
    batMeans.append(np.mean(bacf))
88

89
pd.Series(batMeans).plot()
90
plt.ylabel("Mean ACF for lags [0...15]")
91
plt.xlabel("Batches of 50 samples")
92
plt.savefig(nick+"batchMeansACF.pdf", format='pdf')
93
plt.clf()'''
94

    
95
# BC realization of a random node
96
print "BC realization of a random node..."
97
if not os.path.exists("BCreal"):
98
    os.makedirs("BCreal")
99
os.chdir("BCreal")
100

    
101

    
102
for i in range(0, len(y)-interval, interval):
103
    plt.plot(range(i, i+interval, 1), y[i:i+interval])
104
    plt.ylim(min(y), max(y))
105
    plt.xlabel("Time [s]")
106
    plt.ylabel("Betweenness Centrality (NON-norm)")
107
    plt.savefig(nick+"BCrealization["+str(i) +
108
                "-"+str(i+interval)+"].pdf", format='pdf')
109
    plt.clf()
110
os.chdir("./..")
111

    
112

    
113
'''
114
# BC Heatmaps for consecutive time-frames
115
print "BC Heatmaps for consecutive time-frames"
116
if not os.path.exists("TimeFramesHeatmaps"):
117
    os.makedirs("TimeFramesHeatmaps")
118

119
os.chdir("TimeFramesHeatmaps")
120
sns.set(font_scale=0.5)
121

122
for i in range(0, len(y)-interval, interval):
123
    xticks = range(i, i+interval)
124
    #yticks=range(0, len(dfn),5)
125
    sns.heatmap(dfn.iloc[xticks].T, cmap="Spectral",
126
                xticklabels=xticks, cbar_kws={'label': 'BC'})
127
    #ax.set_xticks(range(i, i+interval))
128
    plt.xlabel("Time [sec]")
129
    plt.ylabel("Nodes")
130
    plt.yticks(rotation=0)
131
    plt.savefig(nick+"BCrealization["+str(i) +
132
                "-"+str(i+interval)+"].pdf", format='pdf')
133
    plt.clf()
134
os.chdir("./..")
135
sns.set(font_scale=1)
136
'''
137

    
138
def coreNodesAtTime(t, perc):
139
    BCd = dict(dfn.iloc[t])
140
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
141
    upto = int(len(srtd_BC) * (perc/100.0))
142
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
143
    coreDict = {k: v for k, v in srtd_BC[:upto]}
144
    coreRank = {}
145
    for i in range(upto):
146
        coreRank[srtd_BC[i][0]] = i
147
    return coreDict, coreRank, coreNodes
148

    
149

    
150
print "CoreResistence..."
151
'''dfCoreResist = pd.DataFrame()
152
for t in range(len(dfn.iloc[0])):
153
    coreT, coreRankT, coreNodes = coreNodesAtTime(t, 20)
154
    corePD = pd.DataFrame(coreNodes)
155
    dfCoreResist = pd.concat([dfCoreResist, corePD], axis=1)'''
156
activeMap = defaultdict(bool)
157
coreResistMap = [{}]
158
firstCore = coreNodesAtTime(0, 20)[2]
159
for n in nodes:
160
    flag = n in firstCore
161
    activeMap[n] = flag
162
    coreResistMap[0][n] = flag
163

    
164
print "\tComputing ResistMap..."
165
for t in range(1, len(dfn.iloc[:,0])):
166
    coreNodes = coreNodesAtTime(t, 20)[2]
167
    old_Actives = [k for k, v in activeMap.items() if v]
168
    #code.interact(local=dict(globals(), **locals()))
169
    # rimuovi chi non e' piu' nella top20
170
    for n in old_Actives:
171
        if n not in coreNodes:
172
            activeMap[n] = False
173
    # aggiungi i nuovi arrivatim chi si trova nella meta' alta
174
    for n in coreNodes[:len(coreNodes)/2]:
175
        activeMap[n] = True
176
    # aggiorna la coreResistMap
177
    resistings = {}
178
    for n in nodes:
179
        if activeMap[n]:
180
            if n in coreNodes:
181
                resistings[n] = True
182
        else:
183
            resistings[n] = False
184
    coreResistMap.append(resistings)
185

    
186
print "\tPlotting ResistMap..."
187
cmap1 = LinearSegmentedColormap.from_list('mycmap1', ['white', 'blue'], 2)
188
resDF = pd.DataFrame(coreResistMap).T
189

    
190
plt.ylabel("Nodes")
191
plt.xlabel("Time")
192
#sns.heatmap(resDF, cmap=cmap1, xticklabels=range(10000), yticklabels=range(650), cbar_kws={
193
#            'label': '\"Core Or Not\" (Blue or White)'})
194

    
195
small=pd.DataFrame(resDF.iloc[:,0:1000])
196
#code.interact(local=dict(globals(), **locals()))
197
sns.heatmap(small.applymap(int), cmap=cmap1, xticklabels=range(1000), yticklabels=range(len(small)), cbar_kws={
198
            'label': '\"Core Or Not\" (Blue or White)'})
199

    
200
plt.savefig(nick+"coreResistMap-EntryTOP10LeavingTOP20.pdf", format='pdf')
201
plt.clf()
202

    
203

    
204
def activeIntervals(v):
205
    retval = []
206
    current = 0
207
    prev = False
208
    for i in range(0, len(v)):
209
        if v[i]:
210
            if prev == False:
211
                current += 1
212
                prev = True
213
            elif prev == True:
214
                current += 1
215
        elif v[i] == False:
216
            if prev == False:
217
                continue
218
            elif prev == True:
219
                retval.append(current)
220
                current = 0
221
                prev = False
222
    return retval
223

    
224
#code.interact(local=dict(globals(), **locals()))
225
print "Distribuzione tempo permanenza nel core..."
226
nodes2interval = {}
227
for n in nodes:
228
    nodes2interval[n] = activeIntervals(resDF.iloc[n])
229

    
230
allint = []
231
for e in nodes2interval.values():
232
    allint = allint+e
233
np.mean(allint)
234

    
235
#code.interact(local=dict(globals(), **locals()))
236
pd.DataFrame(allint).hist(bins=50,normed=True)
237
plt.xlabel("Intervals of Persistence in the core [sec]")
238
plt.ylabel("Normalized Frequency")
239
plt.savefig(
240
    nick+"PersistenceDistributionEntryTOP10LeavingTOP20.pdf", format='pdf')
241
plt.clf()
242

    
243
f = open(nick + "stats.txt", 'w')
244
f.write(str(pd.DataFrame(allint).describe()))
245
f.close()