Statistics
| Branch: | Revision:

mobicen / timeAnalysis.py @ 9f4f3f35

History | View | Annotate | Download (7.6 KB)

1
import code  # code.interact(local=dict(globals(), **locals()))
2
from collections import deque
3
from scipy import stats
4
import matplotlib.pyplot as plt
5
from collections import defaultdict
6
import os
7
import sys
8
from statsmodels.graphics.tsaplots import plot_acf, acf
9
import operator
10
from mpl_toolkits import mplot3d
11
import pandas as pd
12
from pprint import pprint
13
import numpy as np
14
import glob
15
import matplotlib
16

    
17

    
18

    
19
folder = sys.argv[1]
20
lags = 100
21
if len(sys.argv) > 2:
22
    lags = int(sys.argv[2])
23
nick = folder.split('/')[-2].split('_')[0]
24
os.chdir(folder)
25

    
26
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
27
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
28
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
29
print "Loading data from", folder, "..."
30
for snap in sorted(glob.glob('./stats*')):
31
    # print "",snap
32
    node_id = int(snap.strip('.csv').strip('./stats'))
33
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
34
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
35
    degdf = pd.concat([degdf, df['deg']], axis=1)
36
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
37

    
38
nodes = range(len(bcdf.columns))
39

    
40
initialCentrality = {}
41
for n in nodes:
42
    initialCentrality[int(n)] = bcdf.iloc[0][n]
43

    
44

    
45
sorted_x = sorted(initialCentrality.items(),
46
                  key=operator.itemgetter(1), reverse=True)
47
srtNodes = [e[0] for e in sorted_x]
48

    
49
bcACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
50
degACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
51
kcoreACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
52
print "Processing data..."
53
for node in nodes:
54
    # print "Autocorr of node", node
55
    nodebcACF = pd.DataFrame([bcdf.iloc[:, node].autocorr(lag)
56
                              for lag in range(lags)])
57
    bcACF = pd.concat([bcACF, nodebcACF], axis=1)
58
    nodedegACF = pd.DataFrame(
59
        [degdf.iloc[:, node].autocorr(lag) for lag in range(lags)])
60
    degACF = pd.concat([degACF, nodedegACF], axis=1)
61
    nodekcoreACF = pd.DataFrame(
62
        [kcoredf.iloc[:, node].autocorr(lag) for lag in range(lags)])
63
    kcoreACF = pd.concat([kcoreACF, nodekcoreACF], axis=1)
64

    
65
'''
66
X ==> time-lag
67
Y ==> i nodi in ordine di centralita a tempo_0
68
Z ==> l'acf del nodo y al time-lag x
69
'''
70

    
71
if not os.path.exists("plots"+nick):
72
    os.makedirs("plots"+nick)
73

    
74
os.chdir("plots"+nick)
75
# Plotting
76

    
77

    
78
# ACF boxplots
79
bcACF.T.boxplot(column=[1]+range(5,lags,5))
80
plt.ylabel("ACF of BC for all nodes")
81
plt.xlabel("Time-lag")
82
plt.xticks(rotation="vertical")
83
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
84
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
85
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
86
plt.ylim(-0.4, 1.0)
87
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
88
plt.savefig(nick+"autoCorrBOXPLOT-BC.pdf", format='pdf')
89
plt.clf()
90

    
91
degACF.T.boxplot(column=[1]+range(5,lags,5))
92
plt.ylabel("ACF of DEG for all nodes")
93
plt.xlabel("Time-lag")
94
plt.xticks(rotation="vertical")
95
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
96
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
97
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
98
plt.ylim(-0.4, 1.0)
99
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
100
plt.savefig(nick+"autoCorrBOXPLOT-DEG.pdf", format='pdf')
101
plt.clf()
102

    
103
kcoreACF.T.boxplot(column=[1]+range(5,lags,5))
104
plt.ylabel("ACF of KCORE for all nodes")
105
plt.xlabel("Time-lag")
106
plt.xticks(rotation="vertical")
107
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
108
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
109
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
110
plt.ylim(-0.4, 1.0)
111
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
112
plt.savefig(nick+"autoCorrBOXPLOT-KCORE.pdf", format='pdf')
113
plt.clf()
114

    
115

    
116

    
117
# Mean AutoCorrelation and Rank-Correlation
118
# lags=20
119
firstRank = bcdf.iloc[0, :]
120
x = range(0, lags)
121
meanbcACF = []
122
meandegACF = []
123
meankcoreACF = []
124
rankCorr = []
125
weightedRankCorr = []
126
for i in x:
127
    #code.interact(local=dict(globals(), **locals()))
128
    meanbcACF.append(np.mean(bcACF.iloc[i]))
129
    meandegACF.append(np.mean(degACF.iloc[i]))
130
    meankcoreACF.append(np.mean(kcoreACF.iloc[i]))
131
    rankCorr.append(stats.spearmanr(firstRank, bcdf.iloc[i, :])[0])
132
    #weightedRankCorr.append(stats.weightedtau(firstRank, dfn.iloc[i,:])[0])
133

    
134
plt.plot(x, meanbcACF, lw="1.5", label='Mean BC Autocorrelation')
135
plt.plot(x, meandegACF, lw="1.5", label='Mean DEG Autocorrelation')
136
plt.plot(x, meankcoreACF, lw="1.5", label='Mean KCORE Autocorrelation')
137
plt.plot(x, rankCorr, lw="1.5", label='Rank-Correlation (with rank at t_0)')
138
# plt.plot(x, weightedRankCorr, lw="1.5",
139
#         label='Weighted-Rank-Correlation (with rank at t_0)')
140
plt.ylabel('Correlation indexes')
141
plt.xlabel('Time-lags / Time')
142
plt.grid()
143
plt.legend()
144
# plt.ylim(-1.0,1.0)
145
plt.xlim(0, lags)
146
plt.savefig(nick+"autoCorrMean-RankSpearman.pdf", format='pdf')
147
plt.clf()
148

    
149
toWrite = pd.concat([pd.Series(meanbcACF), pd.Series(
150
    meandegACF), pd.Series(meankcoreACF)], axis=1).iloc[1:, :]
151
fout = open("meanAC"+nick+".csv", 'w')
152
toWrite.to_csv(fout, index=False)
153
fout.close()
154

    
155
bc_deg = []
156
bc_kcore = []
157
deg_kcore = []
158
for n in nodes:
159
    bcn = bcdf.iloc[:, n]
160
    degn = degdf.iloc[:, n]
161
    kn = kcoredf.iloc[:, n]
162
    cordf = pd.concat([bcn, degn, kn], axis=1)
163
    cm = cordf.corr()
164
    bc_deg.append(cm['bc']['deg'])
165
    bc_kcore.append(cm['bc']['kcore'])
166
    deg_kcore.append(cm['deg']['kcore'])
167

    
168

    
169
a = np.mean(bc_deg)
170
b = np.mean(bc_kcore)
171
c = np.mean(deg_kcore)
172
toplotdf = pd.DataFrame([[1, a, b],
173
                         [a, 1, c],
174
                         [b, c, 1]])
175

    
176
#code.interact(local=dict(globals(), **locals()))
177
import seaborn as sns
178
sns.set()
179

    
180

    
181
'''f250=bcdf.iloc[750:1000,:]
182
f250.columns=map(str, range(0, len(f250.columns)))
183
sns.heatmap(f250.corr(), cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0,
184
    cbar_kws={"label": "Cross-nodes BC Pearson Correlation"})
185
plt.xlabel("Nodes")
186
plt.ylabel("Nodes")
187
plt.savefig(nick+"bcNodesCorrHM.pdf", format='pdf')
188
plt.clf()
189

190
cg=sns.clustermap(f250.corr(), cmap="RdBu_r", robust=True)
191
cg.ax_row_dendrogram.set_visible(False)
192
cg.ax_col_dendrogram.set_visible(False)
193
cg.savefig(nick+"bcNodesClusteredCorr.pdf", format='pdf')
194
plt.clf()'''
195

    
196
sns.heatmap(toplotdf, cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0)
197
plt.savefig(nick+"meanMetricsCorrelation.pdf", format='pdf')
198
plt.clf()
199

    
200
fout = open("meanMetricsCorr"+nick+".csv", 'w')
201
toplotdf.to_csv(fout, index=False)
202
fout.close()
203

    
204
'''
205

206

207

208
nodes2coreInst = defaultdict(list)
209
#nodes2rankInst = defaultdict(list)
210

211
for t in range(len(dfn.iloc[0])):
212
    coreT, coreRankT = coreNodesAtTime(dfn, t, 5)
213
    for n in coreT:
214
        nodes2coreInst[n].append((t,coreT[n],coreRankT[n]))
215

216

217

218
for n in [5,38,59,92]:
219
    points = nodes2coreInst[n]
220
    x = [p[0] for p in points]
221
    y = [len(nodes2coreInst)-p[2] for p in points]
222
    color = n / float(len(nodes2coreInst.keys()))
223
    #rgba = cmap(color)
224
    #plt.scatter(x,y, rgba)
225
    plt.plot(x,y, 'o')
226
plt.ylim(0, len(nodes2coreInst))
227
plt.show()
228
code.interact(local=dict(globals(), **locals()))
229
plt.show()
230
exit()
231
# Core Persitence
232
plags=100
233
x = range(0, plags)
234
y = []
235

236
for i in x:
237
    print "cacca"
238

239
plt.clf()
240

241
'''
242

    
243
'''X, Y, Z = [], [], []
244
for node in srtNodes:
245
    #print "n:", node
246
    for lag in range(lags):
247
        #print "\tn:%d  lag:%d" % (node,lag)
248
        #code.interact(local=dict(globals(), **locals()))
249
        X.append(lag)
250
        Y.append(node)
251
        Z.append(list(bcACF.iloc[lag])[node])
252

253

254
fig = plt.figure()
255
ax = plt.axes(projection='3d')
256
ax.set_xlabel('Time-Lag')
257
ax.set_ylabel('Nodes sorted by BC at t_0')
258
ax.set_zlabel('ACF at time-lag x of node y')
259
ax.plot_trisurf(X, Y, Z, linewidth=0.2, antialiased=True)
260
ax.set_xlim(0, lags)
261
ax.set_ylim(0, len(srtNodes))
262
#ax.set_zlim(-1.0, 1.0)
263
plt.savefig(nick+"autoBC-3d.pdf", format="pdf")'''
264

    
265
print "THE END"