Statistics
| Branch: | Revision:

mobicen / timeAnalysis.py @ 36331ad0

History | View | Annotate | Download (7.71 KB)

1
import seaborn as sns
2
import code  # code.interact(local=dict(globals(), **locals()))
3
from collections import deque
4
from scipy import stats
5
import matplotlib.pyplot as plt
6
from collections import defaultdict
7
import os
8
import sys
9
from statsmodels.graphics.tsaplots import plot_acf
10
from statsmodels.tsa.stattools import acf
11
import operator
12
from mpl_toolkits import mplot3d
13
import pandas as pd
14
from pprint import pprint
15
import numpy as np
16
import glob
17
from tqdm import tqdm
18
import matplotlib
19

    
20
#mys.rank(method='first', ascending=False)
21

    
22
folder = sys.argv[1]
23
lags = 100
24
if len(sys.argv) > 2:
25
    lags = int(sys.argv[2])
26
nick = folder.split('/')[-2].split('_')[0]
27
os.chdir(folder)
28

    
29
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
30
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
31
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
32
print "Loading data from", folder, "..."
33
for snap in sorted(glob.glob('./stats*')):
34
    # print "",snap
35
    node_id = int(snap.strip('.csv').strip('./stats'))
36
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
37
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
38
    degdf = pd.concat([degdf, df['deg']], axis=1)
39
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
40

    
41
nodes = range(len(bcdf.columns))
42

    
43
'''initialCentrality = {}
44
for n in nodes:
45
    initialCentrality[int(n)] = bcdf.iloc[0][n]
46

47

48
sorted_x = sorted(initialCentrality.items(),
49
                  key=operator.itemgetter(1), reverse=True)
50
srtNodes = [e[0] for e in sorted_x]'''
51

    
52
bcACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
53
degACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
54
kcoreACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
55
print "Processing data..."
56
for node in nodes:
57
    # print "Autocorr of node", node
58
    nodebcACF = pd.DataFrame([bcdf.iloc[:, node].autocorr(lag)
59
                              for lag in range(lags)])
60
    bcACF = pd.concat([bcACF, nodebcACF], axis=1)
61
    nodedegACF = pd.DataFrame(
62
        [degdf.iloc[:, node].autocorr(lag) for lag in range(lags)])
63
    degACF = pd.concat([degACF, nodedegACF], axis=1)
64
    nodekcoreACF = pd.DataFrame(
65
        [kcoredf.iloc[:, node].autocorr(lag) for lag in range(lags)])
66
    kcoreACF = pd.concat([kcoreACF, nodekcoreACF], axis=1)
67

    
68
'''
69
X ==> time-lag
70
Y ==> i nodi in ordine di centralita a tempo_0
71
Z ==> l'acf del nodo y al time-lag x
72
'''
73

    
74
if not os.path.exists("plots"+nick):
75
    os.makedirs("plots"+nick)
76

    
77
os.chdir("plots"+nick)
78
# Plotting
79

    
80
# ACF boxplots
81
bcACF.T.boxplot(column=[1]+range(5, lags, 5))
82
plt.ylabel("ACF of BC for all nodes")
83
plt.xlabel("Time-lag")
84
plt.xticks(rotation="vertical")
85
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
86
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
87
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
88
plt.ylim(-0.4, 1.0)
89
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
90
plt.savefig(nick+"autoCorrBOXPLOT-BC.pdf", format='pdf')
91
plt.clf()
92

    
93
degACF.T.boxplot(column=[1]+range(5, lags, 5))
94
plt.ylabel("ACF of DEG for all nodes")
95
plt.xlabel("Time-lag")
96
plt.xticks(rotation="vertical")
97
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
98
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
99
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
100
plt.ylim(-0.4, 1.0)
101
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
102
plt.savefig(nick+"autoCorrBOXPLOT-DEG.pdf", format='pdf')
103
plt.clf()
104

    
105
kcoreACF.T.boxplot(column=[1]+range(5, lags, 5))
106
plt.ylabel("ACF of KCORE for all nodes")
107
plt.xlabel("Time-lag")
108
plt.xticks(rotation="vertical")
109
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
110
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
111
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
112
plt.ylim(-0.4, 1.0)
113
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
114
plt.savefig(nick+"autoCorrBOXPLOT-KCORE.pdf", format='pdf')
115
plt.clf()
116

    
117

    
118
# Mean AutoCorrelation and Rank-Correlation
119
# lags=20
120
firstRank = bcdf.iloc[0, :]
121
x = range(0, lags)
122
meanbcACF = []
123
meandegACF = []
124
meankcoreACF = []
125
rankCorr = []
126
weightedRankCorr = []
127
for i in x:
128
    #code.interact(local=dict(globals(), **locals()))
129
    meanbcACF.append(np.mean(bcACF.iloc[i]))
130
    meandegACF.append(np.mean(degACF.iloc[i]))
131
    meankcoreACF.append(np.mean(kcoreACF.iloc[i]))
132
    rankCorr.append(stats.spearmanr(firstRank, bcdf.iloc[i, :])[0])
133
    #weightedRankCorr.append(stats.weightedtau(firstRank, dfn.iloc[i,:])[0])
134

    
135
plt.plot(x, meanbcACF, lw="1.5", label='Mean BC Autocorrelation')
136
plt.plot(x, meandegACF, lw="1.5", label='Mean DEG Autocorrelation')
137
plt.plot(x, meankcoreACF, lw="1.5", label='Mean KCORE Autocorrelation')
138
plt.plot(x, rankCorr, lw="1.5", label='Rank-Correlation (with rank at t_0)')
139
# plt.plot(x, weightedRankCorr, lw="1.5",
140
#         label='Weighted-Rank-Correlation (with rank at t_0)')
141
plt.ylabel('Correlation indexes')
142
plt.xlabel('Time-lags / Time')
143
plt.grid()
144
plt.legend()
145
# plt.ylim(-1.0,1.0)
146
plt.xlim(0, lags)
147
plt.savefig(nick+"autoCorrMean-RankSpearman.pdf", format='pdf')
148
plt.clf()
149

    
150
toWrite = pd.concat([pd.Series(meanbcACF), pd.Series(
151
    meandegACF), pd.Series(meankcoreACF)], axis=1).iloc[1:, :]
152
fout = open("meanAC"+nick+".csv", 'w')
153
toWrite.to_csv(fout, index=False)
154
fout.close()
155

    
156
bc_deg = []
157
bc_kcore = []
158
deg_kcore = []
159
for n in nodes:
160
    bcn = bcdf.iloc[:, n]
161
    degn = degdf.iloc[:, n]
162
    kn = kcoredf.iloc[:, n]
163
    cordf = pd.concat([bcn, degn, kn], axis=1)
164
    cm = cordf.corr()
165
    bc_deg.append(cm['bc']['deg'])
166
    bc_kcore.append(cm['bc']['kcore'])
167
    deg_kcore.append(cm['deg']['kcore'])
168

    
169

    
170
a = np.mean(bc_deg)
171
b = np.mean(bc_kcore)
172
c = np.mean(deg_kcore)
173
toplotdf = pd.DataFrame([[1, a, b],
174
                         [a, 1, c],
175
                         [b, c, 1]])
176

    
177
#code.interact(local=dict(globals(), **locals()))
178
sns.set()
179

    
180

    
181
'''f250=bcdf.iloc[750:1000,:]
182
f250.columns=map(str, range(0, len(f250.columns)))
183
sns.heatmap(f250.corr(), cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0,
184
    cbar_kws={"label": "Cross-nodes BC Pearson Correlation"})
185
plt.xlabel("Nodes")
186
plt.ylabel("Nodes")
187
plt.savefig(nick+"bcNodesCorrHM.pdf", format='pdf')
188
plt.clf()
189

190
cg=sns.clustermap(f250.corr(), cmap="RdBu_r", robust=True)
191
cg.ax_row_dendrogram.set_visible(False)
192
cg.ax_col_dendrogram.set_visible(False)
193
cg.savefig(nick+"bcNodesClusteredCorr.pdf", format='pdf')
194
plt.clf()'''
195

    
196
sns.heatmap(toplotdf, cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0)
197
plt.savefig(nick+"meanMetricsCorrelation.pdf", format='pdf')
198
plt.clf()
199

    
200
fout = open("meanMetricsCorr"+nick+".csv", 'w')
201
toplotdf.to_csv(fout, index=False)
202
fout.close()
203

    
204
'''
205

206

207

208
nodes2coreInst = defaultdict(list)
209
#nodes2rankInst = defaultdict(list)
210

211
for t in range(len(dfn.iloc[0])):
212
    coreT, coreRankT = coreNodesAtTime(dfn, t, 5)
213
    for n in coreT:
214
        nodes2coreInst[n].append((t,coreT[n],coreRankT[n]))
215

216

217

218
for n in [5,38,59,92]:
219
    points = nodes2coreInst[n]
220
    x = [p[0] for p in points]
221
    y = [len(nodes2coreInst)-p[2] for p in points]
222
    color = n / float(len(nodes2coreInst.keys()))
223
    #rgba = cmap(color)
224
    #plt.scatter(x,y, rgba)
225
    plt.plot(x,y, 'o')
226
plt.ylim(0, len(nodes2coreInst))
227
plt.show()
228
code.interact(local=dict(globals(), **locals()))
229
plt.show()
230
exit()
231
# Core Persitence
232
plags=100
233
x = range(0, plags)
234
y = []
235

236
for i in x:
237
    print "cacca"
238

239
plt.clf()
240

241
'''
242

    
243
'''X, Y, Z = [], [], []
244
for node in srtNodes:
245
    #print "n:", node
246
    for lag in range(lags):
247
        #print "\tn:%d  lag:%d" % (node,lag)
248
        #code.interact(local=dict(globals(), **locals()))
249
        X.append(lag)
250
        Y.append(node)
251
        Z.append(list(bcACF.iloc[lag])[node])
252

253

254
fig = plt.figure()
255
ax = plt.axes(projection='3d')
256
ax.set_xlabel('Time-Lag')
257
ax.set_ylabel('Nodes sorted by BC at t_0')
258
ax.set_zlabel('ACF at time-lag x of node y')
259
ax.plot_trisurf(X, Y, Z, linewidth=0.2, antialiased=True)
260
ax.set_xlim(0, lags)
261
ax.set_ylim(0, len(srtNodes))
262
#ax.set_zlim(-1.0, 1.0)
263
plt.savefig(nick+"autoBC-3d.pdf", format="pdf")'''
264

    
265
print "THE END"