Statistics
| Branch: | Revision:

mobicen / timeAnalysis.py @ 0a4aa24d

History | View | Annotate | Download (7.16 KB)

1 1ef4948a LoreBz
import code  # code.interact(local=dict(globals(), **locals()))
2
from collections import deque
3
from scipy import stats
4
import matplotlib.pyplot as plt
5
from collections import defaultdict
6
import os
7
import sys
8
from statsmodels.graphics.tsaplots import plot_acf, acf
9
import operator
10
from mpl_toolkits import mplot3d
11 fa4a0a42 LoreBz
import pandas as pd
12
from pprint import pprint
13
import numpy as np
14
import glob
15 1ef4948a LoreBz
import matplotlib
16 0a4aa24d LoreBz
17 1ef4948a LoreBz
18 fa4a0a42 LoreBz
19
folder = sys.argv[1]
20 c457778e LoreBz
lags = 100
21
if len(sys.argv) > 2:
22
    lags = int(sys.argv[2])
23 0a4aa24d LoreBz
nick = folder.split('/')[-2].split('_')[0]
24 fa4a0a42 LoreBz
os.chdir(folder)
25
26 c457778e LoreBz
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
27
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
28
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
29 fa4a0a42 LoreBz
print "Loading data from", folder, "..."
30 c16015e2 LoreBz
for snap in sorted(glob.glob('./stats*')):
31 c457778e LoreBz
    # print "",snap
32 c16015e2 LoreBz
    node_id = int(snap.strip('.csv').strip('./stats'))
33 c457778e LoreBz
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
34
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
35
    degdf = pd.concat([degdf, df['deg']], axis=1)
36
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
37 cf03b931 LoreBz
38 c457778e LoreBz
nodes = range(len(bcdf.columns))
39 fa4a0a42 LoreBz
40 d1ed760e LoreBz
initialCentrality = {}
41 cf03b931 LoreBz
for n in nodes:
42 c457778e LoreBz
    initialCentrality[int(n)] = bcdf.iloc[0][n]
43 d1ed760e LoreBz
44
45 1ef4948a LoreBz
sorted_x = sorted(initialCentrality.items(),
46
                  key=operator.itemgetter(1), reverse=True)
47 d1ed760e LoreBz
srtNodes = [e[0] for e in sorted_x]
48
49 c457778e LoreBz
bcACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
50
degACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
51
kcoreACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
52 fa4a0a42 LoreBz
print "Processing data..."
53 cf03b931 LoreBz
for node in nodes:
54 c457778e LoreBz
    # print "Autocorr of node", node
55
    nodebcACF = pd.DataFrame([bcdf.iloc[:, node].autocorr(lag)
56
                              for lag in range(lags)])
57
    bcACF = pd.concat([bcACF, nodebcACF], axis=1)
58
    nodedegACF = pd.DataFrame(
59
        [degdf.iloc[:, node].autocorr(lag) for lag in range(lags)])
60
    degACF = pd.concat([degACF, nodedegACF], axis=1)
61
    nodekcoreACF = pd.DataFrame(
62
        [kcoredf.iloc[:, node].autocorr(lag) for lag in range(lags)])
63
    kcoreACF = pd.concat([kcoreACF, nodekcoreACF], axis=1)
64 fa4a0a42 LoreBz
65 d1ed760e LoreBz
'''
66
X ==> time-lag
67
Y ==> i nodi in ordine di centralita a tempo_0
68
Z ==> l'acf del nodo y al time-lag x
69
'''
70 fa4a0a42 LoreBz
71 1ef4948a LoreBz
if not os.path.exists("plots"+nick):
72
    os.makedirs("plots"+nick)
73 c16015e2 LoreBz
74 1ef4948a LoreBz
os.chdir("plots"+nick)
75 fa4a0a42 LoreBz
# Plotting
76 0a4aa24d LoreBz
code.interact(local=dict(globals(), **locals()))
77
78
# ACF boxplots
79
bcACF.T.boxplot(column=[1]+range(5,lags,5))
80
plt.ylabel("ACF of BC for all nodes")
81
plt.xlabel("Time-lag")
82
plt.xticks(rotation="vertical")
83
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
84
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
85
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
86
plt.ylim(-0.4, 1.0)
87
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
88
plt.savefig(nick+"autoCorrBOXPLOT-BC.pdf", format='pdf')
89
plt.clf()
90
91
degACF.T.boxplot(column=[1]+range(5,lags,5))
92
plt.ylabel("ACF of DEG for all nodes")
93
plt.xlabel("Time-lag")
94
plt.xticks(rotation="vertical")
95
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
96
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
97
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
98
plt.ylim(-0.4, 1.0)
99
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
100
plt.savefig(nick+"autoCorrBOXPLOT-DEG.pdf", format='pdf')
101
plt.clf()
102
103
kcoreACF.T.boxplot(column=[1]+range(5,lags,5))
104
plt.ylabel("ACF of KCORE for all nodes")
105
plt.xlabel("Time-lag")
106
plt.xticks(rotation="vertical")
107
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2)
108
plt.axhline(y=0.0, color='k', linestyle='--', lw=2)
109
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2)
110
plt.ylim(-0.4, 1.0)
111
plt.yticks(np.arange(-0.4, 1.0, step=0.1))
112
plt.savefig(nick+"autoCorrBOXPLOT-KCORE.pdf", format='pdf')
113
plt.clf()
114
115
116
117 1ef4948a LoreBz
# Mean AutoCorrelation and Rank-Correlation
118
# lags=20
119 c457778e LoreBz
firstRank = bcdf.iloc[0, :]
120 fa4a0a42 LoreBz
x = range(0, lags)
121 c457778e LoreBz
meanbcACF = []
122
meandegACF = []
123
meankcoreACF = []
124 1ef4948a LoreBz
rankCorr = []
125
weightedRankCorr = []
126
for i in x:
127 c16015e2 LoreBz
    #code.interact(local=dict(globals(), **locals()))
128 c457778e LoreBz
    meanbcACF.append(np.mean(bcACF.iloc[i]))
129
    meandegACF.append(np.mean(degACF.iloc[i]))
130
    meankcoreACF.append(np.mean(kcoreACF.iloc[i]))
131
    rankCorr.append(stats.spearmanr(firstRank, bcdf.iloc[i, :])[0])
132 c16015e2 LoreBz
    #weightedRankCorr.append(stats.weightedtau(firstRank, dfn.iloc[i,:])[0])
133
134 c457778e LoreBz
plt.plot(x, meanbcACF, lw="1.5", label='Mean BC Autocorrelation')
135
plt.plot(x, meandegACF, lw="1.5", label='Mean DEG Autocorrelation')
136
plt.plot(x, meankcoreACF, lw="1.5", label='Mean KCORE Autocorrelation')
137 1ef4948a LoreBz
plt.plot(x, rankCorr, lw="1.5", label='Rank-Correlation (with rank at t_0)')
138 c457778e LoreBz
# plt.plot(x, weightedRankCorr, lw="1.5",
139 c16015e2 LoreBz
#         label='Weighted-Rank-Correlation (with rank at t_0)')
140 0a4aa24d LoreBz
plt.ylabel('Correlation indexes')
141 1ef4948a LoreBz
plt.xlabel('Time-lags / Time')
142
plt.grid()
143
plt.legend()
144
# plt.ylim(-1.0,1.0)
145
plt.xlim(0, lags)
146
plt.savefig(nick+"autoCorrMean-RankSpearman.pdf", format='pdf')
147
plt.clf()
148 c16015e2 LoreBz
149 c457778e LoreBz
toWrite = pd.concat([pd.Series(meanbcACF), pd.Series(
150
    meandegACF), pd.Series(meankcoreACF)], axis=1).iloc[1:, :]
151 0a4aa24d LoreBz
fout = open("meanAC"+nick+".csv", 'w')
152 c457778e LoreBz
toWrite.to_csv(fout, index=False)
153 0a4aa24d LoreBz
fout.close()
154
155
bc_deg = []
156
bc_kcore = []
157
deg_kcore = []
158
for n in nodes:
159
    bcn = bcdf.iloc[:, n]
160
    degn = degdf.iloc[:, n]
161
    kn = kcoredf.iloc[:, n]
162
    cordf = pd.concat([bcn, degn, kn], axis=1)
163
    cm = cordf.corr()
164
    bc_deg.append(cm['bc']['deg'])
165
    bc_kcore.append(cm['bc']['kcore'])
166
    deg_kcore.append(cm['deg']['kcore'])
167
168
169
a = np.mean(bc_deg)
170
b = np.mean(bc_kcore)
171
c = np.mean(deg_kcore)
172
toplotdf = pd.DataFrame([[1, a, b],
173
                         [a, 1, c],
174
                         [b, c, 1]])
175
import seaborn as sns
176
sns.set()
177
178
#TODO
179
#sns.heatmap(degdf.corr(), cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0)
180
181
182
sns.heatmap(toplotdf, cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0)
183
plt.savefig(nick+"meanMetricsCorrelation.pdf", format='pdf')
184
plt.clf()
185
186
fout = open("meanMetricsCorr"+nick+".csv", 'w')
187
toplotdf.to_csv(fout, index=False)
188
fout.close()
189 c16015e2 LoreBz
190 1ef4948a LoreBz
'''
191

192

193

194
nodes2coreInst = defaultdict(list)
195
#nodes2rankInst = defaultdict(list)
196

197
for t in range(len(dfn.iloc[0])):
198
    coreT, coreRankT = coreNodesAtTime(dfn, t, 5)
199
    for n in coreT:
200
        nodes2coreInst[n].append((t,coreT[n],coreRankT[n]))
201

202

203

204
for n in [5,38,59,92]:
205
    points = nodes2coreInst[n]
206
    x = [p[0] for p in points]
207
    y = [len(nodes2coreInst)-p[2] for p in points]
208
    color = n / float(len(nodes2coreInst.keys()))
209
    #rgba = cmap(color)
210
    #plt.scatter(x,y, rgba)
211
    plt.plot(x,y, 'o')
212
plt.ylim(0, len(nodes2coreInst))
213
plt.show()
214
code.interact(local=dict(globals(), **locals()))
215
plt.show()
216
exit()
217
# Core Persitence
218
plags=100
219
x = range(0, plags)
220 fa4a0a42 LoreBz
y = []
221 1ef4948a LoreBz

222 fa4a0a42 LoreBz
for i in x:
223 1ef4948a LoreBz
    print "cacca"
224

225 fa4a0a42 LoreBz
plt.clf()
226 d1ed760e LoreBz

227 c16015e2 LoreBz
'''
228 cf03b931 LoreBz
229 c457778e LoreBz
'''X, Y, Z = [], [], []
230 cf03b931 LoreBz
for node in srtNodes:
231 23c7ab1e LoreBz
    #print "n:", node
232 cf03b931 LoreBz
    for lag in range(lags):
233 23c7ab1e LoreBz
        #print "\tn:%d  lag:%d" % (node,lag)
234 c16015e2 LoreBz
        #code.interact(local=dict(globals(), **locals()))
235 cf03b931 LoreBz
        X.append(lag)
236
        Y.append(node)
237 c457778e LoreBz
        Z.append(list(bcACF.iloc[lag])[node])
238 1ef4948a LoreBz

239 d1ed760e LoreBz

240
fig = plt.figure()
241
ax = plt.axes(projection='3d')
242
ax.set_xlabel('Time-Lag')
243
ax.set_ylabel('Nodes sorted by BC at t_0')
244 1ef4948a LoreBz
ax.set_zlabel('ACF at time-lag x of node y')
245
ax.plot_trisurf(X, Y, Z, linewidth=0.2, antialiased=True)
246 d1ed760e LoreBz
ax.set_xlim(0, lags)
247
ax.set_ylim(0, len(srtNodes))
248
#ax.set_zlim(-1.0, 1.0)
249 c457778e LoreBz
plt.savefig(nick+"autoBC-3d.pdf", format="pdf")'''
250 d1ed760e LoreBz
251 fa4a0a42 LoreBz
print "THE END"