mobicen / timeAnalysis.py @ 36331ad0
History | View | Annotate | Download (7.71 KB)
1 |
import seaborn as sns |
---|---|
2 |
import code # code.interact(local=dict(globals(), **locals())) |
3 |
from collections import deque |
4 |
from scipy import stats |
5 |
import matplotlib.pyplot as plt |
6 |
from collections import defaultdict |
7 |
import os |
8 |
import sys |
9 |
from statsmodels.graphics.tsaplots import plot_acf |
10 |
from statsmodels.tsa.stattools import acf |
11 |
import operator |
12 |
from mpl_toolkits import mplot3d |
13 |
import pandas as pd |
14 |
from pprint import pprint |
15 |
import numpy as np |
16 |
import glob |
17 |
from tqdm import tqdm |
18 |
import matplotlib |
19 |
|
20 |
#mys.rank(method='first', ascending=False)
|
21 |
|
22 |
folder = sys.argv[1]
|
23 |
lags = 100
|
24 |
if len(sys.argv) > 2: |
25 |
lags = int(sys.argv[2]) |
26 |
nick = folder.split('/')[-2].split('_')[0] |
27 |
os.chdir(folder) |
28 |
|
29 |
bcdf = pd.DataFrame() # rows=nodes columns=BC at column-index time-instant
|
30 |
degdf = pd.DataFrame() # rows=nodes columns=DEG at column-index time-instant
|
31 |
kcoredf = pd.DataFrame() # rows=nodes columns=KCORE at column-index time-instant
|
32 |
print "Loading data from", folder, "..." |
33 |
for snap in sorted(glob.glob('./stats*')): |
34 |
# print "",snap
|
35 |
node_id = int(snap.strip('.csv').strip('./stats')) |
36 |
df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1) |
37 |
bcdf = pd.concat([bcdf, df['bc']], axis=1) |
38 |
degdf = pd.concat([degdf, df['deg']], axis=1) |
39 |
kcoredf = pd.concat([kcoredf, df['kcore']], axis=1) |
40 |
|
41 |
nodes = range(len(bcdf.columns)) |
42 |
|
43 |
'''initialCentrality = {}
|
44 |
for n in nodes:
|
45 |
initialCentrality[int(n)] = bcdf.iloc[0][n]
|
46 |
|
47 |
|
48 |
sorted_x = sorted(initialCentrality.items(),
|
49 |
key=operator.itemgetter(1), reverse=True)
|
50 |
srtNodes = [e[0] for e in sorted_x]'''
|
51 |
|
52 |
bcACF = pd.DataFrame() # rows=Time-Lags, columns = nodes
|
53 |
degACF = pd.DataFrame() # rows=Time-Lags, columns = nodes
|
54 |
kcoreACF = pd.DataFrame() # rows=Time-Lags, columns = nodes
|
55 |
print "Processing data..." |
56 |
for node in nodes: |
57 |
# print "Autocorr of node", node
|
58 |
nodebcACF = pd.DataFrame([bcdf.iloc[:, node].autocorr(lag) |
59 |
for lag in range(lags)]) |
60 |
bcACF = pd.concat([bcACF, nodebcACF], axis=1)
|
61 |
nodedegACF = pd.DataFrame( |
62 |
[degdf.iloc[:, node].autocorr(lag) for lag in range(lags)]) |
63 |
degACF = pd.concat([degACF, nodedegACF], axis=1)
|
64 |
nodekcoreACF = pd.DataFrame( |
65 |
[kcoredf.iloc[:, node].autocorr(lag) for lag in range(lags)]) |
66 |
kcoreACF = pd.concat([kcoreACF, nodekcoreACF], axis=1)
|
67 |
|
68 |
'''
|
69 |
X ==> time-lag
|
70 |
Y ==> i nodi in ordine di centralita a tempo_0
|
71 |
Z ==> l'acf del nodo y al time-lag x
|
72 |
'''
|
73 |
|
74 |
if not os.path.exists("plots"+nick): |
75 |
os.makedirs("plots"+nick)
|
76 |
|
77 |
os.chdir("plots"+nick)
|
78 |
# Plotting
|
79 |
|
80 |
# ACF boxplots
|
81 |
bcACF.T.boxplot(column=[1]+range(5, lags, 5)) |
82 |
plt.ylabel("ACF of BC for all nodes")
|
83 |
plt.xlabel("Time-lag")
|
84 |
plt.xticks(rotation="vertical")
|
85 |
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2) |
86 |
plt.axhline(y=0.0, color='k', linestyle='--', lw=2) |
87 |
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2) |
88 |
plt.ylim(-0.4, 1.0) |
89 |
plt.yticks(np.arange(-0.4, 1.0, step=0.1)) |
90 |
plt.savefig(nick+"autoCorrBOXPLOT-BC.pdf", format='pdf') |
91 |
plt.clf() |
92 |
|
93 |
degACF.T.boxplot(column=[1]+range(5, lags, 5)) |
94 |
plt.ylabel("ACF of DEG for all nodes")
|
95 |
plt.xlabel("Time-lag")
|
96 |
plt.xticks(rotation="vertical")
|
97 |
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2) |
98 |
plt.axhline(y=0.0, color='k', linestyle='--', lw=2) |
99 |
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2) |
100 |
plt.ylim(-0.4, 1.0) |
101 |
plt.yticks(np.arange(-0.4, 1.0, step=0.1)) |
102 |
plt.savefig(nick+"autoCorrBOXPLOT-DEG.pdf", format='pdf') |
103 |
plt.clf() |
104 |
|
105 |
kcoreACF.T.boxplot(column=[1]+range(5, lags, 5)) |
106 |
plt.ylabel("ACF of KCORE for all nodes")
|
107 |
plt.xlabel("Time-lag")
|
108 |
plt.xticks(rotation="vertical")
|
109 |
plt.axhline(y=0.2, color='k', linestyle='--', lw=1.2) |
110 |
plt.axhline(y=0.0, color='k', linestyle='--', lw=2) |
111 |
plt.axhline(y=-0.2, color='k', linestyle='--', lw=1.2) |
112 |
plt.ylim(-0.4, 1.0) |
113 |
plt.yticks(np.arange(-0.4, 1.0, step=0.1)) |
114 |
plt.savefig(nick+"autoCorrBOXPLOT-KCORE.pdf", format='pdf') |
115 |
plt.clf() |
116 |
|
117 |
|
118 |
# Mean AutoCorrelation and Rank-Correlation
|
119 |
# lags=20
|
120 |
firstRank = bcdf.iloc[0, :]
|
121 |
x = range(0, lags) |
122 |
meanbcACF = [] |
123 |
meandegACF = [] |
124 |
meankcoreACF = [] |
125 |
rankCorr = [] |
126 |
weightedRankCorr = [] |
127 |
for i in x: |
128 |
#code.interact(local=dict(globals(), **locals()))
|
129 |
meanbcACF.append(np.mean(bcACF.iloc[i])) |
130 |
meandegACF.append(np.mean(degACF.iloc[i])) |
131 |
meankcoreACF.append(np.mean(kcoreACF.iloc[i])) |
132 |
rankCorr.append(stats.spearmanr(firstRank, bcdf.iloc[i, :])[0])
|
133 |
#weightedRankCorr.append(stats.weightedtau(firstRank, dfn.iloc[i,:])[0])
|
134 |
|
135 |
plt.plot(x, meanbcACF, lw="1.5", label='Mean BC Autocorrelation') |
136 |
plt.plot(x, meandegACF, lw="1.5", label='Mean DEG Autocorrelation') |
137 |
plt.plot(x, meankcoreACF, lw="1.5", label='Mean KCORE Autocorrelation') |
138 |
plt.plot(x, rankCorr, lw="1.5", label='Rank-Correlation (with rank at t_0)') |
139 |
# plt.plot(x, weightedRankCorr, lw="1.5",
|
140 |
# label='Weighted-Rank-Correlation (with rank at t_0)')
|
141 |
plt.ylabel('Correlation indexes')
|
142 |
plt.xlabel('Time-lags / Time')
|
143 |
plt.grid() |
144 |
plt.legend() |
145 |
# plt.ylim(-1.0,1.0)
|
146 |
plt.xlim(0, lags)
|
147 |
plt.savefig(nick+"autoCorrMean-RankSpearman.pdf", format='pdf') |
148 |
plt.clf() |
149 |
|
150 |
toWrite = pd.concat([pd.Series(meanbcACF), pd.Series( |
151 |
meandegACF), pd.Series(meankcoreACF)], axis=1).iloc[1:, :] |
152 |
fout = open("meanAC"+nick+".csv", 'w') |
153 |
toWrite.to_csv(fout, index=False)
|
154 |
fout.close() |
155 |
|
156 |
bc_deg = [] |
157 |
bc_kcore = [] |
158 |
deg_kcore = [] |
159 |
for n in nodes: |
160 |
bcn = bcdf.iloc[:, n] |
161 |
degn = degdf.iloc[:, n] |
162 |
kn = kcoredf.iloc[:, n] |
163 |
cordf = pd.concat([bcn, degn, kn], axis=1)
|
164 |
cm = cordf.corr() |
165 |
bc_deg.append(cm['bc']['deg']) |
166 |
bc_kcore.append(cm['bc']['kcore']) |
167 |
deg_kcore.append(cm['deg']['kcore']) |
168 |
|
169 |
|
170 |
a = np.mean(bc_deg) |
171 |
b = np.mean(bc_kcore) |
172 |
c = np.mean(deg_kcore) |
173 |
toplotdf = pd.DataFrame([[1, a, b],
|
174 |
[a, 1, c],
|
175 |
[b, c, 1]])
|
176 |
|
177 |
#code.interact(local=dict(globals(), **locals()))
|
178 |
sns.set() |
179 |
|
180 |
|
181 |
'''f250=bcdf.iloc[750:1000,:]
|
182 |
f250.columns=map(str, range(0, len(f250.columns)))
|
183 |
sns.heatmap(f250.corr(), cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0,
|
184 |
cbar_kws={"label": "Cross-nodes BC Pearson Correlation"})
|
185 |
plt.xlabel("Nodes")
|
186 |
plt.ylabel("Nodes")
|
187 |
plt.savefig(nick+"bcNodesCorrHM.pdf", format='pdf')
|
188 |
plt.clf()
|
189 |
|
190 |
cg=sns.clustermap(f250.corr(), cmap="RdBu_r", robust=True)
|
191 |
cg.ax_row_dendrogram.set_visible(False)
|
192 |
cg.ax_col_dendrogram.set_visible(False)
|
193 |
cg.savefig(nick+"bcNodesClusteredCorr.pdf", format='pdf')
|
194 |
plt.clf()'''
|
195 |
|
196 |
sns.heatmap(toplotdf, cmap="RdBu_r", center=0.0, vmin=-1.0, vmax=1.0) |
197 |
plt.savefig(nick+"meanMetricsCorrelation.pdf", format='pdf') |
198 |
plt.clf() |
199 |
|
200 |
fout = open("meanMetricsCorr"+nick+".csv", 'w') |
201 |
toplotdf.to_csv(fout, index=False)
|
202 |
fout.close() |
203 |
|
204 |
'''
|
205 |
|
206 |
|
207 |
|
208 |
nodes2coreInst = defaultdict(list)
|
209 |
#nodes2rankInst = defaultdict(list)
|
210 |
|
211 |
for t in range(len(dfn.iloc[0])):
|
212 |
coreT, coreRankT = coreNodesAtTime(dfn, t, 5)
|
213 |
for n in coreT:
|
214 |
nodes2coreInst[n].append((t,coreT[n],coreRankT[n]))
|
215 |
|
216 |
|
217 |
|
218 |
for n in [5,38,59,92]:
|
219 |
points = nodes2coreInst[n]
|
220 |
x = [p[0] for p in points]
|
221 |
y = [len(nodes2coreInst)-p[2] for p in points]
|
222 |
color = n / float(len(nodes2coreInst.keys()))
|
223 |
#rgba = cmap(color)
|
224 |
#plt.scatter(x,y, rgba)
|
225 |
plt.plot(x,y, 'o')
|
226 |
plt.ylim(0, len(nodes2coreInst))
|
227 |
plt.show()
|
228 |
code.interact(local=dict(globals(), **locals()))
|
229 |
plt.show()
|
230 |
exit()
|
231 |
# Core Persitence
|
232 |
plags=100
|
233 |
x = range(0, plags)
|
234 |
y = []
|
235 |
|
236 |
for i in x:
|
237 |
print "cacca"
|
238 |
|
239 |
plt.clf()
|
240 |
|
241 |
'''
|
242 |
|
243 |
'''X, Y, Z = [], [], []
|
244 |
for node in srtNodes:
|
245 |
#print "n:", node
|
246 |
for lag in range(lags):
|
247 |
#print "\tn:%d lag:%d" % (node,lag)
|
248 |
#code.interact(local=dict(globals(), **locals()))
|
249 |
X.append(lag)
|
250 |
Y.append(node)
|
251 |
Z.append(list(bcACF.iloc[lag])[node])
|
252 |
|
253 |
|
254 |
fig = plt.figure()
|
255 |
ax = plt.axes(projection='3d')
|
256 |
ax.set_xlabel('Time-Lag')
|
257 |
ax.set_ylabel('Nodes sorted by BC at t_0')
|
258 |
ax.set_zlabel('ACF at time-lag x of node y')
|
259 |
ax.plot_trisurf(X, Y, Z, linewidth=0.2, antialiased=True)
|
260 |
ax.set_xlim(0, lags)
|
261 |
ax.set_ylim(0, len(srtNodes))
|
262 |
#ax.set_zlim(-1.0, 1.0)
|
263 |
plt.savefig(nick+"autoBC-3d.pdf", format="pdf")'''
|
264 |
|
265 |
print "THE END" |