Statistics
| Branch: | Revision:

mobicen / timeAnalysis.py @ c457778e

History | View | Annotate | Download (5.07 KB)

1
import code  # code.interact(local=dict(globals(), **locals()))
2
from collections import deque
3
from scipy import stats
4
import matplotlib.pyplot as plt
5
from collections import defaultdict
6
import os
7
import sys
8
from statsmodels.graphics.tsaplots import plot_acf, acf
9
import operator
10
from mpl_toolkits import mplot3d
11
import pandas as pd
12
from pprint import pprint
13
import numpy as np
14
import glob
15
import matplotlib
16
import seaborn as sns
17
sns.set()
18

    
19

    
20
folder = sys.argv[1]
21
lags = 100
22
if len(sys.argv) > 2:
23
    lags = int(sys.argv[2])
24
nick = folder.split('/')[-2].split('_')[0]+"_"
25
os.chdir(folder)
26

    
27
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
28
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
29
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
30
print "Loading data from", folder, "..."
31
for snap in sorted(glob.glob('./stats*')):
32
    # print "",snap
33
    node_id = int(snap.strip('.csv').strip('./stats'))
34
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
35
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
36
    degdf = pd.concat([degdf, df['deg']], axis=1)
37
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
38

    
39

    
40
nodes = range(len(bcdf.columns))
41

    
42
initialCentrality = {}
43
for n in nodes:
44
    initialCentrality[int(n)] = bcdf.iloc[0][n]
45

    
46

    
47
sorted_x = sorted(initialCentrality.items(),
48
                  key=operator.itemgetter(1), reverse=True)
49
srtNodes = [e[0] for e in sorted_x]
50

    
51
bcACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
52
degACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
53
kcoreACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
54
print "Processing data..."
55
for node in nodes:
56
    # print "Autocorr of node", node
57
    nodebcACF = pd.DataFrame([bcdf.iloc[:, node].autocorr(lag)
58
                              for lag in range(lags)])
59
    bcACF = pd.concat([bcACF, nodebcACF], axis=1)
60
    nodedegACF = pd.DataFrame(
61
        [degdf.iloc[:, node].autocorr(lag) for lag in range(lags)])
62
    degACF = pd.concat([degACF, nodedegACF], axis=1)
63
    nodekcoreACF = pd.DataFrame(
64
        [kcoredf.iloc[:, node].autocorr(lag) for lag in range(lags)])
65
    kcoreACF = pd.concat([kcoreACF, nodekcoreACF], axis=1)
66
#code.interact(local=dict(globals(), **locals()))
67

    
68
'''
69
X ==> time-lag
70
Y ==> i nodi in ordine di centralita a tempo_0
71
Z ==> l'acf del nodo y al time-lag x
72
'''
73

    
74
if not os.path.exists("plots"+nick):
75
    os.makedirs("plots"+nick)
76

    
77
os.chdir("plots"+nick)
78
# Plotting
79
# Mean AutoCorrelation and Rank-Correlation
80
# lags=20
81
firstRank = bcdf.iloc[0, :]
82
x = range(0, lags)
83
meanbcACF = []
84
meandegACF = []
85
meankcoreACF = []
86
rankCorr = []
87
weightedRankCorr = []
88
for i in x:
89
    #code.interact(local=dict(globals(), **locals()))
90
    meanbcACF.append(np.mean(bcACF.iloc[i]))
91
    meandegACF.append(np.mean(degACF.iloc[i]))
92
    meankcoreACF.append(np.mean(kcoreACF.iloc[i]))
93
    rankCorr.append(stats.spearmanr(firstRank, bcdf.iloc[i, :])[0])
94
    #weightedRankCorr.append(stats.weightedtau(firstRank, dfn.iloc[i,:])[0])
95

    
96
plt.plot(x, meanbcACF, lw="1.5", label='Mean BC Autocorrelation')
97
plt.plot(x, meandegACF, lw="1.5", label='Mean DEG Autocorrelation')
98
plt.plot(x, meankcoreACF, lw="1.5", label='Mean KCORE Autocorrelation')
99
plt.plot(x, rankCorr, lw="1.5", label='Rank-Correlation (with rank at t_0)')
100
# plt.plot(x, weightedRankCorr, lw="1.5",
101
#         label='Weighted-Rank-Correlation (with rank at t_0)')
102
plt.ylabel('Corr coeff: [ACF, Spearman rho]')
103
plt.xlabel('Time-lags / Time')
104
plt.grid()
105
plt.legend()
106
# plt.ylim(-1.0,1.0)
107
plt.xlim(0, lags)
108
plt.savefig(nick+"autoCorrMean-RankSpearman.pdf", format='pdf')
109
plt.clf()
110

    
111
toWrite = pd.concat([pd.Series(meanbcACF), pd.Series(
112
    meandegACF), pd.Series(meankcoreACF)], axis=1).iloc[1:, :]
113
fout = open("meanAC.csv", 'w')
114
toWrite.to_csv(fout, index=False)
115

    
116
'''
117

118

119

120
nodes2coreInst = defaultdict(list)
121
#nodes2rankInst = defaultdict(list)
122

123
for t in range(len(dfn.iloc[0])):
124
    coreT, coreRankT = coreNodesAtTime(dfn, t, 5)
125
    for n in coreT:
126
        nodes2coreInst[n].append((t,coreT[n],coreRankT[n]))
127

128

129

130
for n in [5,38,59,92]:
131
    points = nodes2coreInst[n]
132
    x = [p[0] for p in points]
133
    y = [len(nodes2coreInst)-p[2] for p in points]
134
    color = n / float(len(nodes2coreInst.keys()))
135
    #rgba = cmap(color)
136
    #plt.scatter(x,y, rgba)
137
    plt.plot(x,y, 'o')
138
plt.ylim(0, len(nodes2coreInst))
139
plt.show()
140
code.interact(local=dict(globals(), **locals()))
141
plt.show()
142
exit()
143
# Core Persitence
144
plags=100
145
x = range(0, plags)
146
y = []
147

148
for i in x:
149
    print "cacca"
150

151
plt.clf()
152

153
'''
154

    
155
'''X, Y, Z = [], [], []
156
for node in srtNodes:
157
    #print "n:", node
158
    for lag in range(lags):
159
        #print "\tn:%d  lag:%d" % (node,lag)
160
        #code.interact(local=dict(globals(), **locals()))
161
        X.append(lag)
162
        Y.append(node)
163
        Z.append(list(bcACF.iloc[lag])[node])
164

165

166
fig = plt.figure()
167
ax = plt.axes(projection='3d')
168
ax.set_xlabel('Time-Lag')
169
ax.set_ylabel('Nodes sorted by BC at t_0')
170
ax.set_zlabel('ACF at time-lag x of node y')
171
ax.plot_trisurf(X, Y, Z, linewidth=0.2, antialiased=True)
172
ax.set_xlim(0, lags)
173
ax.set_ylim(0, len(srtNodes))
174
#ax.set_zlim(-1.0, 1.0)
175
plt.savefig(nick+"autoBC-3d.pdf", format="pdf")'''
176

    
177
print "THE END"