Revision 0412dba8 sampleACF.py
sampleACF.py  

1 
import seaborn as sns 

2  1 
import code # code.interact(local=dict(globals(), **locals())) 
3  2 
from collections import deque 
4  3 
from scipy import stats 
...  ...  
9  8 
from statsmodels.graphics.tsaplots import plot_acf 
10  9 
from statsmodels.tsa.stattools import acf 
11  10 
import operator 
12 
from mpl_toolkits import mplot3d 

13  11 
import pandas as pd 
14  12 
from pprint import pprint 
15  13 
import numpy as np 
16  14 
import glob 
17  15 
from tqdm import tqdm 
18 
import matplotlib 

19  16 
from multiprocessing import Pool 
20  17  
21  18  
19 
# mys.rank(method='first', ascending=False) 

20  
21 
folder = sys.argv[1] 

22 
num_workers = 1 

23 
if len(sys.argv) > 2: 

24 
num_workers = int(sys.argv[2]) 

25 
lags = 100 

26 
if len(sys.argv) > 3: 

27 
lags = int(sys.argv[3]) 

28 
nick = folder.split('/')[2].split('_')[0] 

29 
os.chdir(folder) 

30  
31  
32  
33 
'''def nistRH(v, h): 

34 
N, mu, var = len(v), np.mean(v), np.var(v) 

35 
Ch = 0.0 

36 
for t in range(0, Nh): 

37 
Ch += (v[t]mu)*(v[t+h]mu) 

38 
return (Ch/(Nh))/var 

39  
22  40  
23  41 
def sampleACF(params): 
24 
v=params['v'] 

25 
nlags=params['nlags'] 

26 
timeDepth=params['timeDepth'] 

27 
v=v[:timeDepth].to_list() 

28 
acf_t=defaultdict(list) 

42 
v = params['v'] 

43 
nlags = params['nlags'] 

44 
timeDepth = params['timeDepth'] 

45 
v = v[:timeDepth].to_list() 

46 
assert len(v) == timeDepth 

47 
acf_t = defaultdict(list) 

29  48 
for tau in range(0, nlags): 
30  49 
for k in range(0, timeDepthtau): 
31 
if (v[k] >0 and v[k+tau]>0):


50 
if (v[k] > 0 and v[k+tau] > 0):


32  51 
acf_kt = v[k] * v[k+tau] / (np.mean([v[k], v[k+tau]])**2) 
33  52 
acf_t[tau].append(acf_kt) 
34  53 
return map(np.mean, acf_t.values()) 
35  54  
55  
36  56 
def sampleACF2(v, nlags=40, timeDepth=500): 
37 
v=v[:timeDepth].to_list()


38 
return pd.Series(acf(v, nlags=nlags, fft=True))


57 
v = v[:timeDepth].to_list()


58 
return pd.Series(acf(v, nlags=nlags, fft=True, unbiased=True))'''


39  59  
40 
#mys.rank(method='first', ascending=False) 

41  
42 
folder = sys.argv[1] 

43 
num_workers = int(sys.argv[2]) 

44 
lags = 100 

45 
if len(sys.argv) > 3: 

46 
lags = int(sys.argv[3]) 

47 
nick = folder.split('/')[2].split('_')[0] 

48 
os.chdir(folder) 

49  60  
50  61 
bcdf = pd.DataFrame() # rows=nodes columns=BC at columnindex timeinstant 
51  62 
degdf = pd.DataFrame() # rows=nodes columns=DEG at columnindex timeinstant 
...  ...  
59  70 
degdf = pd.concat([degdf, df['deg']], axis=1) 
60  71 
kcoredf = pd.concat([kcoredf, df['kcore']], axis=1) 
61  72  
73 
rankbcdf = pd.DataFrame() 

74 
for t in range(0, len(bcdf)): 

75 
r=bcdf.iloc[t].rank(method='first', ascending=False).reset_index(drop=True) 

76 
rankbcdf = rankbcdf.append(r, ignore_index=True) 

77  
78 


79  
62  80 
nodes = range(len(bcdf.columns)) 
63  81  
64  82  
...  ...  
90  108 
coreNodes = [int(e[0]) for e in srtd_BC[:upto]] 
91  109 
return coreNodes 
92  110  
111 
def jaccard_top_CF(maxt=100, nlags=20, perc=5): 

112 
N = maxt 

113 
memoSet = {} 

114 
for i in range(0, N): 

115 
memoSet[i] = topNodes(i, perc) 

116 
retval = [] 

117 
for tau in range(0, nlags): 

118 
jtau = 0.0 

119 
for t in range(0, N  tau): 

120 
jtau += jaccard_similarity(memoSet[t], memoSet[t+tau]) 

121 
jtau /= N  tau 

122 
retval.append(jtau) 

123 
return retval 

124  
125 
def jaccard_tail_CF(maxt=100, nlags=20, perc=5): 

126 
N = maxt 

127 
memoSet = {} 

128 
for i in range(0, N): 

129 
memoSet[i] = tailNodes(i, perc) 

130 
retval = [] 

131 
for tau in range(0, nlags): 

132 
jtau = 0.0 

133 
for t in range(0, N  tau): 

134 
jtau += jaccard_similarity(memoSet[t], memoSet[t+tau]) 

135 
jtau /= N  tau 

136 
retval.append(jtau) 

137 
return retval 

138 
p = 2 

139 
pd.Series(jaccard_top_CF(perc=p)).plot(label='Top') 

140 
pd.Series(jaccard_tail_CF(perc=p)).plot(label='Tail') 

141 
plt.legend() 

142 
plt.savefig(nick+"Jaccardtau1perc="+str(p)+".pdf", format='pdf') 

143 
plt.clf() 

144  
145 
jtop1 = [] 

146  
147 
for t in range(0,100): 

148 
x,y=topNodes(t,15), topNodes(t+1,15) 

149 
jtop1.append(jaccard_similarity(x,y)) 

150  
151 
jtail1 = [] 

152  
153 
for t in range(0,100): 

154 
x,y=tailNodes(t,15), tailNodes(t+1,15) 

155 
jtail1.append(jaccard_similarity(x,y)) 

156  
157  
158  
93  159 
'''for i in range(k, k+memoryMax): 
94  160 
bcktop, bcitop = bcdf.iloc[k, top], bcdf.iloc[i, top] 
95  161 
acTop[ik].append(bcktop * bcitop / (np.mean([bcktop, bcitop])**2)) 
...  ...  
99  165 
acTail[ik].append(bcktail*bcitail / 
100  166 
(np.mean([bcktail, bcitail])**2))''' 
101  167  
168  
102  169 
perc = 5 
103 
memoryMax = 40


170 
memoryMax = 70


104  171 
klim = 200 
105 
acTop = defaultdict(list) 

106 
acTail = defaultdict(list) 

107 
# Per tanti istanti di inizio detti k 

172 
acTop = {} 

173 
acTail = {} 

174  
175 
acTopRank = {} 

176 
acTailRank = {} 

177  
178  
179 
for t in tqdm(range(0, 900, 10)): 

180 
topn = topNodes(t, perc)[0] 

181 
tailn = tailNodes(t, perc)[0] 

182  
183 
topSeries = bcdf.iloc[:, topn][t:t+klim] 

184 
topacf = acf(topSeries, nlags=memoryMax, unbiased=True, fft=True) 

185  
186 
tailSeries = bcdf.iloc[:, tailn][t:t+klim] 

187 
tailacf = acf(tailSeries, nlags=memoryMax, unbiased=True, fft=True) 

188  
189 
topRSeries = rankbcdf.iloc[:, topn][t:t+klim] 

190 
topRacf = acf(topRSeries, nlags=memoryMax, unbiased=True, fft=True) 

191  
192 
tailRSeries = rankbcdf.iloc[:, tailn][t:t+klim] 

193 
tailRacf = acf(tailRSeries, nlags=memoryMax, unbiased=True, fft=True) 

194  
195 
acTop[t]=topacf 

196 
acTail[t]=tailacf 

197 
acTopRank[t]=topRacf 

198 
acTailRank[t]=tailRacf 

199  
200 
acTopDF=pd.DataFrame(acTop) 

201 
acTailDF=pd.DataFrame(acTail) 

202  
203 
acTopDF.T.mean().plot(label='Top') 

204 
acTailDF.T.mean().plot(label='Tail') 

205 
plt.ylim(1,1) 

206 
plt.ylabel('ACF...') 

207 
plt.xlabel('Tau') 

208 
plt.legend() 

209 
plt.grid() 

210 
plt.savefig(nick+"ACFtopVSTail.pdf", format='pdf') 

211 
plt.clf() 

212 
#plt.show() 

213  
214 
acTopRDF=pd.DataFrame(acTopRank) 

215 
acTailRDF=pd.DataFrame(acTailRank) 

216  
217 
acTopRDF.T.mean().plot(label='TopR') 

218 
acTailRDF.T.mean().plot(label='TailR') 

219 
plt.ylim(1,1) 

220 
plt.ylabel('ACF_rank...') 

221 
plt.xlabel('Tau') 

222 
plt.legend() 

223 
plt.grid() 

224 
plt.savefig(nick+"ACFrank_topVSTail.pdf", format='pdf') 

225 
plt.clf() 

226  
227  
228 
topSeries.plot(label='Top') 

229 
tailSeries.plot(label='Tail') 

230 
plt.legend() 

231 
plt.grid() 

232 
plt.ylabel('BC') 

233 
plt.xlabel('Time') 

234 
plt.savefig(nick+"topVStailExample.pdf", format='pdf') 

235 
plt.clf() 

236 
#plt.show() 

237  
238 
rankbcdf.iloc[:,topn][t:t+klim].plot(label='TopRank') 

239 
rankbcdf.iloc[:,tailn][t:t+klim].plot(label='TailRank') 

240 
plt.legend() 

241 
plt.ylabel('Rank (1==HighestBC)') 

242 
plt.xlabel('Time') 

243 
plt.grid() 

244 
plt.savefig(nick+"topVStailRankExample.pdf", format='pdf') 

245 
plt.clf() 

246 
#plt.show() 

247  
248  
249 
time2top = {} 

250 
time2tail = {} 

251  
252 
#code.interact(local=dict(globals(), **locals())) 

253  
254 
'''# Per tanti istanti di inizio detti k 

108  255 
for k in tqdm(range(1, klim)): 
109  256 
# Prendi i top e tail nodi a quell'istante k 
110  257 
topn = topNodes(k, perc) 
...  ...  
112  259 
# Calcola la Normalized Istantaneous Correlation, 
113  260 
p = Pool(num_workers) 
114  261 
timeSeries = [] 
115 
#code.interact(local=dict(globals(), **locals())) 

262 
# code.interact(local=dict(globals(), **locals()))


116  263 
for top in topn: 
117 
params = {'v': bcdf.iloc[:,top][k:], 'nlags':memoryMax, 'timeDepth': klim} 

264 
params = {'v': bcdf.iloc[:, top][k:], 

265 
'nlags': memoryMax, 'timeDepth': klim} 

118  266 
timeSeries.append(params) 
119 
#tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100) 

267 
# tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)


120  268 
res = p.map(sampleACF, timeSeries) 
121  269 
for r in res: 
122  270 
for lag in range(0, len(r)): 
123  271 
acTop[lag].append(r[lag]) 
124  272  
125  273 
for tail in tailn: 
126 
params = {'v': bcdf.iloc[:,tail][k:], 'nlags':memoryMax, 'timeDepth': klim} 

274 
params = {'v': bcdf.iloc[:, tail][k:], 

275 
'nlags': memoryMax, 'timeDepth': klim} 

127  276 
timeSeries.append(params) 
128 
#tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100) 

277 
# tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)


129  278 
res = p.map(sampleACF, timeSeries) 
130  279 
for r in res: 
131  280 
for lag in range(0, len(r)): 
132  281 
acTail[lag].append(r[lag]) 
133  282 
p.close() 
134 


135 
#code.interact(local=dict(globals(), **locals())) 

283  
136  284 
pd.Series(map(np.mean, acTop.values())).plot(label='Top'+str(perc)+'%') 
137 
pd.Series(map(np.mean,acTail.values())).plot(label='Tail'+str(perc)+'%') 

285 
pd.Series(map(np.mean, acTail.values())).plot(label='Tail'+str(perc)+'%')


138  286 
plt.legend() 
139  287 
plt.xlabel('Tau') 
140  288 
plt.ylabel('Sample ACF') 
141 
plt.savefig(nick+"sampleACF.pdf", format='pdf') 

289 
plt.savefig(nick+"sampleACF.pdf", format='pdf')''' 
Also available in: Unified diff