Statistics
| Branch: | Revision:

mobicen / sampleACF.py @ 0412dba8

History | View | Annotate | Download (7.98 KB)

1
import code  # code.interact(local=dict(globals(), **locals()))
2
from collections import deque
3
from scipy import stats
4
import matplotlib.pyplot as plt
5
from collections import defaultdict
6
import os
7
import sys
8
from statsmodels.graphics.tsaplots import plot_acf
9
from statsmodels.tsa.stattools import acf
10
import operator
11
import pandas as pd
12
from pprint import pprint
13
import numpy as np
14
import glob
15
from tqdm import tqdm
16
from multiprocessing import Pool
17

    
18

    
19
# mys.rank(method='first', ascending=False)
20

    
21
folder = sys.argv[1]
22
num_workers = 1
23
if len(sys.argv) > 2:
24
    num_workers = int(sys.argv[2])
25
lags = 100
26
if len(sys.argv) > 3:
27
    lags = int(sys.argv[3])
28
nick = folder.split('/')[-2].split('_')[0]
29
os.chdir(folder)
30

    
31

    
32

    
33
'''def nistRH(v, h):
34
    N, mu, var = len(v), np.mean(v), np.var(v)
35
    Ch = 0.0
36
    for t in range(0, N-h):
37
        Ch += (v[t]-mu)*(v[t+h]-mu)
38
    return (Ch/(N-h))/var
39

40

41
def sampleACF(params):
42
    v = params['v']
43
    nlags = params['nlags']
44
    timeDepth = params['timeDepth']
45
    v = v[:timeDepth].to_list()
46
    assert len(v) == timeDepth
47
    acf_t = defaultdict(list)
48
    for tau in range(0, nlags):
49
        for k in range(0, timeDepth-tau):
50
            if (v[k] > 0 and v[k+tau] > 0):
51
                acf_kt = v[k] * v[k+tau] / (np.mean([v[k], v[k+tau]])**2)
52
                acf_t[tau].append(acf_kt)
53
    return map(np.mean, acf_t.values())
54

55

56
def sampleACF2(v, nlags=40, timeDepth=500):
57
    v = v[:timeDepth].to_list()
58
    return pd.Series(acf(v, nlags=nlags, fft=True, unbiased=True))'''
59

    
60

    
61
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
62
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
63
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
64
print "Loading data from", folder, "..."
65
for snap in sorted(glob.glob('./stats*')):
66
    # print "",snap
67
    node_id = int(snap.strip('.csv').strip('./stats'))
68
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
69
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
70
    degdf = pd.concat([degdf, df['deg']], axis=1)
71
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
72

    
73
rankbcdf = pd.DataFrame()
74
for t in range(0, len(bcdf)):
75
    r=bcdf.iloc[t].rank(method='first', ascending=False).reset_index(drop=True)
76
    rankbcdf = rankbcdf.append(r, ignore_index=True)
77

    
78
    
79

    
80
nodes = range(len(bcdf.columns))
81

    
82

    
83
if not os.path.exists("plots"+nick):
84
    os.makedirs("plots"+nick)
85

    
86
os.chdir("plots"+nick)
87
# Plotting
88

    
89

    
90
def jaccard_similarity(x, y):
91
    intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
92
    union_cardinality = len(set.union(*[set(x), set(y)]))
93
    return intersection_cardinality/float(union_cardinality)
94

    
95

    
96
def topNodes(t, perc):
97
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
98
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
99
    upto = int(len(srtd_BC) * (perc/100.0))
100
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
101
    return coreNodes
102

    
103

    
104
def tailNodes(t, perc):
105
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
106
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=False)
107
    upto = int(len(srtd_BC) * (perc/100.0))
108
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
109
    return coreNodes
110

    
111
def jaccard_top_CF(maxt=100, nlags=20, perc=5):
112
    N = maxt
113
    memoSet = {}
114
    for i in range(0, N):
115
        memoSet[i] = topNodes(i, perc)
116
    retval = []
117
    for tau in range(0, nlags):
118
        jtau = 0.0
119
        for t in range(0, N - tau):
120
            jtau += jaccard_similarity(memoSet[t], memoSet[t+tau])
121
        jtau /= N - tau
122
        retval.append(jtau)
123
    return retval
124

    
125
def jaccard_tail_CF(maxt=100, nlags=20, perc=5):
126
    N = maxt
127
    memoSet = {}
128
    for i in range(0, N):
129
        memoSet[i] = tailNodes(i, perc)
130
    retval = []
131
    for tau in range(0, nlags):
132
        jtau = 0.0
133
        for t in range(0, N - tau):
134
            jtau += jaccard_similarity(memoSet[t], memoSet[t+tau])
135
        jtau /= N - tau
136
        retval.append(jtau)
137
    return retval
138
p = 2
139
pd.Series(jaccard_top_CF(perc=p)).plot(label='Top')
140
pd.Series(jaccard_tail_CF(perc=p)).plot(label='Tail')
141
plt.legend()
142
plt.savefig(nick+"Jaccard-tau1-perc="+str(p)+".pdf", format='pdf')
143
plt.clf()
144

    
145
jtop1 = [] 
146

    
147
for t in range(0,100):
148
    x,y=topNodes(t,15), topNodes(t+1,15)
149
    jtop1.append(jaccard_similarity(x,y))
150

    
151
jtail1 = [] 
152

    
153
for t in range(0,100):
154
    x,y=tailNodes(t,15), tailNodes(t+1,15)
155
    jtail1.append(jaccard_similarity(x,y))
156

    
157

    
158

    
159
'''for i in range(k, k+memoryMax):
160
            bcktop, bcitop = bcdf.iloc[k, top], bcdf.iloc[i, top]
161
            acTop[i-k].append(bcktop * bcitop / (np.mean([bcktop, bcitop])**2))
162
for i in range(k, k+memoryMax):
163
            bcktail, bcitail = bcdf.iloc[k, tail], bcdf.iloc[i, tail]
164
            if (bcitail > 0 and bcktail > 0):
165
                acTail[i-k].append(bcktail*bcitail /
166
                                   (np.mean([bcktail, bcitail])**2))'''
167

    
168

    
169
perc = 5
170
memoryMax = 70
171
klim = 200
172
acTop = {}
173
acTail = {}
174

    
175
acTopRank = {}
176
acTailRank = {}
177

    
178

    
179
for t in tqdm(range(0, 900, 10)):
180
    topn = topNodes(t, perc)[0]
181
    tailn = tailNodes(t, perc)[0]
182

    
183
    topSeries = bcdf.iloc[:, topn][t:t+klim]
184
    topacf = acf(topSeries, nlags=memoryMax, unbiased=True, fft=True)
185

    
186
    tailSeries = bcdf.iloc[:, tailn][t:t+klim]
187
    tailacf = acf(tailSeries, nlags=memoryMax, unbiased=True, fft=True)
188

    
189
    topRSeries = rankbcdf.iloc[:, topn][t:t+klim]
190
    topRacf = acf(topRSeries, nlags=memoryMax, unbiased=True, fft=True)
191

    
192
    tailRSeries = rankbcdf.iloc[:, tailn][t:t+klim]
193
    tailRacf = acf(tailRSeries, nlags=memoryMax, unbiased=True, fft=True)
194

    
195
    acTop[t]=topacf
196
    acTail[t]=tailacf
197
    acTopRank[t]=topRacf
198
    acTailRank[t]=tailRacf
199

    
200
acTopDF=pd.DataFrame(acTop)
201
acTailDF=pd.DataFrame(acTail)
202

    
203
acTopDF.T.mean().plot(label='Top')
204
acTailDF.T.mean().plot(label='Tail')
205
plt.ylim(-1,1)
206
plt.ylabel('ACF...')
207
plt.xlabel('Tau')
208
plt.legend()
209
plt.grid()
210
plt.savefig(nick+"ACFtopVSTail.pdf", format='pdf')
211
plt.clf()
212
#plt.show()
213

    
214
acTopRDF=pd.DataFrame(acTopRank)
215
acTailRDF=pd.DataFrame(acTailRank)
216

    
217
acTopRDF.T.mean().plot(label='TopR')
218
acTailRDF.T.mean().plot(label='TailR')
219
plt.ylim(-1,1)
220
plt.ylabel('ACF_rank...')
221
plt.xlabel('Tau')
222
plt.legend()
223
plt.grid()
224
plt.savefig(nick+"ACFrank_topVSTail.pdf", format='pdf')
225
plt.clf()
226

    
227

    
228
topSeries.plot(label='Top')
229
tailSeries.plot(label='Tail')
230
plt.legend()
231
plt.grid()
232
plt.ylabel('BC')
233
plt.xlabel('Time')
234
plt.savefig(nick+"topVStailExample.pdf", format='pdf')
235
plt.clf()
236
#plt.show()
237

    
238
rankbcdf.iloc[:,topn][t:t+klim].plot(label='TopRank')
239
rankbcdf.iloc[:,tailn][t:t+klim].plot(label='TailRank')
240
plt.legend()
241
plt.ylabel('Rank (1==HighestBC)')
242
plt.xlabel('Time')
243
plt.grid()
244
plt.savefig(nick+"topVStailRankExample.pdf", format='pdf')
245
plt.clf()
246
#plt.show()
247

    
248

    
249
time2top = {}
250
time2tail = {}
251

    
252
#code.interact(local=dict(globals(), **locals()))
253

    
254
'''# Per tanti istanti di inizio detti k
255
for k in tqdm(range(1, klim)):
256
    # Prendi i top e tail nodi a quell'istante k
257
    topn = topNodes(k, perc)
258
    tailn = tailNodes(k, perc)
259
    # Calcola la Normalized Istantaneous Correlation,
260
    p = Pool(num_workers)
261
    timeSeries = []
262
    # code.interact(local=dict(globals(), **locals()))
263
    for top in topn:
264
        params = {'v': bcdf.iloc[:, top][k:],
265
                  'nlags': memoryMax, 'timeDepth': klim}
266
        timeSeries.append(params)
267
        # tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
268
    res = p.map(sampleACF, timeSeries)
269
    for r in res:
270
        for lag in range(0, len(r)):
271
            acTop[lag].append(r[lag])
272

273
    for tail in tailn:
274
        params = {'v': bcdf.iloc[:, tail][k:],
275
                  'nlags': memoryMax, 'timeDepth': klim}
276
        timeSeries.append(params)
277
        # tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
278
    res = p.map(sampleACF, timeSeries)
279
    for r in res:
280
        for lag in range(0, len(r)):
281
            acTail[lag].append(r[lag])
282
    p.close()
283

284
pd.Series(map(np.mean, acTop.values())).plot(label='Top-'+str(perc)+'%')
285
pd.Series(map(np.mean, acTail.values())).plot(label='Tail-'+str(perc)+'%')
286
plt.legend()
287
plt.xlabel('Tau')
288
plt.ylabel('Sample ACF')
289
plt.savefig(nick+"sampleACF.pdf", format='pdf')'''