Revision 05763cfb sampleACF.py

View differences:

sampleACF.py
29 29
os.chdir(folder)
30 30

  
31 31

  
32

  
33 32
'''def nistRH(v, h):
34 33
    N, mu, var = len(v), np.mean(v), np.var(v)
35 34
    Ch = 0.0
......
72 71

  
73 72
rankbcdf = pd.DataFrame()
74 73
for t in range(0, len(bcdf)):
75
    r=bcdf.iloc[t].rank(method='first', ascending=False).reset_index(drop=True)
74
    r = bcdf.iloc[t].rank(
75
        method='first', ascending=False).reset_index(drop=True)
76 76
    rankbcdf = rankbcdf.append(r, ignore_index=True)
77 77

  
78
    
79 78

  
80 79
nodes = range(len(bcdf.columns))
81 80

  
......
108 107
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
109 108
    return coreNodes
110 109

  
111
def jaccard_top_CF(maxt=100, nlags=20, perc=5):
112
    N = maxt
113
    memoSet = {}
114
    for i in range(0, N):
115
        memoSet[i] = topNodes(i, perc)
116
    retval = []
117
    for tau in range(0, nlags):
118
        jtau = 0.0
119
        for t in range(0, N - tau):
120
            jtau += jaccard_similarity(memoSet[t], memoSet[t+tau])
121
        jtau /= N - tau
122
        retval.append(jtau)
123
    return retval
124 110

  
125
def jaccard_tail_CF(maxt=100, nlags=20, perc=5):
111
def jaccard_CF(start=0, maxt=100, nlags=20, perc=5, top=True):
126 112
    N = maxt
127 113
    memoSet = {}
128 114
    for i in range(0, N):
129
        memoSet[i] = tailNodes(i, perc)
115
        if top:
116
            memoSet[i] = topNodes(start+i, perc)
117
        else:
118
            memoSet[i] = tailNodes(start+i, perc)
130 119
    retval = []
131
    for tau in range(0, nlags):
120
    for tau in range(0, nlags+1):
132 121
        jtau = 0.0
133 122
        for t in range(0, N - tau):
134 123
            jtau += jaccard_similarity(memoSet[t], memoSet[t+tau])
135 124
        jtau /= N - tau
136 125
        retval.append(jtau)
137 126
    return retval
138
p = 2
127

  
128

  
129
#code.interact(local=dict(globals(), **locals()))
130

  
131
colors = iter(['k', 'r', 'b', 'g', 'y', 'c'])
132
styles = iter(['s', 'o', '^', '*', 'p'])
133
p = 5
134
for klen in [25, 50, 100, 200, 500]:
135
    pd.Series(jaccard_CF(start=0, maxt=klen, perc=p, top=True)).plot.line(
136
        label='N='+str(klen), style=next(colors)+next(styles)+'-')
137

  
138
plt.legend()
139
plt.xticks(range(0,21))
140
plt.yticks(np.arange(0,1.0,0.1))
141
plt.grid()
142
plt.ylim(0, 1.0)
143
plt.title("start=t0")
144
plt.xlabel('Tau')
145
plt.ylabel('Jaccard Correlation')
146
plt.savefig(nick+"jaccard_ACF_changingN.pdf", format='pdf')
147
plt.clf()
148
#plt.show()
149

  
150
colors = iter(['k', 'r', 'b', 'g', 'y', 'c'])
151
styles = iter(['s', 'o', '^', '*', 'p'])
152
p = 5
153
for s in [0, 50, 100, 200, 500]:
154
    pd.Series(jaccard_CF(start=s, maxt=50, perc=p, top=True)).plot.line(
155
        label='start='+str(s), style=next(colors)+next(styles)+'-')
156

  
157
plt.legend()
158
plt.xticks(range(0,21))
159
plt.yticks(np.arange(0,1.0,0.1))
160
plt.grid()
161
plt.title("N=50")
162
plt.xlabel('Tau')
163
plt.ylabel('Jaccard Correlation')
164
plt.ylim(0, 1.0)
165
plt.savefig(nick+"jaccard_ACF_changingSTART.pdf", format='pdf')
166
plt.clf()
167

  
168
perc = 20
169
t0 = topNodes(0, perc)
170
l0 = [jaccard_similarity(t0, topNodes(t, perc)) for t in range(1, 70)]
171
t100 = topNodes(100, perc)
172
l100 = [jaccard_similarity(t100, topNodes(100+t, perc)) for t in range(1, 70)]
173
t500 = topNodes(500, perc)
174
l500 = [jaccard_similarity(t500, topNodes(500+t, perc)) for t in range(1, 70)]
175
t750 = topNodes(750, perc)
176
l750 = [jaccard_similarity(t750, topNodes(750+t, perc)) for t in range(1, 70)]
177

  
178
pd.Series(l0).plot(label="start = t0", linewidth=1.5)
179
pd.Series(l100).plot(label="start = t100", linewidth=1.5)
180
pd.Series(l500).plot(label="start = t500", linewidth=1.5)
181
pd.Series(l750).plot(label="start = t750", linewidth=1.5)
182
plt.ylabel("Jaccard Similarity")
183
plt.xlabel("tau")
184
plt.title("Top Set size = "+str(len(t0))+", "+str(perc)+"% of nodes")
185
plt.legend()
186
plt.xticks(range(0,70,5))
187
plt.yticks(np.arange(0,1.1,0.1))
188
plt.ylim(-0.02,1.02)
189
plt.grid()
190
plt.savefig(nick+"jaccard_scaletta.pdf", format='pdf')
191
plt.clf()
192
#t0 = [jaccard_similarity(topNodes(t, perc), topNodes(t+1, perc)) for t in range(0,100)]
193

  
194

  
195
'''p = 80
139 196
pd.Series(jaccard_top_CF(perc=p)).plot(label='Top')
140 197
pd.Series(jaccard_tail_CF(perc=p)).plot(label='Tail')
198
plt.ylabel("Jaccard Similarity")
141 199
plt.legend()
142 200
plt.savefig(nick+"Jaccard-tau1-perc="+str(p)+".pdf", format='pdf')
143 201
plt.clf()
......
152 210

  
153 211
for t in range(0,100):
154 212
    x,y=tailNodes(t,15), tailNodes(t+1,15)
155
    jtail1.append(jaccard_similarity(x,y))
156

  
213
    jtail1.append(jaccard_similarity(x,y))'''
157 214

  
158 215

  
159 216
'''for i in range(k, k+memoryMax):
......
192 249
    tailRSeries = rankbcdf.iloc[:, tailn][t:t+klim]
193 250
    tailRacf = acf(tailRSeries, nlags=memoryMax, unbiased=True, fft=True)
194 251

  
195
    acTop[t]=topacf
196
    acTail[t]=tailacf
197
    acTopRank[t]=topRacf
198
    acTailRank[t]=tailRacf
252
    acTop[t] = topacf
253
    acTail[t] = tailacf
254
    acTopRank[t] = topRacf
255
    acTailRank[t] = tailRacf
199 256

  
200
acTopDF=pd.DataFrame(acTop)
201
acTailDF=pd.DataFrame(acTail)
257
acTopDF = pd.DataFrame(acTop)
258
acTailDF = pd.DataFrame(acTail)
202 259

  
203 260
acTopDF.T.mean().plot(label='Top')
204 261
acTailDF.T.mean().plot(label='Tail')
205
plt.ylim(-1,1)
262
plt.ylim(-1, 1)
206 263
plt.ylabel('ACF...')
207 264
plt.xlabel('Tau')
208 265
plt.legend()
209 266
plt.grid()
210 267
plt.savefig(nick+"ACFtopVSTail.pdf", format='pdf')
211 268
plt.clf()
212
#plt.show()
269
# plt.show()
213 270

  
214
acTopRDF=pd.DataFrame(acTopRank)
215
acTailRDF=pd.DataFrame(acTailRank)
271
acTopRDF = pd.DataFrame(acTopRank)
272
acTailRDF = pd.DataFrame(acTailRank)
216 273

  
217 274
acTopRDF.T.mean().plot(label='TopR')
218 275
acTailRDF.T.mean().plot(label='TailR')
219
plt.ylim(-1,1)
276
plt.ylim(-1, 1)
220 277
plt.ylabel('ACF_rank...')
221 278
plt.xlabel('Tau')
222 279
plt.legend()
......
233 290
plt.xlabel('Time')
234 291
plt.savefig(nick+"topVStailExample.pdf", format='pdf')
235 292
plt.clf()
236
#plt.show()
293
# plt.show()
237 294

  
238
rankbcdf.iloc[:,topn][t:t+klim].plot(label='TopRank')
239
rankbcdf.iloc[:,tailn][t:t+klim].plot(label='TailRank')
295
rankbcdf.iloc[:, topn][t:t+klim].plot(label='TopRank')
296
rankbcdf.iloc[:, tailn][t:t+klim].plot(label='TailRank')
240 297
plt.legend()
241 298
plt.ylabel('Rank (1==HighestBC)')
242 299
plt.xlabel('Time')
243 300
plt.grid()
244 301
plt.savefig(nick+"topVStailRankExample.pdf", format='pdf')
245 302
plt.clf()
246
#plt.show()
303
# plt.show()
247 304

  
248 305

  
249 306
time2top = {}
250 307
time2tail = {}
251 308

  
252
#code.interact(local=dict(globals(), **locals()))
253 309

  
254 310
'''# Per tanti istanti di inizio detti k
255 311
for k in tqdm(range(1, klim)):

Also available in: Unified diff