Statistics
| Branch: | Revision:

mobicen / sampleACF.py @ 36331ad0

History | View | Annotate | Download (4.63 KB)

1
import seaborn as sns
2
import code  # code.interact(local=dict(globals(), **locals()))
3
from collections import deque
4
from scipy import stats
5
import matplotlib.pyplot as plt
6
from collections import defaultdict
7
import os
8
import sys
9
from statsmodels.graphics.tsaplots import plot_acf
10
from statsmodels.tsa.stattools import acf
11
import operator
12
from mpl_toolkits import mplot3d
13
import pandas as pd
14
from pprint import pprint
15
import numpy as np
16
import glob
17
from tqdm import tqdm
18
import matplotlib
19
from multiprocessing import Pool
20

    
21

    
22

    
23
def sampleACF(params):
24
    v=params['v']
25
    nlags=params['nlags']
26
    timeDepth=params['timeDepth']
27
    v=v[:timeDepth].to_list()
28
    acf_t=defaultdict(list)
29
    for tau in range(0, nlags):
30
        for k in range(0, timeDepth-tau):
31
            if (v[k] >0 and v[k+tau]>0):
32
                acf_kt = v[k] * v[k+tau] / (np.mean([v[k], v[k+tau]])**2)
33
                acf_t[tau].append(acf_kt)
34
    return map(np.mean, acf_t.values())
35

    
36
def sampleACF2(v, nlags=40, timeDepth=500):
37
    v=v[:timeDepth].to_list()    
38
    return pd.Series(acf(v, nlags=nlags, fft=True))
39

    
40
#mys.rank(method='first', ascending=False)
41

    
42
folder = sys.argv[1]
43
num_workers = int(sys.argv[2])
44
lags = 100
45
if len(sys.argv) > 3:
46
    lags = int(sys.argv[3])
47
nick = folder.split('/')[-2].split('_')[0]
48
os.chdir(folder)
49

    
50
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
51
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
52
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
53
print "Loading data from", folder, "..."
54
for snap in sorted(glob.glob('./stats*')):
55
    # print "",snap
56
    node_id = int(snap.strip('.csv').strip('./stats'))
57
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
58
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
59
    degdf = pd.concat([degdf, df['deg']], axis=1)
60
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
61

    
62
nodes = range(len(bcdf.columns))
63

    
64

    
65
if not os.path.exists("plots"+nick):
66
    os.makedirs("plots"+nick)
67

    
68
os.chdir("plots"+nick)
69
# Plotting
70

    
71

    
72
def jaccard_similarity(x, y):
73
    intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
74
    union_cardinality = len(set.union(*[set(x), set(y)]))
75
    return intersection_cardinality/float(union_cardinality)
76

    
77

    
78
def topNodes(t, perc):
79
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
80
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
81
    upto = int(len(srtd_BC) * (perc/100.0))
82
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
83
    return coreNodes
84

    
85

    
86
def tailNodes(t, perc):
87
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
88
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=False)
89
    upto = int(len(srtd_BC) * (perc/100.0))
90
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
91
    return coreNodes
92

    
93
'''for i in range(k, k+memoryMax):
94
            bcktop, bcitop = bcdf.iloc[k, top], bcdf.iloc[i, top]
95
            acTop[i-k].append(bcktop * bcitop / (np.mean([bcktop, bcitop])**2))
96
for i in range(k, k+memoryMax):
97
            bcktail, bcitail = bcdf.iloc[k, tail], bcdf.iloc[i, tail]
98
            if (bcitail > 0 and bcktail > 0):
99
                acTail[i-k].append(bcktail*bcitail /
100
                                   (np.mean([bcktail, bcitail])**2))'''
101

    
102
perc = 5
103
memoryMax = 40
104
klim = 200
105
acTop = defaultdict(list)
106
acTail = defaultdict(list)
107
# Per tanti istanti di inizio detti k
108
for k in tqdm(range(1, klim)):
109
    # Prendi i top e tail nodi a quell'istante k
110
    topn = topNodes(k, perc)
111
    tailn = tailNodes(k, perc)
112
    # Calcola la Normalized Istantaneous Correlation,
113
    p = Pool(num_workers)
114
    timeSeries = []
115
    #code.interact(local=dict(globals(), **locals()))
116
    for top in topn:
117
        params = {'v': bcdf.iloc[:,top][k:], 'nlags':memoryMax, 'timeDepth': klim}
118
        timeSeries.append(params)
119
        #tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
120
    res = p.map(sampleACF, timeSeries)
121
    for r in res:
122
        for lag in range(0, len(r)):
123
            acTop[lag].append(r[lag])
124

    
125
    for tail in tailn:
126
        params = {'v': bcdf.iloc[:,tail][k:], 'nlags':memoryMax, 'timeDepth': klim}
127
        timeSeries.append(params)
128
        #tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
129
    res = p.map(sampleACF, timeSeries)
130
    for r in res:
131
        for lag in range(0, len(r)):
132
            acTail[lag].append(r[lag])
133
    p.close()
134
        
135
#code.interact(local=dict(globals(), **locals()))
136
pd.Series(map(np.mean, acTop.values())).plot(label='Top-'+str(perc)+'%')
137
pd.Series(map(np.mean,acTail.values())).plot(label='Tail-'+str(perc)+'%')
138
plt.legend()
139
plt.xlabel('Tau')
140
plt.ylabel('Sample ACF')
141
plt.savefig(nick+"sampleACF.pdf", format='pdf')