Revision 36331ad0

View differences:

sampleACF.py
1
import seaborn as sns
2
import code  # code.interact(local=dict(globals(), **locals()))
3
from collections import deque
4
from scipy import stats
5
import matplotlib.pyplot as plt
6
from collections import defaultdict
7
import os
8
import sys
9
from statsmodels.graphics.tsaplots import plot_acf
10
from statsmodels.tsa.stattools import acf
11
import operator
12
from mpl_toolkits import mplot3d
13
import pandas as pd
14
from pprint import pprint
15
import numpy as np
16
import glob
17
from tqdm import tqdm
18
import matplotlib
19
from multiprocessing import Pool
20

  
21

  
22

  
23
def sampleACF(params):
24
    v=params['v']
25
    nlags=params['nlags']
26
    timeDepth=params['timeDepth']
27
    v=v[:timeDepth].to_list()
28
    acf_t=defaultdict(list)
29
    for tau in range(0, nlags):
30
        for k in range(0, timeDepth-tau):
31
            if (v[k] >0 and v[k+tau]>0):
32
                acf_kt = v[k] * v[k+tau] / (np.mean([v[k], v[k+tau]])**2)
33
                acf_t[tau].append(acf_kt)
34
    return map(np.mean, acf_t.values())
35

  
36
def sampleACF2(v, nlags=40, timeDepth=500):
37
    v=v[:timeDepth].to_list()    
38
    return pd.Series(acf(v, nlags=nlags, fft=True))
39

  
40
#mys.rank(method='first', ascending=False)
41

  
42
folder = sys.argv[1]
43
num_workers = int(sys.argv[2])
44
lags = 100
45
if len(sys.argv) > 3:
46
    lags = int(sys.argv[3])
47
nick = folder.split('/')[-2].split('_')[0]
48
os.chdir(folder)
49

  
50
bcdf = pd.DataFrame()  # rows=nodes columns=BC at column-index time-instant
51
degdf = pd.DataFrame()  # rows=nodes columns=DEG at column-index time-instant
52
kcoredf = pd.DataFrame()  # rows=nodes columns=KCORE at column-index time-instant
53
print "Loading data from", folder, "..."
54
for snap in sorted(glob.glob('./stats*')):
55
    # print "",snap
56
    node_id = int(snap.strip('.csv').strip('./stats'))
57
    df = pd.read_csv(snap, names=['time', 'bc', 'deg', 'kcore'], skiprows=1)
58
    bcdf = pd.concat([bcdf, df['bc']], axis=1)
59
    degdf = pd.concat([degdf, df['deg']], axis=1)
60
    kcoredf = pd.concat([kcoredf, df['kcore']], axis=1)
61

  
62
nodes = range(len(bcdf.columns))
63

  
64

  
65
if not os.path.exists("plots"+nick):
66
    os.makedirs("plots"+nick)
67

  
68
os.chdir("plots"+nick)
69
# Plotting
70

  
71

  
72
def jaccard_similarity(x, y):
73
    intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
74
    union_cardinality = len(set.union(*[set(x), set(y)]))
75
    return intersection_cardinality/float(union_cardinality)
76

  
77

  
78
def topNodes(t, perc):
79
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
80
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=True)
81
    upto = int(len(srtd_BC) * (perc/100.0))
82
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
83
    return coreNodes
84

  
85

  
86
def tailNodes(t, perc):
87
    BCd = bcdf.iloc[t].reset_index(drop=True).to_dict()
88
    srtd_BC = sorted(BCd.items(), key=operator.itemgetter(1), reverse=False)
89
    upto = int(len(srtd_BC) * (perc/100.0))
90
    coreNodes = [int(e[0]) for e in srtd_BC[:upto]]
91
    return coreNodes
92

  
93
'''for i in range(k, k+memoryMax):
94
            bcktop, bcitop = bcdf.iloc[k, top], bcdf.iloc[i, top]
95
            acTop[i-k].append(bcktop * bcitop / (np.mean([bcktop, bcitop])**2))
96
for i in range(k, k+memoryMax):
97
            bcktail, bcitail = bcdf.iloc[k, tail], bcdf.iloc[i, tail]
98
            if (bcitail > 0 and bcktail > 0):
99
                acTail[i-k].append(bcktail*bcitail /
100
                                   (np.mean([bcktail, bcitail])**2))'''
101

  
102
perc = 5
103
memoryMax = 40
104
klim = 200
105
acTop = defaultdict(list)
106
acTail = defaultdict(list)
107
# Per tanti istanti di inizio detti k
108
for k in tqdm(range(1, klim)):
109
    # Prendi i top e tail nodi a quell'istante k
110
    topn = topNodes(k, perc)
111
    tailn = tailNodes(k, perc)
112
    # Calcola la Normalized Istantaneous Correlation,
113
    p = Pool(num_workers)
114
    timeSeries = []
115
    #code.interact(local=dict(globals(), **locals()))
116
    for top in topn:
117
        params = {'v': bcdf.iloc[:,top][k:], 'nlags':memoryMax, 'timeDepth': klim}
118
        timeSeries.append(params)
119
        #tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
120
    res = p.map(sampleACF, timeSeries)
121
    for r in res:
122
        for lag in range(0, len(r)):
123
            acTop[lag].append(r[lag])
124

  
125
    for tail in tailn:
126
        params = {'v': bcdf.iloc[:,tail][k:], 'nlags':memoryMax, 'timeDepth': klim}
127
        timeSeries.append(params)
128
        #tacf = sampleACF(bcdf.iloc[:,top][k:], nlags=memoryMax, timeDepth=100)
129
    res = p.map(sampleACF, timeSeries)
130
    for r in res:
131
        for lag in range(0, len(r)):
132
            acTail[lag].append(r[lag])
133
    p.close()
134
        
135
#code.interact(local=dict(globals(), **locals()))
136
pd.Series(map(np.mean, acTop.values())).plot(label='Top-'+str(perc)+'%')
137
pd.Series(map(np.mean,acTail.values())).plot(label='Tail-'+str(perc)+'%')
138
plt.legend()
139
plt.xlabel('Tau')
140
plt.ylabel('Sample ACF')
141
plt.savefig(nick+"sampleACF.pdf", format='pdf')
timeAnalysis.py
1
import seaborn as sns
1 2
import code  # code.interact(local=dict(globals(), **locals()))
2 3
from collections import deque
3 4
from scipy import stats
......
5 6
from collections import defaultdict
6 7
import os
7 8
import sys
8
from statsmodels.graphics.tsaplots import plot_acf, acf
9
from statsmodels.graphics.tsaplots import plot_acf
10
from statsmodels.tsa.stattools import acf
9 11
import operator
10 12
from mpl_toolkits import mplot3d
11 13
import pandas as pd
12 14
from pprint import pprint
13 15
import numpy as np
14 16
import glob
17
from tqdm import tqdm
15 18
import matplotlib
16 19

  
17

  
20
#mys.rank(method='first', ascending=False)
18 21

  
19 22
folder = sys.argv[1]
20 23
lags = 100
......
37 40

  
38 41
nodes = range(len(bcdf.columns))
39 42

  
40
initialCentrality = {}
43
'''initialCentrality = {}
41 44
for n in nodes:
42 45
    initialCentrality[int(n)] = bcdf.iloc[0][n]
43 46

  
44 47

  
45 48
sorted_x = sorted(initialCentrality.items(),
46 49
                  key=operator.itemgetter(1), reverse=True)
47
srtNodes = [e[0] for e in sorted_x]
50
srtNodes = [e[0] for e in sorted_x]'''
48 51

  
49 52
bcACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
50 53
degACF = pd.DataFrame()  # rows=Time-Lags, columns = nodes
......
74 77
os.chdir("plots"+nick)
75 78
# Plotting
76 79

  
77

  
78 80
# ACF boxplots
79
bcACF.T.boxplot(column=[1]+range(5,lags,5))
81
bcACF.T.boxplot(column=[1]+range(5, lags, 5))
80 82
plt.ylabel("ACF of BC for all nodes")
81 83
plt.xlabel("Time-lag")
82 84
plt.xticks(rotation="vertical")
......
88 90
plt.savefig(nick+"autoCorrBOXPLOT-BC.pdf", format='pdf')
89 91
plt.clf()
90 92

  
91
degACF.T.boxplot(column=[1]+range(5,lags,5))
93
degACF.T.boxplot(column=[1]+range(5, lags, 5))
92 94
plt.ylabel("ACF of DEG for all nodes")
93 95
plt.xlabel("Time-lag")
94 96
plt.xticks(rotation="vertical")
......
100 102
plt.savefig(nick+"autoCorrBOXPLOT-DEG.pdf", format='pdf')
101 103
plt.clf()
102 104

  
103
kcoreACF.T.boxplot(column=[1]+range(5,lags,5))
105
kcoreACF.T.boxplot(column=[1]+range(5, lags, 5))
104 106
plt.ylabel("ACF of KCORE for all nodes")
105 107
plt.xlabel("Time-lag")
106 108
plt.xticks(rotation="vertical")
......
113 115
plt.clf()
114 116

  
115 117

  
116

  
117 118
# Mean AutoCorrelation and Rank-Correlation
118 119
# lags=20
119 120
firstRank = bcdf.iloc[0, :]
......
174 175
                         [b, c, 1]])
175 176

  
176 177
#code.interact(local=dict(globals(), **locals()))
177
import seaborn as sns
178 178
sns.set()
179 179

  
180 180

  

Also available in: Unified diff