forked from aepasto/triest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRunCountingLocal.cpp
151 lines (123 loc) · 4.15 KB
/
RunCountingLocal.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include "GraphScheduler.h"
#include "GraphSampler.h"
#include "TriangleCounter.h"
#include "UDynGraph.h"
#include "Stats.h"
#include <iostream>
#include <cassert>
#include <cstring>
#include <cmath>
using namespace std;
struct Result {
double pearson = 0.0;
double mean_eps_err = 0.0; //As defined in mascot
double top_triangle_exact = 0.0;
double top_triangle_est = 0.0;
};
Result local_err(TriangleCounter& gt_counter, GraphSampler& gt_sampler, TriangleCounter& est_counter, GraphSampler& est_sampler){
Result res;
res.top_triangle_exact = 0.0;
res.top_triangle_est = 0.0;
vector<int> nodes;
gt_counter.get_nodes(&nodes);
double gt_avg = 0.0;
double est_avg = 0.0;
for (const auto & n: nodes){
gt_avg+=gt_sampler.get_triangle_est_local(n);
est_avg+=est_sampler.get_triangle_est_local(n);
}
gt_avg/=nodes.size();
est_avg/=nodes.size();
double cov = 0.0;
double sum_dev_gt = 0.0;
double sum_dev_est = 0.0;
double mean_eps_err = 0.0;
for (const auto & n: nodes){
double gt_ =gt_sampler.get_triangle_est_local(n);
double est_ =est_sampler.get_triangle_est_local(n);
if (gt_>res.top_triangle_exact){
res.top_triangle_exact = gt_;
}
if (est_>res.top_triangle_est){
res.top_triangle_est = est_;
}
mean_eps_err+= abs(gt_-est_)/(gt_+1);
cov += (gt_-gt_avg)*(est_-est_avg);
sum_dev_gt += (gt_-gt_avg)*(gt_-gt_avg);
sum_dev_est += (est_-est_avg)*(est_-est_avg);
}
if(sum_dev_est==0){
res.pearson =0;//In this case it is not well defined
} else {
res.pearson = cov/(sqrt(sum_dev_gt)*sqrt(sum_dev_est));
}
res.mean_eps_err = mean_eps_err/nodes.size();
return res;
}
int main(int argc, char** argv) {
if (argc <= 6) {
cerr
<< "ERROR Requires FIRST 5 parameters. RunCounting only_add (1=yes,0=no); random_seed (int); Check_Error_every_number_steps (int); graph-udates.txt;\n" <<
"THEN: Type of Sampler (R for reservoir, F for fix-p, RH resevoir sample and hold, FH fix-p sample and hold)"<<
" THEN IF reservoir: size reservoir (int) "<<
" ELSE IF fixed-p: p (double) "<< endl;
exit(1);
}
bool only_add = atoi(argv[1])==1;
assert(atoi(argv[1])<=1 && atoi(argv[1])>=0);
int random_seed = atoi(argv[2]);
assert(random_seed>=0);
srand(random_seed);
int stats_freq = atoi(argv[3]);
assert(stats_freq >0);
string file_name(argv[4]);
bool is_reservoir = strcmp(argv[5], "R") == 0 || strcmp(argv[5], "RH") == 0;
bool is_fix_p = strcmp(argv[5], "F") == 0 || strcmp(argv[5], "FH") == 0;
bool use_sample_and_hold = strcmp(argv[5], "RH") == 0 || strcmp(argv[5], "FH") == 0;
assert(only_add || !use_sample_and_hold); //can't use sample and hold with deletion
double p = -1;
int size_reservoir = -1;
if (is_reservoir){
size_reservoir = atoi(argv[6]);
} else if (is_fix_p){
p = atof(argv[6]);
} else {
cerr<<argv[5]<<" not supported yet."<<endl;
assert(false);
}
GraphScheduler scheduler(file_name, false /* not storing time*/);
TriangleCounter counter(true /*use local count*/);
TriangleCounter counter_exact(true /*use local count*/);
FixedPSampler sampler_exact(1.0, false, &counter_exact);
GraphSampler* sampler;
if(is_reservoir && only_add) {
sampler = new ReservoirSampler(size_reservoir, use_sample_and_hold, &counter);
} else if(is_reservoir && !only_add) {
sampler = new ReservoirAddRemSampler(size_reservoir, &counter);
} else if(is_fix_p) {
sampler = new FixedPSampler(p, use_sample_and_hold, &counter);
} else{
assert(false);
}
// Statsstats(stats_freq);
unsigned long long count_op = 0;
while (scheduler.has_next()) {
EdgeUpdate update = scheduler.next_update();
if(only_add && !update.is_add){
break; // ENDS at the first remove
}
sampler->exec_operation(update);
sampler_exact.exec_operation(update);
unsigned long long triangles_exact = counter_exact.triangles();
if(++count_op%stats_freq==0){
if(triangles_exact == 0){
continue; // No error possibile !
}
Result res = local_err(counter_exact,sampler_exact,counter,*sampler);
cout << count_op << "\t"<<counter.size_sample()<<"\t"<<triangles_exact<< "\t" <<res.top_triangle_exact<<"\t"<<res.top_triangle_est<<"\t" <<res.pearson<<"\t"<<res.mean_eps_err<<endl;
}
}
//stats.end_op();
delete sampler;
return 0;
}