JODA  0.13.1 (59b41972)
JSON On-Demand Analysis
AttributeStatAggregator.h
Go to the documentation of this file.
1 //
2 // Created by Nico Schäfer on 10/31/17.
3 //
4 
5 #ifndef JODA_ATTRIBUTESTATAGGREGATOR_H
6 #define JODA_ATTRIBUTESTATAGGREGATOR_H
7 
8 #include <map>
9 
10 #include "IAggregator.h"
11 #include "joda/misc/RJFwd.h"
12 namespace joda::query {
14  public:
16  const std::string &toPointer,
17  std::vector<std::unique_ptr<IValueProvider>> &&params);
18  void merge(IAggregator *other) override;
19  RJValue terminate(RJMemoryPoolAlloc &alloc) override;
20  std::unique_ptr<IAggregator> duplicate() const override;
21  void accumulate(const RapidJsonDocument &json,
22  RJMemoryPoolAlloc &alloc) override;
23  const std::string getName() const override;
24  std::vector<std::string> getAttributes() const override;
25 
26  static constexpr auto getName_() { return "ATTSTAT"; }
27 
28  protected:
30  public:
31  struct Node {
32  Node(const std::string &name, Node *parent) : name(name), parent(parent) {
33  if (this->parent != nullptr) {
34  DCHECK(!this->name.empty());
35  DCHECK(this->parent != this);
36  }
37  }
38 
39  Node(const Node &n) = delete;
40  Node &operator=(const Node &n) = delete;
41  Node(Node &&n) = default;
42  Node &operator=(Node &&n) = default;
43 
44  std::string name;
45  size_t count_obj = 0;
46  uint64_t max_member = 0;
47  uint64_t min_member = std::numeric_limits<uint64_t>::max();
48  size_t count_arr = 0;
49  uint64_t max_size = 0;
50  uint64_t min_size = std::numeric_limits<uint64_t>::max();
51  size_t count_null = 0;
52  size_t count_str = 0;
53  uint64_t max_strsize = 0;
54  uint64_t min_strsize = std::numeric_limits<uint64_t>::max();
55  size_t count_double = 0;
56  double max_double = std::numeric_limits<double>::lowest();
57  double min_double = std::numeric_limits<double>::max();
58  size_t count_int = 0;
59  int64_t max_int = std::numeric_limits<int64_t>::min();
60  int64_t min_int = std::numeric_limits<int64_t>::max();
61  size_t count_bool = 0;
62  size_t count_true = 0;
63  size_t count_false = 0;
64 
65  size_t getCount() const {
68  }
69 
70  Node *findChild(const std::string &name) {
71  DCHECK(count_obj > 0);
72  Node *c = nullptr;
73  for (auto &child : children) {
74  if (child.name == name) {
75  c = &child;
76  break;
77  }
78  }
79  if (c == nullptr) {
80  Node n(name, this);
81  children.emplace_back(std::move(n));
82  DCHECK(!children.empty());
83  c = &children.back();
84  }
85  DCHECK(c != nullptr);
86  c->parent = this; // Update, if pointer changed
87  return c;
88  }
89 
90  Node *findArrElt(size_t i) {
91  DCHECK(count_arr > 0);
92  Node *c = nullptr;
93  if (arrMembers.size() > i) {
94  c = &arrMembers[i];
95  } else {
96  Node n(std::to_string(i), this);
97  arrMembers.emplace_back(std::move(n));
98  DCHECK(arrMembers.size() == i + 1);
99  c = &arrMembers.back();
100  }
101  DCHECK(c != nullptr);
102  c->parent = this; // Update, if pointer changed
103  return c;
104  }
105 
106  void merge(const Node *o) {
107  if (o == nullptr) return;
108  DCHECK(o->name == name);
109  count_obj += o->count_obj;
110  count_arr += o->count_arr;
111  count_int += o->count_int;
113  count_str += o->count_str;
114  count_null += o->count_null;
115  count_bool += o->count_bool;
116  count_true += o->count_true;
117  count_false += o->count_false;
118  min_int = std::min(min_int, o->min_int);
119  max_int = std::max(max_int, o->max_int);
120  min_double = std::min(min_double, o->min_double);
121  max_double = std::max(max_double, o->max_double);
122  min_member = std::min(min_member, o->min_member);
123  max_member = std::max(max_member, o->max_member);
124  min_size = std::min(min_size, o->min_size);
125  max_size = std::max(max_size, o->max_size);
126  min_strsize = std::min(min_strsize, o->min_strsize);
127  max_strsize = std::max(max_strsize, o->max_strsize);
128  for (const auto &child : o->children) {
129  auto c = findChild(child.name);
130  c->merge(&child);
131  }
132  for (size_t i = 0; i < o->arrMembers.size(); ++i) {
133  auto c = findArrElt(i);
134  c->merge(&o->arrMembers[i]);
135  }
136  }
137 
139  RJValue r(rapidjson::kObjectType);
140  if (!name.empty()) {
141  r.AddMember("Key", name, alloc);
142  }
143  r.AddMember("Count_Total", getCount(), alloc);
144  r.AddMember("Count_Object", count_obj, alloc);
145  if (min_member < std::numeric_limits<uint64_t>::max()) {
146  r.AddMember("Min_Member", min_member, alloc);
147  r.AddMember("Max_Member", max_member, alloc);
148  }
149  r.AddMember("Count_Array", count_arr, alloc);
150  if (min_size < std::numeric_limits<uint64_t>::max()) {
151  r.AddMember("Min_Size", min_size, alloc);
152  r.AddMember("Max_Size", max_size, alloc);
153  }
154  r.AddMember("Count_Null", count_null, alloc);
155  r.AddMember("Count_Boolean", count_bool, alloc);
156  r.AddMember("Count_True", count_true, alloc);
157  r.AddMember("Count_False", count_false, alloc);
158  r.AddMember("Count_String", count_str, alloc);
159  if (min_strsize < std::numeric_limits<uint64_t>::max()) {
160  r.AddMember("Min_StrSize", min_strsize, alloc);
161  r.AddMember("Max_StrSize", max_strsize, alloc);
162  }
163  r.AddMember("Count_Int", count_int, alloc);
164  if (count_int > 0) {
165  r.AddMember("Min_Int", min_int, alloc);
166  r.AddMember("Max_Int", max_int, alloc);
167  }
168  r.AddMember("Count_Float", count_double, alloc);
169  if (count_double > 0) {
170  r.AddMember("Min_Float", min_double, alloc);
171  r.AddMember("Max_Float", max_double, alloc);
172  }
173  r.AddMember("Count_Number", count_int + count_double, alloc);
174  if (!children.empty()) {
175  RJValue ch(rapidjson::kArrayType);
176  for (const auto &child : children) {
177  auto child_val = child.toValue(alloc);
178  ch.PushBack(child_val, alloc);
179  }
180  r.AddMember("Children", ch, alloc);
181  }
182  if (!arrMembers.empty()) {
183  RJValue ch(rapidjson::kArrayType);
184  for (const auto &child : arrMembers) {
185  auto child_val = child.toValue(alloc);
186  ch.PushBack(child_val, alloc);
187  }
188  r.AddMember("Array_Items", ch, alloc);
189  }
190  return r;
191  }
192 
193  std::vector<Node> children{};
194  std::vector<Node> arrMembers{};
195  Node *parent = nullptr;
196  };
197 
198  AttStatHandler() { currnode = &root; }
199 
200  typedef char Ch;
201 
202  void merge(const Node &n) { root.merge(&n); }
203 
204  void finishValue() {
205  DCHECK(currnode != nullptr);
206  currnode = currnode->parent;
207  }
208 
209  bool Null() {
210  checkArray();
211  DCHECK(currnode != nullptr);
212  currnode->count_null++;
213  finishValue();
214  return true;
215  }
216 
217  bool Bool(bool b) {
218  checkArray();
219  DCHECK(currnode != nullptr);
220  currnode->count_bool++;
221  if (b) {
222  currnode->count_true++;
223  } else {
224  currnode->count_false++;
225  }
226  finishValue();
227  return true;
228  }
229 
230  bool Int(int i) {
231  checkArray();
232  DCHECK(currnode != nullptr);
233  currnode->count_int++;
234  currnode->min_int = std::min((int64_t)i, currnode->min_int);
235  currnode->max_int = std::max((int64_t)i, currnode->max_int);
236  finishValue();
237  return true;
238  }
239 
240  bool Uint(unsigned i) {
241  checkArray();
242  DCHECK(currnode != nullptr);
243  currnode->count_int++;
244  currnode->min_int = std::min((int64_t)i, currnode->min_int);
245  currnode->max_int = std::max((int64_t)i, currnode->max_int);
246  finishValue();
247  return true;
248  }
249 
250  bool Int64(int64_t i) {
251  checkArray();
252  DCHECK(currnode != nullptr);
253  currnode->count_int++;
254  currnode->min_int = std::min(i, currnode->min_int);
255  currnode->max_int = std::max(i, currnode->max_int);
256  finishValue();
257  return true;
258  }
259 
260  bool Uint64(uint64_t i) {
261  checkArray();
262  DCHECK(currnode != nullptr);
263  currnode->count_int++;
264  currnode->min_int = std::min((int64_t)i, currnode->min_int);
265  currnode->max_int = std::max((int64_t)i, currnode->max_int);
266  finishValue();
267  return true;
268  }
269 
270  bool Double(double d) {
271  checkArray();
272  DCHECK(currnode != nullptr);
273  currnode->count_double++;
274  currnode->min_double = std::min(d, currnode->min_double);
275  currnode->max_double = std::max(d, currnode->max_double);
276  finishValue();
277  return true;
278  }
279 
280  bool RawNumber(const Ch *str, rapidjson::SizeType length, bool copy) {
281  checkArray();
282  DCHECK(currnode != nullptr);
283  currnode->count_double++;
284  finishValue();
285  return true;
286  }
287 
288  bool String(const Ch *str, rapidjson::SizeType length, bool copy) {
289  checkArray();
290  DCHECK(currnode != nullptr);
291  currnode->count_str++;
292  currnode->min_strsize =
293  std::min(static_cast<u_int64_t>(length), currnode->min_strsize);
294  currnode->max_strsize =
295  std::max(static_cast<u_int64_t>(length), currnode->max_strsize);
296  finishValue();
297  return true;
298  }
299 
300  bool StartObject() {
301  checkArray();
302  DCHECK(currnode != nullptr);
303  currnode->count_obj++;
304  levels.emplace_back(false);
305  return true;
306  }
307 
308  bool Key(const Ch *str, rapidjson::SizeType length, bool copy) {
309  auto name = std::string(str, length);
310  DCHECK(!name.empty());
311  DCHECK(currnode != nullptr);
312  currnode = currnode->findChild(name);
313  return true;
314  }
315 
316  bool EndObject(rapidjson::SizeType memberCount) {
317  DCHECK(currnode != nullptr);
318  currnode->min_member =
319  std::min(static_cast<u_int64_t>(memberCount), currnode->min_member);
320  currnode->max_member =
321  std::max(static_cast<u_int64_t>(memberCount), currnode->max_member);
322  finishValue();
323  levels.pop_back();
324  return true;
325  }
326 
327  bool StartArray() {
328  checkArray();
329  DCHECK(currnode != nullptr);
330  currnode->count_arr++;
331  levels.emplace_back(true);
332  return true;
333  }
334 
335  bool EndArray(rapidjson::SizeType elementCount) {
336  DCHECK(currnode != nullptr);
337  currnode->min_size =
338  std::min(static_cast<u_int64_t>(elementCount), currnode->min_size);
339  currnode->max_size =
340  std::max(static_cast<u_int64_t>(elementCount), currnode->max_size);
341  finishValue();
342  levels.pop_back();
343  return true;
344  }
345 
346  void reset() {
347  root = {"", nullptr};
348  currnode = &root;
349  levels.clear();
350  }
351 
352  Node finish() { return std::move(root); }
353 
354  void finishDocument() {
355  currnode = &root;
356  levels.clear();
357  }
358 
359  private:
360  struct Level {
361  Level(bool isArr) : isArr(isArr) {}
362 
363  bool isArr;
364  size_t count = 0;
365  };
366 
367  void checkArray() {
368  if (levels.empty()) return;
369  if (levels.back().isArr) {
370  DCHECK(currnode != nullptr);
371  currnode = currnode->findArrElt(levels.back().count);
372  levels.back().count++;
373  }
374  }
375 
376  Node root{"", nullptr};
377  Node *currnode = nullptr;
378  std::vector<Level> levels{};
379  };
380 
382 };
383 } // namespace joda::query
384 
385 #endif // JODA_ATTRIBUTESTATAGGREGATOR_H
rapidjson::MemoryPoolAllocator< RJBaseAlloc > RJMemoryPoolAlloc
Definition: RJFwd.h:26
rapidjson::GenericValue< RJChar, RJMemoryPoolAlloc > RJValue
Definition: RJFwd.h:29
Definition: RapidJsonDocument.h:22
Definition: AttributeStatAggregator.h:29
void reset()
Definition: AttributeStatAggregator.h:346
void finishValue()
Definition: AttributeStatAggregator.h:204
bool Double(double d)
Definition: AttributeStatAggregator.h:270
bool Int64(int64_t i)
Definition: AttributeStatAggregator.h:250
bool StartObject()
Definition: AttributeStatAggregator.h:300
bool StartArray()
Definition: AttributeStatAggregator.h:327
bool Bool(bool b)
Definition: AttributeStatAggregator.h:217
bool String(const Ch *str, rapidjson::SizeType length, bool copy)
Definition: AttributeStatAggregator.h:288
Node finish()
Definition: AttributeStatAggregator.h:352
void merge(const Node &n)
Definition: AttributeStatAggregator.h:202
bool Uint(unsigned i)
Definition: AttributeStatAggregator.h:240
char Ch
Definition: AttributeStatAggregator.h:200
bool Null()
Definition: AttributeStatAggregator.h:209
bool Uint64(uint64_t i)
Definition: AttributeStatAggregator.h:260
bool EndArray(rapidjson::SizeType elementCount)
Definition: AttributeStatAggregator.h:335
bool Int(int i)
Definition: AttributeStatAggregator.h:230
bool Key(const Ch *str, rapidjson::SizeType length, bool copy)
Definition: AttributeStatAggregator.h:308
bool EndObject(rapidjson::SizeType memberCount)
Definition: AttributeStatAggregator.h:316
void finishDocument()
Definition: AttributeStatAggregator.h:354
AttStatHandler()
Definition: AttributeStatAggregator.h:198
bool RawNumber(const Ch *str, rapidjson::SizeType length, bool copy)
Definition: AttributeStatAggregator.h:280
Definition: AttributeStatAggregator.h:13
std::vector< std::string > getAttributes() const override
Definition: AttributeStatAggregator.cpp:46
RJValue terminate(RJMemoryPoolAlloc &alloc) override
Definition: AttributeStatAggregator.cpp:17
const std::string getName() const override
Definition: AttributeStatAggregator.cpp:42
void accumulate(const RapidJsonDocument &json, RJMemoryPoolAlloc &alloc) override
Definition: AttributeStatAggregator.cpp:36
AttStatHandler handler
Definition: AttributeStatAggregator.h:381
void merge(IAggregator *other) override
Definition: AttributeStatAggregator.cpp:9
static constexpr auto getName_()
Definition: AttributeStatAggregator.h:26
AttributeStatAggregator(const std::string &toPointer, std::vector< std::unique_ptr< IValueProvider >> &&params)
Definition: AttributeStatAggregator.cpp:28
std::unique_ptr< IAggregator > duplicate() const override
Definition: AttributeStatAggregator.cpp:23
Definition: IAggregator.h:25
std::vector< std::unique_ptr< IValueProvider > > params
Definition: IAggregator.h:124
std::string toPointer
Definition: IAggregator.h:120
Definition: AttributeStatAggregator.h:12
Definition: AttributeStatAggregator.h:31
size_t count_false
Definition: AttributeStatAggregator.h:63
uint64_t max_size
Definition: AttributeStatAggregator.h:49
uint64_t max_strsize
Definition: AttributeStatAggregator.h:53
size_t count_int
Definition: AttributeStatAggregator.h:58
size_t count_bool
Definition: AttributeStatAggregator.h:61
uint64_t min_size
Definition: AttributeStatAggregator.h:50
Node * findArrElt(size_t i)
Definition: AttributeStatAggregator.h:90
std::string name
Definition: AttributeStatAggregator.h:44
std::vector< Node > arrMembers
Definition: AttributeStatAggregator.h:194
void merge(const Node *o)
Definition: AttributeStatAggregator.h:106
Node(const std::string &name, Node *parent)
Definition: AttributeStatAggregator.h:32
size_t count_double
Definition: AttributeStatAggregator.h:55
size_t count_null
Definition: AttributeStatAggregator.h:51
size_t count_str
Definition: AttributeStatAggregator.h:52
int64_t min_int
Definition: AttributeStatAggregator.h:60
double min_double
Definition: AttributeStatAggregator.h:57
Node * findChild(const std::string &name)
Definition: AttributeStatAggregator.h:70
uint64_t min_strsize
Definition: AttributeStatAggregator.h:54
uint64_t max_member
Definition: AttributeStatAggregator.h:46
double max_double
Definition: AttributeStatAggregator.h:56
uint64_t min_member
Definition: AttributeStatAggregator.h:47
Node * parent
Definition: AttributeStatAggregator.h:195
int64_t max_int
Definition: AttributeStatAggregator.h:59
size_t count_true
Definition: AttributeStatAggregator.h:62
size_t getCount() const
Definition: AttributeStatAggregator.h:65
std::vector< Node > children
Definition: AttributeStatAggregator.h:193
size_t count_arr
Definition: AttributeStatAggregator.h:48
RJValue toValue(RJMemoryPoolAlloc &alloc) const
Definition: AttributeStatAggregator.h:138
size_t count_obj
Definition: AttributeStatAggregator.h:45