5 #ifndef JODA_SIMILARITYSCHEDULER_H
6 #define JODA_SIMILARITYSCHEDULER_H
12 #include <rapidjson/istreamwrapper.h>
20 template <
typename SIM>
77 std::unique_ptr<RJDocument> &&doc,
78 std::unique_ptr<IOrigin> &&origin,
size_t size);
88 typedef std::pair<std::unique_ptr<JSONContainer>,
91 std::vector<sCont> container;
94 std::unique_ptr<JSONContainer> createContainer(
size_t contSize)
const;
97 template <
typename SIM>
99 size_t contSize)
const {
101 return std::make_unique<JSONContainer>();
103 return std::make_unique<JSONContainer>(contSize);
106 template <
typename SIM>
109 return std::make_unique<RJDocument>(container[
id].first->getAlloc());
112 template <
typename SIM>
115 std::unique_ptr<IOrigin> &&origin,
size_t size) {
116 container[id].first->insertDoc(std::move(doc), std::move(origin));
117 if (!(container[
id].first->hasSpace(0) || container[
id].first->size() == 0)) {
118 container[id].first->finalize();
119 DCHECK(container[
id].first !=
nullptr);
120 queue->send(std::move(container[
id].first));
121 DCHECK(container[
id].first ==
nullptr);
122 container[id].first = createContainer(contSize);
123 DCHECK(container[
id].first !=
nullptr);
127 template <
typename SIM>
130 : queue(queue), contSize(contSize) {}
132 template <
typename SIM>
138 <<
"Function called with wrong Similarity measure";
141 if (!container.empty()) {
144 for (
size_t i = 0; i < container.size(); ++i) {
145 double sim = measure.measure(docRep, container[i].second);
155 container.emplace_back(createContainer(contSize), std::move(docRep));
156 return container.size() - 1;
159 template <
typename SIM>
165 <<
"Function called with wrong Similarity measure";
168 if (!container.empty()) {
171 for (
size_t i = 0; i < container.size(); ++i) {
172 double sim = measure.measure(docRep, container[i].second);
182 container.emplace_back(createContainer(contSize), std::move(docRep));
183 return container.size() - 1;
186 template <
typename SIM>
189 rapidjson::IStreamWrapper &stream) {
193 <<
"Function called with wrong Similarity measure";
196 if (!container.empty()) {
199 for (
auto i = 0; i < container.size(); ++i) {
200 double sim = measure.measure(docRep, container[i].second);
210 container.emplace_back(createContainer(contSize), std::move(docRep));
211 return container.size() - 1;
214 template <
typename SIM>
218 for (
auto &¤tSimContainer : container) {
219 auto ¤tContainer = currentSimContainer.first;
220 if (currentContainer->size() > 0) {
221 DCHECK(currentContainer !=
nullptr);
222 currentContainer->finalize();
224 queue->send(std::move(currentContainer));
225 DCHECK(currentContainer ==
nullptr);
228 queue->producerFinished();
rapidjson::GenericDocument< RJChar, RJMemoryPoolAlloc, RJBaseAlloc > RJDocument
Definition: RJFwd.h:28
Definition: SimilarityScheduler.h:21
void scheduleDocument(ContainerIdentifier id, std::unique_ptr< RJDocument > &&doc, std::unique_ptr< IOrigin > &&origin, size_t size)
Definition: SimilarityScheduler.h:113
virtual ~SimilarityScheduler()=default
ContainerIdentifier getContainerForDoc(std::string &raw)
Definition: SimilarityScheduler.h:134
std::unique_ptr< RJDocument > getNewDoc(ContainerIdentifier id)
Definition: SimilarityScheduler.h:107
void finalize()
Definition: SimilarityScheduler.h:215
size_t ContainerIdentifier
Definition: SimilarityScheduler.h:23
SimilarityScheduler(JsonContainerQueue::queue_t *queue, size_t contSize=0)
Definition: SimilarityScheduler.h:128
static bool storeJson
Definition: config.h:32
static double sim_min_similarity
Definition: config.h:67
Definition: IJSONSimilarityMeasure.h:36
Representation getRepresentation(const RJDocument &lhs)
Definition: IJSONSimilarityMeasure.h:46
void * Representation
Definition: IJSONSimilarityMeasure.h:38
bool is_implemented
Definition: IJSONSimilarityMeasure.h:56