forked from cmuparlay/pbbslib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge.h
80 lines (75 loc) · 2.19 KB
/
merge.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#pragma once
#include "utilities.h"
#include "seq.h"
#include "binary_search.h"
namespace pbbs {
// TODO: not yet optimized to use moves instead of copies.
// the following parameter can be tuned
constexpr const size_t _merge_base = PAR_GRANULARITY;
template <_copy_type ct, class SeqA, class SeqB, class F>
void seq_merge(SeqA const &A,
SeqB const &B,
range<typename SeqA::value_type*> R,
const F& f) {
size_t nA = A.size();
size_t nB = B.size();
size_t i = 0;
size_t j = 0;
while (true) {
if (i == nA) {
while (j < nB) {copy_val<ct>(R[i+j], B[j]); j++;}
break; }
if (j == nB) {
while (i < nA) {copy_val<ct>(R[i+j], A[i]); i++;}
break; }
if (f(B[j], A[i])) {
copy_val<ct>(R[i+j], B[j]);
j++;
} else {
copy_val<ct>(R[i+j], A[i]);
i++;
}
}
}
// this merge is stable
template <_copy_type ct, class SeqA, class SeqB, class F>
void merge_(const SeqA &A,
const SeqB &B,
range<typename SeqA::value_type*> R,
const F& f,
bool cons=false) {
size_t nA = A.size();
size_t nB = B.size();
size_t nR = nA + nB;
if (nR < _merge_base)
seq_merge<ct>(A, B, R, f);
else if (nA == 0)
parallel_for(0, nB, [&] (size_t i) {copy_val<ct>(R[i], B[i]);});
else if (nB == 0)
parallel_for(0, nA, [&] (size_t i) {copy_val<ct>(R[i], A[i]);});
else {
size_t mA = nA/2;
// important for stability that binary search identifies
// first element in B greater or equal to A[mA]
size_t mB = binary_search(B, A[mA], f);
if (mB == 0) mA++; // ensures at least one on each side
size_t mR = mA + mB;
auto left = [&] () {merge_<ct>(A.slice(0, mA), B.slice(0, mB),
R.slice(0, mR), f, cons);};
auto right = [&] () {merge_<ct>(A.slice(mA, nA), B.slice(mB, nB),
R.slice(mR, nR), f, cons);};
par_do(left, right, cons);
}
}
template <class SeqA, class SeqB, class F>
sequence<typename SeqA::value_type>
merge(const SeqA &A,
const SeqB &B,
const F& f,
bool cons=false) {
using T = typename SeqA::value_type;
auto R = sequence<T>::no_init(A.size() + B.size());
merge_<_assign>(A, B, R.slice(), f, cons);
return R;
}
}