Skip to content

Commit cecca02

Browse files
committed
reciprocal bench
1 parent 0306b49 commit cecca02

4 files changed

Lines changed: 57 additions & 13 deletions

File tree

circle.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ commands:
7474
command: |
7575
cmake --build ~/build --parallel
7676
cmake --build ~/build --target package
77+
- run:
78+
name: "Benchmark"
79+
working_directory: ~/build
80+
command: |
81+
test/intx-bench --benchmark_repetitions=11 --benchmark_filter=reciprocal
7782
- run:
7883
name: "Test"
7984
working_directory: ~/build

test/benchmarks/bench_div.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ void reciprocal(benchmark::State& state)
7777
benchmark::DoNotOptimize(x);
7878
}
7979
BENCHMARK(reciprocal<uint64_t, neg>);
80-
BENCHMARK(reciprocal<uint64_t, reciprocal_naive>);
80+
BENCHMARK(reciprocal<uint64_t, reciprocal_native>);
81+
BENCHMARK(reciprocal<uint64_t, reciprocal_builtin_uint128>);
82+
BENCHMARK(reciprocal<uint64_t, reciprocal_gmp>);
8183
BENCHMARK(reciprocal<uint64_t, reciprocal_2by1>);
8284
BENCHMARK(reciprocal<uint64_t, reciprocal_2by1_noinline>);
8385
BENCHMARK(reciprocal<uint128, reciprocal_3by2>);

test/experimental/div.hpp

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,51 @@
44

55
namespace intx
66
{
7-
inline uint64_t reciprocal_naive(uint64_t d) noexcept
7+
inline uint64_t reciprocal_native(uint64_t d) noexcept
88
{
9-
const auto u = uint128{~uint64_t{0}, ~d};
10-
uint64_t v{};
9+
#ifdef __x86_64__
10+
uint64_t _; // NOLINT(*-init-variables)
11+
uint64_t v; // NOLINT(*-init-variables)
12+
asm("divq %4" // NOLINT(*-no-assembler)
13+
: "=d"(_), "=a"(v)
14+
: "d"(~d), "a"(~uint64_t{0}), "r"(d));
15+
return v;
16+
#else
17+
// Fallback implementation.
18+
return (uint128{~uint64_t{0}, ~d} / d)[0];
19+
#endif
20+
}
1121

12-
#if __x86_64__
13-
uint64_t _{};
14-
asm("divq %4" : "=d"(_), "=a"(v) : "d"(u[1]), "a"(u[0]), "g"(d)); // NOLINT(hicpp-no-assembler)
22+
inline uint64_t reciprocal_builtin_uint128(uint64_t d) noexcept
23+
{
24+
#ifdef INTX_HAS_BUILTIN_INT128
25+
const auto u = (builtin_uint128{~d} << 64) | ~uint64_t{0};
26+
return static_cast<uint64_t>(u / d);
1527
#else
16-
v = (u / d)[0];
28+
// Fallback implementation.
29+
return (uint128{~uint64_t{0}, ~d} / d)[0];
1730
#endif
31+
}
1832

19-
return v;
33+
/// The copy of the GMP algorithm from "Improved division by invariant integers".
34+
constexpr uint64_t reciprocal_gmp(uint64_t d) noexcept
35+
{
36+
INTX_REQUIRE(d & 0x8000000000000000); // Must be normalized.
37+
38+
const uint64_t d9 = d >> 55;
39+
const uint32_t v0 = internal::reciprocal_table[static_cast<size_t>(d9 - 256)];
40+
41+
const uint64_t d40 = (d >> 24) + 1;
42+
const uint64_t v1 = (v0 << 11) - uint32_t(uint32_t{v0 * v0} * d40 >> 40) - 1;
43+
44+
const uint64_t v2 = (v1 << 13) + (v1 * (0x1000000000000000 - v1 * d40) >> 47);
45+
46+
const uint64_t d0 = d & 1;
47+
const uint64_t d63 = (d >> 1) + d0; // ceil(d/2)
48+
const uint64_t e = ((v2 >> 1) & (0 - d0)) - (v2 * d63);
49+
const uint64_t v3 = (umul(v2, e)[1] >> 1) + (v2 << 31);
50+
51+
const uint64_t v4 = v3 - (umul(v3, d) + d)[1] - d;
52+
return v4;
2053
}
2154
} // namespace intx

test/unittests/test_div.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -442,15 +442,19 @@ TEST(div, reciprocal)
442442
constexpr auto d_start = uint64_t{1} << 63;
443443
for (uint64_t d = d_start; d < d_start + n; ++d)
444444
{
445-
auto v = reciprocal_2by1(d);
446-
ASSERT_EQ(v, reciprocal_naive(d)) << d;
445+
const auto expected = reciprocal_builtin_uint128(d);
446+
ASSERT_EQ(reciprocal_2by1(d), expected) << d;
447+
ASSERT_EQ(reciprocal_native(d), expected) << d;
448+
ASSERT_EQ(reciprocal_gmp(d), expected) << d;
447449
}
448450

449451
constexpr auto d_end = ~uint64_t{0};
450452
for (uint64_t d = d_end; d > d_end - n; --d)
451453
{
452-
auto v = reciprocal_2by1(d);
453-
ASSERT_EQ(v, reciprocal_naive(d)) << d;
454+
const auto expected = reciprocal_builtin_uint128(d);
455+
ASSERT_EQ(reciprocal_2by1(d), expected) << d;
456+
ASSERT_EQ(reciprocal_native(d), expected) << d;
457+
ASSERT_EQ(reciprocal_gmp(d), expected) << d;
454458
}
455459
}
456460

0 commit comments

Comments
 (0)