summaryrefslogtreecommitdiff
path: root/lib/math/div64.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-21 08:20:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-21 08:20:50 -0700
commit7856a565416e0cf091f825b0e25c7a1b7abb650e (patch)
tree0a04a0594167fc997b3b1299610b5ef95ab89f19 /lib/math/div64.c
parent617a814f14b8914271f7a70366d72c6196d17663 (diff)
parent5e06e08939df1cafef97a8e04f4b88c2806b538a (diff)
Merge tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton: "Many singleton patches - please see the various changelogs for details. Quite a lot of nilfs2 work this time around. Notable patch series in this pull request are: - "mul_u64_u64_div_u64: new implementation" by Nicolas Pitre, with assistance from Uwe Kleine-König. Reimplement mul_u64_u64_div_u64() to provide (much) more accurate results. The current implementation was causing Uwe some issues in the PWM drivers. - "xz: Updates to license, filters, and compression options" from Lasse Collin. Miscellaneous maintenance and kinor feature work to the xz decompressor. - "Fix some GDB command error and add some GDB commands" from Kuan-Ying Lee. Fixes and enhancements to the gdb scripts. - "treewide: add missing MODULE_DESCRIPTION() macros" from Jeff Johnson. Adds lots of MODULE_DESCRIPTIONs, thus fixing lots of warnings about this. - "nilfs2: add support for some common ioctls" from Ryusuke Konishi. Adds various commonly-available ioctls to nilfs2. - "This series fixes a number of formatting issues in kernel doc comments" from Ryusuke Konishi does that. - "nilfs2: prevent unexpected ENOENT propagation" from Ryusuke Konishi. Fix issues where -ENOENT was being unintentionally and inappropriately returned to userspace. - "nilfs2: assorted cleanups" from Huang Xiaojia. - "nilfs2: fix potential issues with empty b-tree nodes" from Ryusuke Konishi fixes some issues which can occur on corrupted nilfs2 filesystems. - "scripts/decode_stacktrace.sh: improve error reporting and usability" from Luca Ceresoli does those things" * tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (103 commits) list: test: increase coverage of list_test_list_replace*() list: test: fix tests for list_cut_position() proc: use __auto_type more treewide: correct the typo 'retun' ocfs2: cleanup return value and mlog in ocfs2_global_read_info() nilfs2: remove duplicate 'unlikely()' usage nilfs2: fix potential oob read in nilfs_btree_check_delete() nilfs2: determine empty node blocks as corrupted nilfs2: fix potential null-ptr-deref in nilfs_btree_insert() user_namespace: use kmemdup_array() instead of kmemdup() for multiple allocation tools/mm: rm thp_swap_allocator_test when make clean squashfs: fix percpu address space issues in decompressor_multi_percpu.c lib: glob.c: added null check for character class nilfs2: refactor nilfs_segctor_thread() nilfs2: use kthread_create and kthread_stop for the log writer thread nilfs2: remove sc_timer_task nilfs2: do not repair reserved inode bitmap in nilfs_new_inode() nilfs2: eliminate the shared counter and spinlock for i_generation nilfs2: separate inode type information from i_state field nilfs2: use the BITS_PER_LONG macro ...
Diffstat (limited to 'lib/math/div64.c')
-rw-r--r--lib/math/div64.c115
1 files changed, 72 insertions, 43 deletions
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 191761b1b623..5faa29208bdb 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -186,55 +186,84 @@ EXPORT_SYMBOL(iter_div_u64_rem);
#ifndef mul_u64_u64_div_u64
u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
{
- u64 res = 0, div, rem;
- int shift;
+ if (ilog2(a) + ilog2(b) <= 62)
+ return div64_u64(a * b, c);
- /* can a * b overflow ? */
- if (ilog2(a) + ilog2(b) > 62) {
- /*
- * Note that the algorithm after the if block below might lose
- * some precision and the result is more exact for b > a. So
- * exchange a and b if a is bigger than b.
- *
- * For example with a = 43980465100800, b = 100000000, c = 1000000000
- * the below calculation doesn't modify b at all because div == 0
- * and then shift becomes 45 + 26 - 62 = 9 and so the result
- * becomes 4398035251080. However with a and b swapped the exact
- * result is calculated (i.e. 4398046510080).
- */
- if (a > b)
- swap(a, b);
+#if defined(__SIZEOF_INT128__)
+
+ /* native 64x64=128 bits multiplication */
+ u128 prod = (u128)a * b;
+ u64 n_lo = prod, n_hi = prod >> 64;
+
+#else
+
+ /* perform a 64x64=128 bits multiplication manually */
+ u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
+ u64 x, y, z;
+
+ x = (u64)a_lo * b_lo;
+ y = (u64)a_lo * b_hi + (u32)(x >> 32);
+ z = (u64)a_hi * b_hi + (u32)(y >> 32);
+ y = (u64)a_hi * b_lo + (u32)y;
+ z += (u32)(y >> 32);
+ x = (y << 32) + (u32)x;
+
+ u64 n_lo = x, n_hi = z;
+
+#endif
+
+ /* make sure c is not zero, trigger exception otherwise */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdiv-by-zero"
+ if (unlikely(c == 0))
+ return 1/0;
+#pragma GCC diagnostic pop
+
+ int shift = __builtin_ctzll(c);
+ /* try reducing the fraction in case the dividend becomes <= 64 bits */
+ if ((n_hi >> shift) == 0) {
+ u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
+
+ return div64_u64(n, c >> shift);
/*
- * (b * a) / c is equal to
- *
- * (b / c) * a +
- * (b % c) * a / c
- *
- * if nothing overflows. Can the 1st multiplication
- * overflow? Yes, but we do not care: this can only
- * happen if the end result can't fit in u64 anyway.
- *
- * So the code below does
- *
- * res = (b / c) * a;
- * b = b % c;
+ * The remainder value if needed would be:
+ * res = div64_u64_rem(n, c >> shift, &rem);
+ * rem = (rem << shift) + (n_lo - (n << shift));
*/
- div = div64_u64_rem(b, c, &rem);
- res = div * a;
- b = rem;
-
- shift = ilog2(a) + ilog2(b) - 62;
- if (shift > 0) {
- /* drop precision */
- b >>= shift;
- c >>= shift;
- if (!c)
- return res;
- }
}
- return res + div64_u64(a * b, c);
+ if (n_hi >= c) {
+ /* overflow: result is unrepresentable in a u64 */
+ return -1;
+ }
+
+ /* Do the full 128 by 64 bits division */
+
+ shift = __builtin_clzll(c);
+ c <<= shift;
+
+ int p = 64 + shift;
+ u64 res = 0;
+ bool carry;
+
+ do {
+ carry = n_hi >> 63;
+ shift = carry ? 1 : __builtin_clzll(n_hi);
+ if (p < shift)
+ break;
+ p -= shift;
+ n_hi <<= shift;
+ n_hi |= n_lo >> (64 - shift);
+ n_lo <<= shift;
+ if (carry || (n_hi >= c)) {
+ n_hi -= c;
+ res |= 1ULL << p;
+ }
+ } while (n_hi);
+ /* The remainder value if needed would be n_hi << p */
+
+ return res;
}
EXPORT_SYMBOL(mul_u64_u64_div_u64);
#endif