diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 08:20:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 08:20:50 -0700 |
commit | 7856a565416e0cf091f825b0e25c7a1b7abb650e (patch) | |
tree | 0a04a0594167fc997b3b1299610b5ef95ab89f19 /lib/math/div64.c | |
parent | 617a814f14b8914271f7a70366d72c6196d17663 (diff) | |
parent | 5e06e08939df1cafef97a8e04f4b88c2806b538a (diff) |
Merge tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull non-MM updates from Andrew Morton:
"Many singleton patches - please see the various changelogs for
details.
Quite a lot of nilfs2 work this time around.
Notable patch series in this pull request are:
- "mul_u64_u64_div_u64: new implementation" by Nicolas Pitre, with
assistance from Uwe Kleine-König. Reimplement mul_u64_u64_div_u64()
to provide (much) more accurate results. The current implementation
was causing Uwe some issues in the PWM drivers.
- "xz: Updates to license, filters, and compression options" from
Lasse Collin. Miscellaneous maintenance and kinor feature work to
the xz decompressor.
- "Fix some GDB command error and add some GDB commands" from
Kuan-Ying Lee. Fixes and enhancements to the gdb scripts.
- "treewide: add missing MODULE_DESCRIPTION() macros" from Jeff
Johnson. Adds lots of MODULE_DESCRIPTIONs, thus fixing lots of
warnings about this.
- "nilfs2: add support for some common ioctls" from Ryusuke Konishi.
Adds various commonly-available ioctls to nilfs2.
- "This series fixes a number of formatting issues in kernel doc
comments" from Ryusuke Konishi does that.
- "nilfs2: prevent unexpected ENOENT propagation" from Ryusuke
Konishi. Fix issues where -ENOENT was being unintentionally and
inappropriately returned to userspace.
- "nilfs2: assorted cleanups" from Huang Xiaojia.
- "nilfs2: fix potential issues with empty b-tree nodes" from Ryusuke
Konishi fixes some issues which can occur on corrupted nilfs2
filesystems.
- "scripts/decode_stacktrace.sh: improve error reporting and
usability" from Luca Ceresoli does those things"
* tag 'mm-nonmm-stable-2024-09-21-07-52' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (103 commits)
list: test: increase coverage of list_test_list_replace*()
list: test: fix tests for list_cut_position()
proc: use __auto_type more
treewide: correct the typo 'retun'
ocfs2: cleanup return value and mlog in ocfs2_global_read_info()
nilfs2: remove duplicate 'unlikely()' usage
nilfs2: fix potential oob read in nilfs_btree_check_delete()
nilfs2: determine empty node blocks as corrupted
nilfs2: fix potential null-ptr-deref in nilfs_btree_insert()
user_namespace: use kmemdup_array() instead of kmemdup() for multiple allocation
tools/mm: rm thp_swap_allocator_test when make clean
squashfs: fix percpu address space issues in decompressor_multi_percpu.c
lib: glob.c: added null check for character class
nilfs2: refactor nilfs_segctor_thread()
nilfs2: use kthread_create and kthread_stop for the log writer thread
nilfs2: remove sc_timer_task
nilfs2: do not repair reserved inode bitmap in nilfs_new_inode()
nilfs2: eliminate the shared counter and spinlock for i_generation
nilfs2: separate inode type information from i_state field
nilfs2: use the BITS_PER_LONG macro
...
Diffstat (limited to 'lib/math/div64.c')
-rw-r--r-- | lib/math/div64.c | 115 |
1 files changed, 72 insertions, 43 deletions
diff --git a/lib/math/div64.c b/lib/math/div64.c index 191761b1b623..5faa29208bdb 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -186,55 +186,84 @@ EXPORT_SYMBOL(iter_div_u64_rem); #ifndef mul_u64_u64_div_u64 u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) { - u64 res = 0, div, rem; - int shift; + if (ilog2(a) + ilog2(b) <= 62) + return div64_u64(a * b, c); - /* can a * b overflow ? */ - if (ilog2(a) + ilog2(b) > 62) { - /* - * Note that the algorithm after the if block below might lose - * some precision and the result is more exact for b > a. So - * exchange a and b if a is bigger than b. - * - * For example with a = 43980465100800, b = 100000000, c = 1000000000 - * the below calculation doesn't modify b at all because div == 0 - * and then shift becomes 45 + 26 - 62 = 9 and so the result - * becomes 4398035251080. However with a and b swapped the exact - * result is calculated (i.e. 4398046510080). - */ - if (a > b) - swap(a, b); +#if defined(__SIZEOF_INT128__) + + /* native 64x64=128 bits multiplication */ + u128 prod = (u128)a * b; + u64 n_lo = prod, n_hi = prod >> 64; + +#else + + /* perform a 64x64=128 bits multiplication manually */ + u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32; + u64 x, y, z; + + x = (u64)a_lo * b_lo; + y = (u64)a_lo * b_hi + (u32)(x >> 32); + z = (u64)a_hi * b_hi + (u32)(y >> 32); + y = (u64)a_hi * b_lo + (u32)y; + z += (u32)(y >> 32); + x = (y << 32) + (u32)x; + + u64 n_lo = x, n_hi = z; + +#endif + + /* make sure c is not zero, trigger exception otherwise */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdiv-by-zero" + if (unlikely(c == 0)) + return 1/0; +#pragma GCC diagnostic pop + + int shift = __builtin_ctzll(c); + /* try reducing the fraction in case the dividend becomes <= 64 bits */ + if ((n_hi >> shift) == 0) { + u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo; + + return div64_u64(n, c >> shift); /* - * (b * a) / c is equal to - * - * (b / c) * a + - * (b % c) * a / c - * - * if nothing overflows. Can the 1st multiplication - * overflow? Yes, but we do not care: this can only - * happen if the end result can't fit in u64 anyway. - * - * So the code below does - * - * res = (b / c) * a; - * b = b % c; + * The remainder value if needed would be: + * res = div64_u64_rem(n, c >> shift, &rem); + * rem = (rem << shift) + (n_lo - (n << shift)); */ - div = div64_u64_rem(b, c, &rem); - res = div * a; - b = rem; - - shift = ilog2(a) + ilog2(b) - 62; - if (shift > 0) { - /* drop precision */ - b >>= shift; - c >>= shift; - if (!c) - return res; - } } - return res + div64_u64(a * b, c); + if (n_hi >= c) { + /* overflow: result is unrepresentable in a u64 */ + return -1; + } + + /* Do the full 128 by 64 bits division */ + + shift = __builtin_clzll(c); + c <<= shift; + + int p = 64 + shift; + u64 res = 0; + bool carry; + + do { + carry = n_hi >> 63; + shift = carry ? 1 : __builtin_clzll(n_hi); + if (p < shift) + break; + p -= shift; + n_hi <<= shift; + n_hi |= n_lo >> (64 - shift); + n_lo <<= shift; + if (carry || (n_hi >= c)) { + n_hi -= c; + res |= 1ULL << p; + } + } while (n_hi); + /* The remainder value if needed would be n_hi << p */ + + return res; } EXPORT_SYMBOL(mul_u64_u64_div_u64); #endif |