Diskuze: MOD velkého unsigned integer čísla
V předchozím kvízu, Online test znalostí C++, jsme si ověřili nabyté zkušenosti z kurzu.


popravdě máš to nějaký divný
přešti si tohle:
https://stackoverflow.com/…arge-numbers
a knížku
Discrete Mathematics and Its Applications, Kenneth H. Rosen
už má 8. vydání.
a taky tady
https://pdfcoffee.com/…df-free.html
https://stackoverflow.com/…th-libraries
Caster:8. ledna 20:59
Jak použít proměnnou typu uint128_t ve Visual Studiu vím. Ke správnému výsledku mi to ale nepomohlo. Nyní radši používám svou externí funkci na 128bitové dělení v x64 asm.
// https://stackoverflow.com/questions/6759592/how-to-enable-int128-on-visual-studio
#define _CRT_SECURE_NO_WARNINGS
#include <__msvc_int128.hpp>
#include "AVX2_MOD_Test.h"
using namespace std;
using uint128_t = _Unsigned128;
Používám externí funkci v x64 asm 128÷128-bit Unsigned Integer Division (128-bit Quotient and Remainder) kterou volám z C++.
Aktuální verze MOD, která ale stále nefunguje.
#include <stdint.h> // For uint64_t, uint8_t
#include <stddef.h> // For size_t
#include <string.h> // For memcpy, memset
#include <stdio.h> // For printf
#include <intrin.h> // For _umul128, _addcarry_u64, _subborrow_u64, _BitScanReverse64
#include <windows.h> // For VirtualAlloc, VirtualFree
// Define uint128_t type using struct
typedef struct {
uint64_t lo;
uint64_t hi;
} uint128_t;
uint8_t CarryFlag = 0; // Crucial: Initialize CarryFlag
// Function to compare two uint128_t values
int compare_uint128(uint128_t a, uint128_t b) {
if (a.hi < b.hi) return -1;
if (a.hi > b.hi) return 1;
if (a.lo < b.lo) return -1;
if (a.lo > b.lo) return 1;
return 0;
}
// External udiv128 function (your implementation)
extern "C" void udiv128(uint128_t* q, uint128_t* dividend, uint128_t* divisor, uint128_t* r);
// Shift left by `shift` bits in place
void big_integer_shift_left_in_place(uint64_t* a, size_t len, unsigned long shift_bits) {
if (shift_bits == 0 || len == 0) return;
size_t word_shift = shift_bits / 64;
unsigned long bit_shift = shift_bits % 64;
if (word_shift >= len) {
// All bits are shifted out
memset(a, 0, len * sizeof(uint64_t));
return;
}
// Shift words
if (word_shift > 0) {
for (size_t i = len; i-- > word_shift;) {
a[i] = a[i - word_shift];
}
// Zero fill lower words
memset(a, 0, word_shift * sizeof(uint64_t));
}
// Shift bits
if (bit_shift > 0) {
uint64_t carry = 0;
for (size_t i = word_shift; i < len; ++i) {
uint64_t temp = a[i];
a[i] = (temp << bit_shift) | carry;
carry = temp >> (64 - bit_shift);
}
}
}
// Shift right by `shift` bits in place
void big_integer_shift_right_in_place(uint64_t* a, size_t len, unsigned long shift) {
if (shift == 0) return;
size_t word_shift = shift / 64;
shift %= 64;
if (word_shift >= len) {
memset(a, 0, len * sizeof(uint64_t));
return;
}
if (word_shift > 0) {
for (size_t i = 0; i + word_shift < len; ++i) {
a[i] = a[i + word_shift];
}
memset(a + len - word_shift, 0, word_shift * sizeof(uint64_t));
}
if (shift > 0) {
uint64_t carry = 0;
for (size_t i = len; i-- > 0;) {
uint64_t temp = a[i];
a[i] = (temp >> shift) | carry;
carry = temp << (64 - shift);
}
}
}
// Corrected mod function with detailed debug printouts
void mod(uint64_t* dst, uint64_t* src, uint64_t* tmp, size_t dst_len, size_t src_len) {
if (dst_len < src_len) {
// The modulus is dst itself
return;
}
size_t n = dst_len;
size_t k = src_len;
// Initialize u
size_t u_len = n + k + 1;
uint64_t* u = tmp; // Length u_len
memset(u, 0, u_len * sizeof(uint64_t));
memcpy(u, dst, n * sizeof(uint64_t)); // Copy dst into u[0..n-1]
// Initialize v
uint64_t* v = tmp + u_len; // Length k
memcpy(v, src, k * sizeof(uint64_t));
// Normalize
unsigned long shift;
if (_BitScanReverse64(&shift, v[0])) {
shift = 63 - shift;
}
else {
printf("Error: Divisor is zero.\n");
return;
}
if (shift > 0) {
big_integer_shift_left_in_place(v, k, shift);
big_integer_shift_left_in_place(u, u_len, shift); // u has length u_len
}
// Debug: Print normalized v
printf("Normalized v (k = %zu):\n", k);
for (size_t i = 0; i < k; ++i) {
printf("%016llx ", v[i]);
}
printf("\n");
// Debug: Print normalized u
printf("Normalized u (u_len = %zu):\n", u_len);
for (size_t i = 0; i < u_len; ++i) {
printf("%016llx ", u[i]);
}
printf("\n");
// Initialize q
uint64_t* q = tmp + u_len + k; // Length n - k + 1
memset(q, 0, (n - k + 1) * sizeof(uint64_t));
uint64_t v1 = v[0];
uint64_t v2 = (k > 1) ? v[1] : 0;
// Main loop
for (size_t j = 0; j <= n - k; ++j) {
// Construct the 128-bit dividend
uint128_t dividend;
dividend.hi = u[j];
dividend.lo = u[j + 1];
// Construct the 128-bit divisor
uint128_t divisor;
divisor.hi = 0;
divisor.lo = v1;
// Variables for quotient and remainder
uint128_t q_hat_struct;
uint128_t r_hat_struct;
// Use external udiv128 function
udiv128(&q_hat_struct, ÷nd, &divisor, &r_hat_struct);
uint64_t q_hat = q_hat_struct.lo;
uint64_t r_hat = r_hat_struct.lo;
// Next word in u
uint64_t uj2 = (j + 2 < u_len) ? u[j + 2] : 0;
// Debug printout
printf("\nStep %zu:\n", j);
printf(" dividend.hi=%016llx dividend.lo=%016llx\n", dividend.hi, dividend.lo);
printf(" divisor.hi=%016llx divisor.lo=%016llx\n", divisor.hi, divisor.lo);
printf(" Initial q_hat=%016llx r_hat=%016llx\n", q_hat, r_hat);
printf(" uj2=%016llx\n", uj2);
/// Adjust q_hat if necessary
if (k > 1) {
while (true) {
uint64_t lhs_hi, lhs_lo;
lhs_lo = _umul128(q_hat, v2, &lhs_hi);
// Corrected lhs and rhs
uint128_t lhs = { lhs_hi, lhs_lo };
uint128_t rhs = { r_hat, uj2 };
// Debug: Print comparison values
printf(" Adjusting q_hat:\n");
printf(" lhs=%016llx%016llx\n", lhs.hi, lhs.lo);
printf(" rhs=%016llx%016llx\n", rhs.hi, rhs.lo);
if (compare_uint128(lhs, rhs) <= 0) {
// If lhs <= rhs, stop adjusting
break;
}
q_hat--;
CarryFlag = _addcarry_u64(CarryFlag, r_hat, v1, &r_hat);
if (CarryFlag) {
// If overflow occurs, cannot adjust further
break;
}
// Debug: Print adjusted q_hat and r_hat
printf(" q_hat decremented to %016llx, r_hat adjusted to %016llx\n", q_hat, r_hat);
}
}
// Debug printout after adjustment
printf(" Adjusted q_hat=%016llx\n", q_hat);
// Multiply and subtract q_hat * v from u
uint8_t borrow = 0;
uint64_t carry = 0;
for (size_t i = k; i-- > 0;) {
uint64_t prod_hi, prod_lo;
prod_lo = _umul128(q_hat, v[i], &prod_hi);
// Add previous carry to prod_lo
uint64_t sum_lo;
uint8_t carry_in = _addcarry_u64(0, prod_lo, carry, &sum_lo);
// Update carry
uint64_t sum_hi = prod_hi + carry_in;
// Subtract sum_lo from u[j + i]
uint64_t temp;
borrow = _subborrow_u64(borrow, u[j + i], sum_lo, &temp);
u[j + i] = temp;
carry = sum_hi;
}
borrow = _subborrow_u64(borrow, u[j + k], carry, &u[j + k]);
// Debug printout after subtraction
printf(" After subtraction borrow=%u\n", borrow);
printf(" u[%zu..%zu] = ", j, j + k);
for (size_t idx = j; idx <= j + k; ++idx) {
printf("%016llx ", u[idx]);
}
printf("\n");
// Correction if borrow occurred
if (borrow) {
q_hat--;
uint8_t carry2 = 0;
for (size_t i = 0; i <= k; ++i) {
uint64_t sum;
uint64_t addend = (i < k) ? v[i] : 0;
carry2 = _addcarry_u64(carry2, u[j + i], addend, &sum);
u[j + i] = sum;
}
// Debug after correction
printf(" Correction applied, q_hat decremented to %016llx\n", q_hat);
printf(" u[%zu..%zu] after correction = ", j, j + k);
for (size_t idx = j; idx <= j + k; ++idx) {
printf("%016llx ", u[idx]);
}
printf("\n");
}
// Debug printout final q_hat
printf(" Final q_hat=%016llx\n", q_hat);
q[j] = q_hat;
}
// Denormalize
if (shift > 0) {
big_integer_shift_right_in_place(u, u_len, shift);
}
// Debug: Print final u
printf("\nFinal u after denormalization (u_len = %zu):\n", u_len);
for (size_t i = 0; i < u_len; ++i) {
printf("%016llx ", u[i]);
}
printf("\n");
// The modulus is in u[0..k-1]
memcpy(dst, u, k * sizeof(uint64_t));
memset(dst + k, 0, (dst_len - k) * sizeof(uint64_t));
}
int main() {
const size_t size = 1024 * 1024; // Adjust size as needed
uint64_t* POLE = (uint64_t*)VirtualAlloc(NULL, size * sizeof(uint64_t), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (!POLE) {
printf("Memory allocation failed.\n");
return -1;
}
size_t dst_len = 16;
size_t src_len = 9;
uint64_t* dst = POLE;
uint64_t* src = dst + dst_len;
uint64_t* expected_modulus = src + src_len;
uint64_t* tmp = expected_modulus + src_len;
uint64_t dst_init[16] = {
0xe180d8077d300002ULL, 0x7b625008a4fdb542ULL, 0xe9265e2d44f0d9cfULL, 0x86c153dde116ab13ULL,
0x1c432374cb51e3beULL, 0x37109f09c0bb25e3ULL, 0x4e4e0317cc2e7e96ULL, 0xe7fb642df0da0ec7ULL,
0xa073e694d0335847ULL, 0x6f531c9bc39a4535ULL, 0xaa72054f943acfd8ULL, 0x3ee329ad967df484ULL,
0x7b6c1bf56a1b4541ULL, 0x66329dc0cbc5bcf8ULL, 0x96780f8534f0ba1dULL, 0x0000000000001b8cULL
};
memcpy(dst, dst_init, dst_len * sizeof(uint64_t));
uint64_t src_init[9] = {
0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
0x00000000000001ffULL
};
memcpy(src, src_init, src_len * sizeof(uint64_t));
uint64_t expected_modulus_init[9] = {
0x6529f3aed3debafdULL, 0x55ac347acd93cac2ULL, 0xa0d6b8378fae5bd5ULL, 0x6b43594fb2429f10ULL,
0x8ff4c08cf314a0e6ULL, 0x0e2e2701d6abd260ULL, 0x4768fb7a28b4690bULL, 0xf16dd49eee538fc1ULL,
0x0000000000000103ULL
};
memcpy(expected_modulus, expected_modulus_init, src_len * sizeof(uint64_t));
mod(dst, src, tmp, dst_len, src_len);
if (memcmp(dst, expected_modulus, src_len * sizeof(uint64_t)) == 0) {
printf("MOD function works correctly.\n");
}
else {
printf("MOD function failed.\n");
printf("Expected:\n");
for (size_t i = 0; i < src_len; ++i) {
printf("%016llx ", expected_modulus[i]);
}
printf("\nActual:\n");
for (size_t i = 0; i < src_len; ++i) {
printf("%016llx ", dst[i]);
}
printf("\n");
}
VirtualFree(POLE, 0, MEM_RELEASE);
return 0;
}
A x64 asm, externí funkce udiv128.asm na 128 bitové dělení viz odkaz nahoře. Kód jsem jen lehce upravil o uložení pár registrů na stoh a jejich vrácení zpět (final_ret: je finální návrat z funkce). __udivmodti4() function for AMD64 processors, supporting the Microsoft calling convention, using the extended precision division algorithm:
; Copyright © 2004-2025, Stefan Kanthak <stefan.kanthak@nexgo.de>
; Microsoft calling convention for AMD64 platform:
; - first 4 integer or pointer arguments (from left to right) are passed
; in registers RCX/R1, RDX/R2, R8 and R9;
; - 16-byte arguments are passed by reference;
; - surplus arguments are pushed on stack in reverse order (from right
; to left), 8-byte aligned;
; - caller allocates memory for 16-byte return value and passes pointer
; to it as (hidden) first argument, thus shifting all other arguments;
; - caller always allocates "home space" for 4 arguments on stack,
; even when less than 4 arguments are passed, but does not need to push
; first 4 arguments;
; - callee can spill first 4 arguments from registers to "home space";
; - callee can clobber "home space";
; - stack is 16-byte aligned: callee must decrement RSP by 8+n*16
; bytes when it calls other functions (CALL instruction pushes 8 bytes);
; - 64-bit integer or pointer result is returned in register RAX/R0;
; - 32-bit integer result is returned in register EAX;
; - registers RAX/R0, RCX/R1, RDX/R2, R8, R9, R10 and R11 are volatile
; and can be clobbered;
; - registers RBX/R3, RSP/R4, RBP/R5, RSI/R6, RDI/R7, R12, R13, R14 and
; R15 must be preserved.
; NOTE: raises "division exception" when divisor is 0!
; rcx = oword ptr quotient
; rdx = oword ptr dividend
; r8 = oword ptr divisor
; r9 = oword ptr remainder
.code
udiv128 proc
push rbx ; Preserve nonvolatile registers
push r12
push r13
push r14
mov rax, [rdx] ; rax = low qword of dividend
mov rdx, [rdx+8]; rdx = high qword of dividend
mov r10, [r8] ; r10 = low qword of divisor
mov r11, [r8+8] ; r11 = high qword of divisor
mov r8, rcx ; r8 = address of quotient
cmp rax, r10
mov rcx, rdx
sbb rcx, r11
jb trivial ; dividend < divisor?
bsr rcx, r11 ; rcx = index of most significant '1' bit
; in high qword of divisor
jnz extended ; high qword of divisor <> 0?
; divisor < 2**64
cmp rdx, r10
jae long ; high qword of dividend >= (low qword of) divisor?
; dividend < divisor * 2**64: quotient < 2**64
; perform normal division
normal:
div r10 ; rax = (low qword of) quotient,
; rdx = (low qword of) remainder
mov [r8], rax
mov [r8+8], r11 ; high qword of quotient = 0
test r9, r9
jz @f ; address of remainder = 0?
mov [r9], rdx
mov [r9+8], r11 ; high qword of remainder = 0
@@:
mov rax, r8 ; rax = address of quotient
jmp final_ret
;ret
; dividend >= divisor * 2**64: quotient >= 2**64
; perform "long" alias "schoolbook" division
long:
mov rcx, rax ; rcx = low qword of dividend
mov rax, rdx ; rax = high qword of dividend
mov rdx, r11 ; rdx:rax = high qword of dividend
div r10 ; rax = high qword of quotient,
; rdx = high qword of remainder'
xchg rcx, rax ; rcx = high qword of quotient,
; rax = low qword of dividend
div r10 ; rax = low qword of quotient,
; rdx = (low qword of) remainder
mov [r8], rax
mov [r8+8], rcx ; quotient = rcx:rax
test r9, r9
jz @f ; address of remainder = 0?
mov [r9], rdx
mov [r9+8], r11 ; high qword of remainder = 0
@@:
mov rax, r8 ; rax = address of quotient
jmp final_ret
;ret
; dividend < divisor: quotient = 0, remainder = dividend
trivial:
xor ecx, ecx
mov [r8], rcx
mov [r8+8], rcx ; quotient = 0
test r9, r9
jz @f ; address of remainder = 0?
mov [r9], rax
mov [r9+8], rdx ; remainder = dividend
@@:
mov rax, r8 ; rax = address of quotient
jmp final_ret
;ret
; divisor >= 2**64: quotient < 2**64
extended:
xor ecx, 63 ; ecx = number of leading '0' bits
; in (high qword of) divisor
jz special ; divisor >= 2**127?
; perform "extended & adjusted" division
mov [rsp+8], rbx
mov [rsp+16], r12
mov [rsp+24], r13
mov [rsp+32], r14
mov r12, r11 ; r12 = high qword of divisor
shld r12, r10, cl ; r12 = divisor / 2**(index + 1)
; = divisor'
;; shl r10, cl
mov r13, rax
mov r14, rdx ; r14:r13 = high qword of dividend
ifndef JccLess
xor ebx, ebx ; rbx = high qword of quotient' = 0
cmp rdx, r12
jb @f ; high qword of dividend < divisor'?
; high qword of dividend >= divisor':
; subtract divisor' from high qword of dividend to prevent possible
; quotient overflow and set most significant bit of quotient"
sub rdx, r12 ; rdx = high qword of dividend - divisor'
; = high qword of dividend'
inc ebx ; rbx = high qword of quotient' = 1
@@:
elseif 0
sub rdx, r12 ; rdx = high qword of dividend - divisor'
sbb rbx, rbx ; rbx = (high qword of dividend < divisor') ? -1 : 0
and rbx, r12 ; rbx = (high qword of dividend < divisor') ? divisor' : 0
add rdx, rbx ; rdx = high qword of dividend
; - (high qword of dividend < divisor') ? 0 : divisor'
; = high qword of dividend'
neg rbx ; CF = (high qword of dividend < divisor')
sbb ebx, ebx ; ebx = (high qword of dividend < divisor') ? -1 : 0
inc ebx ; rbx = (high qword of dividend < divisor') ? 0 : 1
; = high qword of quotient'
elseif 0
sub rdx, r12 ; rdx = high qword of dividend - divisor'
cmovb rdx, r14 ; = high qword of dividend'
sbb ebx, ebx ; ebx = (high qword of dividend < divisor') ? -1 : 0
inc ebx ; rbx = (high qword of dividend < divisor') ? 0 : 1
; = high qword of quotient'
else ; JccLess
xor ebx, ebx ; rbx = high qword of quotient' = 0
sub rdx, r12 ; rdx = high qword of dividend - divisor'
cmovb rdx, r14 ; = high qword of dividend'
sbb ebx, -1 ; rbx = (high qword of dividend < divisor') ? 0 : 1
; = high qword of quotient'
endif ; JccLess
; high qword of dividend' < divisor'
div r12 ; rax = dividend' / divisor'
; = low qword of quotient',
; rdx = remainder'
shld rbx, rax, cl ; rbx = quotient' / 2**(index + 1)
; = dividend / divisor'
; = quotient"
;; shl rax, cl
ifndef JccLess
mov rax, r10 ; rax = low qword of divisor
mov r12, r11 ; r12 = high qword of divisor
imul r12, rbx ; r12 = high qword of divisor * quotient"
mul rbx ; rdx:rax = low qword of divisor * quotient"
add rdx, r12 ; rdx:rax = divisor * quotient"
jnc @f ; divisor * quotient" < 2**64?
; (with carry, it is off by divisor,
; and quotient" is off by 1)
if 0
sbb rbx, 0 ; rbx = quotient" - 1
else
dec rbx ; rbx = quotient" - 1
endif
sub rax, r10
sbb rdx, r11 ; rdx:rax = divisor * (quotient" - 1)
@@:
sub r13, rax
sbb r14, rdx ; r14:r13 = dividend - divisor * quotient"
; = remainder"
else ; JccLess
mov rax, r10 ; rax = low qword of divisor
mov r12, r11 ; r12 = high qword of divisor
imul r12, rbx ; r12 = high qword of divisor * quotient"
mul rbx ; rdx:rax = low qword of divisor * quotient"
sub r13, rax
sbb r14, rdx ; r14:r13 = dividend
; - low qword of divisor * quotient"
sub r14, r12 ; r14:r13 = dividend - divisor * quotient"
; = remainder"
endif ; JccLess
jnb @f ; remainder" >= 0?
; (with borrow, it is off by divisor,
; and quotient" is off by 1)
if 0
sbb rbx, 0 ; rbx = quotient" - 1
; = quotient
else
dec rbx ; rbx = quotient" - 1
; = quotient
endif
add r13, r10
adc r14, r11 ; r14:r13 = remainder" + divisor
; = remainder
@@:
xor eax, eax ; rax = high qword of quotient = 0
mov [r8], rbx
mov [r8+8], rax ; quotient = rax:rbx
test r9, r9
jz @f ; address of remainder = 0?
mov [r9], r13
mov [r9+8], r14 ; remainder = r14:r13
@@:
mov rbx, [rsp+8]
mov r12, [rsp+16]
mov r13, [rsp+24]
mov r14, [rsp+32]
mov rax, r8 ; rax = address of quotient
jmp final_ret
;ret
; dividend >= divisor >= 2**127:
; quotient = 1, remainder = dividend - divisor
special:
mov [r8+8], rcx
inc ecx
mov [r8], rcx ; quotient = 1
test r9, r9
jz @f ; address of remainder = 0?
sub rax, r10
sbb rdx, r11 ; rdx:rax = dividend - divisor
; = remainder
mov [r9], rax
mov [r9+8], rdx
@@:
mov rax, r8 ; rax = address of quotient
;ret
final_ret:
pop r14 ; Restore nonvolatile registers
pop r13
pop r12
pop rbx
ret
udiv128 endp
end
\---
no někdy se vyplatí ten kod zahodit a začít úplně znova a uplně od
začátku
https://stackoverflow.com/…-bit-integer
a nebo prostě používej Intel C++ compiler, tam to jde
https://stackoverflow.com/…l-c-compiler
https://www.quora.com/…bit-CPUs-GCC
https://www.ehu.eus/…c_ug_lnx.pdf
Caster:9. ledna 10:01
V těch 128 bitových operacích (dělení - ext. funkce, násobení, sčítání a odečítání - VS intrinsic) není žádný problém, to jsem si opakovaně zkontroloval. Problém je v ostatních částech kódu na MOD.
já ti rozumim, ale obávám se, že procházení tvého kodu krok za krokem
by mi zabralo asi tak století a tolik času nemám budeš muset použít debug
a výpis výsledků do textového okna krok za krokem - tzv. atomizace
kroků.
jinak Intel C/C++ opravdu MÁ 128bit aritmetiku ...
Caster:9. ledna 12:11
openai o1 už na tom pracuje. Musel jsem jí trochu postrčit. Přišla na to, že hned na začátku špatně provádí normalizaci, když jí vyjde posun vlevo 0 tj. uint64_t je 0xFFFFFFFFFFFFFFFF. Už to opravila s tím, že udělá posun vlevo alespoň o jeden bit.
Teď do kódu přidá debug výpisy a určitě to dám s AI do pořádku .
Caster:9. ledna 16:37
Už mi to pomohlo v dost věcech a to vč. programování ve Visual Basicu
pro Excel, Pythonu a programu v C++ (MPLAB X IDE) pro řízení el. ploténky
podle zadané teplotní křivky pro zapájení SMD součástek pro MCU Microchip
SAMD21J18 (32bit) vč. výpočtu parametrů PID řízení na základě
změřené teplotní charakteristiky el. vařiče .
Samozřejmě je nutné těm programům rozumět, umět říci co po AI chceš a v čem programuješ a pak si ten kód umět zkontrolovat a ověřit, že počítá správně. V naprosté většině případů je to správně na správně na 1. pokus. Jen v tomto konkrétním případě, kdy potřebuji spočítat MOD velkého čísla (ve finále má 40 mil. desítkových číslic) to jde trochu ztuha.
Někdy AI zapomene, že programuji ve Visual Studiu a ne v Linuxu, tak jí
Visual Studio musím stále připomínat. AI zadávám i výstupy mého programu
ideálně s ladícími výpisy, které tam na mou žádost sama přidá, a AI mi
pak vypíše, co udělala chybně, opraví části programu s vysvětlením a na
přání mi vypíše celý opravený program .
Pomocí speciální AI jsem dokonce prohnal snímky magnetické rezonance
mého kolene, kdy mi pak AI ještě dříve, než jsem se to dozvěděl od
lékaře napsala, že mám natržený vnitřní roh menisku pravého kolene a
doporučuje mi artroskopii. Už jsem ji absolvoval a jsem teď v pohodě .
S AI jsem to konečně úspěšně dořešil. Byly tam dva problémy.
- AI mi s několika chybami špatně převedla výpis z paměti děleného čísla na shluky uint64_t, abych mohl zadat velké číslo do programu. Musel jsem číslo zadat ručně.
- Přestože AI věděla, že vytváří program pro Visual Studio, napadlo mě, že pracuje nesprávně s čísly uloženými v paměti, což se potvrdilo. Pracovala s formátem big-endian, ale čísla byla uložena ve tvaru little-endia tj. nejméně významný byte nejdříve.
Místo mé externí funkce na dělení 128bitových čísel (zjistil jsem, že v prvních verzích programu někdy došlo k tomu, že byl výsledek dělení větší než 64 bitů, má externí funkce s tím počítala a počítala správně) ji AI nahradila funkcí _udiv128, která dělí 128bitové číslo 64bitovým.
Nyní program funguje tak jak má a správně spočítá výsledek zbytku po
dělení velkých čísel. Problem solved
P.S. Na intenzivní práci s openai o1 preview jsem musel použít placenou Pro verzi. Verze zdarma umožňuje zadat jen jeden dotaz denně a po vyčerpání kreditu za cca 14 dnů je nutné přejít na placenou verzi.
Správný výstup programu:
Shift amount: 55
Before shift left:
00000000000001ff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff
After shift left by 55 bits:
ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ff80000000000000
Before shift left:
0000000000000000 0000000000001b8c 96780fb363dff216 25a9f48824f07aa4 4e390b4886e2f705 ed0de76c166dcc5e f3e65e4c8be40b8c 93c8e4512c2aff07 52374ead5d75f703 f16dd49eee538fb3 811dbf724f027912 031b520792995a23 3dcda4074ed12f6a e84cd25bfc37682a 715cc508696869cf 8f625008a4fdb542 e180d8077d300002
After shift left by 55 bits:
000000000000000d c64b3c07d9b1eff9 0b12d4fa4412783d 52271c85a443717b 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 d9c08edfb927813c 89018da903c94cad 119ee6d203a76897 b57426692dfe1bb4 1538ae628434b434 e7c7b12804527eda a170c06c03be9800 0100000000000000
Normalized v (k = 9):
ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff ff80000000000000
Normalized u (u_len = 17):
000000000000000d c64b3c07d9b1eff9 0b12d4fa4412783d 52271c85a443717b 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 d9c08edfb927813c 89018da903c94cad 119ee6d203a76897 b57426692dfe1bb4 1538ae628434b434 e7c7b12804527eda a170c06c03be9800 0100000000000000
Iteration 7:
Dividend: hi = 000000000000000d, lo = c64b3c07d9b1eff9
q_hat: 000000000000000d, r_hat: c64b3c07d9b1f006
After subtraction, u: 0000000000000000 c64b3c07d9b1eff9 0b12d4fa4412783d 52271c85a443717b 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0408edfb927813c
Stored q[7] = 000000000000000d
Iteration 6:
Dividend: hi = c64b3c07d9b1eff9, lo = 0b12d4fa4412783d
q_hat: c64b3c07d9b1eff9, r_hat: d15e11021dc46836
After subtraction, u: 0000000000000000 0b12d4fa4412783d 52271c85a443717b 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 85818da903c94cad
Stored q[6] = c64b3c07d9b1eff9
Iteration 5:
Dividend: hi = 0b12d4fa4412783d, lo = 52271c85a443717b
q_hat: 0b12d4fa4412783d, r_hat: 5d39f17fe855e9b8
After subtraction, u: 0000000000000000 52271c85a443717b 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 301ee6d203a76897
Stored q[5] = 0b12d4fa4412783d
Iteration 4:
Dividend: hi = 52271c85a443717b, lo = 82f686f3b60b36e6
q_hat: 52271c85a443717b, r_hat: d51da3795a4ea861
After subtraction, u: 0000000000000000 82f686f3b60b36e6 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 72f426692dfe1bb4
Stored q[4] = 52271c85a443717b
Iteration 3:
Dividend: hi = 82f686f3b60b36e6, lo = 2f79f32f2645f205
q_hat: 82f686f3b60b36e6, r_hat: b2707a22dc5128eb
After subtraction, u: 0000000000000000 2f79f32f2645f205 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 7335a1aca7d9214f 8838ae628434b434
Stored q[3] = 82f686f3b60b36e6
Iteration 2:
Dividend: hi = 2f79f32f2645f205, lo = c649e4722896157f
q_hat: 2f79f32f2645f205, r_hat: f5c3d7a14edc0784
After subtraction, u: 0000000000000000 c649e4722896157f 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 7335a1aca7d9214f 88506b5c1bc7d72d ea47b12804527eda
Stored q[2] = 2f79f32f2645f205
Iteration 1:
Dividend: hi = c649e4722896157f, lo = 83a91ba756aebafb
q_hat: c649e4722896157f, r_hat: 49f300197f44d07a
After subtraction, u: 0000000000000000 83a91ba756aebafb 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 7335a1aca7d9214f 88506b5c1bc7d72d eaaad61a3d66c9e5 60f0c06c03be9800
Stored q[1] = c649e4722896157f
Iteration 0:
Dividend: hi = 83a91ba756aebafb, lo = 81f8b6ea4f7729c7
q_hat: 83a91ba756aebafb, r_hat: 05a1d291a625e4c2
After subtraction, u: 0000000000000000 81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 7335a1aca7d9214f 88506b5c1bc7d72d eaaad61a3d66c9e5 613294f9d769ef5d 7e80000000000000
Stored q[0] = 83a91ba756aebafb
Denormalizing u by shifting right 55 bits
Before shift right:
81f8b6ea4f7729c7 e0a3b47dbd145a34 8587171380eb55e9 3047fa6046798a50 7335a1aca7d9214f 88506b5c1bc7d72d eaaad61a3d66c9e5 613294f9d769ef5d 7e80000000000000
After shift right by 55 bits:
0000000000000103 f16dd49eee538fc1 4768fb7a28b4690b 0e2e2701d6abd260 8ff4c08cf314a0e6 6b43594fb2429f10 a0d6b8378fae5bd5 55ac347acd93cac2 6529f3aed3debafd
Final u after denormalization (u_len = 17):
0000000000000103 f16dd49eee538fc1 4768fb7a28b4690b 0e2e2701d6abd260 8ff4c08cf314a0e6 6b43594fb2429f10 a0d6b8378fae5bd5 55ac347acd93cac2 6529f3aed3debafd
MOD function works correctly.
Plně funkční program:
#include <stdint.h> // For uint64_t, uint8_t, ptrdiff_t
#include <stddef.h> // For size_t, ptrdiff_t
#include <string.h> // For memcpy, memset
#include <stdio.h> // For printf
#include <intrin.h> // For _umul128, _addcarry_u64, _subborrow_u64, _BitScanReverse64
#include <windows.h> // For VirtualAlloc, VirtualFree
// Define uint128_t type using struct
typedef struct {
uint64_t lo;
uint64_t hi;
} uint128_t;
// Define ssize_t for Visual Studio
typedef ptrdiff_t ssize_t;
// Helper function to perform 128-bit division
void divide_128_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t* q, uint64_t* r) {
// u1:u0 divided by v, q = quotient, r = remainder
uint64_t qhat, rhat;
if (u1 >= v) {
// Overflow, quotient is maximum representable uint64_t value
*q = ~(uint64_t)0;
*r = u1 - v + 1;
return;
}
// Compute qhat = (u1 * 2^64 + u0) / v
// Note: __udiv128 divides (high:low) by v, returns quotient and remainder
qhat = _udiv128(u1, u0, v, &rhat);
*q = qhat;
*r = rhat;
}
// Function to compare two uint128_t values
int compare_uint128(uint128_t a, uint128_t b) {
if (a.hi != b.hi) {
return (a.hi < b.hi) ? -1 : 1;
}
else {
if (a.lo < b.lo) return -1;
if (a.lo > b.lo) return 1;
return 0;
}
}
// Function to compare two big integers represented as arrays (little-endian)
int compare_big_int(uint64_t* a, size_t a_len, uint64_t* b, size_t b_len) {
// Compare from most significant word to least significant
ssize_t i;
for (i = (ssize_t)(a_len > b_len ? a_len : b_len) - 1; i >= 0; --i) {
uint64_t ai = (i < a_len) ? a[i] : 0;
uint64_t bi = (i < b_len) ? b[i] : 0;
if (ai != bi)
return (ai > bi) ? 1 : -1;
}
return 0;
}
// Function to compare two big integers represented as arrays (little-endian)
// Handles different lengths
int compare_big_int_var(uint64_t* a, size_t a_len, uint64_t* b, size_t b_len) {
size_t max_len = (a_len > b_len) ? a_len : b_len;
for (ssize_t i = max_len - 1; i >= 0; --i) {
uint64_t ai = (i < a_len) ? a[i] : 0;
uint64_t bi = (i < b_len) ? b[i] : 0;
if (ai != bi) {
return (ai < bi) ? -1 : 1;
}
}
return 0;
}
// External udiv128 function (you need to implement or provide this)
//extern "C" void udiv128(uint128_t* q, uint128_t* dividend, uint128_t* divisor, uint128_t* r);
// Shift left by 'shift_bits' bits in place (little-endian)
void big_integer_shift_left_in_place(uint64_t* a, size_t len, unsigned long shift_bits) {
if (shift_bits == 0 || len == 0) return;
size_t word_shift = shift_bits / 64;
unsigned long bit_shift = shift_bits % 64;
printf("Before shift left:\n");
for (size_t i = len; i-- > 0;) {
printf("%016llx ", a[i]);
}
printf("\n");
if (word_shift >= len) {
memset(a, 0, len * sizeof(uint64_t));
return;
}
// Word shift
if (word_shift > 0) {
for (ssize_t i = len - 1; i >= 0; --i) {
if ((size_t)i >= word_shift) {
a[i] = a[i - word_shift];
}
else {
a[i] = 0;
}
}
}
// Bit shift
if (bit_shift > 0) {
uint64_t carry = 0;
for (size_t i = 0; i < len; ++i) {
uint64_t temp = a[i];
a[i] = (temp << bit_shift) | carry;
carry = temp >> (64 - bit_shift);
}
}
printf("After shift left by %lu bits:\n", shift_bits);
for (size_t i = len; i-- > 0;) {
printf("%016llx ", a[i]);
}
printf("\n");
}
// Corrected shift right by 'shift_bits' bits in place (little-endian)
void big_integer_shift_right_in_place(uint64_t* a, size_t len, unsigned long shift_bits) {
if (shift_bits == 0 || len == 0) return;
size_t word_shift = shift_bits / 64;
unsigned long bit_shift = shift_bits % 64;
printf("Before shift right:\n");
for (size_t i = len; i-- > 0;) { // From most significant word to least
printf("%016llx ", a[i]);
}
printf("\n");
if (word_shift >= len) {
// All bits are shifted out
memset(a, 0, len * sizeof(uint64_t));
return;
}
// Word shift
if (word_shift > 0) {
for (size_t i = 0; i < len - word_shift; ++i) {
a[i] = a[i + word_shift];
}
memset(a + len - word_shift, 0, word_shift * sizeof(uint64_t));
}
// Bit shift
if (bit_shift > 0) {
uint64_t carry = 0;
for (ssize_t i = len - 1; i >= 0; --i) { // From most significant word to least
uint64_t temp = a[i];
a[i] = (temp >> bit_shift) | (carry << (64 - bit_shift));
carry = temp & ((1ULL << bit_shift) - 1);
}
}
printf("After shift right by %lu bits:\n", shift_bits);
for (size_t i = len; i-- > 0;) {
printf("%016llx ", a[i]);
}
printf("\n");
}
// Corrected mod function with detailed debug printouts (little-endian)
void mod(uint64_t* dst, uint64_t* src, uint64_t* tmp, size_t dst_len, size_t src_len) {
if (dst_len < src_len) {
// The modulus is dst itself
return;
}
if (src_len == 0) {
printf("Error: Divisor has zero length.\n");
return;
}
size_t n = dst_len;
size_t k = src_len;
// Initialize u
size_t u_len = n + 1;
uint64_t* u = tmp; // tmp must have enough space for u_len + k + (n - k + 2)
memset(u, 0, u_len * sizeof(uint64_t));
memcpy(u, dst, n * sizeof(uint64_t)); // Copy dst into u[0..n-1]
// Initialize v
uint64_t* v = u + u_len; // v has length k
memcpy(v, src, k * sizeof(uint64_t));
// Compute the shift amount
unsigned long shift = 0;
if (_BitScanReverse64(&shift, v[k - 1])) {
shift = 63 - shift;
}
else {
printf("Error: Divisor is zero.\n");
return;
}
printf("Shift amount: %lu\n", shift);
// Normalize v and u
if (shift > 0) {
big_integer_shift_left_in_place(v, k, shift);
big_integer_shift_left_in_place(u, u_len, shift);
}
// Debug: Print normalized v
printf("Normalized v (k = %zu):\n", k);
for (size_t i = k; i-- > 0;) {
printf("%016llx ", v[i]);
}
printf("\n");
// Debug: Print normalized u
printf("Normalized u (u_len = %zu):\n", u_len);
for (size_t i = u_len; i-- > 0;) {
printf("%016llx ", u[i]);
}
printf("\n");
// Initialize q
uint64_t* q = v + k; // q has length n - k + 1
memset(q, 0, (n - k + 1) * sizeof(uint64_t));
uint64_t v1 = v[k - 1];
uint64_t v2 = (k > 1) ? v[k - 2] : 0;
// Main loop
for (ssize_t j = n - k; j >= 0; --j) {
// Estimate q_hat
uint64_t u_jk = u[j + k]; // u[j + k]
uint64_t u_jk_1 = u[j + k - 1]; // u[j + k - 1]
uint64_t u_jk_2 = (j + k - 2 >= 0) ? u[j + k - 2] : 0;
uint64_t q_hat = 0;
uint64_t r_hat = 0;
divide_128_64(u_jk, u_jk_1, v1, &q_hat, &r_hat);
// Adjust q_hat if necessary
uint128_t lhs;
uint128_t rhs;
while (true) {
// lhs = q_hat * (v1:v2)
uint64_t mul_hi, mul_lo;
mul_lo = _umul128(q_hat, v1, &mul_hi); // q_hat * v1
lhs.hi = mul_hi;
lhs.lo = mul_lo;
// Construct comparison numerator
uint128_t comp;
comp.hi = r_hat;
comp.lo = u_jk_2;
if (compare_uint128(lhs, comp) <= 0) {
break;
}
q_hat--;
uint64_t prev_r_hat = r_hat;
r_hat += v1;
if (r_hat < prev_r_hat) { // Check for overflow
break;
}
}
printf("\nIteration %zd:\n", j);
printf("Dividend: hi = %016llx, lo = %016llx\n", u_jk, u_jk_1);
printf("q_hat: %016llx, r_hat: %016llx\n", q_hat, r_hat);
// Multiply and subtract q_hat * v from u[j .. j + k]
uint8_t borrow = 0;
uint64_t carry = 0;
for (size_t i = 0; i < k; ++i) {
uint64_t prod_hi, prod_lo;
prod_lo = _umul128(q_hat, v[i], &prod_hi);
// Add previous carry to prod_lo
uint64_t sum_lo;
uint8_t carry_in = _addcarry_u64(0, prod_lo, carry, &sum_lo);
// Update carry
uint64_t sum_hi = prod_hi + carry_in;
// Subtract sum_lo from u[j + i]
uint64_t temp;
borrow = _subborrow_u64(borrow, u[j + i], sum_lo, &temp);
u[j + i] = temp;
carry = sum_hi;
}
// Subtract the final carry
uint64_t temp;
borrow = _subborrow_u64(borrow, u[j + k], carry, &temp);
u[j + k] = temp;
// After subtraction, u:
printf("After subtraction, u:");
for (ssize_t i = j + k; i >= j; --i) {
printf(" %016llx", u[i]);
}
printf("\n");
// Correction if borrow occurred
if (borrow) {
printf("Borrow occurred, correcting u and q_hat\n");
q_hat--;
uint8_t carry2 = 0;
for (size_t i = 0; i < k; ++i) {
uint64_t sum;
carry2 = _addcarry_u64(carry2, u[j + i], v[i], &sum);
u[j + i] = sum;
}
// Add carry to u[j + k]
uint64_t sum;
carry2 = _addcarry_u64(carry2, u[j + k], 0, &sum);
u[j + k] = sum;
// After correction, u:
printf("After correction, u:");
for (ssize_t i = j + k; i >= j; --i) {
printf(" %016llx", u[i]);
}
printf("\n");
}
q[j] = q_hat;
printf("Stored q[%zd] = %016llx\n", j, q_hat);
}
// Denormalize
if (shift > 0) {
printf("\nDenormalizing u by shifting right %lu bits\n", shift);
big_integer_shift_right_in_place(u, k, shift);
}
// Debug: Print final u
printf("\nFinal u after denormalization (u_len = %zu):\n", u_len);
for (size_t i = k; i-- > 0;) {
printf("%016llx ", u[i]);
}
printf("\n");
// The modulus is in u[0..k - 1]
memcpy(dst, u, k * sizeof(uint64_t));
memset(dst + k, 0, (dst_len - k) * sizeof(uint64_t));
// Ensure the modulus is less than the divisor
if (compare_big_int(dst, k, src, k) >= 0) {
printf("Final subtraction since remainder >= divisor\n");
uint8_t borrow = 0;
for (size_t i = 0; i < k; ++i) {
uint64_t temp;
borrow = _subborrow_u64(borrow, dst[i], src[i], &temp);
dst[i] = temp;
}
}
}
int main() {
const size_t size = 1024 * 1024; // Adjust size as needed
uint64_t* POLE = (uint64_t*)VirtualAlloc(NULL, size * sizeof(uint64_t), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
if (!POLE) {
printf("Memory allocation failed.\n");
return -1;
}
size_t dst_len = 16;
size_t src_len = 9;
uint64_t* dst = POLE;
uint64_t* src = dst + dst_len;
uint64_t* expected_modulus = src + src_len;
uint64_t* tmp = expected_modulus + src_len;
// Initialize dst (dividend) - little-endian
uint64_t dst_init[16] = {
0xe180d8077d300002ULL, 0x8f625008a4fdb542ULL, 0x715cc508696869cfULL, 0xe84cd25bfc37682aULL,
0x3dcda4074ed12f6aULL, 0x031b520792995a23ULL, 0x811dbf724f027912ULL, 0xf16dd49eee538fb3ULL,
0x52374ead5d75f703ULL, 0x93c8e4512c2aff07ULL, 0xf3e65e4c8be40b8cULL, 0xed0de76c166dcc5eULL,
0x4e390b4886e2f705ULL, 0x25a9f48824f07aa4ULL, 0x96780fb363dff216ULL, 0x0000000000001b8cULL
};
memcpy(dst, dst_init, dst_len * sizeof(uint64_t));
// Initialize src (divisor) - little-endian
uint64_t src_init[9] = {
0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
0x00000000000001ffULL
};
memcpy(src, src_init, src_len * sizeof(uint64_t));
// Initialize expected modulus - little-endian
uint64_t expected_modulus_init[9] = {
0x6529f3aed3debafdULL, 0x55ac347acd93cac2ULL, 0xa0d6b8378fae5bd5ULL, 0x6b43594fb2429f10ULL,
0x8ff4c08cf314a0e6ULL, 0x0e2e2701d6abd260ULL, 0x4768fb7a28b4690bULL, 0xf16dd49eee538fc1ULL,
0x0000000000000103ULL
};
memcpy(expected_modulus, expected_modulus_init, src_len * sizeof(uint64_t));
// Call mod function
mod(dst, src, tmp, dst_len, src_len);
// Verify result
if (memcmp(dst, expected_modulus, src_len * sizeof(uint64_t)) == 0) {
printf("MOD function works correctly.\n");
}
else {
printf("MOD function failed.\n");
printf("Expected:\n");
for (size_t i = src_len; i-- > 0;) {
printf("%016llx ", expected_modulus[i]);
}
printf("\nActual:\n");
for (size_t i = src_len; i-- > 0;) {
printf("%016llx ", dst[i]);
}
printf("\n");
}
VirtualFree(POLE, 0, MEM_RELEASE);
return 0;
}
+5 Zkušeností

Zobrazeno 13 zpráv z 13.