diff options
author | bptato <nincsnevem662@gmail.com> | 2025-05-04 17:00:49 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2025-05-04 17:06:47 +0200 |
commit | bee4f12b0348e8893d62a01ce027b1550bb6ef09 (patch) | |
tree | cccddd33c62ed0e8a0487944648331aa06a95a93 | |
parent | 9b184b31eb916b013a4d501a5d1e9ff8460d3fee (diff) | |
download | chawan-bee4f12b0348e8893d62a01ce027b1550bb6ef09.tar.gz |
Update QuickJS-NG to 0.10.0
-rw-r--r-- | lib/monoucha0/monoucha/libregexp.nim | 5 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/builtin-array-fromasync.h | 113 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/cutils.c | 182 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/cutils.h | 55 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/libbf.c | 8422 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/libbf.h | 545 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/libregexp.c | 36 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/libregexp.h | 5 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/quickjs-atom.h | 1 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/quickjs-opcode.h | 1 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/quickjs.c | 3338 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/quickjs.h | 57 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/xsum.c | 1122 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/qjs/xsum.h | 133 | ||||
-rw-r--r-- | lib/monoucha0/monoucha/quickjs.nim | 2 | ||||
-rw-r--r-- | res/license.md | 28 |
16 files changed, 4245 insertions, 9800 deletions
diff --git a/lib/monoucha0/monoucha/libregexp.nim b/lib/monoucha0/monoucha/libregexp.nim index a3d0506f..7810f418 100644 --- a/lib/monoucha0/monoucha/libregexp.nim +++ b/lib/monoucha0/monoucha/libregexp.nim @@ -28,6 +28,11 @@ proc lre_realloc(opaque, p: pointer; size: csize_t): pointer {.exportc.} = # compilation pass" (i.e. in C). {.emit: """ #ifndef NOT_LRE_ONLY +int *lre_check_timeout(void *opaque) +{ + return 0; +} + bool lre_check_stack_overflow(void *opaque, size_t alloca_size) { return 0; diff --git a/lib/monoucha0/monoucha/qjs/builtin-array-fromasync.h b/lib/monoucha0/monoucha/qjs/builtin-array-fromasync.h new file mode 100644 index 00000000..baaa8687 --- /dev/null +++ b/lib/monoucha0/monoucha/qjs/builtin-array-fromasync.h @@ -0,0 +1,113 @@ +/* File generated automatically by the QuickJS-ng compiler. */ + +#include <inttypes.h> + +const uint32_t qjsc_builtin_array_fromasync_size = 826; + +const uint8_t qjsc_builtin_array_fromasync[826] = { + 0x14, 0x0d, 0x01, 0x1a, 0x61, 0x73, 0x79, 0x6e, + 0x63, 0x49, 0x74, 0x65, 0x72, 0x61, 0x74, 0x6f, + 0x72, 0x01, 0x10, 0x69, 0x74, 0x65, 0x72, 0x61, + 0x74, 0x6f, 0x72, 0x01, 0x12, 0x61, 0x72, 0x72, + 0x61, 0x79, 0x4c, 0x69, 0x6b, 0x65, 0x01, 0x0a, + 0x6d, 0x61, 0x70, 0x46, 0x6e, 0x01, 0x0e, 0x74, + 0x68, 0x69, 0x73, 0x41, 0x72, 0x67, 0x01, 0x0c, + 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x01, 0x02, + 0x69, 0x01, 0x1a, 0x69, 0x73, 0x43, 0x6f, 0x6e, + 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x6f, 0x72, + 0x01, 0x08, 0x73, 0x79, 0x6e, 0x63, 0x01, 0x0c, + 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x01, 0x08, + 0x69, 0x74, 0x65, 0x72, 0x01, 0x1c, 0x6e, 0x6f, + 0x74, 0x20, 0x61, 0x20, 0x66, 0x75, 0x6e, 0x63, + 0x74, 0x69, 0x6f, 0x6e, 0x01, 0x08, 0x63, 0x61, + 0x6c, 0x6c, 0x0c, 0x00, 0x02, 0x00, 0xa2, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x04, 0x01, + 0xa4, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x43, 0x02, + 0x01, 0x00, 0x05, 0x00, 0x05, 0x01, 0x00, 0x01, + 0x03, 0x05, 0xaa, 0x02, 0x00, 0x01, 0x40, 0x9e, + 0x03, 0x00, 0x01, 0x40, 0xc2, 0x03, 0x00, 0x01, + 0x40, 0xcc, 0x01, 0x00, 0x01, 0x40, 0xc4, 0x03, + 0x00, 0x01, 0x40, 0x0c, 0x60, 0x02, 0x01, 0xf8, + 0x01, 0x03, 0x0e, 0x01, 0x06, 0x05, 0x00, 0x86, + 0x04, 0x11, 0xc6, 0x03, 0x00, 0x01, 0x00, 0xc8, + 0x03, 0x00, 0x01, 0x00, 0xca, 0x03, 0x00, 0x01, + 0x00, 0xc6, 0x03, 0x01, 0xff, 0xff, 0xff, 0xff, + 0x0f, 0x20, 0xc8, 0x03, 0x01, 0x01, 0x20, 0xca, + 0x03, 0x01, 0x02, 0x20, 0xcc, 0x03, 0x02, 0x00, + 0x20, 0xce, 0x03, 0x02, 0x04, 0x20, 0xd0, 0x03, + 0x02, 0x05, 0x20, 0xd2, 0x03, 0x02, 0x06, 0x20, + 0xd4, 0x03, 0x02, 0x07, 0x20, 0x64, 0x06, 0x08, + 0x20, 0x82, 0x01, 0x07, 0x09, 0x20, 0xd6, 0x03, + 0x0a, 0x08, 0x30, 0x82, 0x01, 0x0d, 0x0b, 0x20, + 0xd4, 0x01, 0x0d, 0x0c, 0x20, 0x10, 0x00, 0x01, + 0x00, 0x9e, 0x03, 0x01, 0x03, 0xc2, 0x03, 0x02, + 0x03, 0xc4, 0x03, 0x04, 0x03, 0xaa, 0x02, 0x00, + 0x03, 0xcc, 0x01, 0x03, 0x03, 0x08, 0xc4, 0x0d, + 0x62, 0x02, 0x00, 0x62, 0x01, 0x00, 0x62, 0x00, + 0x00, 0xd3, 0xcb, 0xd4, 0x11, 0xf4, 0xec, 0x08, + 0x0e, 0x39, 0x46, 0x00, 0x00, 0x00, 0xdc, 0xcc, + 0xd5, 0x11, 0xf4, 0xec, 0x08, 0x0e, 0x39, 0x46, + 0x00, 0x00, 0x00, 0xdd, 0xcd, 0x62, 0x07, 0x00, + 0x62, 0x06, 0x00, 0x62, 0x05, 0x00, 0x62, 0x04, + 0x00, 0x62, 0x03, 0x00, 0xd4, 0x39, 0x46, 0x00, + 0x00, 0x00, 0xb0, 0xec, 0x16, 0xd4, 0x98, 0x04, + 0x1b, 0x00, 0x00, 0x00, 0xb0, 0xec, 0x0c, 0xdf, + 0x11, 0x04, 0xec, 0x00, 0x00, 0x00, 0x21, 0x01, + 0x00, 0x30, 0x06, 0xce, 0xb6, 0xc4, 0x04, 0xc3, + 0x0d, 0xf7, 0xc4, 0x05, 0x09, 0xc4, 0x06, 0xd3, + 0xe0, 0x48, 0xc4, 0x07, 0x63, 0x07, 0x00, 0x07, + 0xad, 0xec, 0x0f, 0x0a, 0x11, 0x64, 0x06, 0x00, + 0x0e, 0xd3, 0xe1, 0x48, 0x11, 0x64, 0x07, 0x00, + 0x0e, 0x63, 0x07, 0x00, 0x07, 0xad, 0x6a, 0xa6, + 0x00, 0x00, 0x00, 0x62, 0x08, 0x00, 0x06, 0x11, + 0xf4, 0xed, 0x0c, 0x71, 0x43, 0x32, 0x00, 0x00, + 0x00, 0xc4, 0x08, 0x0e, 0xee, 0x05, 0x0e, 0xd3, + 0xee, 0xf2, 0x63, 0x08, 0x00, 0x8e, 0x11, 0xed, + 0x03, 0x0e, 0xb6, 0x11, 0x64, 0x08, 0x00, 0x0e, + 0x63, 0x05, 0x00, 0xec, 0x0c, 0xc3, 0x0d, 0x11, + 0x63, 0x08, 0x00, 0x21, 0x01, 0x00, 0xee, 0x06, + 0xe2, 0x63, 0x08, 0x00, 0xf1, 0x11, 0x64, 0x03, + 0x00, 0x0e, 0x63, 0x04, 0x00, 0x63, 0x08, 0x00, + 0xa7, 0x6a, 0x2a, 0x01, 0x00, 0x00, 0x62, 0x09, + 0x00, 0xd3, 0x63, 0x04, 0x00, 0x48, 0xc4, 0x09, + 0x63, 0x06, 0x00, 0xec, 0x0a, 0x63, 0x09, 0x00, + 0x8c, 0x11, 0x64, 0x09, 0x00, 0x0e, 0xd4, 0xec, + 0x17, 0xd4, 0x43, 0xed, 0x00, 0x00, 0x00, 0xd5, + 0x63, 0x09, 0x00, 0x63, 0x04, 0x00, 0x24, 0x03, + 0x00, 0x8c, 0x11, 0x64, 0x09, 0x00, 0x0e, 0x5f, + 0x04, 0x00, 0x63, 0x03, 0x00, 0x63, 0x04, 0x00, + 0x92, 0x64, 0x04, 0x00, 0x0b, 0x63, 0x09, 0x00, + 0x4d, 0x41, 0x00, 0x00, 0x00, 0x0a, 0x4d, 0x3e, + 0x00, 0x00, 0x00, 0x0a, 0x4d, 0x3f, 0x00, 0x00, + 0x00, 0xf3, 0x0e, 0xee, 0x9e, 0x62, 0x0a, 0x00, + 0x63, 0x07, 0x00, 0x43, 0xed, 0x00, 0x00, 0x00, + 0xd3, 0x24, 0x01, 0x00, 0xc4, 0x0a, 0x63, 0x05, + 0x00, 0xec, 0x09, 0xc3, 0x0d, 0x11, 0x21, 0x00, + 0x00, 0xee, 0x03, 0xe2, 0xf0, 0x11, 0x64, 0x03, + 0x00, 0x0e, 0x6d, 0x8c, 0x00, 0x00, 0x00, 0x62, + 0x0c, 0x00, 0x62, 0x0b, 0x00, 0x06, 0x11, 0xf4, + 0xed, 0x13, 0x71, 0x43, 0x41, 0x00, 0x00, 0x00, + 0xc4, 0x0b, 0x43, 0x6a, 0x00, 0x00, 0x00, 0xc4, + 0x0c, 0x0e, 0xee, 0x10, 0x0e, 0x63, 0x0a, 0x00, + 0x43, 0x6b, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, + 0x8c, 0xee, 0xe0, 0x63, 0x0c, 0x00, 0xed, 0x4e, + 0x63, 0x06, 0x00, 0xec, 0x0a, 0x63, 0x0b, 0x00, + 0x8c, 0x11, 0x64, 0x0b, 0x00, 0x0e, 0xd4, 0xec, + 0x17, 0xd4, 0x43, 0xed, 0x00, 0x00, 0x00, 0xd5, + 0x63, 0x0b, 0x00, 0x63, 0x04, 0x00, 0x24, 0x03, + 0x00, 0x8c, 0x11, 0x64, 0x0b, 0x00, 0x0e, 0x5f, + 0x04, 0x00, 0x63, 0x03, 0x00, 0x63, 0x04, 0x00, + 0x92, 0x64, 0x04, 0x00, 0x0b, 0x63, 0x0b, 0x00, + 0x4d, 0x41, 0x00, 0x00, 0x00, 0x0a, 0x4d, 0x3e, + 0x00, 0x00, 0x00, 0x0a, 0x4d, 0x3f, 0x00, 0x00, + 0x00, 0xf3, 0x0e, 0xee, 0x83, 0x0e, 0x06, 0x6e, + 0x0d, 0x00, 0x00, 0x00, 0x0e, 0xee, 0x1e, 0x6e, + 0x05, 0x00, 0x00, 0x00, 0x30, 0x63, 0x0a, 0x00, + 0x42, 0x06, 0x00, 0x00, 0x00, 0xec, 0x0d, 0x63, + 0x0a, 0x00, 0x43, 0x06, 0x00, 0x00, 0x00, 0x24, + 0x00, 0x00, 0x0e, 0x6f, 0x63, 0x03, 0x00, 0x63, + 0x04, 0x00, 0x44, 0x32, 0x00, 0x00, 0x00, 0x63, + 0x03, 0x00, 0x2f, 0xc1, 0x00, 0x28, 0xc1, 0x00, + 0xcf, 0x28, +}; + diff --git a/lib/monoucha0/monoucha/qjs/cutils.c b/lib/monoucha0/monoucha/qjs/cutils.c index a7b80502..5ad14eba 100644 --- a/lib/monoucha0/monoucha/qjs/cutils.c +++ b/lib/monoucha0/monoucha/qjs/cutils.c @@ -31,6 +31,13 @@ #if !defined(_MSC_VER) #include <sys/time.h> #endif +#if defined(_WIN32) +#include <windows.h> +#include <process.h> // _beginthread +#endif +#if defined(__APPLE__) +#include <mach-o/dyld.h> +#endif #include "cutils.h" @@ -1197,10 +1204,112 @@ int64_t js__gettimeofday_us(void) { return ((int64_t)tv.tv_sec * 1000000) + tv.tv_usec; } -/*--- Cross-platform threading APIs. ----*/ +#if defined(_WIN32) +int js_exepath(char *buffer, size_t *size_ptr) { + int utf8_len, utf16_buffer_len, utf16_len; + WCHAR* utf16_buffer; + + if (buffer == NULL || size_ptr == NULL || *size_ptr == 0) + return -1; + + if (*size_ptr > 32768) { + /* Windows paths can never be longer than this. */ + utf16_buffer_len = 32768; + } else { + utf16_buffer_len = (int)*size_ptr; + } + + utf16_buffer = malloc(sizeof(WCHAR) * utf16_buffer_len); + if (!utf16_buffer) + return -1; + + /* Get the path as UTF-16. */ + utf16_len = GetModuleFileNameW(NULL, utf16_buffer, utf16_buffer_len); + if (utf16_len <= 0) + goto error; + + /* Convert to UTF-8 */ + utf8_len = WideCharToMultiByte(CP_UTF8, + 0, + utf16_buffer, + -1, + buffer, + (int)*size_ptr, + NULL, + NULL); + if (utf8_len == 0) + goto error; + + free(utf16_buffer); + + /* utf8_len *does* include the terminating null at this point, but the + * returned size shouldn't. */ + *size_ptr = utf8_len - 1; + return 0; + +error: + free(utf16_buffer); + return -1; +} +#elif defined(__APPLE__) +int js_exepath(char *buffer, size_t *size) { + /* realpath(exepath) may be > PATH_MAX so double it to be on the safe side. */ + char abspath[PATH_MAX * 2 + 1]; + char exepath[PATH_MAX + 1]; + uint32_t exepath_size; + size_t abspath_size; + + if (buffer == NULL || size == NULL || *size == 0) + return -1; + + exepath_size = sizeof(exepath); + if (_NSGetExecutablePath(exepath, &exepath_size)) + return -1; + + if (realpath(exepath, abspath) != abspath) + return -1; + + abspath_size = strlen(abspath); + if (abspath_size == 0) + return -1; + + *size -= 1; + if (*size > abspath_size) + *size = abspath_size; + + memcpy(buffer, abspath, *size); + buffer[*size] = '\0'; + + return 0; +} +#elif defined(__linux__) +int js_exepath(char *buffer, size_t *size) { + ssize_t n; + + if (buffer == NULL || size == NULL || *size == 0) + return -1; + + n = *size - 1; + if (n > 0) + n = readlink("/proc/self/exe", buffer, n); -#if !defined(EMSCRIPTEN) && !defined(__wasi__) && !defined(MNC_NO_THREADS) + if (n == -1) + return n; + buffer[n] = '\0'; + *size = n; + + return 0; +} +#else +int js_exepath(char* buffer, size_t* size_ptr) { + return -1; +} +#endif + +/*--- Cross-platform threading APIs. ----*/ + +#if JS_HAVE_THREADS #if defined(_WIN32) typedef void (*js__once_cb)(void); @@ -1267,6 +1376,37 @@ int js_cond_timedwait(js_cond_t *cond, js_mutex_t *mutex, uint64_t timeout) { return -1; } +int js_thread_create(js_thread_t *thrd, void (*start)(void *), void *arg, + int flags) +{ + HANDLE h, cp; + + *thrd = INVALID_HANDLE_VALUE; + if (flags & ~JS_THREAD_CREATE_DETACHED) + return -1; + h = (HANDLE)_beginthread(start, /*stacksize*/2<<20, arg); + if (!h) + return -1; + if (flags & JS_THREAD_CREATE_DETACHED) + return 0; + // _endthread() automatically closes the handle but we want to wait on + // it so make a copy. Race-y for very short-lived threads. Can be solved + // by switching to _beginthreadex(CREATE_SUSPENDED) but means changing + // |start| from __cdecl to __stdcall. + cp = GetCurrentProcess(); + if (DuplicateHandle(cp, h, cp, thrd, 0, FALSE, DUPLICATE_SAME_ACCESS)) + return 0; + return -1; +} + +int js_thread_join(js_thread_t thrd) +{ + if (WaitForSingleObject(thrd, INFINITE)) + return -1; + CloseHandle(thrd); + return 0; +} + #else /* !defined(_WIN32) */ void js_once(js_once_t *guard, void (*callback)(void)) { @@ -1407,9 +1547,43 @@ int js_cond_timedwait(js_cond_t *cond, js_mutex_t *mutex, uint64_t timeout) { return -1; } -#endif +int js_thread_create(js_thread_t *thrd, void (*start)(void *), void *arg, + int flags) +{ + union { + void (*x)(void *); + void *(*f)(void *); + } u = {start}; + pthread_attr_t attr; + int ret; + + if (flags & ~JS_THREAD_CREATE_DETACHED) + return -1; + if (pthread_attr_init(&attr)) + return -1; + ret = -1; + if (pthread_attr_setstacksize(&attr, 2<<20)) + goto fail; + if (flags & JS_THREAD_CREATE_DETACHED) + if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + goto fail; + if (pthread_create(thrd, &attr, u.f, arg)) + goto fail; + ret = 0; +fail: + pthread_attr_destroy(&attr); + return ret; +} + +int js_thread_join(js_thread_t thrd) +{ + if (pthread_join(thrd, NULL)) + return -1; + return 0; +} -#endif /* !defined(EMSCRIPTEN) && !defined(__wasi__) */ +#endif /* !defined(_WIN32) */ +#endif /* JS_HAVE_THREADS */ #ifdef __GNUC__ #pragma GCC visibility pop diff --git a/lib/monoucha0/monoucha/qjs/cutils.h b/lib/monoucha0/monoucha/qjs/cutils.h index c0537e68..e6c7e7b8 100644 --- a/lib/monoucha0/monoucha/qjs/cutils.h +++ b/lib/monoucha0/monoucha/qjs/cutils.h @@ -54,6 +54,10 @@ extern "C" { #include <errno.h> #include <pthread.h> #endif +#if !defined(_WIN32) +#include <limits.h> +#include <unistd.h> +#endif #if defined(_MSC_VER) && !defined(__clang__) # define likely(x) (x) @@ -94,7 +98,7 @@ extern "C" { #define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member))) #endif -#if defined(_MSC_VER) +#if defined(_MSC_VER) || defined(__cplusplus) #define minimum_length(n) n #else #define minimum_length(n) static n @@ -118,6 +122,14 @@ extern "C" { #endif #endif +#if defined(PATH_MAX) +# define JS__PATH_MAX PATH_MAX +#elif defined(_WIN32) +# define JS__PATH_MAX 32767 +#else +# define JS__PATH_MAX 8192 +#endif + void js__pstrcpy(char *buf, int buf_size, const char *str); char *js__pstrcat(char *buf, int buf_size, const char *s); int js__strstart(const char *str, const char *val, const char **ptr); @@ -545,6 +557,26 @@ void rqsort(void *base, size_t nmemb, size_t size, int (*cmp)(const void *, const void *, void *), void *arg); +static inline uint64_t float64_as_uint64(double d) +{ + union { + double d; + uint64_t u64; + } u; + u.d = d; + return u.u64; +} + +static inline double uint64_as_float64(uint64_t u64) +{ + union { + double d; + uint64_t u64; + } u; + u.u64 = u64; + return u.d; +} + int64_t js__gettimeofday_us(void); uint64_t js__hrtime_ns(void); @@ -561,20 +593,30 @@ static inline size_t js__malloc_usable_size(const void *ptr) #endif } +int js_exepath(char* buffer, size_t* size); + /* Cross-platform threading APIs. */ -#if !defined(EMSCRIPTEN) && !defined(__wasi__) && !defined(MNC_NO_THREADS) +#if defined(EMSCRIPTEN) || defined(__wasi__) || defined(MNC_NO_THREADS) + +#define JS_HAVE_THREADS 0 + +#else + +#define JS_HAVE_THREADS 1 #if defined(_WIN32) #define JS_ONCE_INIT INIT_ONCE_STATIC_INIT typedef INIT_ONCE js_once_t; typedef CRITICAL_SECTION js_mutex_t; typedef CONDITION_VARIABLE js_cond_t; +typedef HANDLE js_thread_t; #else #define JS_ONCE_INIT PTHREAD_ONCE_INIT typedef pthread_once_t js_once_t; typedef pthread_mutex_t js_mutex_t; typedef pthread_cond_t js_cond_t; +typedef pthread_t js_thread_t; #endif void js_once(js_once_t *guard, void (*callback)(void)); @@ -591,6 +633,15 @@ void js_cond_broadcast(js_cond_t *cond); void js_cond_wait(js_cond_t *cond, js_mutex_t *mutex); int js_cond_timedwait(js_cond_t *cond, js_mutex_t *mutex, uint64_t timeout); +enum { + JS_THREAD_CREATE_DETACHED = 1, +}; + +// creates threads with 2 MB stacks (glibc default) +int js_thread_create(js_thread_t *thrd, void (*start)(void *), void *arg, + int flags); +int js_thread_join(js_thread_t thrd); + #endif /* !defined(EMSCRIPTEN) && !defined(__wasi__) */ #ifdef __cplusplus diff --git a/lib/monoucha0/monoucha/qjs/libbf.c b/lib/monoucha0/monoucha/qjs/libbf.c deleted file mode 100644 index 26872c6d..00000000 --- a/lib/monoucha0/monoucha/qjs/libbf.c +++ /dev/null @@ -1,8422 +0,0 @@ -/* - * Tiny arbitrary precision floating point library - * - * Copyright (c) 2017-2021 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include <stdlib.h> -#include <stdio.h> -#include <inttypes.h> -#include <math.h> -#include <string.h> -#include <assert.h> - -#ifdef __AVX2__ -#include <immintrin.h> -#endif - -#include "cutils.h" -#include "libbf.h" - -/* enable it to check the multiplication result */ -//#define USE_MUL_CHECK -/* enable it to use FFT/NTT multiplication */ -#define USE_FFT_MUL -/* enable decimal floating point support */ -#define USE_BF_DEC - -//#define inline __attribute__((always_inline)) - -#ifdef __AVX2__ -#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */ -#else -#define FFT_MUL_THRESHOLD 100 /* in limbs of the smallest factor */ -#endif - -/* XXX: adjust */ -#define DIVNORM_LARGE_THRESHOLD 50 -#define UDIV1NORM_THRESHOLD 3 - -#if LIMB_BITS == 64 -#define FMT_LIMB1 "%" PRIx64 -#define FMT_LIMB "%016" PRIx64 -#define PRId_LIMB PRId64 -#define PRIu_LIMB PRIu64 - -#else - -#define FMT_LIMB1 "%x" -#define FMT_LIMB "%08x" -#define PRId_LIMB "d" -#define PRIu_LIMB "u" - -#endif - -typedef intptr_t mp_size_t; - -typedef int bf_op2_func_t(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags); - -#ifdef USE_FFT_MUL - -#define FFT_MUL_R_OVERLAP_A (1 << 0) -#define FFT_MUL_R_OVERLAP_B (1 << 1) -#define FFT_MUL_R_NORESIZE (1 << 2) - -static no_inline int fft_mul(bf_context_t *s, - bf_t *res, limb_t *a_tab, limb_t a_len, - limb_t *b_tab, limb_t b_len, int mul_flags); -static void fft_clear_cache(bf_context_t *s); -#endif -#ifdef USE_BF_DEC -static limb_t get_digit(const limb_t *tab, limb_t len, slimb_t pos); -#endif - - -/* could leading zeros */ -static inline int clz(limb_t a) -{ - if (a == 0) { - return LIMB_BITS; - } else { -#if LIMB_BITS == 64 - return clz64(a); -#else - return clz32(a); -#endif - } -} - -static inline int ctz(limb_t a) -{ - if (a == 0) { - return LIMB_BITS; - } else { -#if LIMB_BITS == 64 - return ctz64(a); -#else - return ctz32(a); -#endif - } -} - -static inline int ceil_log2(limb_t a) -{ - if (a <= 1) - return 0; - else - return LIMB_BITS - clz(a - 1); -} - -/* b must be >= 1 */ -static inline slimb_t ceil_div(slimb_t a, slimb_t b) -{ - if (a >= 0) - return (a + b - 1) / b; - else - return a / b; -} - -/* b must be >= 1 */ -static inline slimb_t floor_div(slimb_t a, slimb_t b) -{ - if (a >= 0) { - return a / b; - } else { - return (a - b + 1) / b; - } -} - -/* return r = a modulo b (0 <= r <= b - 1. b must be >= 1 */ -static inline limb_t smod(slimb_t a, slimb_t b) -{ - a = a % (slimb_t)b; - if (a < 0) - a += b; - return a; -} - -/* signed addition with saturation */ -static inline slimb_t sat_add(slimb_t a, slimb_t b) -{ - slimb_t r; - r = a + b; - /* overflow ? */ - if (((a ^ r) & (b ^ r)) < 0) - r = (a >> (LIMB_BITS - 1)) ^ (((limb_t)1 << (LIMB_BITS - 1)) - 1); - return r; -} - -static inline __maybe_unused limb_t shrd(limb_t low, limb_t high, long shift) -{ - if (shift != 0) - low = (low >> shift) | (high << (LIMB_BITS - shift)); - return low; -} - -static inline __maybe_unused limb_t shld(limb_t a1, limb_t a0, long shift) -{ - if (shift != 0) - return (a1 << shift) | (a0 >> (LIMB_BITS - shift)); - else - return a1; -} - -#define malloc(s) malloc_is_forbidden(s) -#define free(p) free_is_forbidden(p) -#define realloc(p, s) realloc_is_forbidden(p, s) - -void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func, - void *realloc_opaque) -{ - memset(s, 0, sizeof(*s)); - s->realloc_func = realloc_func; - s->realloc_opaque = realloc_opaque; -} - -void bf_context_end(bf_context_t *s) -{ - bf_clear_cache(s); -} - -void bf_init(bf_context_t *s, bf_t *r) -{ - r->ctx = s; - r->sign = 0; - r->expn = BF_EXP_ZERO; - r->len = 0; - r->tab = NULL; -} - -/* return 0 if OK, -1 if alloc error */ -int bf_resize(bf_t *r, limb_t len) -{ - limb_t *tab; - - if (len != r->len) { - tab = bf_realloc(r->ctx, r->tab, len * sizeof(limb_t)); - if (!tab && len != 0) - return -1; - r->tab = tab; - r->len = len; - } - return 0; -} - -/* return 0 or BF_ST_MEM_ERROR */ -int bf_set_ui(bf_t *r, uint64_t a) -{ - r->sign = 0; - if (a == 0) { - r->expn = BF_EXP_ZERO; - bf_resize(r, 0); /* cannot fail */ - } -#if LIMB_BITS == 32 - else if (a <= 0xffffffff) -#else - else -#endif - { - int shift; - if (bf_resize(r, 1)) - goto fail; - shift = clz(a); - r->tab[0] = a << shift; - r->expn = LIMB_BITS - shift; - } -#if LIMB_BITS == 32 - else { - uint32_t a1, a0; - int shift; - if (bf_resize(r, 2)) - goto fail; - a0 = a; - a1 = a >> 32; - shift = clz(a1); - r->tab[0] = a0 << shift; - r->tab[1] = shld(a1, a0, shift); - r->expn = 2 * LIMB_BITS - shift; - } -#endif - return 0; - fail: - bf_set_nan(r); - return BF_ST_MEM_ERROR; -} - -/* return 0 or BF_ST_MEM_ERROR */ -int bf_set_si(bf_t *r, int64_t a) -{ - int ret; - - if (a < 0) { - ret = bf_set_ui(r, -a); - r->sign = 1; - } else { - ret = bf_set_ui(r, a); - } - return ret; -} - -void bf_set_nan(bf_t *r) -{ - bf_resize(r, 0); /* cannot fail */ - r->expn = BF_EXP_NAN; - r->sign = 0; -} - -void bf_set_zero(bf_t *r, int is_neg) -{ - bf_resize(r, 0); /* cannot fail */ - r->expn = BF_EXP_ZERO; - r->sign = is_neg; -} - -void bf_set_inf(bf_t *r, int is_neg) -{ - bf_resize(r, 0); /* cannot fail */ - r->expn = BF_EXP_INF; - r->sign = is_neg; -} - -/* return 0 or BF_ST_MEM_ERROR */ -int bf_set(bf_t *r, const bf_t *a) -{ - if (r == a) - return 0; - if (bf_resize(r, a->len)) { - bf_set_nan(r); - return BF_ST_MEM_ERROR; - } - r->sign = a->sign; - r->expn = a->expn; - if (a->len > 0) - memcpy(r->tab, a->tab, a->len * sizeof(limb_t)); - return 0; -} - -/* equivalent to bf_set(r, a); bf_delete(a) */ -void bf_move(bf_t *r, bf_t *a) -{ - bf_context_t *s = r->ctx; - if (r == a) - return; - bf_free(s, r->tab); - *r = *a; -} - -static limb_t get_limbz(const bf_t *a, limb_t idx) -{ - if (idx >= a->len) - return 0; - else - return a->tab[idx]; -} - -/* get LIMB_BITS at bit position 'pos' in tab */ -static inline limb_t get_bits(const limb_t *tab, limb_t len, slimb_t pos) -{ - limb_t i, a0, a1; - int p; - - i = pos >> LIMB_LOG2_BITS; - p = pos & (LIMB_BITS - 1); - if (i < len) - a0 = tab[i]; - else - a0 = 0; - if (p == 0) { - return a0; - } else { - i++; - if (i < len) - a1 = tab[i]; - else - a1 = 0; - return (a0 >> p) | (a1 << (LIMB_BITS - p)); - } -} - -static inline limb_t get_bit(const limb_t *tab, limb_t len, slimb_t pos) -{ - slimb_t i; - i = pos >> LIMB_LOG2_BITS; - if (i < 0 || i >= len) - return 0; - return (tab[i] >> (pos & (LIMB_BITS - 1))) & 1; -} - -static inline limb_t limb_mask(int start, int last) -{ - limb_t v; - int n; - n = last - start + 1; - if (n == LIMB_BITS) - v = -1; - else - v = (((limb_t)1 << n) - 1) << start; - return v; -} - -static limb_t mp_scan_nz(const limb_t *tab, mp_size_t n) -{ - mp_size_t i; - for(i = 0; i < n; i++) { - if (tab[i] != 0) - return 1; - } - return 0; -} - -/* return != 0 if one bit between 0 and bit_pos inclusive is not zero. */ -static inline limb_t scan_bit_nz(const bf_t *r, slimb_t bit_pos) -{ - slimb_t pos; - limb_t v; - - pos = bit_pos >> LIMB_LOG2_BITS; - if (pos < 0) - return 0; - v = r->tab[pos] & limb_mask(0, bit_pos & (LIMB_BITS - 1)); - if (v != 0) - return 1; - pos--; - while (pos >= 0) { - if (r->tab[pos] != 0) - return 1; - pos--; - } - return 0; -} - -/* return the addend for rounding. Note that prec can be <= 0 (for - BF_FLAG_RADPNT_PREC) */ -static int bf_get_rnd_add(int *pret, const bf_t *r, limb_t l, - slimb_t prec, int rnd_mode) -{ - int add_one, inexact; - limb_t bit1, bit0; - - if (rnd_mode == BF_RNDF) { - bit0 = 1; /* faithful rounding does not honor the INEXACT flag */ - } else { - /* starting limb for bit 'prec + 1' */ - bit0 = scan_bit_nz(r, l * LIMB_BITS - 1 - bf_max(0, prec + 1)); - } - - /* get the bit at 'prec' */ - bit1 = get_bit(r->tab, l, l * LIMB_BITS - 1 - prec); - inexact = (bit1 | bit0) != 0; - - add_one = 0; - switch(rnd_mode) { - case BF_RNDZ: - break; - case BF_RNDN: - if (bit1) { - if (bit0) { - add_one = 1; - } else { - /* round to even */ - add_one = - get_bit(r->tab, l, l * LIMB_BITS - 1 - (prec - 1)); - } - } - break; - case BF_RNDD: - case BF_RNDU: - if (r->sign == (rnd_mode == BF_RNDD)) - add_one = inexact; - break; - case BF_RNDA: - add_one = inexact; - break; - case BF_RNDNA: - case BF_RNDF: - add_one = bit1; - break; - default: - abort(); - } - - if (inexact) - *pret |= BF_ST_INEXACT; - return add_one; -} - -static int bf_set_overflow(bf_t *r, int sign, limb_t prec, bf_flags_t flags) -{ - slimb_t i, l, e_max; - int rnd_mode; - - rnd_mode = flags & BF_RND_MASK; - if (prec == BF_PREC_INF || - rnd_mode == BF_RNDN || - rnd_mode == BF_RNDNA || - rnd_mode == BF_RNDA || - (rnd_mode == BF_RNDD && sign == 1) || - (rnd_mode == BF_RNDU && sign == 0)) { - bf_set_inf(r, sign); - } else { - /* set to maximum finite number */ - l = (prec + LIMB_BITS - 1) / LIMB_BITS; - if (bf_resize(r, l)) { - bf_set_nan(r); - return BF_ST_MEM_ERROR; - } - r->tab[0] = limb_mask((-prec) & (LIMB_BITS - 1), - LIMB_BITS - 1); - for(i = 1; i < l; i++) - r->tab[i] = (limb_t)-1; - e_max = (limb_t)1 << (bf_get_exp_bits(flags) - 1); - r->expn = e_max; - r->sign = sign; - } - return BF_ST_OVERFLOW | BF_ST_INEXACT; -} - -/* round to prec1 bits assuming 'r' is non zero and finite. 'r' is - assumed to have length 'l' (1 <= l <= r->len). Note: 'prec1' can be - infinite (BF_PREC_INF). 'ret' is 0 or BF_ST_INEXACT if the result - is known to be inexact. Can fail with BF_ST_MEM_ERROR in case of - overflow not returning infinity. */ -static int __bf_round(bf_t *r, limb_t prec1, bf_flags_t flags, limb_t l, - int ret) -{ - limb_t v, a; - int shift, add_one, rnd_mode; - slimb_t i, bit_pos, pos, e_min, e_max, e_range, prec; - - /* e_min and e_max are computed to match the IEEE 754 conventions */ - e_range = (limb_t)1 << (bf_get_exp_bits(flags) - 1); - e_min = -e_range + 3; - e_max = e_range; - - if (flags & BF_FLAG_RADPNT_PREC) { - /* 'prec' is the precision after the radix point */ - if (prec1 != BF_PREC_INF) - prec = r->expn + prec1; - else - prec = prec1; - } else if (unlikely(r->expn < e_min) && (flags & BF_FLAG_SUBNORMAL)) { - /* restrict the precision in case of potentially subnormal - result */ - assert(prec1 != BF_PREC_INF); - prec = prec1 - (e_min - r->expn); - } else { - prec = prec1; - } - - /* round to prec bits */ - rnd_mode = flags & BF_RND_MASK; - add_one = bf_get_rnd_add(&ret, r, l, prec, rnd_mode); - - if (prec <= 0) { - if (add_one) { - bf_resize(r, 1); /* cannot fail */ - r->tab[0] = (limb_t)1 << (LIMB_BITS - 1); - r->expn += 1 - prec; - ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT; - return ret; - } else { - goto underflow; - } - } else if (add_one) { - limb_t carry; - - /* add one starting at digit 'prec - 1' */ - bit_pos = l * LIMB_BITS - 1 - (prec - 1); - pos = bit_pos >> LIMB_LOG2_BITS; - carry = (limb_t)1 << (bit_pos & (LIMB_BITS - 1)); - - for(i = pos; i < l; i++) { - v = r->tab[i] + carry; - carry = (v < carry); - r->tab[i] = v; - if (carry == 0) - break; - } - if (carry) { - /* shift right by one digit */ - v = 1; - for(i = l - 1; i >= pos; i--) { - a = r->tab[i]; - r->tab[i] = (a >> 1) | (v << (LIMB_BITS - 1)); - v = a; - } - r->expn++; - } - } - - /* check underflow */ - if (unlikely(r->expn < e_min)) { - if (flags & BF_FLAG_SUBNORMAL) { - /* if inexact, also set the underflow flag */ - if (ret & BF_ST_INEXACT) - ret |= BF_ST_UNDERFLOW; - } else { - underflow: - ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT; - bf_set_zero(r, r->sign); - return ret; - } - } - - /* check overflow */ - if (unlikely(r->expn > e_max)) - return bf_set_overflow(r, r->sign, prec1, flags); - - /* keep the bits starting at 'prec - 1' */ - bit_pos = l * LIMB_BITS - 1 - (prec - 1); - i = bit_pos >> LIMB_LOG2_BITS; - if (i >= 0) { - shift = bit_pos & (LIMB_BITS - 1); - if (shift != 0) - r->tab[i] &= limb_mask(shift, LIMB_BITS - 1); - } else { - i = 0; - } - /* remove trailing zeros */ - while (r->tab[i] == 0) - i++; - if (i > 0) { - l -= i; - memmove(r->tab, r->tab + i, l * sizeof(limb_t)); - } - bf_resize(r, l); /* cannot fail */ - return ret; -} - -/* 'r' must be a finite number. */ -int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags) -{ - limb_t l, v, a; - int shift, ret; - slimb_t i; - - // bf_print_str("bf_renorm", r); - l = r->len; - while (l > 0 && r->tab[l - 1] == 0) - l--; - if (l == 0) { - /* zero */ - r->expn = BF_EXP_ZERO; - bf_resize(r, 0); /* cannot fail */ - ret = 0; - } else { - r->expn -= (r->len - l) * LIMB_BITS; - /* shift to have the MSB set to '1' */ - v = r->tab[l - 1]; - shift = clz(v); - if (shift != 0) { - v = 0; - for(i = 0; i < l; i++) { - a = r->tab[i]; - r->tab[i] = (a << shift) | (v >> (LIMB_BITS - shift)); - v = a; - } - r->expn -= shift; - } - ret = __bf_round(r, prec1, flags, l, 0); - } - // bf_print_str("r_final", r); - return ret; -} - -/* return true if rounding can be done at precision 'prec' assuming - the exact result r is such that |r-a| <= 2^(EXP(a)-k). */ -/* XXX: check the case where the exponent would be incremented by the - rounding */ -int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k) -{ - bool is_rndn; - slimb_t bit_pos, n; - limb_t bit; - - if (a->expn == BF_EXP_INF || a->expn == BF_EXP_NAN) - return false; - if (rnd_mode == BF_RNDF) { - return (k >= (prec + 1)); - } - if (a->expn == BF_EXP_ZERO) - return false; - is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA); - if (k < (prec + 2)) - return false; - bit_pos = a->len * LIMB_BITS - 1 - prec; - n = k - prec; - /* bit pattern for RNDN or RNDNA: 0111.. or 1000... - for other rounding modes: 000... or 111... - */ - bit = get_bit(a->tab, a->len, bit_pos); - bit_pos--; - n--; - bit ^= is_rndn; - /* XXX: slow, but a few iterations on average */ - while (n != 0) { - if (get_bit(a->tab, a->len, bit_pos) != bit) - return true; - bit_pos--; - n--; - } - return false; -} - -/* Cannot fail with BF_ST_MEM_ERROR. */ -int bf_round(bf_t *r, limb_t prec, bf_flags_t flags) -{ - if (r->len == 0) - return 0; - return __bf_round(r, prec, flags, r->len, 0); -} - -/* for debugging */ -static __maybe_unused void dump_limbs(const char *str, const limb_t *tab, limb_t n) -{ - limb_t i; - printf("%s: len=%" PRId_LIMB "\n", str, n); - for(i = 0; i < n; i++) { - printf("%" PRId_LIMB ": " FMT_LIMB "\n", - i, tab[i]); - } -} - -void mp_print_str(const char *str, const limb_t *tab, limb_t n) -{ - slimb_t i; - printf("%s= 0x", str); - for(i = n - 1; i >= 0; i--) { - if (i != (n - 1)) - printf("_"); - printf(FMT_LIMB, tab[i]); - } - printf("\n"); -} - -static __maybe_unused void mp_print_str_h(const char *str, - const limb_t *tab, limb_t n, - limb_t high) -{ - slimb_t i; - printf("%s= 0x", str); - printf(FMT_LIMB, high); - for(i = n - 1; i >= 0; i--) { - printf("_"); - printf(FMT_LIMB, tab[i]); - } - printf("\n"); -} - -/* for debugging */ -void bf_print_str(const char *str, const bf_t *a) -{ - slimb_t i; - printf("%s=", str); - - if (a->expn == BF_EXP_NAN) { - printf("NaN"); - } else { - if (a->sign) - putchar('-'); - if (a->expn == BF_EXP_ZERO) { - putchar('0'); - } else if (a->expn == BF_EXP_INF) { - printf("Inf"); - } else { - printf("0x0."); - for(i = a->len - 1; i >= 0; i--) - printf(FMT_LIMB, a->tab[i]); - printf("p%" PRId_LIMB, a->expn); - } - } - printf("\n"); -} - -/* compare the absolute value of 'a' and 'b'. Return < 0 if a < b, 0 - if a = b and > 0 otherwise. */ -int bf_cmpu(const bf_t *a, const bf_t *b) -{ - slimb_t i; - limb_t len, v1, v2; - - if (a->expn != b->expn) { - if (a->expn < b->expn) - return -1; - else - return 1; - } - len = bf_max(a->len, b->len); - for(i = len - 1; i >= 0; i--) { - v1 = get_limbz(a, a->len - len + i); - v2 = get_limbz(b, b->len - len + i); - if (v1 != v2) { - if (v1 < v2) - return -1; - else - return 1; - } - } - return 0; -} - -/* Full order: -0 < 0, NaN == NaN and NaN is larger than all other numbers */ -int bf_cmp_full(const bf_t *a, const bf_t *b) -{ - int res; - - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - if (a->expn == b->expn) - res = 0; - else if (a->expn == BF_EXP_NAN) - res = 1; - else - res = -1; - } else if (a->sign != b->sign) { - res = 1 - 2 * a->sign; - } else { - res = bf_cmpu(a, b); - if (a->sign) - res = -res; - } - return res; -} - -/* Standard floating point comparison: return 2 if one of the operands - is NaN (unordered) or -1, 0, 1 depending on the ordering assuming - -0 == +0 */ -int bf_cmp(const bf_t *a, const bf_t *b) -{ - int res; - - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - res = 2; - } else if (a->sign != b->sign) { - if (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_ZERO) - res = 0; - else - res = 1 - 2 * a->sign; - } else { - res = bf_cmpu(a, b); - if (a->sign) - res = -res; - } - return res; -} - -/* Compute the number of bits 'n' matching the pattern: - a= X1000..0 - b= X0111..1 - - When computing a-b, the result will have at least n leading zero - bits. - - Precondition: a > b and a.expn - b.expn = 0 or 1 -*/ -static limb_t count_cancelled_bits(const bf_t *a, const bf_t *b) -{ - slimb_t bit_offset, b_offset, n; - int p, p1; - limb_t v1, v2, mask; - - bit_offset = a->len * LIMB_BITS - 1; - b_offset = (b->len - a->len) * LIMB_BITS - (LIMB_BITS - 1) + - a->expn - b->expn; - n = 0; - - /* first search the equals bits */ - for(;;) { - v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS); - v2 = get_bits(b->tab, b->len, bit_offset + b_offset); - // printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2); - if (v1 != v2) - break; - n += LIMB_BITS; - bit_offset -= LIMB_BITS; - } - /* find the position of the first different bit */ - p = clz(v1 ^ v2) + 1; - n += p; - /* then search for '0' in a and '1' in b */ - p = LIMB_BITS - p; - if (p > 0) { - /* search in the trailing p bits of v1 and v2 */ - mask = limb_mask(0, p - 1); - p1 = bf_min(clz(v1 & mask), clz((~v2) & mask)) - (LIMB_BITS - p); - n += p1; - if (p1 != p) - goto done; - } - bit_offset -= LIMB_BITS; - for(;;) { - v1 = get_limbz(a, bit_offset >> LIMB_LOG2_BITS); - v2 = get_bits(b->tab, b->len, bit_offset + b_offset); - // printf("v1=" FMT_LIMB " v2=" FMT_LIMB "\n", v1, v2); - if (v1 != 0 || v2 != -1) { - /* different: count the matching bits */ - p1 = bf_min(clz(v1), clz(~v2)); - n += p1; - break; - } - n += LIMB_BITS; - bit_offset -= LIMB_BITS; - } - done: - return n; -} - -static int bf_add_internal(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, int b_neg) -{ - const bf_t *tmp; - int is_sub, ret, cmp_res, a_sign, b_sign; - - a_sign = a->sign; - b_sign = b->sign ^ b_neg; - is_sub = a_sign ^ b_sign; - cmp_res = bf_cmpu(a, b); - if (cmp_res < 0) { - tmp = a; - a = b; - b = tmp; - a_sign = b_sign; /* b_sign is never used later */ - } - /* abs(a) >= abs(b) */ - if (cmp_res == 0 && is_sub && a->expn < BF_EXP_INF) { - /* zero result */ - bf_set_zero(r, (flags & BF_RND_MASK) == BF_RNDD); - ret = 0; - } else if (a->len == 0 || b->len == 0) { - ret = 0; - if (a->expn >= BF_EXP_INF) { - if (a->expn == BF_EXP_NAN) { - /* at least one operand is NaN */ - bf_set_nan(r); - } else if (b->expn == BF_EXP_INF && is_sub) { - /* infinities with different signs */ - bf_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - bf_set_inf(r, a_sign); - } - } else { - /* at least one zero and not subtract */ - bf_set(r, a); - r->sign = a_sign; - goto renorm; - } - } else { - slimb_t d, a_offset, b_bit_offset, i, cancelled_bits; - limb_t carry, v1, v2, u, r_len, carry1, precl, tot_len, z, sub_mask; - - r->sign = a_sign; - r->expn = a->expn; - d = a->expn - b->expn; - /* must add more precision for the leading cancelled bits in - subtraction */ - if (is_sub) { - if (d <= 1) - cancelled_bits = count_cancelled_bits(a, b); - else - cancelled_bits = 1; - } else { - cancelled_bits = 0; - } - - /* add two extra bits for rounding */ - precl = (cancelled_bits + prec + 2 + LIMB_BITS - 1) / LIMB_BITS; - tot_len = bf_max(a->len, b->len + (d + LIMB_BITS - 1) / LIMB_BITS); - r_len = bf_min(precl, tot_len); - if (bf_resize(r, r_len)) - goto fail; - a_offset = a->len - r_len; - b_bit_offset = (b->len - r_len) * LIMB_BITS + d; - - /* compute the bits before for the rounding */ - carry = is_sub; - z = 0; - sub_mask = -is_sub; - i = r_len - tot_len; - while (i < 0) { - slimb_t ap, bp; - bool inflag; - - ap = a_offset + i; - bp = b_bit_offset + i * LIMB_BITS; - inflag = false; - if (ap >= 0 && ap < a->len) { - v1 = a->tab[ap]; - inflag = true; - } else { - v1 = 0; - } - if (bp + LIMB_BITS > 0 && bp < (slimb_t)(b->len * LIMB_BITS)) { - v2 = get_bits(b->tab, b->len, bp); - inflag = true; - } else { - v2 = 0; - } - if (!inflag) { - /* outside 'a' and 'b': go directly to the next value - inside a or b so that the running time does not - depend on the exponent difference */ - i = 0; - if (ap < 0) - i = bf_min(i, -a_offset); - /* b_bit_offset + i * LIMB_BITS + LIMB_BITS >= 1 - equivalent to - i >= ceil(-b_bit_offset + 1 - LIMB_BITS) / LIMB_BITS) - */ - if (bp + LIMB_BITS <= 0) - i = bf_min(i, (-b_bit_offset) >> LIMB_LOG2_BITS); - } else { - i++; - } - v2 ^= sub_mask; - u = v1 + v2; - carry1 = u < v1; - u += carry; - carry = (u < carry) | carry1; - z |= u; - } - /* and the result */ - for(i = 0; i < r_len; i++) { - v1 = get_limbz(a, a_offset + i); - v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS); - v2 ^= sub_mask; - u = v1 + v2; - carry1 = u < v1; - u += carry; - carry = (u < carry) | carry1; - r->tab[i] = u; - } - /* set the extra bits for the rounding */ - r->tab[0] |= (z != 0); - - /* carry is only possible in add case */ - if (!is_sub && carry) { - if (bf_resize(r, r_len + 1)) - goto fail; - r->tab[r_len] = 1; - r->expn += LIMB_BITS; - } - renorm: - ret = bf_normalize_and_round(r, prec, flags); - } - return ret; - fail: - bf_set_nan(r); - return BF_ST_MEM_ERROR; -} - -static int __bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_add_internal(r, a, b, prec, flags, 0); -} - -static int __bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_add_internal(r, a, b, prec, flags, 1); -} - -limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2, - limb_t n, limb_t carry) -{ - slimb_t i; - limb_t k, a, v, k1; - - k = carry; - for(i=0;i<n;i++) { - v = op1[i]; - a = v + op2[i]; - k1 = a < v; - a = a + k; - k = (a < k) | k1; - res[i] = a; - } - return k; -} - -limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n) -{ - size_t i; - limb_t k, a; - - k=b; - for(i=0;i<n;i++) { - if (k == 0) - break; - a = tab[i] + k; - k = (a < k); - tab[i] = a; - } - return k; -} - -limb_t mp_sub(limb_t *res, const limb_t *op1, const limb_t *op2, - mp_size_t n, limb_t carry) -{ - int i; - limb_t k, a, v, k1; - - k = carry; - for(i=0;i<n;i++) { - v = op1[i]; - a = v - op2[i]; - k1 = a > v; - v = a - k; - k = (v > a) | k1; - res[i] = v; - } - return k; -} - -/* compute 0 - op2 */ -static limb_t mp_neg(limb_t *res, const limb_t *op2, mp_size_t n, limb_t carry) -{ - int i; - limb_t k, a, v, k1; - - k = carry; - for(i=0;i<n;i++) { - v = 0; - a = v - op2[i]; - k1 = a > v; - v = a - k; - k = (v > a) | k1; - res[i] = v; - } - return k; -} - -limb_t mp_sub_ui(limb_t *tab, limb_t b, mp_size_t n) -{ - mp_size_t i; - limb_t k, a, v; - - k=b; - for(i=0;i<n;i++) { - v = tab[i]; - a = v - k; - k = a > v; - tab[i] = a; - if (k == 0) - break; - } - return k; -} - -/* r = (a + high*B^n) >> shift. Return the remainder r (0 <= r < 2^shift). - 1 <= shift <= LIMB_BITS - 1 */ -static limb_t mp_shr(limb_t *tab_r, const limb_t *tab, mp_size_t n, - int shift, limb_t high) -{ - mp_size_t i; - limb_t l, a; - - assert(shift >= 1 && shift < LIMB_BITS); - l = high; - for(i = n - 1; i >= 0; i--) { - a = tab[i]; - tab_r[i] = (a >> shift) | (l << (LIMB_BITS - shift)); - l = a; - } - return l & (((limb_t)1 << shift) - 1); -} - -/* tabr[] = taba[] * b + l. Return the high carry */ -static limb_t mp_mul1(limb_t *tabr, const limb_t *taba, limb_t n, - limb_t b, limb_t l) -{ - limb_t i; - dlimb_t t; - - for(i = 0; i < n; i++) { - t = (dlimb_t)taba[i] * (dlimb_t)b + l; - tabr[i] = t; - l = t >> LIMB_BITS; - } - return l; -} - -/* tabr[] += taba[] * b, return the high word. */ -static limb_t mp_add_mul1(limb_t *tabr, const limb_t *taba, limb_t n, - limb_t b) -{ - limb_t i, l; - dlimb_t t; - - l = 0; - for(i = 0; i < n; i++) { - t = (dlimb_t)taba[i] * (dlimb_t)b + l + tabr[i]; - tabr[i] = t; - l = t >> LIMB_BITS; - } - return l; -} - -/* size of the result : op1_size + op2_size. */ -static void mp_mul_basecase(limb_t *result, - const limb_t *op1, limb_t op1_size, - const limb_t *op2, limb_t op2_size) -{ - limb_t i, r; - - result[op1_size] = mp_mul1(result, op1, op1_size, op2[0], 0); - for(i=1;i<op2_size;i++) { - r = mp_add_mul1(result + i, op1, op1_size, op2[i]); - result[i + op1_size] = r; - } -} - -/* return 0 if OK, -1 if memory error */ -/* XXX: change API so that result can be allocated */ -int mp_mul(bf_context_t *s, limb_t *result, - const limb_t *op1, limb_t op1_size, - const limb_t *op2, limb_t op2_size) -{ -#ifdef USE_FFT_MUL - if (unlikely(bf_min(op1_size, op2_size) >= FFT_MUL_THRESHOLD)) { - bf_t r_s, *r = &r_s; - r->tab = result; - /* XXX: optimize memory usage in API */ - if (fft_mul(s, r, (limb_t *)op1, op1_size, - (limb_t *)op2, op2_size, FFT_MUL_R_NORESIZE)) - return -1; - } else -#endif - { - mp_mul_basecase(result, op1, op1_size, op2, op2_size); - } - return 0; -} - -/* tabr[] -= taba[] * b. Return the value to substract to the high - word. */ -static limb_t mp_sub_mul1(limb_t *tabr, const limb_t *taba, limb_t n, - limb_t b) -{ - limb_t i, l; - dlimb_t t; - - l = 0; - for(i = 0; i < n; i++) { - t = tabr[i] - (dlimb_t)taba[i] * (dlimb_t)b - l; - tabr[i] = t; - l = -(t >> LIMB_BITS); - } - return l; -} - -/* WARNING: d must be >= 2^(LIMB_BITS-1) */ -static inline limb_t udiv1norm_init(limb_t d) -{ - limb_t a0, a1; - a1 = -d - 1; - a0 = -1; - return (((dlimb_t)a1 << LIMB_BITS) | a0) / d; -} - -/* return the quotient and the remainder in '*pr'of 'a1*2^LIMB_BITS+a0 - / d' with 0 <= a1 < d. */ -static inline limb_t udiv1norm(limb_t *pr, limb_t a1, limb_t a0, - limb_t d, limb_t d_inv) -{ - limb_t n1m, n_adj, q, r, ah; - dlimb_t a; - n1m = ((slimb_t)a0 >> (LIMB_BITS - 1)); - n_adj = a0 + (n1m & d); - a = (dlimb_t)d_inv * (a1 - n1m) + n_adj; - q = (a >> LIMB_BITS) + a1; - /* compute a - q * r and update q so that the remainder is\ - between 0 and d - 1 */ - a = ((dlimb_t)a1 << LIMB_BITS) | a0; - a = a - (dlimb_t)q * d - d; - ah = a >> LIMB_BITS; - q += 1 + ah; - r = (limb_t)a + (ah & d); - *pr = r; - return q; -} - -/* b must be >= 1 << (LIMB_BITS - 1) */ -static limb_t mp_div1norm(limb_t *tabr, const limb_t *taba, limb_t n, - limb_t b, limb_t r) -{ - slimb_t i; - - if (n >= UDIV1NORM_THRESHOLD) { - limb_t b_inv; - b_inv = udiv1norm_init(b); - for(i = n - 1; i >= 0; i--) { - tabr[i] = udiv1norm(&r, r, taba[i], b, b_inv); - } - } else { - dlimb_t a1; - for(i = n - 1; i >= 0; i--) { - a1 = ((dlimb_t)r << LIMB_BITS) | taba[i]; - tabr[i] = a1 / b; - r = a1 % b; - } - } - return r; -} - -static int mp_divnorm_large(bf_context_t *s, - limb_t *tabq, limb_t *taba, limb_t na, - const limb_t *tabb, limb_t nb); - -/* base case division: divides taba[0..na-1] by tabb[0..nb-1]. tabb[nb - - 1] must be >= 1 << (LIMB_BITS - 1). na - nb must be >= 0. 'taba' - is modified and contains the remainder (nb limbs). tabq[0..na-nb] - contains the quotient with tabq[na - nb] <= 1. */ -static int mp_divnorm(bf_context_t *s, limb_t *tabq, limb_t *taba, limb_t na, - const limb_t *tabb, limb_t nb) -{ - limb_t r, a, c, q, v, b1, b1_inv, n, dummy_r; - slimb_t i, j; - - b1 = tabb[nb - 1]; - if (nb == 1) { - taba[0] = mp_div1norm(tabq, taba, na, b1, 0); - return 0; - } - n = na - nb; - if (bf_min(n, nb) >= DIVNORM_LARGE_THRESHOLD) { - return mp_divnorm_large(s, tabq, taba, na, tabb, nb); - } - - if (n >= UDIV1NORM_THRESHOLD) - b1_inv = udiv1norm_init(b1); - else - b1_inv = 0; - - /* first iteration: the quotient is only 0 or 1 */ - q = 1; - for(j = nb - 1; j >= 0; j--) { - if (taba[n + j] != tabb[j]) { - if (taba[n + j] < tabb[j]) - q = 0; - break; - } - } - tabq[n] = q; - if (q) { - mp_sub(taba + n, taba + n, tabb, nb, 0); - } - - for(i = n - 1; i >= 0; i--) { - if (unlikely(taba[i + nb] >= b1)) { - q = -1; - } else if (b1_inv) { - q = udiv1norm(&dummy_r, taba[i + nb], taba[i + nb - 1], b1, b1_inv); - } else { - dlimb_t al; - al = ((dlimb_t)taba[i + nb] << LIMB_BITS) | taba[i + nb - 1]; - q = al / b1; - r = al % b1; - } - r = mp_sub_mul1(taba + i, tabb, nb, q); - - v = taba[i + nb]; - a = v - r; - c = (a > v); - taba[i + nb] = a; - - if (c != 0) { - /* negative result */ - for(;;) { - q--; - c = mp_add(taba + i, taba + i, tabb, nb, 0); - /* propagate carry and test if positive result */ - if (c != 0) { - if (++taba[i + nb] == 0) { - break; - } - } - } - } - tabq[i] = q; - } - return 0; -} - -/* compute r=B^(2*n)/a such as a*r < B^(2*n) < a*r + 2 with n >= 1. 'a' - has n limbs with a[n-1] >= B/2 and 'r' has n+1 limbs with r[n] = 1. - - See Modern Computer Arithmetic by Richard P. Brent and Paul - Zimmermann, algorithm 3.5 */ -int mp_recip(bf_context_t *s, limb_t *tabr, const limb_t *taba, limb_t n) -{ - mp_size_t l, h, k, i; - limb_t *tabxh, *tabt, c, *tabu; - - if (n <= 2) { - /* return ceil(B^(2*n)/a) - 1 */ - /* XXX: could avoid allocation */ - tabu = bf_malloc(s, sizeof(limb_t) * (2 * n + 1)); - tabt = bf_malloc(s, sizeof(limb_t) * (n + 2)); - if (!tabt || !tabu) - goto fail; - for(i = 0; i < 2 * n; i++) - tabu[i] = 0; - tabu[2 * n] = 1; - if (mp_divnorm(s, tabt, tabu, 2 * n + 1, taba, n)) - goto fail; - for(i = 0; i < n + 1; i++) - tabr[i] = tabt[i]; - if (mp_scan_nz(tabu, n) == 0) { - /* only happens for a=B^n/2 */ - mp_sub_ui(tabr, 1, n + 1); - } - } else { - l = (n - 1) / 2; - h = n - l; - /* n=2p -> l=p-1, h = p + 1, k = p + 3 - n=2p+1-> l=p, h = p + 1; k = p + 2 - */ - tabt = bf_malloc(s, sizeof(limb_t) * (n + h + 1)); - tabu = bf_malloc(s, sizeof(limb_t) * (n + 2 * h - l + 2)); - if (!tabt || !tabu) - goto fail; - tabxh = tabr + l; - if (mp_recip(s, tabxh, taba + l, h)) - goto fail; - if (mp_mul(s, tabt, taba, n, tabxh, h + 1)) /* n + h + 1 limbs */ - goto fail; - while (tabt[n + h] != 0) { - mp_sub_ui(tabxh, 1, h + 1); - c = mp_sub(tabt, tabt, taba, n, 0); - mp_sub_ui(tabt + n, c, h + 1); - } - /* T = B^(n+h) - T */ - mp_neg(tabt, tabt, n + h + 1, 0); - tabt[n + h]++; - if (mp_mul(s, tabu, tabt + l, n + h + 1 - l, tabxh, h + 1)) - goto fail; - /* n + 2*h - l + 2 limbs */ - k = 2 * h - l; - for(i = 0; i < l; i++) - tabr[i] = tabu[i + k]; - mp_add(tabr + l, tabr + l, tabu + 2 * h, h, 0); - } - bf_free(s, tabt); - bf_free(s, tabu); - return 0; - fail: - bf_free(s, tabt); - bf_free(s, tabu); - return -1; -} - -/* return -1, 0 or 1 */ -static int mp_cmp(const limb_t *taba, const limb_t *tabb, mp_size_t n) -{ - mp_size_t i; - for(i = n - 1; i >= 0; i--) { - if (taba[i] != tabb[i]) { - if (taba[i] < tabb[i]) - return -1; - else - return 1; - } - } - return 0; -} - -//#define DEBUG_DIVNORM_LARGE -//#define DEBUG_DIVNORM_LARGE2 - -/* subquadratic divnorm */ -static int mp_divnorm_large(bf_context_t *s, - limb_t *tabq, limb_t *taba, limb_t na, - const limb_t *tabb, limb_t nb) -{ - limb_t *tabb_inv, nq, *tabt, i, n; - nq = na - nb; -#ifdef DEBUG_DIVNORM_LARGE - printf("na=%d nb=%d nq=%d\n", (int)na, (int)nb, (int)nq); - mp_print_str("a", taba, na); - mp_print_str("b", tabb, nb); -#endif - assert(nq >= 1); - n = nq; - if (nq < nb) - n++; - tabb_inv = bf_malloc(s, sizeof(limb_t) * (n + 1)); - tabt = bf_malloc(s, sizeof(limb_t) * 2 * (n + 1)); - if (!tabb_inv || !tabt) - goto fail; - - if (n >= nb) { - for(i = 0; i < n - nb; i++) - tabt[i] = 0; - for(i = 0; i < nb; i++) - tabt[i + n - nb] = tabb[i]; - } else { - /* truncate B: need to increment it so that the approximate - inverse is smaller that the exact inverse */ - for(i = 0; i < n; i++) - tabt[i] = tabb[i + nb - n]; - if (mp_add_ui(tabt, 1, n)) { - /* tabt = B^n : tabb_inv = B^n */ - memset(tabb_inv, 0, n * sizeof(limb_t)); - tabb_inv[n] = 1; - goto recip_done; - } - } - if (mp_recip(s, tabb_inv, tabt, n)) - goto fail; - recip_done: - /* Q=A*B^-1 */ - if (mp_mul(s, tabt, tabb_inv, n + 1, taba + na - (n + 1), n + 1)) - goto fail; - - for(i = 0; i < nq + 1; i++) - tabq[i] = tabt[i + 2 * (n + 1) - (nq + 1)]; -#ifdef DEBUG_DIVNORM_LARGE - mp_print_str("q", tabq, nq + 1); -#endif - - bf_free(s, tabt); - bf_free(s, tabb_inv); - tabb_inv = NULL; - - /* R=A-B*Q */ - tabt = bf_malloc(s, sizeof(limb_t) * (na + 1)); - if (!tabt) - goto fail; - if (mp_mul(s, tabt, tabq, nq + 1, tabb, nb)) - goto fail; - /* we add one more limb for the result */ - mp_sub(taba, taba, tabt, nb + 1, 0); - bf_free(s, tabt); - /* the approximated quotient is smaller than than the exact one, - hence we may have to increment it */ -#ifdef DEBUG_DIVNORM_LARGE2 - int cnt = 0; - static int cnt_max; -#endif - for(;;) { - if (taba[nb] == 0 && mp_cmp(taba, tabb, nb) < 0) - break; - taba[nb] -= mp_sub(taba, taba, tabb, nb, 0); - mp_add_ui(tabq, 1, nq + 1); -#ifdef DEBUG_DIVNORM_LARGE2 - cnt++; -#endif - } -#ifdef DEBUG_DIVNORM_LARGE2 - if (cnt > cnt_max) { - cnt_max = cnt; - printf("\ncnt=%d nq=%d nb=%d\n", cnt_max, (int)nq, (int)nb); - } -#endif - return 0; - fail: - bf_free(s, tabb_inv); - bf_free(s, tabt); - return -1; -} - -int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - int ret, r_sign; - - if (a->len < b->len) { - const bf_t *tmp = a; - a = b; - b = tmp; - } - r_sign = a->sign ^ b->sign; - /* here b->len <= a->len */ - if (b->len == 0) { - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bf_set_nan(r); - ret = 0; - } else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_INF) { - if ((a->expn == BF_EXP_INF && b->expn == BF_EXP_ZERO) || - (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_INF)) { - bf_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - bf_set_inf(r, r_sign); - ret = 0; - } - } else { - bf_set_zero(r, r_sign); - ret = 0; - } - } else { - bf_t tmp, *r1 = NULL; - limb_t a_len, b_len, precl; - limb_t *a_tab, *b_tab; - - a_len = a->len; - b_len = b->len; - - if ((flags & BF_RND_MASK) == BF_RNDF) { - /* faithful rounding does not require using the full inputs */ - precl = (prec + 2 + LIMB_BITS - 1) / LIMB_BITS; - a_len = bf_min(a_len, precl); - b_len = bf_min(b_len, precl); - } - a_tab = a->tab + a->len - a_len; - b_tab = b->tab + b->len - b_len; - -#ifdef USE_FFT_MUL - if (b_len >= FFT_MUL_THRESHOLD) { - int mul_flags = 0; - if (r == a) - mul_flags |= FFT_MUL_R_OVERLAP_A; - if (r == b) - mul_flags |= FFT_MUL_R_OVERLAP_B; - if (fft_mul(r->ctx, r, a_tab, a_len, b_tab, b_len, mul_flags)) - goto fail; - } else -#endif - { - if (r == a || r == b) { - bf_init(r->ctx, &tmp); - r1 = r; - r = &tmp; - } - if (bf_resize(r, a_len + b_len)) { - fail: - bf_set_nan(r); - ret = BF_ST_MEM_ERROR; - goto done; - } - mp_mul_basecase(r->tab, a_tab, a_len, b_tab, b_len); - } - r->sign = r_sign; - r->expn = a->expn + b->expn; - ret = bf_normalize_and_round(r, prec, flags); - done: - if (r == &tmp) - bf_move(r1, &tmp); - } - return ret; -} - -/* multiply 'r' by 2^e */ -int bf_mul_2exp(bf_t *r, slimb_t e, limb_t prec, bf_flags_t flags) -{ - slimb_t e_max; - if (r->len == 0) - return 0; - e_max = ((limb_t)1 << BF_EXT_EXP_BITS_MAX) - 1; - e = bf_max(e, -e_max); - e = bf_min(e, e_max); - r->expn += e; - return __bf_round(r, prec, flags, r->len, 0); -} - -/* Return e such as a=m*2^e with m odd integer. return 0 if a is zero, - Infinite or Nan. */ -slimb_t bf_get_exp_min(const bf_t *a) -{ - slimb_t i; - limb_t v; - int k; - - for(i = 0; i < a->len; i++) { - v = a->tab[i]; - if (v != 0) { - k = ctz(v); - return a->expn - (a->len - i) * LIMB_BITS + k; - } - } - return 0; -} - -/* a and b must be finite numbers with a >= 0 and b > 0. 'q' is the - integer defined as floor(a/b) and r = a - q * b. */ -static void bf_tdivremu(bf_t *q, bf_t *r, - const bf_t *a, const bf_t *b) -{ - if (bf_cmpu(a, b) < 0) { - bf_set_ui(q, 0); - bf_set(r, a); - } else { - bf_div(q, a, b, bf_max(a->expn - b->expn + 1, 2), BF_RNDZ); - bf_rint(q, BF_RNDZ); - bf_mul(r, q, b, BF_PREC_INF, BF_RNDZ); - bf_sub(r, a, r, BF_PREC_INF, BF_RNDZ); - } -} - -static int __bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - int ret, r_sign; - limb_t n, nb, precl; - - r_sign = a->sign ^ b->sign; - if (a->expn >= BF_EXP_INF || b->expn >= BF_EXP_INF) { - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF && b->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else if (a->expn == BF_EXP_INF) { - bf_set_inf(r, r_sign); - return 0; - } else { - bf_set_zero(r, r_sign); - return 0; - } - } else if (a->expn == BF_EXP_ZERO) { - if (b->expn == BF_EXP_ZERO) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_zero(r, r_sign); - return 0; - } - } else if (b->expn == BF_EXP_ZERO) { - bf_set_inf(r, r_sign); - return BF_ST_DIVIDE_ZERO; - } - - /* number of limbs of the quotient (2 extra bits for rounding) */ - precl = (prec + 2 + LIMB_BITS - 1) / LIMB_BITS; - nb = b->len; - n = bf_max(a->len, precl); - - { - limb_t *taba, na; - slimb_t d; - - na = n + nb; - -#if LIMB_LOG2_BITS == 6 - if (na >= (SIZE_MAX / sizeof(limb_t)) - 1) { - return BF_ST_MEM_ERROR; /* Return memory error status */ - } -#endif - - taba = bf_malloc(s, (na + 1) * sizeof(limb_t)); - if (!taba) - goto fail; - d = na - a->len; - memset(taba, 0, d * sizeof(limb_t)); - memcpy(taba + d, a->tab, a->len * sizeof(limb_t)); - if (bf_resize(r, n + 1)) - goto fail1; - if (mp_divnorm(s, r->tab, taba, na, b->tab, nb)) { - fail1: - bf_free(s, taba); - goto fail; - } - /* see if non zero remainder */ - if (mp_scan_nz(taba, nb)) - r->tab[0] |= 1; - bf_free(r->ctx, taba); - r->expn = a->expn - b->expn + LIMB_BITS; - r->sign = r_sign; - ret = bf_normalize_and_round(r, prec, flags); - } - return ret; - fail: - bf_set_nan(r); - return BF_ST_MEM_ERROR; -} - -/* division and remainder. - - rnd_mode is the rounding mode for the quotient. The additional - rounding mode BF_RND_EUCLIDIAN is supported. - - 'q' is an integer. 'r' is rounded with prec and flags (prec can be - BF_PREC_INF). -*/ -int bf_divrem(bf_t *q, bf_t *r, const bf_t *a, const bf_t *b, - limb_t prec, bf_flags_t flags, int rnd_mode) -{ - bf_t a1_s, *a1 = &a1_s; - bf_t b1_s, *b1 = &b1_s; - int q_sign, ret; - bool is_ceil, is_rndn; - - assert(q != a && q != b); - assert(r != a && r != b); - assert(q != r); - - if (a->len == 0 || b->len == 0) { - bf_set_zero(q, 0); - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_ZERO) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set(r, a); - return bf_round(r, prec, flags); - } - } - - q_sign = a->sign ^ b->sign; - is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA); - switch(rnd_mode) { - default: - case BF_RNDZ: - case BF_RNDN: - case BF_RNDNA: - is_ceil = false; - break; - case BF_RNDD: - is_ceil = q_sign; - break; - case BF_RNDU: - is_ceil = q_sign ^ 1; - break; - case BF_RNDA: - is_ceil = true; - break; - case BF_DIVREM_EUCLIDIAN: - is_ceil = a->sign; - break; - } - - a1->expn = a->expn; - a1->tab = a->tab; - a1->len = a->len; - a1->sign = 0; - - b1->expn = b->expn; - b1->tab = b->tab; - b1->len = b->len; - b1->sign = 0; - - /* XXX: could improve to avoid having a large 'q' */ - bf_tdivremu(q, r, a1, b1); - if (bf_is_nan(q) || bf_is_nan(r)) - goto fail; - - if (r->len != 0) { - if (is_rndn) { - int res; - b1->expn--; - res = bf_cmpu(r, b1); - b1->expn++; - if (res > 0 || - (res == 0 && - (rnd_mode == BF_RNDNA || - get_bit(q->tab, q->len, q->len * LIMB_BITS - q->expn)))) { - goto do_sub_r; - } - } else if (is_ceil) { - do_sub_r: - ret = bf_add_si(q, q, 1, BF_PREC_INF, BF_RNDZ); - ret |= bf_sub(r, r, b1, BF_PREC_INF, BF_RNDZ); - if (ret & BF_ST_MEM_ERROR) - goto fail; - } - } - - r->sign ^= a->sign; - q->sign = q_sign; - return bf_round(r, prec, flags); - fail: - bf_set_nan(q); - bf_set_nan(r); - return BF_ST_MEM_ERROR; -} - -int bf_rem(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode) -{ - bf_t q_s, *q = &q_s; - int ret; - - bf_init(r->ctx, q); - ret = bf_divrem(q, r, a, b, prec, flags, rnd_mode); - bf_delete(q); - return ret; -} - -static inline int bf_get_limb(slimb_t *pres, const bf_t *a, int flags) -{ -#if LIMB_BITS == 32 - return bf_get_int32(pres, a, flags); -#else - return bf_get_int64(pres, a, flags); -#endif -} - -int bf_remquo(slimb_t *pq, bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode) -{ - bf_t q_s, *q = &q_s; - int ret; - - bf_init(r->ctx, q); - ret = bf_divrem(q, r, a, b, prec, flags, rnd_mode); - bf_get_limb(pq, q, BF_GET_INT_MOD); - bf_delete(q); - return ret; -} - -static __maybe_unused inline limb_t mul_mod(limb_t a, limb_t b, limb_t m) -{ - dlimb_t t; - t = (dlimb_t)a * (dlimb_t)b; - return t % m; -} - -#if defined(USE_MUL_CHECK) -static limb_t mp_mod1(const limb_t *tab, limb_t n, limb_t m, limb_t r) -{ - slimb_t i; - dlimb_t t; - - for(i = n - 1; i >= 0; i--) { - t = ((dlimb_t)r << LIMB_BITS) | tab[i]; - r = t % m; - } - return r; -} -#endif - -static const uint16_t sqrt_table[192] = { -128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255, -}; - -/* a >= 2^(LIMB_BITS - 2). Return (s, r) with s=floor(sqrt(a)) and - r=a-s^2. 0 <= r <= 2 * s */ -static limb_t mp_sqrtrem1(limb_t *pr, limb_t a) -{ - limb_t s1, r1, s, r, q, u, num; - - /* use a table for the 16 -> 8 bit sqrt */ - s1 = sqrt_table[(a >> (LIMB_BITS - 8)) - 64]; - r1 = (a >> (LIMB_BITS - 16)) - s1 * s1; - if (r1 > 2 * s1) { - r1 -= 2 * s1 + 1; - s1++; - } - - /* one iteration to get a 32 -> 16 bit sqrt */ - num = (r1 << 8) | ((a >> (LIMB_BITS - 32 + 8)) & 0xff); - q = num / (2 * s1); /* q <= 2^8 */ - u = num % (2 * s1); - s = (s1 << 8) + q; - r = (u << 8) | ((a >> (LIMB_BITS - 32)) & 0xff); - r -= q * q; - if ((slimb_t)r < 0) { - s--; - r += 2 * s + 1; - } - -#if LIMB_BITS == 64 - s1 = s; - r1 = r; - /* one more iteration for 64 -> 32 bit sqrt */ - num = (r1 << 16) | ((a >> (LIMB_BITS - 64 + 16)) & 0xffff); - q = num / (2 * s1); /* q <= 2^16 */ - u = num % (2 * s1); - s = (s1 << 16) + q; - r = (u << 16) | ((a >> (LIMB_BITS - 64)) & 0xffff); - r -= q * q; - if ((slimb_t)r < 0) { - s--; - r += 2 * s + 1; - } -#endif - *pr = r; - return s; -} - -/* return floor(sqrt(a)) */ -limb_t bf_isqrt(limb_t a) -{ - limb_t s, r; - int k; - - if (a == 0) - return 0; - k = clz(a) & ~1; - s = mp_sqrtrem1(&r, a << k); - s >>= (k >> 1); - return s; -} - -static limb_t mp_sqrtrem2(limb_t *tabs, limb_t *taba) -{ - limb_t s1, r1, s, q, u, a0, a1; - dlimb_t r, num; - int l; - - a0 = taba[0]; - a1 = taba[1]; - s1 = mp_sqrtrem1(&r1, a1); - l = LIMB_BITS / 2; - num = ((dlimb_t)r1 << l) | (a0 >> l); - q = num / (2 * s1); - u = num % (2 * s1); - s = (s1 << l) + q; - r = ((dlimb_t)u << l) | (a0 & (((limb_t)1 << l) - 1)); - if (unlikely((q >> l) != 0)) - r -= (dlimb_t)1 << LIMB_BITS; /* special case when q=2^l */ - else - r -= q * q; - if ((slimb_t)(r >> LIMB_BITS) < 0) { - s--; - r += 2 * (dlimb_t)s + 1; - } - tabs[0] = s; - taba[0] = r; - return r >> LIMB_BITS; -} - -//#define DEBUG_SQRTREM - -/* tmp_buf must contain (n / 2 + 1 limbs). *prh contains the highest - limb of the remainder. */ -static int mp_sqrtrem_rec(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n, - limb_t *tmp_buf, limb_t *prh) -{ - limb_t l, h, rh, ql, qh, c, i; - - if (n == 1) { - *prh = mp_sqrtrem2(tabs, taba); - return 0; - } -#ifdef DEBUG_SQRTREM - mp_print_str("a", taba, 2 * n); -#endif - l = n / 2; - h = n - l; - if (mp_sqrtrem_rec(s, tabs + l, taba + 2 * l, h, tmp_buf, &qh)) - return -1; -#ifdef DEBUG_SQRTREM - mp_print_str("s1", tabs + l, h); - mp_print_str_h("r1", taba + 2 * l, h, qh); - mp_print_str_h("r2", taba + l, n, qh); -#endif - - /* the remainder is in taba + 2 * l. Its high bit is in qh */ - if (qh) { - mp_sub(taba + 2 * l, taba + 2 * l, tabs + l, h, 0); - } - /* instead of dividing by 2*s, divide by s (which is normalized) - and update q and r */ - if (mp_divnorm(s, tmp_buf, taba + l, n, tabs + l, h)) - return -1; - qh += tmp_buf[l]; - for(i = 0; i < l; i++) - tabs[i] = tmp_buf[i]; - ql = mp_shr(tabs, tabs, l, 1, qh & 1); - qh = qh >> 1; /* 0 or 1 */ - if (ql) - rh = mp_add(taba + l, taba + l, tabs + l, h, 0); - else - rh = 0; -#ifdef DEBUG_SQRTREM - mp_print_str_h("q", tabs, l, qh); - mp_print_str_h("u", taba + l, h, rh); -#endif - - mp_add_ui(tabs + l, qh, h); -#ifdef DEBUG_SQRTREM - mp_print_str_h("s2", tabs, n, sh); -#endif - - /* q = qh, tabs[l - 1 ... 0], r = taba[n - 1 ... l] */ - /* subtract q^2. if qh = 1 then q = B^l, so we can take shortcuts */ - if (qh) { - c = qh; - } else { - if (mp_mul(s, taba + n, tabs, l, tabs, l)) - return -1; - c = mp_sub(taba, taba, taba + n, 2 * l, 0); - } - rh -= mp_sub_ui(taba + 2 * l, c, n - 2 * l); - if ((slimb_t)rh < 0) { - mp_sub_ui(tabs, 1, n); - rh += mp_add_mul1(taba, tabs, n, 2); - rh += mp_add_ui(taba, 1, n); - } - *prh = rh; - return 0; -} - -/* 'taba' has 2*n limbs with n >= 1 and taba[2*n-1] >= 2 ^ (LIMB_BITS - - 2). Return (s, r) with s=floor(sqrt(a)) and r=a-s^2. 0 <= r <= 2 - * s. tabs has n limbs. r is returned in the lower n limbs of - taba. Its r[n] is the returned value of the function. */ -/* Algorithm from the article "Karatsuba Square Root" by Paul Zimmermann and - inspirated from its GMP implementation */ -int mp_sqrtrem(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n) -{ - limb_t tmp_buf1[8]; - limb_t *tmp_buf; - mp_size_t n2; - int ret; - n2 = n / 2 + 1; - if (n2 <= countof(tmp_buf1)) { - tmp_buf = tmp_buf1; - } else { - tmp_buf = bf_malloc(s, sizeof(limb_t) * n2); - if (!tmp_buf) - return -1; - } - ret = mp_sqrtrem_rec(s, tabs, taba, n, tmp_buf, taba + n); - if (tmp_buf != tmp_buf1) - bf_free(s, tmp_buf); - return ret; -} - -/* Integer square root with remainder. 'a' must be an integer. r = - floor(sqrt(a)) and rem = a - r^2. BF_ST_INEXACT is set if the result - is inexact. 'rem' can be NULL if the remainder is not needed. */ -int bf_sqrtrem(bf_t *r, bf_t *rem1, const bf_t *a) -{ - int ret; - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - } else if (a->expn == BF_EXP_INF && a->sign) { - goto invalid_op; - } else { - bf_set(r, a); - } - if (rem1) - bf_set_ui(rem1, 0); - ret = 0; - } else if (a->sign) { - invalid_op: - bf_set_nan(r); - if (rem1) - bf_set_ui(rem1, 0); - ret = BF_ST_INVALID_OP; - } else { - bf_t rem_s, *rem; - - bf_sqrt(r, a, (a->expn + 1) / 2, BF_RNDZ); - bf_rint(r, BF_RNDZ); - /* see if the result is exact by computing the remainder */ - if (rem1) { - rem = rem1; - } else { - rem = &rem_s; - bf_init(r->ctx, rem); - } - /* XXX: could avoid recomputing the remainder */ - bf_mul(rem, r, r, BF_PREC_INF, BF_RNDZ); - bf_neg(rem); - bf_add(rem, rem, a, BF_PREC_INF, BF_RNDZ); - if (bf_is_nan(rem)) { - ret = BF_ST_MEM_ERROR; - goto done; - } - if (rem->len != 0) { - ret = BF_ST_INEXACT; - } else { - ret = 0; - } - done: - if (!rem1) - bf_delete(rem); - } - return ret; -} - -int bf_sqrt(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = a->ctx; - int ret; - - assert(r != a); - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - } else if (a->expn == BF_EXP_INF && a->sign) { - goto invalid_op; - } else { - bf_set(r, a); - } - ret = 0; - } else if (a->sign) { - invalid_op: - bf_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - limb_t *a1; - slimb_t n, n1; - limb_t res; - - /* convert the mantissa to an integer with at least 2 * - prec + 4 bits */ - n = (2 * (prec + 2) + 2 * LIMB_BITS - 1) / (2 * LIMB_BITS); - if (bf_resize(r, n)) - goto fail; - a1 = bf_malloc(s, sizeof(limb_t) * 2 * n); - if (!a1) - goto fail; - n1 = bf_min(2 * n, a->len); - memset(a1, 0, (2 * n - n1) * sizeof(limb_t)); - memcpy(a1 + 2 * n - n1, a->tab + a->len - n1, n1 * sizeof(limb_t)); - if (a->expn & 1) { - res = mp_shr(a1, a1, 2 * n, 1, 0); - } else { - res = 0; - } - if (mp_sqrtrem(s, r->tab, a1, n)) { - bf_free(s, a1); - goto fail; - } - if (!res) { - res = mp_scan_nz(a1, n + 1); - } - bf_free(s, a1); - if (!res) { - res = mp_scan_nz(a->tab, a->len - n1); - } - if (res != 0) - r->tab[0] |= 1; - r->sign = 0; - r->expn = (a->expn + 1) >> 1; - ret = bf_round(r, prec, flags); - } - return ret; - fail: - bf_set_nan(r); - return BF_ST_MEM_ERROR; -} - -static no_inline int bf_op2(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, bf_op2_func_t *func) -{ - bf_t tmp; - int ret; - - if (r == a || r == b) { - bf_init(r->ctx, &tmp); - ret = func(&tmp, a, b, prec, flags); - bf_move(r, &tmp); - } else { - ret = func(r, a, b, prec, flags); - } - return ret; -} - -int bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2(r, a, b, prec, flags, __bf_add); -} - -int bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2(r, a, b, prec, flags, __bf_sub); -} - -int bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2(r, a, b, prec, flags, __bf_div); -} - -int bf_mul_ui(bf_t *r, const bf_t *a, uint64_t b1, limb_t prec, - bf_flags_t flags) -{ - bf_t b; - int ret; - bf_init(r->ctx, &b); - ret = bf_set_ui(&b, b1); - ret |= bf_mul(r, a, &b, prec, flags); - bf_delete(&b); - return ret; -} - -int bf_mul_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, - bf_flags_t flags) -{ - bf_t b; - int ret; - bf_init(r->ctx, &b); - ret = bf_set_si(&b, b1); - ret |= bf_mul(r, a, &b, prec, flags); - bf_delete(&b); - return ret; -} - -int bf_add_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, - bf_flags_t flags) -{ - bf_t b; - int ret; - - bf_init(r->ctx, &b); - ret = bf_set_si(&b, b1); - ret |= bf_add(r, a, &b, prec, flags); - bf_delete(&b); - return ret; -} - -static int bf_pow_ui(bf_t *r, const bf_t *a, limb_t b, limb_t prec, - bf_flags_t flags) -{ - int ret, n_bits, i; - - assert(r != a); - if (b == 0) - return bf_set_ui(r, 1); - ret = bf_set(r, a); - n_bits = LIMB_BITS - clz(b); - for(i = n_bits - 2; i >= 0; i--) { - ret |= bf_mul(r, r, r, prec, flags); - if ((b >> i) & 1) - ret |= bf_mul(r, r, a, prec, flags); - } - return ret; -} - -static int bf_pow_ui_ui(bf_t *r, limb_t a1, limb_t b, - limb_t prec, bf_flags_t flags) -{ - bf_t a; - int ret; - - if (a1 == 10 && b <= LIMB_DIGITS) { - /* use precomputed powers. We do not round at this point - because we expect the caller to do it */ - ret = bf_set_ui(r, mp_pow_dec[b]); - } else { - bf_init(r->ctx, &a); - ret = bf_set_ui(&a, a1); - ret |= bf_pow_ui(r, &a, b, prec, flags); - bf_delete(&a); - } - return ret; -} - -/* convert to integer (infinite precision) */ -int bf_rint(bf_t *r, int rnd_mode) -{ - return bf_round(r, 0, rnd_mode | BF_FLAG_RADPNT_PREC); -} - -/* logical operations */ -#define BF_LOGIC_OR 0 -#define BF_LOGIC_XOR 1 -#define BF_LOGIC_AND 2 - -static inline limb_t bf_logic_op1(limb_t a, limb_t b, int op) -{ - switch(op) { - case BF_LOGIC_OR: - return a | b; - case BF_LOGIC_XOR: - return a ^ b; - default: - case BF_LOGIC_AND: - return a & b; - } -} - -static int bf_logic_op(bf_t *r, const bf_t *a1, const bf_t *b1, int op) -{ - bf_t b1_s, a1_s, *a, *b; - limb_t a_sign, b_sign, r_sign; - slimb_t l, i, a_bit_offset, b_bit_offset; - limb_t v1, v2, v1_mask, v2_mask, r_mask; - int ret; - - assert(r != a1 && r != b1); - - if (a1->expn <= 0) - a_sign = 0; /* minus zero is considered as positive */ - else - a_sign = a1->sign; - - if (b1->expn <= 0) - b_sign = 0; /* minus zero is considered as positive */ - else - b_sign = b1->sign; - - if (a_sign) { - a = &a1_s; - bf_init(r->ctx, a); - if (bf_add_si(a, a1, 1, BF_PREC_INF, BF_RNDZ)) { - b = NULL; - goto fail; - } - } else { - a = (bf_t *)a1; - } - - if (b_sign) { - b = &b1_s; - bf_init(r->ctx, b); - if (bf_add_si(b, b1, 1, BF_PREC_INF, BF_RNDZ)) - goto fail; - } else { - b = (bf_t *)b1; - } - - r_sign = bf_logic_op1(a_sign, b_sign, op); - if (op == BF_LOGIC_AND && r_sign == 0) { - /* no need to compute extra zeros for and */ - if (a_sign == 0 && b_sign == 0) - l = bf_min(a->expn, b->expn); - else if (a_sign == 0) - l = a->expn; - else - l = b->expn; - } else { - l = bf_max(a->expn, b->expn); - } - /* Note: a or b can be zero */ - l = (bf_max(l, 1) + LIMB_BITS - 1) / LIMB_BITS; - if (bf_resize(r, l)) - goto fail; - a_bit_offset = a->len * LIMB_BITS - a->expn; - b_bit_offset = b->len * LIMB_BITS - b->expn; - v1_mask = -a_sign; - v2_mask = -b_sign; - r_mask = -r_sign; - for(i = 0; i < l; i++) { - v1 = get_bits(a->tab, a->len, a_bit_offset + i * LIMB_BITS) ^ v1_mask; - v2 = get_bits(b->tab, b->len, b_bit_offset + i * LIMB_BITS) ^ v2_mask; - r->tab[i] = bf_logic_op1(v1, v2, op) ^ r_mask; - } - r->expn = l * LIMB_BITS; - r->sign = r_sign; - bf_normalize_and_round(r, BF_PREC_INF, BF_RNDZ); /* cannot fail */ - if (r_sign) { - if (bf_add_si(r, r, -1, BF_PREC_INF, BF_RNDZ)) - goto fail; - } - ret = 0; - done: - if (a == &a1_s) - bf_delete(a); - if (b == &b1_s) - bf_delete(b); - return ret; - fail: - bf_set_nan(r); - ret = BF_ST_MEM_ERROR; - goto done; -} - -/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */ -int bf_logic_or(bf_t *r, const bf_t *a, const bf_t *b) -{ - return bf_logic_op(r, a, b, BF_LOGIC_OR); -} - -/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */ -int bf_logic_xor(bf_t *r, const bf_t *a, const bf_t *b) -{ - return bf_logic_op(r, a, b, BF_LOGIC_XOR); -} - -/* 'a' and 'b' must be integers. Return 0 or BF_ST_MEM_ERROR. */ -int bf_logic_and(bf_t *r, const bf_t *a, const bf_t *b) -{ - return bf_logic_op(r, a, b, BF_LOGIC_AND); -} - -/* conversion between fixed size types */ - -typedef union { - double d; - uint64_t u; -} Float64Union; - -int bf_get_float64(const bf_t *a, double *pres, bf_rnd_t rnd_mode) -{ - Float64Union u; - int e, ret; - uint64_t m; - - ret = 0; - if (a->expn == BF_EXP_NAN) { - u.u = 0x7ff8000000000000; /* quiet nan */ - } else { - bf_t b_s, *b = &b_s; - - bf_init(a->ctx, b); - bf_set(b, a); - if (bf_is_finite(b)) { - ret = bf_round(b, 53, rnd_mode | BF_FLAG_SUBNORMAL | bf_set_exp_bits(11)); - } - if (b->expn == BF_EXP_INF) { - e = (1 << 11) - 1; - m = 0; - } else if (b->expn == BF_EXP_ZERO) { - e = 0; - m = 0; - } else { - e = b->expn + 1023 - 1; -#if LIMB_BITS == 32 - if (b->len == 2) { - m = ((uint64_t)b->tab[1] << 32) | b->tab[0]; - } else { - m = ((uint64_t)b->tab[0] << 32); - } -#else - m = b->tab[0]; -#endif - if (e <= 0) { - /* subnormal */ - m = m >> (12 - e); - e = 0; - } else { - m = (m << 1) >> 12; - } - } - u.u = m | ((uint64_t)e << 52) | ((uint64_t)b->sign << 63); - bf_delete(b); - } - *pres = u.d; - return ret; -} - -int bf_set_float64(bf_t *a, double d) -{ - Float64Union u; - uint64_t m; - int shift, e, sgn; - - u.d = d; - sgn = u.u >> 63; - e = (u.u >> 52) & ((1 << 11) - 1); - m = u.u & (((uint64_t)1 << 52) - 1); - if (e == ((1 << 11) - 1)) { - if (m != 0) { - bf_set_nan(a); - } else { - bf_set_inf(a, sgn); - } - } else if (e == 0) { - if (m == 0) { - bf_set_zero(a, sgn); - } else { - /* subnormal number */ - m <<= 12; - shift = clz64(m); - m <<= shift; - e = -shift; - goto norm; - } - } else { - m = (m << 11) | ((uint64_t)1 << 63); - norm: - a->expn = e - 1023 + 1; -#if LIMB_BITS == 32 - if (bf_resize(a, 2)) - goto fail; - a->tab[0] = m; - a->tab[1] = m >> 32; -#else - if (bf_resize(a, 1)) - goto fail; - a->tab[0] = m; -#endif - a->sign = sgn; - } - return 0; -fail: - bf_set_nan(a); - return BF_ST_MEM_ERROR; -} - -/* The rounding mode is always BF_RNDZ. Return BF_ST_INVALID_OP if there - is an overflow and 0 otherwise. */ -int bf_get_int32(int *pres, const bf_t *a, int flags) -{ - uint32_t v; - int ret; - if (a->expn >= BF_EXP_INF) { - ret = BF_ST_INVALID_OP; - if (flags & BF_GET_INT_MOD) { - v = 0; - } else if (a->expn == BF_EXP_INF) { - v = (uint32_t)INT32_MAX + a->sign; - } else { - v = INT32_MAX; - } - } else if (a->expn <= 0) { - v = 0; - ret = 0; - } else if (a->expn <= 31) { - v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn); - if (a->sign) - v = -v; - ret = 0; - } else if (!(flags & BF_GET_INT_MOD)) { - ret = BF_ST_INVALID_OP; - if (a->sign) { - v = (uint32_t)INT32_MAX + 1; - if (a->expn == 32 && - (a->tab[a->len - 1] >> (LIMB_BITS - 32)) == v) { - ret = 0; - } - } else { - v = INT32_MAX; - } - } else { - v = get_bits(a->tab, a->len, a->len * LIMB_BITS - a->expn); - if (a->sign) - v = -v; - ret = 0; - } - *pres = v; - return ret; -} - -/* The rounding mode is always BF_RNDZ. Return BF_ST_INVALID_OP if there - is an overflow and 0 otherwise. */ -int bf_get_int64(int64_t *pres, const bf_t *a, int flags) -{ - uint64_t v; - int ret; - if (a->expn >= BF_EXP_INF) { - ret = BF_ST_INVALID_OP; - if (flags & BF_GET_INT_MOD) { - v = 0; - } else if (a->expn == BF_EXP_INF) { - v = (uint64_t)INT64_MAX + a->sign; - } else { - v = INT64_MAX; - } - } else if (a->expn <= 0) { - v = 0; - ret = 0; - } else if (a->expn <= 63) { -#if LIMB_BITS == 32 - if (a->expn <= 32) - v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn); - else - v = (((uint64_t)a->tab[a->len - 1] << 32) | - get_limbz(a, a->len - 2)) >> (64 - a->expn); -#else - v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn); -#endif - if (a->sign) - v = -v; - ret = 0; - } else if (!(flags & BF_GET_INT_MOD)) { - ret = BF_ST_INVALID_OP; - if (a->sign) { - uint64_t v1; - v = (uint64_t)INT64_MAX + 1; - if (a->expn == 64) { - v1 = a->tab[a->len - 1]; -#if LIMB_BITS == 32 - v1 = (v1 << 32) | get_limbz(a, a->len - 2); -#endif - if (v1 == v) - ret = 0; - } - } else { - v = INT64_MAX; - } - } else { - slimb_t bit_pos = a->len * LIMB_BITS - a->expn; - v = get_bits(a->tab, a->len, bit_pos); -#if LIMB_BITS == 32 - v |= (uint64_t)get_bits(a->tab, a->len, bit_pos + 32) << 32; -#endif - if (a->sign) - v = -v; - ret = 0; - } - *pres = v; - return ret; -} - -/* The rounding mode is always BF_RNDZ. Return BF_ST_INVALID_OP if there - is an overflow and 0 otherwise. */ -int bf_get_uint64(uint64_t *pres, const bf_t *a) -{ - uint64_t v; - int ret; - if (a->expn == BF_EXP_NAN) { - goto overflow; - } else if (a->expn <= 0) { - v = 0; - ret = 0; - } else if (a->sign) { - v = 0; - ret = BF_ST_INVALID_OP; - } else if (a->expn <= 64) { -#if LIMB_BITS == 32 - if (a->expn <= 32) - v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn); - else - v = (((uint64_t)a->tab[a->len - 1] << 32) | - get_limbz(a, a->len - 2)) >> (64 - a->expn); -#else - v = a->tab[a->len - 1] >> (LIMB_BITS - a->expn); -#endif - ret = 0; - } else { - overflow: - v = UINT64_MAX; - ret = BF_ST_INVALID_OP; - } - *pres = v; - return ret; -} - -/* base conversion from radix */ - -static const uint8_t digits_per_limb_table[BF_RADIX_MAX - 1] = { -#if LIMB_BITS == 32 -32,20,16,13,12,11,10,10, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, -#else -64,40,32,27,24,22,21,20,19,18,17,17,16,16,16,15,15,15,14,14,14,14,13,13,13,13,13,13,13,12,12,12,12,12,12, -#endif -}; - -static limb_t get_limb_radix(int radix) -{ - int i, k; - limb_t radixl; - - k = digits_per_limb_table[radix - 2]; - radixl = radix; - for(i = 1; i < k; i++) - radixl *= radix; - return radixl; -} - -/* return != 0 if error */ -static int bf_integer_from_radix_rec(bf_t *r, const limb_t *tab, - limb_t n, int level, limb_t n0, - limb_t radix, bf_t *pow_tab) -{ - int ret; - if (n == 1) { - ret = bf_set_ui(r, tab[0]); - } else { - bf_t T_s, *T = &T_s, *B; - limb_t n1, n2; - - n2 = (((n0 * 2) >> (level + 1)) + 1) / 2; - n1 = n - n2; - // printf("level=%d n0=%ld n1=%ld n2=%ld\n", level, n0, n1, n2); - B = &pow_tab[level]; - if (B->len == 0) { - ret = bf_pow_ui_ui(B, radix, n2, BF_PREC_INF, BF_RNDZ); - if (ret) - return ret; - } - ret = bf_integer_from_radix_rec(r, tab + n2, n1, level + 1, n0, - radix, pow_tab); - if (ret) - return ret; - ret = bf_mul(r, r, B, BF_PREC_INF, BF_RNDZ); - if (ret) - return ret; - bf_init(r->ctx, T); - ret = bf_integer_from_radix_rec(T, tab, n2, level + 1, n0, - radix, pow_tab); - if (!ret) - ret = bf_add(r, r, T, BF_PREC_INF, BF_RNDZ); - bf_delete(T); - } - return ret; - // bf_print_str(" r=", r); -} - -/* return 0 if OK != 0 if memory error */ -static int bf_integer_from_radix(bf_t *r, const limb_t *tab, - limb_t n, limb_t radix) -{ - bf_context_t *s = r->ctx; - int pow_tab_len, i, ret; - limb_t radixl; - bf_t *pow_tab; - - radixl = get_limb_radix(radix); - pow_tab_len = ceil_log2(n) + 2; /* XXX: check */ - pow_tab = bf_malloc(s, sizeof(pow_tab[0]) * pow_tab_len); - if (!pow_tab) - return -1; - for(i = 0; i < pow_tab_len; i++) - bf_init(r->ctx, &pow_tab[i]); - ret = bf_integer_from_radix_rec(r, tab, n, 0, n, radixl, pow_tab); - for(i = 0; i < pow_tab_len; i++) { - bf_delete(&pow_tab[i]); - } - bf_free(s, pow_tab); - return ret; -} - -/* compute and round T * radix^expn. */ -int bf_mul_pow_radix(bf_t *r, const bf_t *T, limb_t radix, - slimb_t expn, limb_t prec, bf_flags_t flags) -{ - int ret, expn_sign, overflow; - slimb_t e, extra_bits, prec1, ziv_extra_bits; - bf_t B_s, *B = &B_s; - - if (T->len == 0) { - return bf_set(r, T); - } else if (expn == 0) { - ret = bf_set(r, T); - ret |= bf_round(r, prec, flags); - return ret; - } - - e = expn; - expn_sign = 0; - if (e < 0) { - e = -e; - expn_sign = 1; - } - bf_init(r->ctx, B); - if (prec == BF_PREC_INF) { - /* infinite precision: only used if the result is known to be exact */ - ret = bf_pow_ui_ui(B, radix, e, BF_PREC_INF, BF_RNDN); - if (expn_sign) { - ret |= bf_div(r, T, B, T->len * LIMB_BITS, BF_RNDN); - } else { - ret |= bf_mul(r, T, B, BF_PREC_INF, BF_RNDN); - } - } else { - ziv_extra_bits = 16; - for(;;) { - prec1 = prec + ziv_extra_bits; - /* XXX: correct overflow/underflow handling */ - /* XXX: rigorous error analysis needed */ - extra_bits = ceil_log2(e) * 2 + 1; - ret = bf_pow_ui_ui(B, radix, e, prec1 + extra_bits, BF_RNDN | BF_FLAG_EXT_EXP); - overflow = !bf_is_finite(B); - /* XXX: if bf_pow_ui_ui returns an exact result, can stop - after the next operation */ - if (expn_sign) - ret |= bf_div(r, T, B, prec1 + extra_bits, BF_RNDN | BF_FLAG_EXT_EXP); - else - ret |= bf_mul(r, T, B, prec1 + extra_bits, BF_RNDN | BF_FLAG_EXT_EXP); - if (ret & BF_ST_MEM_ERROR) - break; - if ((ret & BF_ST_INEXACT) && - !bf_can_round(r, prec, flags & BF_RND_MASK, prec1) && - !overflow) { - /* and more precision and retry */ - ziv_extra_bits = ziv_extra_bits + (ziv_extra_bits / 2); - } else { - /* XXX: need to use __bf_round() to pass the inexact - flag for the subnormal case */ - ret = bf_round(r, prec, flags) | (ret & BF_ST_INEXACT); - break; - } - } - } - bf_delete(B); - return ret; -} - -static inline int bf_to_digit(int c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - else if (c >= 'A' && c <= 'Z') - return c - 'A' + 10; - else if (c >= 'a' && c <= 'z') - return c - 'a' + 10; - else - return 36; -} - -/* add a limb at 'pos' and decrement pos. new space is created if - needed. Return 0 if OK, -1 if memory error */ -static int bf_add_limb(bf_t *a, slimb_t *ppos, limb_t v) -{ - slimb_t pos; - pos = *ppos; - if (unlikely(pos < 0)) { - limb_t new_size, d, *new_tab; - new_size = bf_max(a->len + 1, a->len * 3 / 2); - new_tab = bf_realloc(a->ctx, a->tab, sizeof(limb_t) * new_size); - if (!new_tab) - return -1; - a->tab = new_tab; - d = new_size - a->len; - memmove(a->tab + d, a->tab, a->len * sizeof(limb_t)); - a->len = new_size; - pos += d; - } - a->tab[pos--] = v; - *ppos = pos; - return 0; -} - -static int bf_tolower(int c) -{ - if (c >= 'A' && c <= 'Z') - c = c - 'A' + 'a'; - return c; -} - -static int strcasestart(const char *str, const char *val, const char **ptr) -{ - const char *p, *q; - p = str; - q = val; - while (*q != '\0') { - if (bf_tolower(*p) != *q) - return 0; - p++; - q++; - } - if (ptr) - *ptr = p; - return 1; -} - -static int bf_atof_internal(bf_t *r, slimb_t *pexponent, - const char *str, const char **pnext, int radix, - limb_t prec, bf_flags_t flags, bool is_dec) -{ - const char *p, *p_start; - int is_neg, radix_bits, exp_is_neg, ret, digits_per_limb, shift; - limb_t cur_limb; - slimb_t pos, expn, int_len, digit_count; - bool has_decpt, is_bin_exp; - bf_t a_s, *a; - - *pexponent = 0; - p = str; - if (!(flags & BF_ATOF_NO_NAN_INF) && radix <= 16 && - strcasestart(p, "nan", &p)) { - bf_set_nan(r); - ret = 0; - goto done; - } - is_neg = 0; - - if (p[0] == '+') { - p++; - p_start = p; - } else if (p[0] == '-') { - is_neg = 1; - p++; - p_start = p; - } else { - p_start = p; - } - if (p[0] == '0') { - if ((p[1] == 'x' || p[1] == 'X') && - (radix == 0 || radix == 16) && - !(flags & BF_ATOF_NO_HEX)) { - radix = 16; - p += 2; - } else if ((p[1] == 'o' || p[1] == 'O') && - radix == 0 && (flags & BF_ATOF_BIN_OCT)) { - p += 2; - radix = 8; - } else if ((p[1] == 'b' || p[1] == 'B') && - radix == 0 && (flags & BF_ATOF_BIN_OCT)) { - p += 2; - radix = 2; - } else { - goto no_prefix; - } - /* there must be a digit after the prefix */ - if (bf_to_digit((uint8_t)*p) >= radix) { - bf_set_nan(r); - ret = 0; - goto done; - } - no_prefix: ; - } else { - if (!(flags & BF_ATOF_NO_NAN_INF) && radix <= 16 && - strcasestart(p, "inf", &p)) { - bf_set_inf(r, is_neg); - ret = 0; - goto done; - } - } - - if (radix == 0) - radix = 10; - if (is_dec) { - assert(radix == 10); - radix_bits = 0; - a = r; - } else if ((radix & (radix - 1)) != 0) { - radix_bits = 0; /* base is not a power of two */ - a = &a_s; - bf_init(r->ctx, a); - } else { - radix_bits = ceil_log2(radix); - a = r; - } - - /* skip leading zeros */ - /* XXX: could also skip zeros after the decimal point */ - while (*p == '0') - p++; - - if (radix_bits) { - shift = digits_per_limb = LIMB_BITS; - } else { - radix_bits = 0; - shift = digits_per_limb = digits_per_limb_table[radix - 2]; - } - cur_limb = 0; - bf_resize(a, 1); - pos = 0; - has_decpt = false; - int_len = digit_count = 0; - for(;;) { - limb_t c; - if (*p == '.' && (p > p_start || bf_to_digit(p[1]) < radix)) { - if (has_decpt) - break; - has_decpt = true; - int_len = digit_count; - p++; - } - c = bf_to_digit(*p); - if (c >= radix) - break; - digit_count++; - p++; - if (radix_bits) { - shift -= radix_bits; - if (shift <= 0) { - cur_limb |= c >> (-shift); - if (bf_add_limb(a, &pos, cur_limb)) - goto mem_error; - if (shift < 0) - cur_limb = c << (LIMB_BITS + shift); - else - cur_limb = 0; - shift += LIMB_BITS; - } else { - cur_limb |= c << shift; - } - } else { - cur_limb = cur_limb * radix + c; - shift--; - if (shift == 0) { - if (bf_add_limb(a, &pos, cur_limb)) - goto mem_error; - shift = digits_per_limb; - cur_limb = 0; - } - } - } - if (!has_decpt) - int_len = digit_count; - - /* add the last limb and pad with zeros */ - if (shift != digits_per_limb) { - if (radix_bits == 0) { - while (shift != 0) { - cur_limb *= radix; - shift--; - } - } - if (bf_add_limb(a, &pos, cur_limb)) { - mem_error: - ret = BF_ST_MEM_ERROR; - if (!radix_bits) - bf_delete(a); - bf_set_nan(r); - goto done; - } - } - - /* reset the next limbs to zero (we prefer to reallocate in the - renormalization) */ - memset(a->tab, 0, (pos + 1) * sizeof(limb_t)); - - if (p == p_start) { - ret = 0; - if (!radix_bits) - bf_delete(a); - bf_set_nan(r); - goto done; - } - - /* parse the exponent, if any */ - expn = 0; - is_bin_exp = false; - if (((radix == 10 && (*p == 'e' || *p == 'E')) || - (radix != 10 && (*p == '@' || - (radix_bits && (*p == 'p' || *p == 'P'))))) && - p > p_start) { - is_bin_exp = (*p == 'p' || *p == 'P'); - p++; - exp_is_neg = 0; - if (*p == '+') { - p++; - } else if (*p == '-') { - exp_is_neg = 1; - p++; - } - for(;;) { - int c; - c = bf_to_digit(*p); - if (c >= 10) - break; - if (unlikely(expn > ((BF_RAW_EXP_MAX - 2 - 9) / 10))) { - /* exponent overflow */ - if (exp_is_neg) { - bf_set_zero(r, is_neg); - ret = BF_ST_UNDERFLOW | BF_ST_INEXACT; - } else { - bf_set_inf(r, is_neg); - ret = BF_ST_OVERFLOW | BF_ST_INEXACT; - } - goto done; - } - p++; - expn = expn * 10 + c; - } - if (exp_is_neg) - expn = -expn; - } - if (is_dec) { - a->expn = expn + int_len; - a->sign = is_neg; - ret = bfdec_normalize_and_round((bfdec_t *)a, prec, flags); - } else if (radix_bits) { - /* XXX: may overflow */ - if (!is_bin_exp) - expn *= radix_bits; - a->expn = expn + (int_len * radix_bits); - a->sign = is_neg; - ret = bf_normalize_and_round(a, prec, flags); - } else { - limb_t l; - pos++; - l = a->len - pos; /* number of limbs */ - if (l == 0) { - bf_set_zero(r, is_neg); - ret = 0; - } else { - bf_t T_s, *T = &T_s; - - expn -= l * digits_per_limb - int_len; - bf_init(r->ctx, T); - if (bf_integer_from_radix(T, a->tab + pos, l, radix)) { - bf_set_nan(r); - ret = BF_ST_MEM_ERROR; - } else { - T->sign = is_neg; - if (flags & BF_ATOF_EXPONENT) { - /* return the exponent */ - *pexponent = expn; - ret = bf_set(r, T); - } else { - ret = bf_mul_pow_radix(r, T, radix, expn, prec, flags); - } - } - bf_delete(T); - } - bf_delete(a); - } - done: - if (pnext) - *pnext = p; - return ret; -} - -/* - Return (status, n, exp). 'status' is the floating point status. 'n' - is the parsed number. - - If (flags & BF_ATOF_EXPONENT) and if the radix is not a power of - two, the parsed number is equal to r * - (*pexponent)^radix. Otherwise *pexponent = 0. -*/ -int bf_atof2(bf_t *r, slimb_t *pexponent, - const char *str, const char **pnext, int radix, - limb_t prec, bf_flags_t flags) -{ - return bf_atof_internal(r, pexponent, str, pnext, radix, prec, flags, - false); -} - -int bf_atof(bf_t *r, const char *str, const char **pnext, int radix, - limb_t prec, bf_flags_t flags) -{ - slimb_t dummy_exp; - return bf_atof_internal(r, &dummy_exp, str, pnext, radix, prec, flags, false); -} - -/* base conversion to radix */ - -#if LIMB_BITS == 64 -#define RADIXL_10 UINT64_C(10000000000000000000) -#else -#define RADIXL_10 UINT64_C(1000000000) -#endif - -static const uint32_t inv_log2_radix[BF_RADIX_MAX - 1][LIMB_BITS / 32 + 1] = { -#if LIMB_BITS == 32 -{ 0x80000000, 0x00000000,}, -{ 0x50c24e60, 0xd4d4f4a7,}, -{ 0x40000000, 0x00000000,}, -{ 0x372068d2, 0x0a1ee5ca,}, -{ 0x3184648d, 0xb8153e7a,}, -{ 0x2d983275, 0x9d5369c4,}, -{ 0x2aaaaaaa, 0xaaaaaaab,}, -{ 0x28612730, 0x6a6a7a54,}, -{ 0x268826a1, 0x3ef3fde6,}, -{ 0x25001383, 0xbac8a744,}, -{ 0x23b46706, 0x82c0c709,}, -{ 0x229729f1, 0xb2c83ded,}, -{ 0x219e7ffd, 0xa5ad572b,}, -{ 0x20c33b88, 0xda7c29ab,}, -{ 0x20000000, 0x00000000,}, -{ 0x1f50b57e, 0xac5884b3,}, -{ 0x1eb22cc6, 0x8aa6e26f,}, -{ 0x1e21e118, 0x0c5daab2,}, -{ 0x1d9dcd21, 0x439834e4,}, -{ 0x1d244c78, 0x367a0d65,}, -{ 0x1cb40589, 0xac173e0c,}, -{ 0x1c4bd95b, 0xa8d72b0d,}, -{ 0x1bead768, 0x98f8ce4c,}, -{ 0x1b903469, 0x050f72e5,}, -{ 0x1b3b433f, 0x2eb06f15,}, -{ 0x1aeb6f75, 0x9c46fc38,}, -{ 0x1aa038eb, 0x0e3bfd17,}, -{ 0x1a593062, 0xb38d8c56,}, -{ 0x1a15f4c3, 0x2b95a2e6,}, -{ 0x19d630dc, 0xcc7ddef9,}, -{ 0x19999999, 0x9999999a,}, -{ 0x195fec80, 0x8a609431,}, -{ 0x1928ee7b, 0x0b4f22f9,}, -{ 0x18f46acf, 0x8c06e318,}, -{ 0x18c23246, 0xdc0a9f3d,}, -#else -{ 0x80000000, 0x00000000, 0x00000000,}, -{ 0x50c24e60, 0xd4d4f4a7, 0x021f57bc,}, -{ 0x40000000, 0x00000000, 0x00000000,}, -{ 0x372068d2, 0x0a1ee5ca, 0x19ea911b,}, -{ 0x3184648d, 0xb8153e7a, 0x7fc2d2e1,}, -{ 0x2d983275, 0x9d5369c4, 0x4dec1661,}, -{ 0x2aaaaaaa, 0xaaaaaaaa, 0xaaaaaaab,}, -{ 0x28612730, 0x6a6a7a53, 0x810fabde,}, -{ 0x268826a1, 0x3ef3fde6, 0x23e2566b,}, -{ 0x25001383, 0xbac8a744, 0x385a3349,}, -{ 0x23b46706, 0x82c0c709, 0x3f891718,}, -{ 0x229729f1, 0xb2c83ded, 0x15fba800,}, -{ 0x219e7ffd, 0xa5ad572a, 0xe169744b,}, -{ 0x20c33b88, 0xda7c29aa, 0x9bddee52,}, -{ 0x20000000, 0x00000000, 0x00000000,}, -{ 0x1f50b57e, 0xac5884b3, 0x70e28eee,}, -{ 0x1eb22cc6, 0x8aa6e26f, 0x06d1a2a2,}, -{ 0x1e21e118, 0x0c5daab1, 0x81b4f4bf,}, -{ 0x1d9dcd21, 0x439834e3, 0x81667575,}, -{ 0x1d244c78, 0x367a0d64, 0xc8204d6d,}, -{ 0x1cb40589, 0xac173e0c, 0x3b7b16ba,}, -{ 0x1c4bd95b, 0xa8d72b0d, 0x5879f25a,}, -{ 0x1bead768, 0x98f8ce4c, 0x66cc2858,}, -{ 0x1b903469, 0x050f72e5, 0x0cf5488e,}, -{ 0x1b3b433f, 0x2eb06f14, 0x8c89719c,}, -{ 0x1aeb6f75, 0x9c46fc37, 0xab5fc7e9,}, -{ 0x1aa038eb, 0x0e3bfd17, 0x1bd62080,}, -{ 0x1a593062, 0xb38d8c56, 0x7998ab45,}, -{ 0x1a15f4c3, 0x2b95a2e6, 0x46aed6a0,}, -{ 0x19d630dc, 0xcc7ddef9, 0x5aadd61b,}, -{ 0x19999999, 0x99999999, 0x9999999a,}, -{ 0x195fec80, 0x8a609430, 0xe1106014,}, -{ 0x1928ee7b, 0x0b4f22f9, 0x5f69791d,}, -{ 0x18f46acf, 0x8c06e318, 0x4d2aeb2c,}, -{ 0x18c23246, 0xdc0a9f3d, 0x3fe16970,}, -#endif -}; - -static const limb_t log2_radix[BF_RADIX_MAX - 1] = { -#if LIMB_BITS == 32 -0x20000000, -0x32b80347, -0x40000000, -0x4a4d3c26, -0x52b80347, -0x59d5d9fd, -0x60000000, -0x6570068e, -0x6a4d3c26, -0x6eb3a9f0, -0x72b80347, -0x766a008e, -0x79d5d9fd, -0x7d053f6d, -0x80000000, -0x82cc7edf, -0x8570068e, -0x87ef05ae, -0x8a4d3c26, -0x8c8ddd45, -0x8eb3a9f0, -0x90c10501, -0x92b80347, -0x949a784c, -0x966a008e, -0x982809d6, -0x99d5d9fd, -0x9b74948f, -0x9d053f6d, -0x9e88c6b3, -0xa0000000, -0xa16bad37, -0xa2cc7edf, -0xa4231623, -0xa570068e, -#else -0x2000000000000000, -0x32b803473f7ad0f4, -0x4000000000000000, -0x4a4d3c25e68dc57f, -0x52b803473f7ad0f4, -0x59d5d9fd5010b366, -0x6000000000000000, -0x6570068e7ef5a1e8, -0x6a4d3c25e68dc57f, -0x6eb3a9f01975077f, -0x72b803473f7ad0f4, -0x766a008e4788cbcd, -0x79d5d9fd5010b366, -0x7d053f6d26089673, -0x8000000000000000, -0x82cc7edf592262d0, -0x8570068e7ef5a1e8, -0x87ef05ae409a0289, -0x8a4d3c25e68dc57f, -0x8c8ddd448f8b845a, -0x8eb3a9f01975077f, -0x90c10500d63aa659, -0x92b803473f7ad0f4, -0x949a784bcd1b8afe, -0x966a008e4788cbcd, -0x982809d5be7072dc, -0x99d5d9fd5010b366, -0x9b74948f5532da4b, -0x9d053f6d26089673, -0x9e88c6b3626a72aa, -0xa000000000000000, -0xa16bad3758efd873, -0xa2cc7edf592262d0, -0xa4231623369e78e6, -0xa570068e7ef5a1e8, -#endif -}; - -/* compute floor(a*b) or ceil(a*b) with b = log2(radix) or - b=1/log2(radix). For is_inv = 0, strict accuracy is not guaranteed - when radix is not a power of two. */ -slimb_t bf_mul_log2_radix(slimb_t a1, unsigned int radix, int is_inv, - int is_ceil1) -{ - int is_neg; - limb_t a; - bool is_ceil; - - is_ceil = is_ceil1; - a = a1; - if (a1 < 0) { - a = -a; - is_neg = 1; - } else { - is_neg = 0; - } - is_ceil ^= is_neg; - if ((radix & (radix - 1)) == 0) { - int radix_bits; - /* radix is a power of two */ - radix_bits = ceil_log2(radix); - if (is_inv) { - if (is_ceil) - a += radix_bits - 1; - a = a / radix_bits; - } else { - a = a * radix_bits; - } - } else { - const uint32_t *tab; - limb_t b0, b1; - dlimb_t t; - - if (is_inv) { - tab = inv_log2_radix[radix - 2]; -#if LIMB_BITS == 32 - b1 = tab[0]; - b0 = tab[1]; -#else - b1 = ((limb_t)tab[0] << 32) | tab[1]; - b0 = (limb_t)tab[2] << 32; -#endif - t = (dlimb_t)b0 * (dlimb_t)a; - t = (dlimb_t)b1 * (dlimb_t)a + (t >> LIMB_BITS); - a = t >> (LIMB_BITS - 1); - } else { - b0 = log2_radix[radix - 2]; - t = (dlimb_t)b0 * (dlimb_t)a; - a = t >> (LIMB_BITS - 3); - } - /* a = floor(result) and 'result' cannot be an integer */ - a += is_ceil; - } - if (is_neg) - a = -a; - return a; -} - -/* 'n' is the number of output limbs */ -static int bf_integer_to_radix_rec(bf_t *pow_tab, - limb_t *out, const bf_t *a, limb_t n, - int level, limb_t n0, limb_t radixl, - unsigned int radixl_bits) -{ - limb_t n1, n2, q_prec; - int ret; - - assert(n >= 1); - if (n == 1) { - out[0] = get_bits(a->tab, a->len, a->len * LIMB_BITS - a->expn); - } else if (n == 2) { - dlimb_t t; - slimb_t pos; - pos = a->len * LIMB_BITS - a->expn; - t = ((dlimb_t)get_bits(a->tab, a->len, pos + LIMB_BITS) << LIMB_BITS) | - get_bits(a->tab, a->len, pos); - if (likely(radixl == RADIXL_10)) { - /* use division by a constant when possible */ - out[0] = t % RADIXL_10; - out[1] = t / RADIXL_10; - } else { - out[0] = t % radixl; - out[1] = t / radixl; - } - } else { - bf_t Q, R, *B, *B_inv; - int q_add; - bf_init(a->ctx, &Q); - bf_init(a->ctx, &R); - n2 = (((n0 * 2) >> (level + 1)) + 1) / 2; - n1 = n - n2; - B = &pow_tab[2 * level]; - B_inv = &pow_tab[2 * level + 1]; - ret = 0; - if (B->len == 0) { - /* compute BASE^n2 */ - ret |= bf_pow_ui_ui(B, radixl, n2, BF_PREC_INF, BF_RNDZ); - /* we use enough bits for the maximum possible 'n1' value, - i.e. n2 + 1 */ - ret |= bf_set_ui(&R, 1); - ret |= bf_div(B_inv, &R, B, (n2 + 1) * radixl_bits + 2, BF_RNDN); - } - // printf("%d: n1=% " PRId64 " n2=%" PRId64 "\n", level, n1, n2); - q_prec = n1 * radixl_bits; - ret |= bf_mul(&Q, a, B_inv, q_prec, BF_RNDN); - ret |= bf_rint(&Q, BF_RNDZ); - - ret |= bf_mul(&R, &Q, B, BF_PREC_INF, BF_RNDZ); - ret |= bf_sub(&R, a, &R, BF_PREC_INF, BF_RNDZ); - - if (ret & BF_ST_MEM_ERROR) - goto fail; - /* adjust if necessary */ - q_add = 0; - while (R.sign && R.len != 0) { - if (bf_add(&R, &R, B, BF_PREC_INF, BF_RNDZ)) - goto fail; - q_add--; - } - while (bf_cmpu(&R, B) >= 0) { - if (bf_sub(&R, &R, B, BF_PREC_INF, BF_RNDZ)) - goto fail; - q_add++; - } - if (q_add != 0) { - if (bf_add_si(&Q, &Q, q_add, BF_PREC_INF, BF_RNDZ)) - goto fail; - } - if (bf_integer_to_radix_rec(pow_tab, out + n2, &Q, n1, level + 1, n0, - radixl, radixl_bits)) - goto fail; - if (bf_integer_to_radix_rec(pow_tab, out, &R, n2, level + 1, n0, - radixl, radixl_bits)) { - fail: - bf_delete(&Q); - bf_delete(&R); - return -1; - } - bf_delete(&Q); - bf_delete(&R); - } - return 0; -} - -/* return 0 if OK != 0 if memory error */ -static int bf_integer_to_radix(bf_t *r, const bf_t *a, limb_t radixl) -{ - bf_context_t *s = r->ctx; - limb_t r_len; - bf_t *pow_tab; - int i, pow_tab_len, ret; - - r_len = r->len; - pow_tab_len = (ceil_log2(r_len) + 2) * 2; /* XXX: check */ - pow_tab = bf_malloc(s, sizeof(pow_tab[0]) * pow_tab_len); - if (!pow_tab) - return -1; - for(i = 0; i < pow_tab_len; i++) - bf_init(r->ctx, &pow_tab[i]); - - ret = bf_integer_to_radix_rec(pow_tab, r->tab, a, r_len, 0, r_len, radixl, - ceil_log2(radixl)); - - for(i = 0; i < pow_tab_len; i++) { - bf_delete(&pow_tab[i]); - } - bf_free(s, pow_tab); - return ret; -} - -/* a must be >= 0. 'P' is the wanted number of digits in radix - 'radix'. 'r' is the mantissa represented as an integer. *pE - contains the exponent. Return != 0 if memory error. */ -static int bf_convert_to_radix(bf_t *r, slimb_t *pE, - const bf_t *a, int radix, - limb_t P, bf_rnd_t rnd_mode, - bool is_fixed_exponent) -{ - slimb_t E, e, prec, extra_bits, ziv_extra_bits, prec0; - bf_t B_s, *B = &B_s; - int e_sign, ret, res; - - if (a->len == 0) { - /* zero case */ - *pE = 0; - return bf_set(r, a); - } - - if (is_fixed_exponent) { - E = *pE; - } else { - /* compute the new exponent */ - E = 1 + bf_mul_log2_radix(a->expn - 1, radix, true, false); - } - // bf_print_str("a", a); - // printf("E=%ld P=%ld radix=%d\n", E, P, radix); - - for(;;) { - e = P - E; - e_sign = 0; - if (e < 0) { - e = -e; - e_sign = 1; - } - /* Note: precision for log2(radix) is not critical here */ - prec0 = bf_mul_log2_radix(P, radix, false, true); - ziv_extra_bits = 16; - for(;;) { - prec = prec0 + ziv_extra_bits; - /* XXX: rigorous error analysis needed */ - extra_bits = ceil_log2(e) * 2 + 1; - ret = bf_pow_ui_ui(r, radix, e, prec + extra_bits, - BF_RNDN | BF_FLAG_EXT_EXP); - if (!e_sign) - ret |= bf_mul(r, r, a, prec + extra_bits, - BF_RNDN | BF_FLAG_EXT_EXP); - else - ret |= bf_div(r, a, r, prec + extra_bits, - BF_RNDN | BF_FLAG_EXT_EXP); - if (ret & BF_ST_MEM_ERROR) - return BF_ST_MEM_ERROR; - /* if the result is not exact, check that it can be safely - rounded to an integer */ - if ((ret & BF_ST_INEXACT) && - !bf_can_round(r, r->expn, rnd_mode, prec)) { - /* and more precision and retry */ - ziv_extra_bits = ziv_extra_bits + (ziv_extra_bits / 2); - continue; - } else { - ret = bf_rint(r, rnd_mode); - if (ret & BF_ST_MEM_ERROR) - return BF_ST_MEM_ERROR; - break; - } - } - if (is_fixed_exponent) - break; - /* check that the result is < B^P */ - /* XXX: do a fast approximate test first ? */ - bf_init(r->ctx, B); - ret = bf_pow_ui_ui(B, radix, P, BF_PREC_INF, BF_RNDZ); - if (ret) { - bf_delete(B); - return ret; - } - res = bf_cmpu(r, B); - bf_delete(B); - if (res < 0) - break; - /* try a larger exponent */ - E++; - } - *pE = E; - return 0; -} - -static void limb_to_a(char *buf, limb_t n, unsigned int radix, int len) -{ - int digit, i; - - if (radix == 10) { - /* specific case with constant divisor */ - for(i = len - 1; i >= 0; i--) { - digit = (limb_t)n % 10; - n = (limb_t)n / 10; - buf[i] = digit + '0'; - } - } else { - for(i = len - 1; i >= 0; i--) { - digit = (limb_t)n % radix; - n = (limb_t)n / radix; - if (digit < 10) - digit += '0'; - else - digit += 'a' - 10; - buf[i] = digit; - } - } -} - -/* for power of 2 radixes */ -static void limb_to_a2(char *buf, limb_t n, unsigned int radix_bits, int len) -{ - int digit, i; - unsigned int mask; - - mask = (1 << radix_bits) - 1; - for(i = len - 1; i >= 0; i--) { - digit = n & mask; - n >>= radix_bits; - if (digit < 10) - digit += '0'; - else - digit += 'a' - 10; - buf[i] = digit; - } -} - -/* 'a' must be an integer if the is_dec = false or if the radix is not - a power of two. A dot is added before the 'dot_pos' digit. dot_pos - = n_digits does not display the dot. 0 <= dot_pos <= - n_digits. n_digits >= 1. */ -static void output_digits(DynBuf *s, const bf_t *a1, int radix, limb_t n_digits, - limb_t dot_pos, bool is_dec) -{ - limb_t i, v, l; - slimb_t pos, pos_incr; - int digits_per_limb, buf_pos, radix_bits, first_buf_pos; - char buf[65]; - bf_t a_s, *a; - - if (is_dec) { - digits_per_limb = LIMB_DIGITS; - a = (bf_t *)a1; - radix_bits = 0; - pos = a->len; - pos_incr = 1; - first_buf_pos = 0; - } else if ((radix & (radix - 1)) == 0) { - a = (bf_t *)a1; - radix_bits = ceil_log2(radix); - digits_per_limb = LIMB_BITS / radix_bits; - pos_incr = digits_per_limb * radix_bits; - /* digits are aligned relative to the radix point */ - pos = a->len * LIMB_BITS + smod(-a->expn, radix_bits); - first_buf_pos = 0; - } else { - limb_t n, radixl; - - digits_per_limb = digits_per_limb_table[radix - 2]; - radixl = get_limb_radix(radix); - a = &a_s; - bf_init(a1->ctx, a); - n = (n_digits + digits_per_limb - 1) / digits_per_limb; - if (bf_resize(a, n)) { - dbuf_set_error(s); - goto done; - } - if (bf_integer_to_radix(a, a1, radixl)) { - dbuf_set_error(s); - goto done; - } - radix_bits = 0; - pos = n; - pos_incr = 1; - first_buf_pos = pos * digits_per_limb - n_digits; - } - buf_pos = digits_per_limb; - i = 0; - while (i < n_digits) { - if (buf_pos == digits_per_limb) { - pos -= pos_incr; - if (radix_bits == 0) { - v = get_limbz(a, pos); - limb_to_a(buf, v, radix, digits_per_limb); - } else { - v = get_bits(a->tab, a->len, pos); - limb_to_a2(buf, v, radix_bits, digits_per_limb); - } - buf_pos = first_buf_pos; - first_buf_pos = 0; - } - if (i < dot_pos) { - l = dot_pos; - } else { - if (i == dot_pos) - dbuf_putc(s, '.'); - l = n_digits; - } - l = bf_min(digits_per_limb - buf_pos, l - i); - dbuf_put(s, (uint8_t *)(buf + buf_pos), l); - buf_pos += l; - i += l; - } - done: - if (a != a1) - bf_delete(a); -} - -static void *bf_dbuf_realloc(void *opaque, void *ptr, size_t size) -{ - bf_context_t *s = opaque; - return bf_realloc(s, ptr, size); -} - -/* return the length in bytes. A trailing '\0' is added */ -static char *bf_ftoa_internal(size_t *plen, const bf_t *a2, int radix, - limb_t prec, bf_flags_t flags, bool is_dec) -{ - bf_context_t *ctx = a2->ctx; - DynBuf s_s, *s = &s_s; - int radix_bits; - - // bf_print_str("ftoa", a2); - // printf("radix=%d\n", radix); - dbuf_init2(s, ctx, bf_dbuf_realloc); - if (a2->expn == BF_EXP_NAN) { - dbuf_putstr(s, "NaN"); - } else { - if (a2->sign) - dbuf_putc(s, '-'); - if (a2->expn == BF_EXP_INF) { - if (flags & BF_FTOA_JS_QUIRKS) - dbuf_putstr(s, "Infinity"); - else - dbuf_putstr(s, "Inf"); - } else { - int fmt, ret; - slimb_t n_digits, n, i, n_max, n1; - bf_t a1_s, *a1 = &a1_s; - - if ((radix & (radix - 1)) != 0) - radix_bits = 0; - else - radix_bits = ceil_log2(radix); - - fmt = flags & BF_FTOA_FORMAT_MASK; - bf_init(ctx, a1); - if (fmt == BF_FTOA_FORMAT_FRAC) { - if (is_dec || radix_bits != 0) { - if (bf_set(a1, a2)) - goto fail1; -#ifdef USE_BF_DEC - if (is_dec) { - if (bfdec_round((bfdec_t *)a1, prec, (flags & BF_RND_MASK) | BF_FLAG_RADPNT_PREC) & BF_ST_MEM_ERROR) - goto fail1; - n = a1->expn; - } else -#endif - { - if (bf_round(a1, prec * radix_bits, (flags & BF_RND_MASK) | BF_FLAG_RADPNT_PREC) & BF_ST_MEM_ERROR) - goto fail1; - n = ceil_div(a1->expn, radix_bits); - } - if (flags & BF_FTOA_ADD_PREFIX) { - if (radix == 16) - dbuf_putstr(s, "0x"); - else if (radix == 8) - dbuf_putstr(s, "0o"); - else if (radix == 2) - dbuf_putstr(s, "0b"); - } - if (a1->expn == BF_EXP_ZERO) { - dbuf_putstr(s, "0"); - if (prec > 0) { - dbuf_putstr(s, "."); - for(i = 0; i < prec; i++) { - dbuf_putc(s, '0'); - } - } - } else { - n_digits = prec + n; - if (n <= 0) { - /* 0.x */ - dbuf_putstr(s, "0."); - for(i = 0; i < -n; i++) { - dbuf_putc(s, '0'); - } - if (n_digits > 0) { - output_digits(s, a1, radix, n_digits, n_digits, is_dec); - } - } else { - output_digits(s, a1, radix, n_digits, n, is_dec); - } - } - } else { - size_t pos, start; - bf_t a_s, *a = &a_s; - - /* make a positive number */ - a->tab = a2->tab; - a->len = a2->len; - a->expn = a2->expn; - a->sign = 0; - - /* one more digit for the rounding */ - n = 1 + bf_mul_log2_radix(bf_max(a->expn, 0), radix, true, true); - n_digits = n + prec; - n1 = n; - if (bf_convert_to_radix(a1, &n1, a, radix, n_digits, - flags & BF_RND_MASK, true)) - goto fail1; - start = s->size; - output_digits(s, a1, radix, n_digits, n, is_dec); - /* remove leading zeros because we allocated one more digit */ - pos = start; - while ((pos + 1) < s->size && s->buf[pos] == '0' && - s->buf[pos + 1] != '.') - pos++; - if (pos > start) { - memmove(s->buf + start, s->buf + pos, s->size - pos); - s->size -= (pos - start); - } - } - } else { -#ifdef USE_BF_DEC - if (is_dec) { - if (bf_set(a1, a2)) - goto fail1; - if (fmt == BF_FTOA_FORMAT_FIXED) { - n_digits = prec; - n_max = n_digits; - if (bfdec_round((bfdec_t *)a1, prec, (flags & BF_RND_MASK)) & BF_ST_MEM_ERROR) - goto fail1; - } else { - /* prec is ignored */ - prec = n_digits = a1->len * LIMB_DIGITS; - /* remove the trailing zero digits */ - while (n_digits > 1 && - get_digit(a1->tab, a1->len, prec - n_digits) == 0) { - n_digits--; - } - n_max = n_digits + 4; - } - n = a1->expn; - } else -#endif - if (radix_bits != 0) { - if (bf_set(a1, a2)) - goto fail1; - if (fmt == BF_FTOA_FORMAT_FIXED) { - slimb_t prec_bits; - n_digits = prec; - n_max = n_digits; - /* align to the radix point */ - prec_bits = prec * radix_bits - - smod(-a1->expn, radix_bits); - if (bf_round(a1, prec_bits, - (flags & BF_RND_MASK)) & BF_ST_MEM_ERROR) - goto fail1; - } else { - limb_t digit_mask; - slimb_t pos; - /* position of the digit before the most - significant digit in bits */ - pos = a1->len * LIMB_BITS + - smod(-a1->expn, radix_bits); - n_digits = ceil_div(pos, radix_bits); - /* remove the trailing zero digits */ - digit_mask = ((limb_t)1 << radix_bits) - 1; - while (n_digits > 1 && - (get_bits(a1->tab, a1->len, pos - n_digits * radix_bits) & digit_mask) == 0) { - n_digits--; - } - n_max = n_digits + 4; - } - n = ceil_div(a1->expn, radix_bits); - } else { - bf_t a_s, *a = &a_s; - - /* make a positive number */ - a->tab = a2->tab; - a->len = a2->len; - a->expn = a2->expn; - a->sign = 0; - - if (fmt == BF_FTOA_FORMAT_FIXED) { - n_digits = prec; - n_max = n_digits; - } else { - slimb_t n_digits_max, n_digits_min; - - assert(prec != BF_PREC_INF); - n_digits = 1 + bf_mul_log2_radix(prec, radix, true, true); - /* max number of digits for non exponential - notation. The rational is to have the same rule - as JS i.e. n_max = 21 for 64 bit float in base 10. */ - n_max = n_digits + 4; - if (fmt == BF_FTOA_FORMAT_FREE_MIN) { - bf_t b_s, *b = &b_s; - - /* find the minimum number of digits by - dichotomy. */ - /* XXX: inefficient */ - n_digits_max = n_digits; - n_digits_min = 1; - bf_init(ctx, b); - while (n_digits_min < n_digits_max) { - n_digits = (n_digits_min + n_digits_max) / 2; - if (bf_convert_to_radix(a1, &n, a, radix, n_digits, - flags & BF_RND_MASK, false)) { - bf_delete(b); - goto fail1; - } - /* convert back to a number and compare */ - ret = bf_mul_pow_radix(b, a1, radix, n - n_digits, - prec, - (flags & ~BF_RND_MASK) | - BF_RNDN); - if (ret & BF_ST_MEM_ERROR) { - bf_delete(b); - goto fail1; - } - if (bf_cmpu(b, a) == 0) { - n_digits_max = n_digits; - } else { - n_digits_min = n_digits + 1; - } - } - bf_delete(b); - n_digits = n_digits_max; - } - } - if (bf_convert_to_radix(a1, &n, a, radix, n_digits, - flags & BF_RND_MASK, false)) { - fail1: - bf_delete(a1); - goto fail; - } - } - if (a1->expn == BF_EXP_ZERO && - fmt != BF_FTOA_FORMAT_FIXED && - !(flags & BF_FTOA_FORCE_EXP)) { - /* just output zero */ - dbuf_putstr(s, "0"); - } else { - if (flags & BF_FTOA_ADD_PREFIX) { - if (radix == 16) - dbuf_putstr(s, "0x"); - else if (radix == 8) - dbuf_putstr(s, "0o"); - else if (radix == 2) - dbuf_putstr(s, "0b"); - } - if (a1->expn == BF_EXP_ZERO) - n = 1; - if ((flags & BF_FTOA_FORCE_EXP) || - n <= -6 || n > n_max) { - /* exponential notation */ - output_digits(s, a1, radix, n_digits, 1, is_dec); - if (radix_bits != 0 && radix <= 16) { - slimb_t exp_n = (n - 1) * radix_bits; - if (flags & BF_FTOA_JS_QUIRKS) - dbuf_printf(s, "p%+" PRId_LIMB, exp_n); - else - dbuf_printf(s, "p%" PRId_LIMB, exp_n); - } else { - const char c = radix <= 10 ? 'e' : '@'; - if (flags & BF_FTOA_JS_QUIRKS) - dbuf_printf(s, "%c%+" PRId_LIMB, c, n - 1); - else - dbuf_printf(s, "%c%" PRId_LIMB, c, n - 1); - } - } else if (n <= 0) { - /* 0.x */ - dbuf_putstr(s, "0."); - for(i = 0; i < -n; i++) { - dbuf_putc(s, '0'); - } - output_digits(s, a1, radix, n_digits, n_digits, is_dec); - } else { - if (n_digits <= n) { - /* no dot */ - output_digits(s, a1, radix, n_digits, n_digits, is_dec); - for(i = 0; i < (n - n_digits); i++) - dbuf_putc(s, '0'); - } else { - output_digits(s, a1, radix, n_digits, n, is_dec); - } - } - } - } - bf_delete(a1); - } - } - dbuf_putc(s, '\0'); - if (dbuf_error(s)) - goto fail; - if (plen) - *plen = s->size - 1; - return (char *)s->buf; - fail: - bf_free(ctx, s->buf); - if (plen) - *plen = 0; - return NULL; -} - -char *bf_ftoa(size_t *plen, const bf_t *a, int radix, limb_t prec, - bf_flags_t flags) -{ - return bf_ftoa_internal(plen, a, radix, prec, flags, false); -} - -/***************************************************************/ -/* transcendental functions */ - -/* Note: the algorithm is from MPFR */ -static void bf_const_log2_rec(bf_t *T, bf_t *P, bf_t *Q, limb_t n1, - limb_t n2, bool need_P) -{ - bf_context_t *s = T->ctx; - if ((n2 - n1) == 1) { - if (n1 == 0) { - bf_set_ui(P, 3); - } else { - bf_set_ui(P, n1); - P->sign = 1; - } - bf_set_ui(Q, 2 * n1 + 1); - Q->expn += 2; - bf_set(T, P); - } else { - limb_t m; - bf_t T1_s, *T1 = &T1_s; - bf_t P1_s, *P1 = &P1_s; - bf_t Q1_s, *Q1 = &Q1_s; - - m = n1 + ((n2 - n1) >> 1); - bf_const_log2_rec(T, P, Q, n1, m, true); - bf_init(s, T1); - bf_init(s, P1); - bf_init(s, Q1); - bf_const_log2_rec(T1, P1, Q1, m, n2, need_P); - bf_mul(T, T, Q1, BF_PREC_INF, BF_RNDZ); - bf_mul(T1, T1, P, BF_PREC_INF, BF_RNDZ); - bf_add(T, T, T1, BF_PREC_INF, BF_RNDZ); - if (need_P) - bf_mul(P, P, P1, BF_PREC_INF, BF_RNDZ); - bf_mul(Q, Q, Q1, BF_PREC_INF, BF_RNDZ); - bf_delete(T1); - bf_delete(P1); - bf_delete(Q1); - } -} - -/* compute log(2) with faithful rounding at precision 'prec' */ -static void bf_const_log2_internal(bf_t *T, limb_t prec) -{ - limb_t w, N; - bf_t P_s, *P = &P_s; - bf_t Q_s, *Q = &Q_s; - - w = prec + 15; - N = w / 3 + 1; - bf_init(T->ctx, P); - bf_init(T->ctx, Q); - bf_const_log2_rec(T, P, Q, 0, N, false); - bf_div(T, T, Q, prec, BF_RNDN); - bf_delete(P); - bf_delete(Q); -} - -/* PI constant */ - -#define CHUD_A 13591409 -#define CHUD_B 545140134 -#define CHUD_C 640320 -#define CHUD_BITS_PER_TERM 47 - -static void chud_bs(bf_t *P, bf_t *Q, bf_t *G, int64_t a, int64_t b, int need_g, - limb_t prec) -{ - bf_context_t *s = P->ctx; - int64_t c; - - if (a == (b - 1)) { - bf_t T0, T1; - - bf_init(s, &T0); - bf_init(s, &T1); - bf_set_ui(G, 2 * b - 1); - bf_mul_ui(G, G, 6 * b - 1, prec, BF_RNDN); - bf_mul_ui(G, G, 6 * b - 5, prec, BF_RNDN); - bf_set_ui(&T0, CHUD_B); - bf_mul_ui(&T0, &T0, b, prec, BF_RNDN); - bf_set_ui(&T1, CHUD_A); - bf_add(&T0, &T0, &T1, prec, BF_RNDN); - bf_mul(P, G, &T0, prec, BF_RNDN); - P->sign = b & 1; - - bf_set_ui(Q, b); - bf_mul_ui(Q, Q, b, prec, BF_RNDN); - bf_mul_ui(Q, Q, b, prec, BF_RNDN); - bf_mul_ui(Q, Q, (uint64_t)CHUD_C * CHUD_C * CHUD_C / 24, prec, BF_RNDN); - bf_delete(&T0); - bf_delete(&T1); - } else { - bf_t P2, Q2, G2; - - bf_init(s, &P2); - bf_init(s, &Q2); - bf_init(s, &G2); - - c = (a + b) / 2; - chud_bs(P, Q, G, a, c, 1, prec); - chud_bs(&P2, &Q2, &G2, c, b, need_g, prec); - - /* Q = Q1 * Q2 */ - /* G = G1 * G2 */ - /* P = P1 * Q2 + P2 * G1 */ - bf_mul(&P2, &P2, G, prec, BF_RNDN); - if (!need_g) - bf_set_ui(G, 0); - bf_mul(P, P, &Q2, prec, BF_RNDN); - bf_add(P, P, &P2, prec, BF_RNDN); - bf_delete(&P2); - - bf_mul(Q, Q, &Q2, prec, BF_RNDN); - bf_delete(&Q2); - if (need_g) - bf_mul(G, G, &G2, prec, BF_RNDN); - bf_delete(&G2); - } -} - -/* compute Pi with faithful rounding at precision 'prec' using the - Chudnovsky formula */ -static void bf_const_pi_internal(bf_t *Q, limb_t prec) -{ - bf_context_t *s = Q->ctx; - int64_t n, prec1; - bf_t P, G; - - /* number of serie terms */ - n = prec / CHUD_BITS_PER_TERM + 1; - /* XXX: precision analysis */ - prec1 = prec + 32; - - bf_init(s, &P); - bf_init(s, &G); - - chud_bs(&P, Q, &G, 0, n, 0, BF_PREC_INF); - - bf_mul_ui(&G, Q, CHUD_A, prec1, BF_RNDN); - bf_add(&P, &G, &P, prec1, BF_RNDN); - bf_div(Q, Q, &P, prec1, BF_RNDF); - - bf_set_ui(&P, CHUD_C); - bf_sqrt(&G, &P, prec1, BF_RNDF); - bf_mul_ui(&G, &G, (uint64_t)CHUD_C / 12, prec1, BF_RNDF); - bf_mul(Q, Q, &G, prec, BF_RNDN); - bf_delete(&P); - bf_delete(&G); -} - -static int bf_const_get(bf_t *T, limb_t prec, bf_flags_t flags, - BFConstCache *c, - void (*func)(bf_t *res, limb_t prec), int sign) -{ - limb_t ziv_extra_bits, prec1; - - ziv_extra_bits = 32; - for(;;) { - prec1 = prec + ziv_extra_bits; - if (c->prec < prec1) { - if (c->val.len == 0) - bf_init(T->ctx, &c->val); - func(&c->val, prec1); - c->prec = prec1; - } else { - prec1 = c->prec; - } - bf_set(T, &c->val); - T->sign = sign; - if (!bf_can_round(T, prec, flags & BF_RND_MASK, prec1)) { - /* and more precision and retry */ - ziv_extra_bits = ziv_extra_bits + (ziv_extra_bits / 2); - } else { - break; - } - } - return bf_round(T, prec, flags); -} - -static void bf_const_free(BFConstCache *c) -{ - bf_delete(&c->val); - memset(c, 0, sizeof(*c)); -} - -int bf_const_log2(bf_t *T, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = T->ctx; - return bf_const_get(T, prec, flags, &s->log2_cache, bf_const_log2_internal, 0); -} - -/* return rounded pi * (1 - 2 * sign) */ -static int bf_const_pi_signed(bf_t *T, int sign, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = T->ctx; - return bf_const_get(T, prec, flags, &s->pi_cache, bf_const_pi_internal, - sign); -} - -int bf_const_pi(bf_t *T, limb_t prec, bf_flags_t flags) -{ - return bf_const_pi_signed(T, 0, prec, flags); -} - -void bf_clear_cache(bf_context_t *s) -{ -#ifdef USE_FFT_MUL - fft_clear_cache(s); -#endif - bf_const_free(&s->log2_cache); - bf_const_free(&s->pi_cache); -} - -/* ZivFunc should compute the result 'r' with faithful rounding at - precision 'prec'. For efficiency purposes, the final bf_round() - does not need to be done in the function. */ -typedef int ZivFunc(bf_t *r, const bf_t *a, limb_t prec, void *opaque); - -static int bf_ziv_rounding(bf_t *r, const bf_t *a, - limb_t prec, bf_flags_t flags, - ZivFunc *f, void *opaque) -{ - int rnd_mode, ret; - slimb_t prec1, ziv_extra_bits; - - rnd_mode = flags & BF_RND_MASK; - if (rnd_mode == BF_RNDF) { - /* no need to iterate */ - f(r, a, prec, opaque); - ret = 0; - } else { - ziv_extra_bits = 32; - for(;;) { - prec1 = prec + ziv_extra_bits; - ret = f(r, a, prec1, opaque); - if (ret & (BF_ST_OVERFLOW | BF_ST_UNDERFLOW | BF_ST_MEM_ERROR)) { - /* overflow or underflow should never happen because - it indicates the rounding cannot be done correctly, - but we do not catch all the cases */ - return ret; - } - /* if the result is exact, we can stop */ - if (!(ret & BF_ST_INEXACT)) { - ret = 0; - break; - } - if (bf_can_round(r, prec, rnd_mode, prec1)) { - ret = BF_ST_INEXACT; - break; - } - ziv_extra_bits = ziv_extra_bits * 2; - // printf("ziv_extra_bits=%" PRId64 "\n", (int64_t)ziv_extra_bits); - } - } - if (r->len == 0) - return ret; - else - return __bf_round(r, prec, flags, r->len, ret); -} - -/* add (1 - 2*e_sign) * 2^e */ -static int bf_add_epsilon(bf_t *r, const bf_t *a, slimb_t e, int e_sign, - limb_t prec, int flags) -{ - bf_t T_s, *T = &T_s; - int ret; - /* small argument case: result = 1 + epsilon * sign(x) */ - bf_init(a->ctx, T); - bf_set_ui(T, 1); - T->sign = e_sign; - T->expn += e; - ret = bf_add(r, r, T, prec, flags); - bf_delete(T); - return ret; -} - -/* Compute the exponential using faithful rounding at precision 'prec'. - Note: the algorithm is from MPFR */ -static int bf_exp_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - slimb_t n, K, l, i, prec1; - - assert(r != a); - - /* argument reduction: - T = a - n*log(2) with 0 <= T < log(2) and n integer. - */ - bf_init(s, T); - if (a->expn <= -1) { - /* 0 <= abs(a) <= 0.5 */ - if (a->sign) - n = -1; - else - n = 0; - } else { - bf_const_log2(T, LIMB_BITS, BF_RNDZ); - bf_div(T, a, T, LIMB_BITS, BF_RNDD); - bf_get_limb(&n, T, 0); - } - - K = bf_isqrt((prec + 1) / 2); - l = (prec - 1) / K + 1; - /* XXX: precision analysis ? */ - prec1 = prec + (K + 2 * l + 18) + K + 8; - if (a->expn > 0) - prec1 += a->expn; - // printf("n=%ld K=%ld prec1=%ld\n", n, K, prec1); - - bf_const_log2(T, prec1, BF_RNDF); - bf_mul_si(T, T, n, prec1, BF_RNDN); - bf_sub(T, a, T, prec1, BF_RNDN); - - /* reduce the range of T */ - bf_mul_2exp(T, -K, BF_PREC_INF, BF_RNDZ); - - /* Taylor expansion around zero : - 1 + x + x^2/2 + ... + x^n/n! - = (1 + x * (1 + x/2 * (1 + ... (x/n)))) - */ - { - bf_t U_s, *U = &U_s; - - bf_init(s, U); - bf_set_ui(r, 1); - for(i = l ; i >= 1; i--) { - bf_set_ui(U, i); - bf_div(U, T, U, prec1, BF_RNDN); - bf_mul(r, r, U, prec1, BF_RNDN); - bf_add_si(r, r, 1, prec1, BF_RNDN); - } - bf_delete(U); - } - bf_delete(T); - - /* undo the range reduction */ - for(i = 0; i < K; i++) { - bf_mul(r, r, r, prec1, BF_RNDN | BF_FLAG_EXT_EXP); - } - - /* undo the argument reduction */ - bf_mul_2exp(r, n, BF_PREC_INF, BF_RNDZ | BF_FLAG_EXT_EXP); - - return BF_ST_INEXACT; -} - -/* crude overflow and underflow tests for exp(a). a_low <= a <= a_high */ -static int check_exp_underflow_overflow(bf_context_t *s, bf_t *r, - const bf_t *a_low, const bf_t *a_high, - limb_t prec, bf_flags_t flags) -{ - bf_t T_s, *T = &T_s; - bf_t log2_s, *log2 = &log2_s; - slimb_t e_min, e_max; - - if (a_high->expn <= 0) - return 0; - - e_max = (limb_t)1 << (bf_get_exp_bits(flags) - 1); - e_min = -e_max + 3; - if (flags & BF_FLAG_SUBNORMAL) - e_min -= (prec - 1); - - bf_init(s, T); - bf_init(s, log2); - bf_const_log2(log2, LIMB_BITS, BF_RNDU); - bf_mul_ui(T, log2, e_max, LIMB_BITS, BF_RNDU); - /* a_low > e_max * log(2) implies exp(a) > e_max */ - if (bf_cmp_lt(T, a_low) > 0) { - /* overflow */ - bf_delete(T); - bf_delete(log2); - return bf_set_overflow(r, 0, prec, flags); - } - /* a_high < (e_min - 2) * log(2) implies exp(a) < (e_min - 2) */ - bf_const_log2(log2, LIMB_BITS, BF_RNDD); - bf_mul_si(T, log2, e_min - 2, LIMB_BITS, BF_RNDD); - if (bf_cmp_lt(a_high, T)) { - int rnd_mode = flags & BF_RND_MASK; - - /* underflow */ - bf_delete(T); - bf_delete(log2); - if (rnd_mode == BF_RNDU) { - /* set the smallest value */ - bf_set_ui(r, 1); - r->expn = e_min; - } else { - bf_set_zero(r, 0); - } - return BF_ST_UNDERFLOW | BF_ST_INEXACT; - } - bf_delete(log2); - bf_delete(T); - return 0; -} - -int bf_exp(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - int ret; - assert(r != a); - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - } else if (a->expn == BF_EXP_INF) { - if (a->sign) - bf_set_zero(r, 0); - else - bf_set_inf(r, 0); - } else { - bf_set_ui(r, 1); - } - return 0; - } - - ret = check_exp_underflow_overflow(s, r, a, a, prec, flags); - if (ret) - return ret; - if (a->expn < 0 && (-a->expn) >= (prec + 2)) { - /* small argument case: result = 1 + epsilon * sign(x) */ - bf_set_ui(r, 1); - return bf_add_epsilon(r, r, -(prec + 2), a->sign, prec, flags); - } - - return bf_ziv_rounding(r, a, prec, flags, bf_exp_internal, NULL); -} - -static int bf_log_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - bf_t U_s, *U = &U_s; - bf_t V_s, *V = &V_s; - slimb_t n, prec1, l, i, K; - - assert(r != a); - - bf_init(s, T); - /* argument reduction 1 */ - /* T=a*2^n with 2/3 <= T <= 4/3 */ - { - bf_t U_s, *U = &U_s; - bf_set(T, a); - n = T->expn; - T->expn = 0; - /* U= ~ 2/3 */ - bf_init(s, U); - bf_set_ui(U, 0xaaaaaaaa); - U->expn = 0; - if (bf_cmp_lt(T, U)) { - T->expn++; - n--; - } - bf_delete(U); - } - // printf("n=%ld\n", n); - // bf_print_str("T", T); - - /* XXX: precision analysis */ - /* number of iterations for argument reduction 2 */ - K = bf_isqrt((prec + 1) / 2); - /* order of Taylor expansion */ - l = prec / (2 * K) + 1; - /* precision of the intermediate computations */ - prec1 = prec + K + 2 * l + 32; - - bf_init(s, U); - bf_init(s, V); - - /* Note: cancellation occurs here, so we use more precision (XXX: - reduce the precision by computing the exact cancellation) */ - bf_add_si(T, T, -1, BF_PREC_INF, BF_RNDN); - - /* argument reduction 2 */ - for(i = 0; i < K; i++) { - /* T = T / (1 + sqrt(1 + T)) */ - bf_add_si(U, T, 1, prec1, BF_RNDN); - bf_sqrt(V, U, prec1, BF_RNDF); - bf_add_si(U, V, 1, prec1, BF_RNDN); - bf_div(T, T, U, prec1, BF_RNDN); - } - - { - bf_t Y_s, *Y = &Y_s; - bf_t Y2_s, *Y2 = &Y2_s; - bf_init(s, Y); - bf_init(s, Y2); - - /* compute ln(1+x) = ln((1+y)/(1-y)) with y=x/(2+x) - = y + y^3/3 + ... + y^(2*l + 1) / (2*l+1) - with Y=Y^2 - = y*(1+Y/3+Y^2/5+...) = y*(1+Y*(1/3+Y*(1/5 + ...))) - */ - bf_add_si(Y, T, 2, prec1, BF_RNDN); - bf_div(Y, T, Y, prec1, BF_RNDN); - - bf_mul(Y2, Y, Y, prec1, BF_RNDN); - bf_set_ui(r, 0); - for(i = l; i >= 1; i--) { - bf_set_ui(U, 1); - bf_set_ui(V, 2 * i + 1); - bf_div(U, U, V, prec1, BF_RNDN); - bf_add(r, r, U, prec1, BF_RNDN); - bf_mul(r, r, Y2, prec1, BF_RNDN); - } - bf_add_si(r, r, 1, prec1, BF_RNDN); - bf_mul(r, r, Y, prec1, BF_RNDN); - bf_delete(Y); - bf_delete(Y2); - } - bf_delete(V); - bf_delete(U); - - /* multiplication by 2 for the Taylor expansion and undo the - argument reduction 2*/ - bf_mul_2exp(r, K + 1, BF_PREC_INF, BF_RNDZ); - - /* undo the argument reduction 1 */ - bf_const_log2(T, prec1, BF_RNDF); - bf_mul_si(T, T, n, prec1, BF_RNDN); - bf_add(r, r, T, prec1, BF_RNDN); - - bf_delete(T); - return BF_ST_INEXACT; -} - -int bf_log(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - - assert(r != a); - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - if (a->sign) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_inf(r, 0); - return 0; - } - } else { - bf_set_inf(r, 1); - return 0; - } - } - if (a->sign) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } - bf_init(s, T); - bf_set_ui(T, 1); - if (bf_cmp_eq(a, T)) { - bf_set_zero(r, 0); - bf_delete(T); - return 0; - } - bf_delete(T); - - return bf_ziv_rounding(r, a, prec, flags, bf_log_internal, NULL); -} - -/* x and y finite and x > 0 */ -static int bf_pow_generic(bf_t *r, const bf_t *x, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - const bf_t *y = opaque; - bf_t T_s, *T = &T_s; - limb_t prec1; - - bf_init(s, T); - /* XXX: proof for the added precision */ - prec1 = prec + 32; - bf_log(T, x, prec1, BF_RNDF | BF_FLAG_EXT_EXP); - bf_mul(T, T, y, prec1, BF_RNDF | BF_FLAG_EXT_EXP); - if (bf_is_nan(T)) - bf_set_nan(r); - else - bf_exp_internal(r, T, prec1, NULL); /* no overflow/underlow test needed */ - bf_delete(T); - return BF_ST_INEXACT; -} - -/* x and y finite, x > 0, y integer and y fits on one limb */ -static int bf_pow_int(bf_t *r, const bf_t *x, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - const bf_t *y = opaque; - bf_t T_s, *T = &T_s; - limb_t prec1; - int ret; - slimb_t y1; - - bf_get_limb(&y1, y, 0); - if (y1 < 0) - y1 = -y1; - /* XXX: proof for the added precision */ - prec1 = prec + ceil_log2(y1) * 2 + 8; - ret = bf_pow_ui(r, x, y1 < 0 ? -y1 : y1, prec1, BF_RNDN | BF_FLAG_EXT_EXP); - if (y->sign) { - bf_init(s, T); - bf_set_ui(T, 1); - ret |= bf_div(r, T, r, prec1, BF_RNDN | BF_FLAG_EXT_EXP); - bf_delete(T); - } - return ret; -} - -/* x must be a finite non zero float. Return true if there is a - floating point number r such as x=r^(2^n) and return this floating - point number 'r'. Otherwise return false and r is undefined. */ -static bool check_exact_power2n(bf_t *r, const bf_t *x, slimb_t n) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - slimb_t e, i, er; - limb_t v; - - /* x = m*2^e with m odd integer */ - e = bf_get_exp_min(x); - /* fast check on the exponent */ - if (n > (LIMB_BITS - 1)) { - if (e != 0) - return false; - er = 0; - } else { - if ((e & (((limb_t)1 << n) - 1)) != 0) - return false; - er = e >> n; - } - /* every perfect odd square = 1 modulo 8 */ - v = get_bits(x->tab, x->len, x->len * LIMB_BITS - x->expn + e); - if ((v & 7) != 1) - return false; - - bf_init(s, T); - bf_set(T, x); - T->expn -= e; - for(i = 0; i < n; i++) { - if (i != 0) - bf_set(T, r); - if (bf_sqrtrem(r, NULL, T) != 0) - return false; - } - r->expn += er; - return true; -} - -/* prec = BF_PREC_INF is accepted for x and y integers and y >= 0 */ -int bf_pow(bf_t *r, const bf_t *x, const bf_t *y, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - bf_t ytmp_s; - bool y_is_int, y_is_odd; - int r_sign, ret, rnd_mode; - slimb_t y_emin; - - if (x->len == 0 || y->len == 0) { - if (y->expn == BF_EXP_ZERO) { - /* pow(x, 0) = 1 */ - bf_set_ui(r, 1); - } else if (x->expn == BF_EXP_NAN) { - bf_set_nan(r); - } else { - int cmp_x_abs_1; - bf_set_ui(r, 1); - cmp_x_abs_1 = bf_cmpu(x, r); - if (cmp_x_abs_1 == 0 && (flags & BF_POW_JS_QUIRKS) && - (y->expn >= BF_EXP_INF)) { - bf_set_nan(r); - } else if (cmp_x_abs_1 == 0 && - (!x->sign || y->expn != BF_EXP_NAN)) { - /* pow(1, y) = 1 even if y = NaN */ - /* pow(-1, +/-inf) = 1 */ - } else if (y->expn == BF_EXP_NAN) { - bf_set_nan(r); - } else if (y->expn == BF_EXP_INF) { - if (y->sign == (cmp_x_abs_1 > 0)) { - bf_set_zero(r, 0); - } else { - bf_set_inf(r, 0); - } - } else { - y_emin = bf_get_exp_min(y); - y_is_odd = (y_emin == 0); - if (y->sign == (x->expn == BF_EXP_ZERO)) { - bf_set_inf(r, y_is_odd & x->sign); - if (y->sign) { - /* pow(0, y) with y < 0 */ - return BF_ST_DIVIDE_ZERO; - } - } else { - bf_set_zero(r, y_is_odd & x->sign); - } - } - } - return 0; - } - bf_init(s, T); - bf_set(T, x); - y_emin = bf_get_exp_min(y); - y_is_int = (y_emin >= 0); - rnd_mode = flags & BF_RND_MASK; - if (x->sign) { - if (!y_is_int) { - bf_set_nan(r); - bf_delete(T); - return BF_ST_INVALID_OP; - } - y_is_odd = (y_emin == 0); - r_sign = y_is_odd; - /* change the directed rounding mode if the sign of the result - is changed */ - if (r_sign && (rnd_mode == BF_RNDD || rnd_mode == BF_RNDU)) - flags ^= 1; - bf_neg(T); - } else { - r_sign = 0; - } - - bf_set_ui(r, 1); - if (bf_cmp_eq(T, r)) { - /* abs(x) = 1: nothing more to do */ - ret = 0; - } else { - /* check the overflow/underflow cases */ - { - bf_t al_s, *al = &al_s; - bf_t ah_s, *ah = &ah_s; - limb_t precl = LIMB_BITS; - - bf_init(s, al); - bf_init(s, ah); - /* compute bounds of log(abs(x)) * y with a low precision */ - /* XXX: compute bf_log() once */ - /* XXX: add a fast test before this slow test */ - bf_log(al, T, precl, BF_RNDD); - bf_log(ah, T, precl, BF_RNDU); - bf_mul(al, al, y, precl, BF_RNDD ^ y->sign); - bf_mul(ah, ah, y, precl, BF_RNDU ^ y->sign); - ret = check_exp_underflow_overflow(s, r, al, ah, prec, flags); - bf_delete(al); - bf_delete(ah); - if (ret) - goto done; - } - - if (y_is_int) { - slimb_t T_bits, e; - int_pow: - T_bits = T->expn - bf_get_exp_min(T); - if (T_bits == 1) { - /* pow(2^b, y) = 2^(b*y) */ - bf_mul_si(T, y, T->expn - 1, LIMB_BITS, BF_RNDZ); - bf_get_limb(&e, T, 0); - bf_set_ui(r, 1); - ret = bf_mul_2exp(r, e, prec, flags); - } else if (prec == BF_PREC_INF) { - slimb_t y1; - /* specific case for infinite precision (integer case) */ - bf_get_limb(&y1, y, 0); - assert(!y->sign); - /* x must be an integer, so abs(x) >= 2 */ - if (y1 >= ((slimb_t)1 << BF_EXP_BITS_MAX)) { - bf_delete(T); - return bf_set_overflow(r, 0, BF_PREC_INF, flags); - } - ret = bf_pow_ui(r, T, y1, BF_PREC_INF, BF_RNDZ); - } else { - if (y->expn <= 31) { - /* small enough power: use exponentiation in all cases */ - } else if (y->sign) { - /* cannot be exact */ - goto general_case; - } else { - if (rnd_mode == BF_RNDF) - goto general_case; /* no need to track exact results */ - /* see if the result has a chance to be exact: - if x=a*2^b (a odd), x^y=a^y*2^(b*y) - x^y needs a precision of at least floor_log2(a)*y bits - */ - bf_mul_si(r, y, T_bits - 1, LIMB_BITS, BF_RNDZ); - bf_get_limb(&e, r, 0); - if (prec < e) - goto general_case; - } - ret = bf_ziv_rounding(r, T, prec, flags, bf_pow_int, (void *)y); - } - } else { - if (rnd_mode != BF_RNDF) { - bf_t *y1; - if (y_emin < 0 && check_exact_power2n(r, T, -y_emin)) { - /* the problem is reduced to a power to an integer */ - bf_set(T, r); - y1 = &ytmp_s; - y1->tab = y->tab; - y1->len = y->len; - y1->sign = y->sign; - y1->expn = y->expn - y_emin; - y = y1; - goto int_pow; - } - } - general_case: - ret = bf_ziv_rounding(r, T, prec, flags, bf_pow_generic, (void *)y); - } - } - done: - bf_delete(T); - r->sign = r_sign; - return ret; -} - -/* compute sqrt(-2*x-x^2) to get |sin(x)| from cos(x) - 1. */ -static void bf_sqrt_sin(bf_t *r, const bf_t *x, limb_t prec1) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - bf_init(s, T); - bf_set(T, x); - bf_mul(r, T, T, prec1, BF_RNDN); - bf_mul_2exp(T, 1, BF_PREC_INF, BF_RNDZ); - bf_add(T, T, r, prec1, BF_RNDN); - bf_neg(T); - bf_sqrt(r, T, prec1, BF_RNDF); - bf_delete(T); -} - -static int bf_sincos(bf_t *s, bf_t *c, const bf_t *a, limb_t prec) -{ - bf_context_t *s1 = a->ctx; - bf_t T_s, *T = &T_s; - bf_t U_s, *U = &U_s; - bf_t r_s, *r = &r_s; - slimb_t K, prec1, i, l, mod, prec2; - int is_neg; - - assert(c != a && s != a); - - bf_init(s1, T); - bf_init(s1, U); - bf_init(s1, r); - - /* XXX: precision analysis */ - K = bf_isqrt(prec / 2); - l = prec / (2 * K) + 1; - prec1 = prec + 2 * K + l + 8; - - /* after the modulo reduction, -pi/4 <= T <= pi/4 */ - if (a->expn <= -1) { - /* abs(a) <= 0.25: no modulo reduction needed */ - bf_set(T, a); - mod = 0; - } else { - slimb_t cancel; - cancel = 0; - for(;;) { - prec2 = prec1 + a->expn + cancel; - bf_const_pi(U, prec2, BF_RNDF); - bf_mul_2exp(U, -1, BF_PREC_INF, BF_RNDZ); - bf_remquo(&mod, T, a, U, prec2, BF_RNDN, BF_RNDN); - // printf("T.expn=%ld prec2=%ld\n", T->expn, prec2); - if (mod == 0 || (T->expn != BF_EXP_ZERO && - (T->expn + prec2) >= (prec1 - 1))) - break; - /* increase the number of bits until the precision is good enough */ - cancel = bf_max(-T->expn, (cancel + 1) * 3 / 2); - } - mod &= 3; - } - - is_neg = T->sign; - - /* compute cosm1(x) = cos(x) - 1 */ - bf_mul(T, T, T, prec1, BF_RNDN); - bf_mul_2exp(T, -2 * K, BF_PREC_INF, BF_RNDZ); - - /* Taylor expansion: - -x^2/2 + x^4/4! - x^6/6! + ... - */ - bf_set_ui(r, 1); - for(i = l ; i >= 1; i--) { - bf_set_ui(U, 2 * i - 1); - bf_mul_ui(U, U, 2 * i, BF_PREC_INF, BF_RNDZ); - bf_div(U, T, U, prec1, BF_RNDN); - bf_mul(r, r, U, prec1, BF_RNDN); - bf_neg(r); - if (i != 1) - bf_add_si(r, r, 1, prec1, BF_RNDN); - } - bf_delete(U); - - /* undo argument reduction: - cosm1(2*x)= 2*(2*cosm1(x)+cosm1(x)^2) - */ - for(i = 0; i < K; i++) { - bf_mul(T, r, r, prec1, BF_RNDN); - bf_mul_2exp(r, 1, BF_PREC_INF, BF_RNDZ); - bf_add(r, r, T, prec1, BF_RNDN); - bf_mul_2exp(r, 1, BF_PREC_INF, BF_RNDZ); - } - bf_delete(T); - - if (c) { - if ((mod & 1) == 0) { - bf_add_si(c, r, 1, prec1, BF_RNDN); - } else { - bf_sqrt_sin(c, r, prec1); - c->sign = is_neg ^ 1; - } - c->sign ^= mod >> 1; - } - if (s) { - if ((mod & 1) == 0) { - bf_sqrt_sin(s, r, prec1); - s->sign = is_neg; - } else { - bf_add_si(s, r, 1, prec1, BF_RNDN); - } - s->sign ^= mod >> 1; - } - bf_delete(r); - return BF_ST_INEXACT; -} - -static int bf_cos_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - return bf_sincos(NULL, r, a, prec); -} - -int bf_cos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_ui(r, 1); - return 0; - } - } - - /* small argument case: result = 1+r(x) with r(x) = -x^2/2 + - O(X^4). We assume r(x) < 2^(2*EXP(x) - 1). */ - if (a->expn < 0) { - slimb_t e; - e = 2 * a->expn - 1; - if (e < -(prec + 2)) { - bf_set_ui(r, 1); - return bf_add_epsilon(r, r, e, 1, prec, flags); - } - } - - return bf_ziv_rounding(r, a, prec, flags, bf_cos_internal, NULL); -} - -static int bf_sin_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - return bf_sincos(r, NULL, a, prec); -} - -int bf_sin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_zero(r, a->sign); - return 0; - } - } - - /* small argument case: result = x+r(x) with r(x) = -x^3/6 + - O(X^5). We assume r(x) < 2^(3*EXP(x) - 2). */ - if (a->expn < 0) { - slimb_t e; - e = sat_add(2 * a->expn, a->expn - 2); - if (e < a->expn - bf_max(prec + 2, a->len * LIMB_BITS + 2)) { - bf_set(r, a); - return bf_add_epsilon(r, r, e, 1 - a->sign, prec, flags); - } - } - - return bf_ziv_rounding(r, a, prec, flags, bf_sin_internal, NULL); -} - -static int bf_tan_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - limb_t prec1; - - /* XXX: precision analysis */ - prec1 = prec + 8; - bf_init(s, T); - bf_sincos(r, T, a, prec1); - bf_div(r, r, T, prec1, BF_RNDF); - bf_delete(T); - return BF_ST_INEXACT; -} - -int bf_tan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - assert(r != a); - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_zero(r, a->sign); - return 0; - } - } - - /* small argument case: result = x+r(x) with r(x) = x^3/3 + - O(X^5). We assume r(x) < 2^(3*EXP(x) - 1). */ - if (a->expn < 0) { - slimb_t e; - e = sat_add(2 * a->expn, a->expn - 1); - if (e < a->expn - bf_max(prec + 2, a->len * LIMB_BITS + 2)) { - bf_set(r, a); - return bf_add_epsilon(r, r, e, a->sign, prec, flags); - } - } - - return bf_ziv_rounding(r, a, prec, flags, bf_tan_internal, NULL); -} - -/* if add_pi2 is true, add pi/2 to the result (used for acos(x) to - avoid cancellation) */ -static int bf_atan_internal(bf_t *r, const bf_t *a, limb_t prec, - void *opaque) -{ - bf_context_t *s = r->ctx; - bool add_pi2 = (bool)(intptr_t)opaque; - bf_t T_s, *T = &T_s; - bf_t U_s, *U = &U_s; - bf_t V_s, *V = &V_s; - bf_t X2_s, *X2 = &X2_s; - int cmp_1; - slimb_t prec1, i, K, l; - - /* XXX: precision analysis */ - K = bf_isqrt((prec + 1) / 2); - l = prec / (2 * K) + 1; - prec1 = prec + K + 2 * l + 32; - // printf("prec=%d K=%d l=%d prec1=%d\n", (int)prec, (int)K, (int)l, (int)prec1); - - bf_init(s, T); - cmp_1 = (a->expn >= 1); /* a >= 1 */ - if (cmp_1) { - bf_set_ui(T, 1); - bf_div(T, T, a, prec1, BF_RNDN); - } else { - bf_set(T, a); - } - - /* abs(T) <= 1 */ - - /* argument reduction */ - - bf_init(s, U); - bf_init(s, V); - bf_init(s, X2); - for(i = 0; i < K; i++) { - /* T = T / (1 + sqrt(1 + T^2)) */ - bf_mul(U, T, T, prec1, BF_RNDN); - bf_add_si(U, U, 1, prec1, BF_RNDN); - bf_sqrt(V, U, prec1, BF_RNDN); - bf_add_si(V, V, 1, prec1, BF_RNDN); - bf_div(T, T, V, prec1, BF_RNDN); - } - - /* Taylor series: - x - x^3/3 + ... + (-1)^ l * y^(2*l + 1) / (2*l+1) - */ - bf_mul(X2, T, T, prec1, BF_RNDN); - bf_set_ui(r, 0); - for(i = l; i >= 1; i--) { - bf_set_si(U, 1); - bf_set_ui(V, 2 * i + 1); - bf_div(U, U, V, prec1, BF_RNDN); - bf_neg(r); - bf_add(r, r, U, prec1, BF_RNDN); - bf_mul(r, r, X2, prec1, BF_RNDN); - } - bf_neg(r); - bf_add_si(r, r, 1, prec1, BF_RNDN); - bf_mul(r, r, T, prec1, BF_RNDN); - - /* undo the argument reduction */ - bf_mul_2exp(r, K, BF_PREC_INF, BF_RNDZ); - - bf_delete(U); - bf_delete(V); - bf_delete(X2); - - i = add_pi2; - if (cmp_1 > 0) { - /* undo the inversion : r = sign(a)*PI/2 - r */ - bf_neg(r); - i += 1 - 2 * a->sign; - } - /* add i*(pi/2) with -1 <= i <= 2 */ - if (i != 0) { - bf_const_pi(T, prec1, BF_RNDF); - if (i != 2) - bf_mul_2exp(T, -1, BF_PREC_INF, BF_RNDZ); - T->sign = (i < 0); - bf_add(r, T, r, prec1, BF_RNDN); - } - - bf_delete(T); - return BF_ST_INEXACT; -} - -int bf_atan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - int res; - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - /* -PI/2 or PI/2 */ - bf_const_pi_signed(r, a->sign, prec, flags); - bf_mul_2exp(r, -1, BF_PREC_INF, BF_RNDZ); - return BF_ST_INEXACT; - } else { - bf_set_zero(r, a->sign); - return 0; - } - } - - bf_init(s, T); - bf_set_ui(T, 1); - res = bf_cmpu(a, T); - bf_delete(T); - if (res == 0) { - /* short cut: abs(a) == 1 -> +/-pi/4 */ - bf_const_pi_signed(r, a->sign, prec, flags); - bf_mul_2exp(r, -2, BF_PREC_INF, BF_RNDZ); - return BF_ST_INEXACT; - } - - /* small argument case: result = x+r(x) with r(x) = -x^3/3 + - O(X^5). We assume r(x) < 2^(3*EXP(x) - 1). */ - if (a->expn < 0) { - slimb_t e; - e = sat_add(2 * a->expn, a->expn - 1); - if (e < a->expn - bf_max(prec + 2, a->len * LIMB_BITS + 2)) { - bf_set(r, a); - return bf_add_epsilon(r, r, e, 1 - a->sign, prec, flags); - } - } - - return bf_ziv_rounding(r, a, prec, flags, bf_atan_internal, (void *)false); -} - -static int bf_atan2_internal(bf_t *r, const bf_t *y, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - const bf_t *x = opaque; - bf_t T_s, *T = &T_s; - limb_t prec1; - int ret; - - if (y->expn == BF_EXP_NAN || x->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } - - /* compute atan(y/x) assumming inf/inf = 1 and 0/0 = 0 */ - bf_init(s, T); - prec1 = prec + 32; - if (y->expn == BF_EXP_INF && x->expn == BF_EXP_INF) { - bf_set_ui(T, 1); - T->sign = y->sign ^ x->sign; - } else if (y->expn == BF_EXP_ZERO && x->expn == BF_EXP_ZERO) { - bf_set_zero(T, y->sign ^ x->sign); - } else { - bf_div(T, y, x, prec1, BF_RNDF); - } - ret = bf_atan(r, T, prec1, BF_RNDF); - - if (x->sign) { - /* if x < 0 (it includes -0), return sign(y)*pi + atan(y/x) */ - bf_const_pi(T, prec1, BF_RNDF); - T->sign = y->sign; - bf_add(r, r, T, prec1, BF_RNDN); - ret |= BF_ST_INEXACT; - } - - bf_delete(T); - return ret; -} - -int bf_atan2(bf_t *r, const bf_t *y, const bf_t *x, - limb_t prec, bf_flags_t flags) -{ - return bf_ziv_rounding(r, y, prec, flags, bf_atan2_internal, (void *)x); -} - -static int bf_asin_internal(bf_t *r, const bf_t *a, limb_t prec, void *opaque) -{ - bf_context_t *s = r->ctx; - bool is_acos = (bool)(intptr_t)opaque; - bf_t T_s, *T = &T_s; - limb_t prec1, prec2; - - /* asin(x) = atan(x/sqrt(1-x^2)) - acos(x) = pi/2 - asin(x) */ - prec1 = prec + 8; - /* increase the precision in x^2 to compensate the cancellation in - (1-x^2) if x is close to 1 */ - /* XXX: use less precision when possible */ - if (a->expn >= 0) - prec2 = BF_PREC_INF; - else - prec2 = prec1; - bf_init(s, T); - bf_mul(T, a, a, prec2, BF_RNDN); - bf_neg(T); - bf_add_si(T, T, 1, prec2, BF_RNDN); - - bf_sqrt(r, T, prec1, BF_RNDN); - bf_div(T, a, r, prec1, BF_RNDN); - if (is_acos) - bf_neg(T); - bf_atan_internal(r, T, prec1, (void *)(intptr_t)is_acos); - bf_delete(T); - return BF_ST_INEXACT; -} - -int bf_asin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - int res; - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_set_zero(r, a->sign); - return 0; - } - } - bf_init(s, T); - bf_set_ui(T, 1); - res = bf_cmpu(a, T); - bf_delete(T); - if (res > 0) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } - - /* small argument case: result = x+r(x) with r(x) = x^3/6 + - O(X^5). We assume r(x) < 2^(3*EXP(x) - 2). */ - if (a->expn < 0) { - slimb_t e; - e = sat_add(2 * a->expn, a->expn - 2); - if (e < a->expn - bf_max(prec + 2, a->len * LIMB_BITS + 2)) { - bf_set(r, a); - return bf_add_epsilon(r, r, e, a->sign, prec, flags); - } - } - - return bf_ziv_rounding(r, a, prec, flags, bf_asin_internal, (void *)false); -} - -int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = r->ctx; - bf_t T_s, *T = &T_s; - int res; - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bf_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bf_const_pi(r, prec, flags); - bf_mul_2exp(r, -1, BF_PREC_INF, BF_RNDZ); - return BF_ST_INEXACT; - } - } - bf_init(s, T); - bf_set_ui(T, 1); - res = bf_cmpu(a, T); - bf_delete(T); - if (res > 0) { - bf_set_nan(r); - return BF_ST_INVALID_OP; - } else if (res == 0 && a->sign == 0) { - bf_set_zero(r, 0); - return 0; - } - - return bf_ziv_rounding(r, a, prec, flags, bf_asin_internal, (void *)true); -} - -/***************************************************************/ -/* decimal floating point numbers */ - -#ifdef USE_BF_DEC - -#define adddq(r1, r0, a1, a0) \ - do { \ - limb_t __t = r0; \ - r0 += (a0); \ - r1 += (a1) + (r0 < __t); \ - } while (0) - -#define subdq(r1, r0, a1, a0) \ - do { \ - limb_t __t = r0; \ - r0 -= (a0); \ - r1 -= (a1) + (r0 > __t); \ - } while (0) - -#if LIMB_BITS == 64 - -/* Note: we assume __int128 is available */ -/* uint128_t defined in libbf.h */ -#define muldq(r1, r0, a, b) \ - do { \ - uint128_t __t; \ - __t = (uint128_t)(a) * (uint128_t)(b); \ - r0 = __t; \ - r1 = __t >> 64; \ - } while (0) - -#define divdq(q, r, a1, a0, b) \ - do { \ - uint128_t __t; \ - limb_t __b = (b); \ - __t = ((uint128_t)(a1) << 64) | (a0); \ - q = __t / __b; \ - r = __t % __b; \ - } while (0) - -#else - -#define muldq(r1, r0, a, b) \ - do { \ - uint64_t __t; \ - __t = (uint64_t)(a) * (uint64_t)(b); \ - r0 = __t; \ - r1 = __t >> 32; \ - } while (0) - -#define divdq(q, r, a1, a0, b) \ - do { \ - uint64_t __t; \ - limb_t __b = (b); \ - __t = ((uint64_t)(a1) << 32) | (a0); \ - q = __t / __b; \ - r = __t % __b; \ - } while (0) - -#endif /* LIMB_BITS != 64 */ - -#if LIMB_DIGITS == 19 - -/* WARNING: hardcoded for b = 1e19. It is assumed that: - 0 <= a1 < 2^63 */ -#define divdq_base(q, r, a1, a0)\ -do {\ - uint64_t __a0, __a1, __t0, __t1, __b = BF_DEC_BASE; \ - __a0 = a0;\ - __a1 = a1;\ - __t0 = __a1;\ - __t0 = shld(__t0, __a0, 1);\ - muldq(q, __t1, __t0, UINT64_C(17014118346046923173)); \ - muldq(__t1, __t0, q, __b);\ - subdq(__a1, __a0, __t1, __t0);\ - subdq(__a1, __a0, 1, __b * 2); \ - __t0 = (slimb_t)__a1 >> 1; \ - q += 2 + __t0;\ - adddq(__a1, __a0, 0, __b & __t0);\ - q += __a1; \ - __a0 += __b & __a1; \ - r = __a0;\ -} while(0) - -#elif LIMB_DIGITS == 9 - -/* WARNING: hardcoded for b = 1e9. It is assumed that: - 0 <= a1 < 2^29 */ -#define divdq_base(q, r, a1, a0)\ -do {\ - uint32_t __t0, __t1, __b = BF_DEC_BASE; \ - __t0 = a1;\ - __t1 = a0;\ - __t0 = (__t0 << 3) | (__t1 >> (32 - 3)); \ - muldq(q, __t1, __t0, 2305843009U);\ - r = a0 - q * __b;\ - __t1 = (r >= __b);\ - q += __t1;\ - if (__t1)\ - r -= __b;\ -} while(0) - -#endif - -/* fast integer division by a fixed constant */ - -typedef struct FastDivData { - limb_t m1; /* multiplier */ - int8_t shift1; - int8_t shift2; -} FastDivData; - -/* From "Division by Invariant Integers using Multiplication" by - Torborn Granlund and Peter L. Montgomery */ -/* d must be != 0 */ -static inline __maybe_unused void fast_udiv_init(FastDivData *s, limb_t d) -{ - int l; - limb_t q, r, m1; - if (d == 1) - l = 0; - else - l = 64 - clz64(d - 1); - divdq(q, r, ((limb_t)1 << l) - d, 0, d); - (void)r; - m1 = q + 1; - // printf("d=%lu l=%d m1=0x%016lx\n", d, l, m1); - s->m1 = m1; - s->shift1 = l; - if (s->shift1 > 1) - s->shift1 = 1; - s->shift2 = l - 1; - if (s->shift2 < 0) - s->shift2 = 0; -} - -static inline limb_t fast_udiv(limb_t a, const FastDivData *s) -{ - limb_t t0, t1; - muldq(t1, t0, s->m1, a); - t0 = (a - t1) >> s->shift1; - return (t1 + t0) >> s->shift2; -} - -/* contains 10^i */ -const limb_t mp_pow_dec[LIMB_DIGITS + 1] = { - 1U, - 10U, - 100U, - 1000U, - 10000U, - 100000U, - 1000000U, - 10000000U, - 100000000U, - 1000000000U, -#if LIMB_BITS == 64 - 10000000000U, - 100000000000U, - 1000000000000U, - 10000000000000U, - 100000000000000U, - 1000000000000000U, - 10000000000000000U, - 100000000000000000U, - 1000000000000000000U, - 10000000000000000000U, -#endif -}; - -/* precomputed from fast_udiv_init(10^i) */ -static const FastDivData mp_pow_div[LIMB_DIGITS + 1] = { -#if LIMB_BITS == 32 - { 0x00000001, 0, 0 }, - { 0x9999999a, 1, 3 }, - { 0x47ae147b, 1, 6 }, - { 0x0624dd30, 1, 9 }, - { 0xa36e2eb2, 1, 13 }, - { 0x4f8b588f, 1, 16 }, - { 0x0c6f7a0c, 1, 19 }, - { 0xad7f29ac, 1, 23 }, - { 0x5798ee24, 1, 26 }, - { 0x12e0be83, 1, 29 }, -#else - { 0x0000000000000001, 0, 0 }, - { 0x999999999999999a, 1, 3 }, - { 0x47ae147ae147ae15, 1, 6 }, - { 0x0624dd2f1a9fbe77, 1, 9 }, - { 0xa36e2eb1c432ca58, 1, 13 }, - { 0x4f8b588e368f0847, 1, 16 }, - { 0x0c6f7a0b5ed8d36c, 1, 19 }, - { 0xad7f29abcaf48579, 1, 23 }, - { 0x5798ee2308c39dfa, 1, 26 }, - { 0x12e0be826d694b2f, 1, 29 }, - { 0xb7cdfd9d7bdbab7e, 1, 33 }, - { 0x5fd7fe17964955fe, 1, 36 }, - { 0x19799812dea11198, 1, 39 }, - { 0xc25c268497681c27, 1, 43 }, - { 0x6849b86a12b9b01f, 1, 46 }, - { 0x203af9ee756159b3, 1, 49 }, - { 0xcd2b297d889bc2b7, 1, 53 }, - { 0x70ef54646d496893, 1, 56 }, - { 0x2725dd1d243aba0f, 1, 59 }, - { 0xd83c94fb6d2ac34d, 1, 63 }, -#endif -}; - -/* divide by 10^shift with 0 <= shift <= LIMB_DIGITS */ -static inline limb_t fast_shr_dec(limb_t a, int shift) -{ - return fast_udiv(a, &mp_pow_div[shift]); -} - -/* division and remainder by 10^shift */ -#define fast_shr_rem_dec(q, r, a, shift) q = fast_shr_dec(a, shift), r = a - q * mp_pow_dec[shift] - -limb_t mp_add_dec(limb_t *res, const limb_t *op1, const limb_t *op2, - mp_size_t n, limb_t carry) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t k, a, v; - - k=carry; - for(i=0;i<n;i++) { - /* XXX: reuse the trick in add_mod */ - v = op1[i]; - a = v + op2[i] + k - base; - k = a <= v; - if (!k) - a += base; - res[i]=a; - } - return k; -} - -limb_t mp_add_ui_dec(limb_t *tab, limb_t b, mp_size_t n) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t k, a, v; - - k=b; - for(i=0;i<n;i++) { - v = tab[i]; - a = v + k - base; - k = a <= v; - if (!k) - a += base; - tab[i] = a; - if (k == 0) - break; - } - return k; -} - -limb_t mp_sub_dec(limb_t *res, const limb_t *op1, const limb_t *op2, - mp_size_t n, limb_t carry) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t k, v, a; - - k=carry; - for(i=0;i<n;i++) { - v = op1[i]; - a = v - op2[i] - k; - k = a > v; - if (k) - a += base; - res[i] = a; - } - return k; -} - -limb_t mp_sub_ui_dec(limb_t *tab, limb_t b, mp_size_t n) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t k, v, a; - - k=b; - for(i=0;i<n;i++) { - v = tab[i]; - a = v - k; - k = a > v; - if (k) - a += base; - tab[i]=a; - if (k == 0) - break; - } - return k; -} - -/* taba[] = taba[] * b + l. 0 <= b, l <= base - 1. Return the high carry */ -limb_t mp_mul1_dec(limb_t *tabr, const limb_t *taba, mp_size_t n, - limb_t b, limb_t l) -{ - mp_size_t i; - limb_t t0, t1, r; - - for(i = 0; i < n; i++) { - muldq(t1, t0, taba[i], b); - adddq(t1, t0, 0, l); - divdq_base(l, r, t1, t0); - tabr[i] = r; - } - return l; -} - -/* tabr[] += taba[] * b. 0 <= b <= base - 1. Return the value to add - to the high word */ -limb_t mp_add_mul1_dec(limb_t *tabr, const limb_t *taba, mp_size_t n, - limb_t b) -{ - mp_size_t i; - limb_t l, t0, t1, r; - - l = 0; - for(i = 0; i < n; i++) { - muldq(t1, t0, taba[i], b); - adddq(t1, t0, 0, l); - adddq(t1, t0, 0, tabr[i]); - divdq_base(l, r, t1, t0); - tabr[i] = r; - } - return l; -} - -/* tabr[] -= taba[] * b. 0 <= b <= base - 1. Return the value to - substract to the high word. */ -limb_t mp_sub_mul1_dec(limb_t *tabr, const limb_t *taba, mp_size_t n, - limb_t b) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t l, t0, t1, r, a, v, c; - - /* XXX: optimize */ - l = 0; - for(i = 0; i < n; i++) { - muldq(t1, t0, taba[i], b); - adddq(t1, t0, 0, l); - divdq_base(l, r, t1, t0); - v = tabr[i]; - a = v - r; - c = a > v; - if (c) - a += base; - /* never bigger than base because r = 0 when l = base - 1 */ - l += c; - tabr[i] = a; - } - return l; -} - -/* size of the result : op1_size + op2_size. */ -void mp_mul_basecase_dec(limb_t *result, - const limb_t *op1, mp_size_t op1_size, - const limb_t *op2, mp_size_t op2_size) -{ - mp_size_t i; - limb_t r; - - result[op1_size] = mp_mul1_dec(result, op1, op1_size, op2[0], 0); - - for(i=1;i<op2_size;i++) { - r = mp_add_mul1_dec(result + i, op1, op1_size, op2[i]); - result[i + op1_size] = r; - } -} - -/* taba[] = (taba[] + r*base^na) / b. 0 <= b < base. 0 <= r < - b. Return the remainder. */ -limb_t mp_div1_dec(limb_t *tabr, const limb_t *taba, mp_size_t na, - limb_t b, limb_t r) -{ - limb_t base = BF_DEC_BASE; - mp_size_t i; - limb_t t0, t1, q; - int shift; - -#if (BF_DEC_BASE % 2) == 0 - if (b == 2) { - limb_t base_div2; - /* Note: only works if base is even */ - base_div2 = base >> 1; - if (r) - r = base_div2; - for(i = na - 1; i >= 0; i--) { - t0 = taba[i]; - tabr[i] = (t0 >> 1) + r; - r = 0; - if (t0 & 1) - r = base_div2; - } - if (r) - r = 1; - } else -#endif - if (na >= UDIV1NORM_THRESHOLD) { - shift = clz(b); - if (shift == 0) { - /* normalized case: b >= 2^(LIMB_BITS-1) */ - limb_t b_inv; - b_inv = udiv1norm_init(b); - for(i = na - 1; i >= 0; i--) { - muldq(t1, t0, r, base); - adddq(t1, t0, 0, taba[i]); - q = udiv1norm(&r, t1, t0, b, b_inv); - tabr[i] = q; - } - } else { - limb_t b_inv; - b <<= shift; - b_inv = udiv1norm_init(b); - for(i = na - 1; i >= 0; i--) { - muldq(t1, t0, r, base); - adddq(t1, t0, 0, taba[i]); - t1 = (t1 << shift) | (t0 >> (LIMB_BITS - shift)); - t0 <<= shift; - q = udiv1norm(&r, t1, t0, b, b_inv); - r >>= shift; - tabr[i] = q; - } - } - } else { - for(i = na - 1; i >= 0; i--) { - muldq(t1, t0, r, base); - adddq(t1, t0, 0, taba[i]); - divdq(q, r, t1, t0, b); - tabr[i] = q; - } - } - return r; -} - -static __maybe_unused void mp_print_str_dec(const char *str, - const limb_t *tab, slimb_t n) -{ - slimb_t i; - printf("%s=", str); - for(i = n - 1; i >= 0; i--) { - if (i != n - 1) - printf("_"); - printf("%0*" PRIu_LIMB, LIMB_DIGITS, tab[i]); - } - printf("\n"); -} - -static __maybe_unused void mp_print_str_h_dec(const char *str, - const limb_t *tab, slimb_t n, - limb_t high) -{ - slimb_t i; - printf("%s=", str); - printf("%0*" PRIu_LIMB, LIMB_DIGITS, high); - for(i = n - 1; i >= 0; i--) { - printf("_"); - printf("%0*" PRIu_LIMB, LIMB_DIGITS, tab[i]); - } - printf("\n"); -} - -//#define DEBUG_DIV_SLOW - -#define DIV_STATIC_ALLOC_LEN 16 - -/* return q = a / b and r = a % b. - - taba[na] must be allocated if tabb1[nb - 1] < B / 2. tabb1[nb - 1] - must be != zero. na must be >= nb. 's' can be NULL if tabb1[nb - 1] - >= B / 2. - - The remainder is is returned in taba and contains nb libms. tabq - contains na - nb + 1 limbs. No overlap is permitted. - - Running time of the standard method: (na - nb + 1) * nb - Return 0 if OK, -1 if memory alloc error -*/ -/* XXX: optimize */ -static int mp_div_dec(bf_context_t *s, limb_t *tabq, - limb_t *taba, mp_size_t na, - const limb_t *tabb1, mp_size_t nb) -{ - limb_t base = BF_DEC_BASE; - limb_t r, mult, t0, t1, a, c, q, v, *tabb; - mp_size_t i, j; - limb_t static_tabb[DIV_STATIC_ALLOC_LEN]; - -#ifdef DEBUG_DIV_SLOW - mp_print_str_dec("a", taba, na); - mp_print_str_dec("b", tabb1, nb); -#endif - - /* normalize tabb */ - r = tabb1[nb - 1]; - assert(r != 0); - i = na - nb; - if (r >= BF_DEC_BASE / 2) { - mult = 1; - tabb = (limb_t *)tabb1; - q = 1; - for(j = nb - 1; j >= 0; j--) { - if (taba[i + j] != tabb[j]) { - if (taba[i + j] < tabb[j]) - q = 0; - break; - } - } - tabq[i] = q; - if (q) { - mp_sub_dec(taba + i, taba + i, tabb, nb, 0); - } - i--; - } else { - mult = base / (r + 1); - if (likely(nb <= DIV_STATIC_ALLOC_LEN)) { - tabb = static_tabb; - } else { - tabb = bf_malloc(s, sizeof(limb_t) * nb); - if (!tabb) - return -1; - } - mp_mul1_dec(tabb, tabb1, nb, mult, 0); - taba[na] = mp_mul1_dec(taba, taba, na, mult, 0); - } - -#ifdef DEBUG_DIV_SLOW - printf("mult=" FMT_LIMB "\n", mult); - mp_print_str_dec("a_norm", taba, na + 1); - mp_print_str_dec("b_norm", tabb, nb); -#endif - - for(; i >= 0; i--) { - if (unlikely(taba[i + nb] >= tabb[nb - 1])) { - /* XXX: check if it is really possible */ - q = base - 1; - } else { - muldq(t1, t0, taba[i + nb], base); - adddq(t1, t0, 0, taba[i + nb - 1]); - divdq(q, r, t1, t0, tabb[nb - 1]); - } - // printf("i=%d q1=%ld\n", i, q); - - r = mp_sub_mul1_dec(taba + i, tabb, nb, q); - // mp_dump("r1", taba + i, nb, bd); - // printf("r2=%ld\n", r); - - v = taba[i + nb]; - a = v - r; - c = a > v; - if (c) - a += base; - taba[i + nb] = a; - - if (c != 0) { - /* negative result */ - for(;;) { - q--; - c = mp_add_dec(taba + i, taba + i, tabb, nb, 0); - /* propagate carry and test if positive result */ - if (c != 0) { - if (++taba[i + nb] == base) { - break; - } - } - } - } - tabq[i] = q; - } - -#ifdef DEBUG_DIV_SLOW - mp_print_str_dec("q", tabq, na - nb + 1); - mp_print_str_dec("r", taba, nb); -#endif - - /* remove the normalization */ - if (mult != 1) { - mp_div1_dec(taba, taba, nb, mult, 0); - if (unlikely(tabb != static_tabb)) - bf_free(s, tabb); - } - return 0; -} - -/* divide by 10^shift */ -static limb_t mp_shr_dec(limb_t *tab_r, const limb_t *tab, mp_size_t n, - limb_t shift, limb_t high) -{ - mp_size_t i; - limb_t l, a, q, r; - - assert(shift >= 1 && shift < LIMB_DIGITS); - l = high; - for(i = n - 1; i >= 0; i--) { - a = tab[i]; - fast_shr_rem_dec(q, r, a, shift); - tab_r[i] = q + l * mp_pow_dec[LIMB_DIGITS - shift]; - l = r; - } - return l; -} - -/* multiply by 10^shift */ -static limb_t mp_shl_dec(limb_t *tab_r, const limb_t *tab, mp_size_t n, - limb_t shift, limb_t low) -{ - mp_size_t i; - limb_t l, a, q, r; - - assert(shift >= 1 && shift < LIMB_DIGITS); - l = low; - for(i = 0; i < n; i++) { - a = tab[i]; - fast_shr_rem_dec(q, r, a, LIMB_DIGITS - shift); - tab_r[i] = r * mp_pow_dec[shift] + l; - l = q; - } - return l; -} - -static limb_t mp_sqrtrem2_dec(limb_t *tabs, limb_t *taba) -{ - int k; - dlimb_t a, b, r; - limb_t taba1[2], s, r0, r1; - - /* convert to binary and normalize */ - a = (dlimb_t)taba[1] * BF_DEC_BASE + taba[0]; - k = clz(a >> LIMB_BITS) & ~1; - b = a << k; - taba1[0] = b; - taba1[1] = b >> LIMB_BITS; - mp_sqrtrem2(&s, taba1); - s >>= (k >> 1); - /* convert the remainder back to decimal */ - r = a - (dlimb_t)s * (dlimb_t)s; - divdq_base(r1, r0, r >> LIMB_BITS, r); - taba[0] = r0; - tabs[0] = s; - return r1; -} - -//#define DEBUG_SQRTREM_DEC - -/* tmp_buf must contain (n / 2 + 1 limbs) */ -static limb_t mp_sqrtrem_rec_dec(limb_t *tabs, limb_t *taba, limb_t n, - limb_t *tmp_buf) -{ - limb_t l, h, rh, ql, qh, c, i; - - if (n == 1) - return mp_sqrtrem2_dec(tabs, taba); -#ifdef DEBUG_SQRTREM_DEC - mp_print_str_dec("a", taba, 2 * n); -#endif - l = n / 2; - h = n - l; - qh = mp_sqrtrem_rec_dec(tabs + l, taba + 2 * l, h, tmp_buf); -#ifdef DEBUG_SQRTREM_DEC - mp_print_str_dec("s1", tabs + l, h); - mp_print_str_h_dec("r1", taba + 2 * l, h, qh); - mp_print_str_h_dec("r2", taba + l, n, qh); -#endif - - /* the remainder is in taba + 2 * l. Its high bit is in qh */ - if (qh) { - mp_sub_dec(taba + 2 * l, taba + 2 * l, tabs + l, h, 0); - } - /* instead of dividing by 2*s, divide by s (which is normalized) - and update q and r */ - mp_div_dec(NULL, tmp_buf, taba + l, n, tabs + l, h); - qh += tmp_buf[l]; - for(i = 0; i < l; i++) - tabs[i] = tmp_buf[i]; - ql = mp_div1_dec(tabs, tabs, l, 2, qh & 1); - qh = qh >> 1; /* 0 or 1 */ - if (ql) - rh = mp_add_dec(taba + l, taba + l, tabs + l, h, 0); - else - rh = 0; -#ifdef DEBUG_SQRTREM_DEC - mp_print_str_h_dec("q", tabs, l, qh); - mp_print_str_h_dec("u", taba + l, h, rh); -#endif - - mp_add_ui_dec(tabs + l, qh, h); -#ifdef DEBUG_SQRTREM_DEC - mp_print_str_dec("s2", tabs, n); -#endif - - /* q = qh, tabs[l - 1 ... 0], r = taba[n - 1 ... l] */ - /* subtract q^2. if qh = 1 then q = B^l, so we can take shortcuts */ - if (qh) { - c = qh; - } else { - mp_mul_basecase_dec(taba + n, tabs, l, tabs, l); - c = mp_sub_dec(taba, taba, taba + n, 2 * l, 0); - } - rh -= mp_sub_ui_dec(taba + 2 * l, c, n - 2 * l); - if ((slimb_t)rh < 0) { - mp_sub_ui_dec(tabs, 1, n); - rh += mp_add_mul1_dec(taba, tabs, n, 2); - rh += mp_add_ui_dec(taba, 1, n); - } - return rh; -} - -/* 'taba' has 2*n limbs with n >= 1 and taba[2*n-1] >= B/4. Return (s, - r) with s=floor(sqrt(a)) and r=a-s^2. 0 <= r <= 2 * s. tabs has n - limbs. r is returned in the lower n limbs of taba. Its r[n] is the - returned value of the function. */ -int mp_sqrtrem_dec(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n) -{ - limb_t tmp_buf1[8]; - limb_t *tmp_buf; - mp_size_t n2; - n2 = n / 2 + 1; - if (n2 <= countof(tmp_buf1)) { - tmp_buf = tmp_buf1; - } else { - tmp_buf = bf_malloc(s, sizeof(limb_t) * n2); - if (!tmp_buf) - return -1; - } - taba[n] = mp_sqrtrem_rec_dec(tabs, taba, n, tmp_buf); - if (tmp_buf != tmp_buf1) - bf_free(s, tmp_buf); - return 0; -} - -/* return the number of leading zero digits, from 0 to LIMB_DIGITS */ -static int clz_dec(limb_t a) -{ - if (a == 0) - return LIMB_DIGITS; - switch(LIMB_BITS - 1 - clz(a)) { - case 0: /* 1-1 */ - return LIMB_DIGITS - 1; - case 1: /* 2-3 */ - return LIMB_DIGITS - 1; - case 2: /* 4-7 */ - return LIMB_DIGITS - 1; - case 3: /* 8-15 */ - if (a < 10) - return LIMB_DIGITS - 1; - else - return LIMB_DIGITS - 2; - case 4: /* 16-31 */ - return LIMB_DIGITS - 2; - case 5: /* 32-63 */ - return LIMB_DIGITS - 2; - case 6: /* 64-127 */ - if (a < 100) - return LIMB_DIGITS - 2; - else - return LIMB_DIGITS - 3; - case 7: /* 128-255 */ - return LIMB_DIGITS - 3; - case 8: /* 256-511 */ - return LIMB_DIGITS - 3; - case 9: /* 512-1023 */ - if (a < 1000) - return LIMB_DIGITS - 3; - else - return LIMB_DIGITS - 4; - case 10: /* 1024-2047 */ - return LIMB_DIGITS - 4; - case 11: /* 2048-4095 */ - return LIMB_DIGITS - 4; - case 12: /* 4096-8191 */ - return LIMB_DIGITS - 4; - case 13: /* 8192-16383 */ - if (a < 10000) - return LIMB_DIGITS - 4; - else - return LIMB_DIGITS - 5; - case 14: /* 16384-32767 */ - return LIMB_DIGITS - 5; - case 15: /* 32768-65535 */ - return LIMB_DIGITS - 5; - case 16: /* 65536-131071 */ - if (a < 100000) - return LIMB_DIGITS - 5; - else - return LIMB_DIGITS - 6; - case 17: /* 131072-262143 */ - return LIMB_DIGITS - 6; - case 18: /* 262144-524287 */ - return LIMB_DIGITS - 6; - case 19: /* 524288-1048575 */ - if (a < 1000000) - return LIMB_DIGITS - 6; - else - return LIMB_DIGITS - 7; - case 20: /* 1048576-2097151 */ - return LIMB_DIGITS - 7; - case 21: /* 2097152-4194303 */ - return LIMB_DIGITS - 7; - case 22: /* 4194304-8388607 */ - return LIMB_DIGITS - 7; - case 23: /* 8388608-16777215 */ - if (a < 10000000) - return LIMB_DIGITS - 7; - else - return LIMB_DIGITS - 8; - case 24: /* 16777216-33554431 */ - return LIMB_DIGITS - 8; - case 25: /* 33554432-67108863 */ - return LIMB_DIGITS - 8; - case 26: /* 67108864-134217727 */ - if (a < 100000000) - return LIMB_DIGITS - 8; - else - return LIMB_DIGITS - 9; -#if LIMB_BITS == 64 - case 27: /* 134217728-268435455 */ - return LIMB_DIGITS - 9; - case 28: /* 268435456-536870911 */ - return LIMB_DIGITS - 9; - case 29: /* 536870912-1073741823 */ - if (a < 1000000000) - return LIMB_DIGITS - 9; - else - return LIMB_DIGITS - 10; - case 30: /* 1073741824-2147483647 */ - return LIMB_DIGITS - 10; - case 31: /* 2147483648-4294967295 */ - return LIMB_DIGITS - 10; - case 32: /* 4294967296-8589934591 */ - return LIMB_DIGITS - 10; - case 33: /* 8589934592-17179869183 */ - if (a < 10000000000) - return LIMB_DIGITS - 10; - else - return LIMB_DIGITS - 11; - case 34: /* 17179869184-34359738367 */ - return LIMB_DIGITS - 11; - case 35: /* 34359738368-68719476735 */ - return LIMB_DIGITS - 11; - case 36: /* 68719476736-137438953471 */ - if (a < 100000000000) - return LIMB_DIGITS - 11; - else - return LIMB_DIGITS - 12; - case 37: /* 137438953472-274877906943 */ - return LIMB_DIGITS - 12; - case 38: /* 274877906944-549755813887 */ - return LIMB_DIGITS - 12; - case 39: /* 549755813888-1099511627775 */ - if (a < 1000000000000) - return LIMB_DIGITS - 12; - else - return LIMB_DIGITS - 13; - case 40: /* 1099511627776-2199023255551 */ - return LIMB_DIGITS - 13; - case 41: /* 2199023255552-4398046511103 */ - return LIMB_DIGITS - 13; - case 42: /* 4398046511104-8796093022207 */ - return LIMB_DIGITS - 13; - case 43: /* 8796093022208-17592186044415 */ - if (a < 10000000000000) - return LIMB_DIGITS - 13; - else - return LIMB_DIGITS - 14; - case 44: /* 17592186044416-35184372088831 */ - return LIMB_DIGITS - 14; - case 45: /* 35184372088832-70368744177663 */ - return LIMB_DIGITS - 14; - case 46: /* 70368744177664-140737488355327 */ - if (a < 100000000000000) - return LIMB_DIGITS - 14; - else - return LIMB_DIGITS - 15; - case 47: /* 140737488355328-281474976710655 */ - return LIMB_DIGITS - 15; - case 48: /* 281474976710656-562949953421311 */ - return LIMB_DIGITS - 15; - case 49: /* 562949953421312-1125899906842623 */ - if (a < 1000000000000000) - return LIMB_DIGITS - 15; - else - return LIMB_DIGITS - 16; - case 50: /* 1125899906842624-2251799813685247 */ - return LIMB_DIGITS - 16; - case 51: /* 2251799813685248-4503599627370495 */ - return LIMB_DIGITS - 16; - case 52: /* 4503599627370496-9007199254740991 */ - return LIMB_DIGITS - 16; - case 53: /* 9007199254740992-18014398509481983 */ - if (a < 10000000000000000) - return LIMB_DIGITS - 16; - else - return LIMB_DIGITS - 17; - case 54: /* 18014398509481984-36028797018963967 */ - return LIMB_DIGITS - 17; - case 55: /* 36028797018963968-72057594037927935 */ - return LIMB_DIGITS - 17; - case 56: /* 72057594037927936-144115188075855871 */ - if (a < 100000000000000000) - return LIMB_DIGITS - 17; - else - return LIMB_DIGITS - 18; - case 57: /* 144115188075855872-288230376151711743 */ - return LIMB_DIGITS - 18; - case 58: /* 288230376151711744-576460752303423487 */ - return LIMB_DIGITS - 18; - case 59: /* 576460752303423488-1152921504606846975 */ - if (a < 1000000000000000000) - return LIMB_DIGITS - 18; - else - return LIMB_DIGITS - 19; -#endif - default: - return 0; - } -} - -/* for debugging */ -void bfdec_print_str(const char *str, const bfdec_t *a) -{ - slimb_t i; - printf("%s=", str); - - if (a->expn == BF_EXP_NAN) { - printf("NaN"); - } else { - if (a->sign) - putchar('-'); - if (a->expn == BF_EXP_ZERO) { - putchar('0'); - } else if (a->expn == BF_EXP_INF) { - printf("Inf"); - } else { - printf("0."); - for(i = a->len - 1; i >= 0; i--) - printf("%0*" PRIu_LIMB, LIMB_DIGITS, a->tab[i]); - printf("e%" PRId_LIMB, a->expn); - } - } - printf("\n"); -} - -/* return != 0 if one digit between 0 and bit_pos inclusive is not zero. */ -static inline limb_t scan_digit_nz(const bfdec_t *r, slimb_t bit_pos) -{ - slimb_t pos; - limb_t v, q; - int shift; - - if (bit_pos < 0) - return 0; - pos = (limb_t)bit_pos / LIMB_DIGITS; - shift = (limb_t)bit_pos % LIMB_DIGITS; - fast_shr_rem_dec(q, v, r->tab[pos], shift + 1); - (void)q; - if (v != 0) - return 1; - pos--; - while (pos >= 0) { - if (r->tab[pos] != 0) - return 1; - pos--; - } - return 0; -} - -static limb_t get_digit(const limb_t *tab, limb_t len, slimb_t pos) -{ - slimb_t i; - int shift; - i = floor_div(pos, LIMB_DIGITS); - if (i < 0 || i >= len) - return 0; - shift = pos - i * LIMB_DIGITS; - return fast_shr_dec(tab[i], shift) % 10; -} - -/* return the addend for rounding. Note that prec can be <= 0 for bf_rint() */ -static int bfdec_get_rnd_add(int *pret, const bfdec_t *r, limb_t l, - slimb_t prec, int rnd_mode) -{ - int add_one, inexact; - limb_t digit1, digit0; - - // bfdec_print_str("get_rnd_add", r); - if (rnd_mode == BF_RNDF) { - digit0 = 1; /* faithful rounding does not honor the INEXACT flag */ - } else { - /* starting limb for bit 'prec + 1' */ - digit0 = scan_digit_nz(r, l * LIMB_DIGITS - 1 - bf_max(0, prec + 1)); - } - - /* get the digit at 'prec' */ - digit1 = get_digit(r->tab, l, l * LIMB_DIGITS - 1 - prec); - inexact = (digit1 | digit0) != 0; - - add_one = 0; - switch(rnd_mode) { - case BF_RNDZ: - break; - case BF_RNDN: - if (digit1 == 5) { - if (digit0) { - add_one = 1; - } else { - /* round to even */ - add_one = - get_digit(r->tab, l, l * LIMB_DIGITS - 1 - (prec - 1)) & 1; - } - } else if (digit1 > 5) { - add_one = 1; - } - break; - case BF_RNDD: - case BF_RNDU: - if (r->sign == (rnd_mode == BF_RNDD)) - add_one = inexact; - break; - case BF_RNDNA: - case BF_RNDF: - add_one = (digit1 >= 5); - break; - case BF_RNDA: - add_one = inexact; - break; - default: - abort(); - } - - if (inexact) - *pret |= BF_ST_INEXACT; - return add_one; -} - -/* round to prec1 bits assuming 'r' is non zero and finite. 'r' is - assumed to have length 'l' (1 <= l <= r->len). prec1 can be - BF_PREC_INF. BF_FLAG_SUBNORMAL is not supported. Cannot fail with - BF_ST_MEM_ERROR. - */ -static int __bfdec_round(bfdec_t *r, limb_t prec1, bf_flags_t flags, limb_t l) -{ - int shift, add_one, rnd_mode, ret; - slimb_t i, bit_pos, pos, e_min, e_max, e_range, prec; - - /* XXX: align to IEEE 754 2008 for decimal numbers ? */ - e_range = (limb_t)1 << (bf_get_exp_bits(flags) - 1); - e_min = -e_range + 3; - e_max = e_range; - - if (flags & BF_FLAG_RADPNT_PREC) { - /* 'prec' is the precision after the decimal point */ - if (prec1 != BF_PREC_INF) - prec = r->expn + prec1; - else - prec = prec1; - } else if (unlikely(r->expn < e_min) && (flags & BF_FLAG_SUBNORMAL)) { - /* restrict the precision in case of potentially subnormal - result */ - assert(prec1 != BF_PREC_INF); - prec = prec1 - (e_min - r->expn); - } else { - prec = prec1; - } - - /* round to prec bits */ - rnd_mode = flags & BF_RND_MASK; - ret = 0; - add_one = bfdec_get_rnd_add(&ret, r, l, prec, rnd_mode); - - if (prec <= 0) { - if (add_one) { - bfdec_resize(r, 1); /* cannot fail because r is non zero */ - r->tab[0] = BF_DEC_BASE / 10; - r->expn += 1 - prec; - ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT; - return ret; - } else { - goto underflow; - } - } else if (add_one) { - limb_t carry; - - /* add one starting at digit 'prec - 1' */ - bit_pos = l * LIMB_DIGITS - 1 - (prec - 1); - pos = bit_pos / LIMB_DIGITS; - carry = mp_pow_dec[bit_pos % LIMB_DIGITS]; - carry = mp_add_ui_dec(r->tab + pos, carry, l - pos); - if (carry) { - /* shift right by one digit */ - mp_shr_dec(r->tab + pos, r->tab + pos, l - pos, 1, 1); - r->expn++; - } - } - - /* check underflow */ - if (unlikely(r->expn < e_min)) { - if (flags & BF_FLAG_SUBNORMAL) { - /* if inexact, also set the underflow flag */ - if (ret & BF_ST_INEXACT) - ret |= BF_ST_UNDERFLOW; - } else { - underflow: - bfdec_set_zero(r, r->sign); - ret |= BF_ST_UNDERFLOW | BF_ST_INEXACT; - return ret; - } - } - - /* check overflow */ - if (unlikely(r->expn > e_max)) { - bfdec_set_inf(r, r->sign); - ret |= BF_ST_OVERFLOW | BF_ST_INEXACT; - return ret; - } - - /* keep the bits starting at 'prec - 1' */ - bit_pos = l * LIMB_DIGITS - 1 - (prec - 1); - i = floor_div(bit_pos, LIMB_DIGITS); - if (i >= 0) { - shift = smod(bit_pos, LIMB_DIGITS); - if (shift != 0) { - r->tab[i] = fast_shr_dec(r->tab[i], shift) * - mp_pow_dec[shift]; - } - } else { - i = 0; - } - /* remove trailing zeros */ - while (r->tab[i] == 0) - i++; - if (i > 0) { - l -= i; - memmove(r->tab, r->tab + i, l * sizeof(limb_t)); - } - bfdec_resize(r, l); /* cannot fail */ - return ret; -} - -/* Cannot fail with BF_ST_MEM_ERROR. */ -int bfdec_round(bfdec_t *r, limb_t prec, bf_flags_t flags) -{ - if (r->len == 0) - return 0; - return __bfdec_round(r, prec, flags, r->len); -} - -/* 'r' must be a finite number. Cannot fail with BF_ST_MEM_ERROR. */ -int bfdec_normalize_and_round(bfdec_t *r, limb_t prec1, bf_flags_t flags) -{ - limb_t l, v; - int shift, ret; - - // bfdec_print_str("bf_renorm", r); - l = r->len; - while (l > 0 && r->tab[l - 1] == 0) - l--; - if (l == 0) { - /* zero */ - r->expn = BF_EXP_ZERO; - bfdec_resize(r, 0); /* cannot fail */ - ret = 0; - } else { - r->expn -= (r->len - l) * LIMB_DIGITS; - /* shift to have the MSB set to '1' */ - v = r->tab[l - 1]; - shift = clz_dec(v); - if (shift != 0) { - mp_shl_dec(r->tab, r->tab, l, shift, 0); - r->expn -= shift; - } - ret = __bfdec_round(r, prec1, flags, l); - } - // bf_print_str("r_final", r); - return ret; -} - -int bfdec_set_ui(bfdec_t *r, uint64_t v) -{ -#if LIMB_BITS == 32 - if (v >= BF_DEC_BASE * BF_DEC_BASE) { - if (bfdec_resize(r, 3)) - goto fail; - r->tab[0] = v % BF_DEC_BASE; - v /= BF_DEC_BASE; - r->tab[1] = v % BF_DEC_BASE; - r->tab[2] = v / BF_DEC_BASE; - r->expn = 3 * LIMB_DIGITS; - } else -#endif - if (v >= BF_DEC_BASE) { - if (bfdec_resize(r, 2)) - goto fail; - r->tab[0] = v % BF_DEC_BASE; - r->tab[1] = v / BF_DEC_BASE; - r->expn = 2 * LIMB_DIGITS; - } else { - if (bfdec_resize(r, 1)) - goto fail; - r->tab[0] = v; - r->expn = LIMB_DIGITS; - } - r->sign = 0; - return bfdec_normalize_and_round(r, BF_PREC_INF, 0); - fail: - bfdec_set_nan(r); - return BF_ST_MEM_ERROR; -} - -int bfdec_set_si(bfdec_t *r, int64_t v) -{ - int ret; - if (v < 0) { - ret = bfdec_set_ui(r, -v); - r->sign = 1; - } else { - ret = bfdec_set_ui(r, v); - } - return ret; -} - -static int bfdec_add_internal(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, bf_flags_t flags, int b_neg) -{ - bf_context_t *s = r->ctx; - int is_sub, cmp_res, a_sign, b_sign, ret; - - a_sign = a->sign; - b_sign = b->sign ^ b_neg; - is_sub = a_sign ^ b_sign; - cmp_res = bfdec_cmpu(a, b); - if (cmp_res < 0) { - const bfdec_t *tmp; - tmp = a; - a = b; - b = tmp; - a_sign = b_sign; /* b_sign is never used later */ - } - /* abs(a) >= abs(b) */ - if (cmp_res == 0 && is_sub && a->expn < BF_EXP_INF) { - /* zero result */ - bfdec_set_zero(r, (flags & BF_RND_MASK) == BF_RNDD); - ret = 0; - } else if (a->len == 0 || b->len == 0) { - ret = 0; - if (a->expn >= BF_EXP_INF) { - if (a->expn == BF_EXP_NAN) { - /* at least one operand is NaN */ - bfdec_set_nan(r); - ret = 0; - } else if (b->expn == BF_EXP_INF && is_sub) { - /* infinities with different signs */ - bfdec_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - bfdec_set_inf(r, a_sign); - } - } else { - /* at least one zero and not subtract */ - if (bfdec_set(r, a)) - return BF_ST_MEM_ERROR; - r->sign = a_sign; - goto renorm; - } - } else { - slimb_t d, a_offset, b_offset, i, r_len; - limb_t carry; - limb_t *b1_tab; - int b_shift; - mp_size_t b1_len; - - d = a->expn - b->expn; - - /* XXX: not efficient in time and memory if the precision is - not infinite */ - r_len = bf_max(a->len, b->len + (d + LIMB_DIGITS - 1) / LIMB_DIGITS); - if (bfdec_resize(r, r_len)) - goto fail; - r->sign = a_sign; - r->expn = a->expn; - - a_offset = r_len - a->len; - for(i = 0; i < a_offset; i++) - r->tab[i] = 0; - for(i = 0; i < a->len; i++) - r->tab[a_offset + i] = a->tab[i]; - - b_shift = d % LIMB_DIGITS; - if (b_shift == 0) { - b1_len = b->len; - b1_tab = (limb_t *)b->tab; - } else { - b1_len = b->len + 1; - b1_tab = bf_malloc(s, sizeof(limb_t) * b1_len); - if (!b1_tab) - goto fail; - b1_tab[0] = mp_shr_dec(b1_tab + 1, b->tab, b->len, b_shift, 0) * - mp_pow_dec[LIMB_DIGITS - b_shift]; - } - b_offset = r_len - (b->len + (d + LIMB_DIGITS - 1) / LIMB_DIGITS); - - if (is_sub) { - carry = mp_sub_dec(r->tab + b_offset, r->tab + b_offset, - b1_tab, b1_len, 0); - if (carry != 0) { - carry = mp_sub_ui_dec(r->tab + b_offset + b1_len, carry, - r_len - (b_offset + b1_len)); - assert(carry == 0); - } - } else { - carry = mp_add_dec(r->tab + b_offset, r->tab + b_offset, - b1_tab, b1_len, 0); - if (carry != 0) { - carry = mp_add_ui_dec(r->tab + b_offset + b1_len, carry, - r_len - (b_offset + b1_len)); - } - if (carry != 0) { - if (bfdec_resize(r, r_len + 1)) { - if (b_shift != 0) - bf_free(s, b1_tab); - goto fail; - } - r->tab[r_len] = 1; - r->expn += LIMB_DIGITS; - } - } - if (b_shift != 0) - bf_free(s, b1_tab); - renorm: - ret = bfdec_normalize_and_round(r, prec, flags); - } - return ret; - fail: - bfdec_set_nan(r); - return BF_ST_MEM_ERROR; -} - -static int __bfdec_add(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - return bfdec_add_internal(r, a, b, prec, flags, 0); -} - -static int __bfdec_sub(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - return bfdec_add_internal(r, a, b, prec, flags, 1); -} - -int bfdec_add(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2((bf_t *)r, (bf_t *)a, (bf_t *)b, prec, flags, - (bf_op2_func_t *)__bfdec_add); -} - -int bfdec_sub(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2((bf_t *)r, (bf_t *)a, (bf_t *)b, prec, flags, - (bf_op2_func_t *)__bfdec_sub); -} - -int bfdec_mul(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - int ret, r_sign; - - if (a->len < b->len) { - const bfdec_t *tmp = a; - a = b; - b = tmp; - } - r_sign = a->sign ^ b->sign; - /* here b->len <= a->len */ - if (b->len == 0) { - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bfdec_set_nan(r); - ret = 0; - } else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_INF) { - if ((a->expn == BF_EXP_INF && b->expn == BF_EXP_ZERO) || - (a->expn == BF_EXP_ZERO && b->expn == BF_EXP_INF)) { - bfdec_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - bfdec_set_inf(r, r_sign); - ret = 0; - } - } else { - bfdec_set_zero(r, r_sign); - ret = 0; - } - } else { - bfdec_t tmp, *r1 = NULL; - limb_t a_len, b_len; - limb_t *a_tab, *b_tab; - - a_len = a->len; - b_len = b->len; - a_tab = a->tab; - b_tab = b->tab; - - if (r == a || r == b) { - bfdec_init(r->ctx, &tmp); - r1 = r; - r = &tmp; - } - if (bfdec_resize(r, a_len + b_len)) { - bfdec_set_nan(r); - ret = BF_ST_MEM_ERROR; - goto done; - } - mp_mul_basecase_dec(r->tab, a_tab, a_len, b_tab, b_len); - r->sign = r_sign; - r->expn = a->expn + b->expn; - ret = bfdec_normalize_and_round(r, prec, flags); - done: - if (r == &tmp) - bfdec_move(r1, &tmp); - } - return ret; -} - -int bfdec_mul_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, - bf_flags_t flags) -{ - bfdec_t b; - int ret; - bfdec_init(r->ctx, &b); - ret = bfdec_set_si(&b, b1); - ret |= bfdec_mul(r, a, &b, prec, flags); - bfdec_delete(&b); - return ret; -} - -int bfdec_add_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, - bf_flags_t flags) -{ - bfdec_t b; - int ret; - - bfdec_init(r->ctx, &b); - ret = bfdec_set_si(&b, b1); - ret |= bfdec_add(r, a, &b, prec, flags); - bfdec_delete(&b); - return ret; -} - -static int __bfdec_div(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, - limb_t prec, bf_flags_t flags) -{ - int ret, r_sign; - limb_t n, nb, precl; - - r_sign = a->sign ^ b->sign; - if (a->expn >= BF_EXP_INF || b->expn >= BF_EXP_INF) { - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bfdec_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF && b->expn == BF_EXP_INF) { - bfdec_set_nan(r); - return BF_ST_INVALID_OP; - } else if (a->expn == BF_EXP_INF) { - bfdec_set_inf(r, r_sign); - return 0; - } else { - bfdec_set_zero(r, r_sign); - return 0; - } - } else if (a->expn == BF_EXP_ZERO) { - if (b->expn == BF_EXP_ZERO) { - bfdec_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bfdec_set_zero(r, r_sign); - return 0; - } - } else if (b->expn == BF_EXP_ZERO) { - bfdec_set_inf(r, r_sign); - return BF_ST_DIVIDE_ZERO; - } - - nb = b->len; - if (prec == BF_PREC_INF) { - /* infinite precision: return BF_ST_INVALID_OP if not an exact - result */ - /* XXX: check */ - precl = nb + 1; - } else if (flags & BF_FLAG_RADPNT_PREC) { - /* number of digits after the decimal point */ - /* XXX: check (2 extra digits for rounding + 2 digits) */ - precl = (bf_max(a->expn - b->expn, 0) + 2 + - prec + 2 + LIMB_DIGITS - 1) / LIMB_DIGITS; - } else { - /* number of limbs of the quotient (2 extra digits for rounding) */ - precl = (prec + 2 + LIMB_DIGITS - 1) / LIMB_DIGITS; - } - n = bf_max(a->len, precl); - - { - limb_t *taba, na, i; - slimb_t d; - - na = n + nb; - taba = bf_malloc(r->ctx, (na + 1) * sizeof(limb_t)); - if (!taba) - goto fail; - d = na - a->len; - memset(taba, 0, d * sizeof(limb_t)); - memcpy(taba + d, a->tab, a->len * sizeof(limb_t)); - if (bfdec_resize(r, n + 1)) - goto fail1; - if (mp_div_dec(r->ctx, r->tab, taba, na, b->tab, nb)) { - fail1: - bf_free(r->ctx, taba); - goto fail; - } - /* see if non zero remainder */ - for(i = 0; i < nb; i++) { - if (taba[i] != 0) - break; - } - bf_free(r->ctx, taba); - if (i != nb) { - if (prec == BF_PREC_INF) { - bfdec_set_nan(r); - return BF_ST_INVALID_OP; - } else { - r->tab[0] |= 1; - } - } - r->expn = a->expn - b->expn + LIMB_DIGITS; - r->sign = r_sign; - ret = bfdec_normalize_and_round(r, prec, flags); - } - return ret; - fail: - bfdec_set_nan(r); - return BF_ST_MEM_ERROR; -} - -int bfdec_div(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags) -{ - return bf_op2((bf_t *)r, (bf_t *)a, (bf_t *)b, prec, flags, - (bf_op2_func_t *)__bfdec_div); -} - -/* a and b must be finite numbers with a >= 0 and b > 0. 'q' is the - integer defined as floor(a/b) and r = a - q * b. */ -static void bfdec_tdivremu(bf_context_t *s, bfdec_t *q, bfdec_t *r, - const bfdec_t *a, const bfdec_t *b) -{ - if (bfdec_cmpu(a, b) < 0) { - bfdec_set_ui(q, 0); - bfdec_set(r, a); - } else { - bfdec_div(q, a, b, 0, BF_RNDZ | BF_FLAG_RADPNT_PREC); - bfdec_mul(r, q, b, BF_PREC_INF, BF_RNDZ); - bfdec_sub(r, a, r, BF_PREC_INF, BF_RNDZ); - } -} - -/* division and remainder. - - rnd_mode is the rounding mode for the quotient. The additional - rounding mode BF_RND_EUCLIDIAN is supported. - - 'q' is an integer. 'r' is rounded with prec and flags (prec can be - BF_PREC_INF). -*/ -int bfdec_divrem(bfdec_t *q, bfdec_t *r, const bfdec_t *a, const bfdec_t *b, - limb_t prec, bf_flags_t flags, int rnd_mode) -{ - bf_context_t *s = q->ctx; - bfdec_t a1_s, *a1 = &a1_s; - bfdec_t b1_s, *b1 = &b1_s; - bfdec_t r1_s, *r1 = &r1_s; - int q_sign, res; - bool is_ceil, is_rndn; - - assert(q != a && q != b); - assert(r != a && r != b); - assert(q != r); - - if (a->len == 0 || b->len == 0) { - bfdec_set_zero(q, 0); - if (a->expn == BF_EXP_NAN || b->expn == BF_EXP_NAN) { - bfdec_set_nan(r); - return 0; - } else if (a->expn == BF_EXP_INF || b->expn == BF_EXP_ZERO) { - bfdec_set_nan(r); - return BF_ST_INVALID_OP; - } else { - bfdec_set(r, a); - return bfdec_round(r, prec, flags); - } - } - - q_sign = a->sign ^ b->sign; - is_rndn = (rnd_mode == BF_RNDN || rnd_mode == BF_RNDNA); - switch(rnd_mode) { - default: - case BF_RNDZ: - case BF_RNDN: - case BF_RNDNA: - is_ceil = false; - break; - case BF_RNDD: - is_ceil = q_sign; - break; - case BF_RNDU: - is_ceil = q_sign ^ 1; - break; - case BF_RNDA: - is_ceil = true; - break; - case BF_DIVREM_EUCLIDIAN: - is_ceil = a->sign; - break; - } - - a1->expn = a->expn; - a1->tab = a->tab; - a1->len = a->len; - a1->sign = 0; - - b1->expn = b->expn; - b1->tab = b->tab; - b1->len = b->len; - b1->sign = 0; - - // bfdec_print_str("a1", a1); - // bfdec_print_str("b1", b1); - /* XXX: could improve to avoid having a large 'q' */ - bfdec_tdivremu(s, q, r, a1, b1); - if (bfdec_is_nan(q) || bfdec_is_nan(r)) - goto fail; - // bfdec_print_str("q", q); - // bfdec_print_str("r", r); - - if (r->len != 0) { - if (is_rndn) { - bfdec_init(s, r1); - if (bfdec_set(r1, r)) - goto fail; - if (bfdec_mul_si(r1, r1, 2, BF_PREC_INF, BF_RNDZ)) { - bfdec_delete(r1); - goto fail; - } - res = bfdec_cmpu(r1, b); - bfdec_delete(r1); - if (res > 0 || - (res == 0 && - (rnd_mode == BF_RNDNA || - (get_digit(q->tab, q->len, q->len * LIMB_DIGITS - q->expn) & 1) != 0))) { - goto do_sub_r; - } - } else if (is_ceil) { - do_sub_r: - res = bfdec_add_si(q, q, 1, BF_PREC_INF, BF_RNDZ); - res |= bfdec_sub(r, r, b1, BF_PREC_INF, BF_RNDZ); - if (res & BF_ST_MEM_ERROR) - goto fail; - } - } - - r->sign ^= a->sign; - q->sign = q_sign; - return bfdec_round(r, prec, flags); - fail: - bfdec_set_nan(q); - bfdec_set_nan(r); - return BF_ST_MEM_ERROR; -} - -int bfdec_rem(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode) -{ - bfdec_t q_s, *q = &q_s; - int ret; - - bfdec_init(r->ctx, q); - ret = bfdec_divrem(q, r, a, b, prec, flags, rnd_mode); - bfdec_delete(q); - return ret; -} - -/* convert to integer (infinite precision) */ -int bfdec_rint(bfdec_t *r, int rnd_mode) -{ - return bfdec_round(r, 0, rnd_mode | BF_FLAG_RADPNT_PREC); -} - -int bfdec_sqrt(bfdec_t *r, const bfdec_t *a, limb_t prec, bf_flags_t flags) -{ - bf_context_t *s = a->ctx; - int ret, k; - limb_t *a1, v; - slimb_t n, n1, prec1; - limb_t res; - - assert(r != a); - - if (a->len == 0) { - if (a->expn == BF_EXP_NAN) { - bfdec_set_nan(r); - } else if (a->expn == BF_EXP_INF && a->sign) { - goto invalid_op; - } else { - bfdec_set(r, a); - } - ret = 0; - } else if (a->sign || prec == BF_PREC_INF) { - invalid_op: - bfdec_set_nan(r); - ret = BF_ST_INVALID_OP; - } else { - if (flags & BF_FLAG_RADPNT_PREC) { - prec1 = bf_max(floor_div(a->expn + 1, 2) + prec, 1); - } else { - prec1 = prec; - } - /* convert the mantissa to an integer with at least 2 * - prec + 4 digits */ - n = (2 * (prec1 + 2) + 2 * LIMB_DIGITS - 1) / (2 * LIMB_DIGITS); - if (bfdec_resize(r, n)) - goto fail; - a1 = bf_malloc(s, sizeof(limb_t) * 2 * n); - if (!a1) - goto fail; - n1 = bf_min(2 * n, a->len); - memset(a1, 0, (2 * n - n1) * sizeof(limb_t)); - memcpy(a1 + 2 * n - n1, a->tab + a->len - n1, n1 * sizeof(limb_t)); - if (a->expn & 1) { - res = mp_shr_dec(a1, a1, 2 * n, 1, 0); - } else { - res = 0; - } - /* normalize so that a1 >= B^(2*n)/4. Not need for n = 1 - because mp_sqrtrem2_dec already does it */ - k = 0; - if (n > 1) { - v = a1[2 * n - 1]; - while (v < BF_DEC_BASE / 4) { - k++; - v *= 4; - } - if (k != 0) - mp_mul1_dec(a1, a1, 2 * n, 1 << (2 * k), 0); - } - if (mp_sqrtrem_dec(s, r->tab, a1, n)) { - bf_free(s, a1); - goto fail; - } - if (k != 0) - mp_div1_dec(r->tab, r->tab, n, 1 << k, 0); - if (!res) { - res = mp_scan_nz(a1, n + 1); - } - bf_free(s, a1); - if (!res) { - res = mp_scan_nz(a->tab, a->len - n1); - } - if (res != 0) - r->tab[0] |= 1; - r->sign = 0; - r->expn = (a->expn + 1) >> 1; - ret = bfdec_round(r, prec, flags); - } - return ret; - fail: - bfdec_set_nan(r); - return BF_ST_MEM_ERROR; -} - -/* The rounding mode is always BF_RNDZ. Return BF_ST_OVERFLOW if there - is an overflow and 0 otherwise. No memory error is possible. */ -int bfdec_get_int32(int *pres, const bfdec_t *a) -{ - uint32_t v; - int ret; - if (a->expn >= BF_EXP_INF) { - ret = 0; - if (a->expn == BF_EXP_INF) { - v = (uint32_t)INT32_MAX + a->sign; - /* XXX: return overflow ? */ - } else { - v = INT32_MAX; - } - } else if (a->expn <= 0) { - v = 0; - ret = 0; - } else if (a->expn <= 9) { - v = fast_shr_dec(a->tab[a->len - 1], LIMB_DIGITS - a->expn); - if (a->sign) - v = -v; - ret = 0; - } else if (a->expn == 10) { - uint64_t v1; - uint32_t v_max; -#if LIMB_BITS == 64 - v1 = fast_shr_dec(a->tab[a->len - 1], LIMB_DIGITS - a->expn); -#else - v1 = (uint64_t)a->tab[a->len - 1] * 10 + - get_digit(a->tab, a->len, (a->len - 1) * LIMB_DIGITS - 1); -#endif - v_max = (uint32_t)INT32_MAX + a->sign; - if (v1 > v_max) { - v = v_max; - ret = BF_ST_OVERFLOW; - } else { - v = v1; - if (a->sign) - v = -v; - ret = 0; - } - } else { - v = (uint32_t)INT32_MAX + a->sign; - ret = BF_ST_OVERFLOW; - } - *pres = v; - return ret; -} - -/* power to an integer with infinite precision */ -int bfdec_pow_ui(bfdec_t *r, const bfdec_t *a, limb_t b) -{ - int ret, n_bits, i; - - assert(r != a); - if (b == 0) - return bfdec_set_ui(r, 1); - ret = bfdec_set(r, a); - n_bits = LIMB_BITS - clz(b); - for(i = n_bits - 2; i >= 0; i--) { - ret |= bfdec_mul(r, r, r, BF_PREC_INF, BF_RNDZ); - if ((b >> i) & 1) - ret |= bfdec_mul(r, r, a, BF_PREC_INF, BF_RNDZ); - } - return ret; -} - -char *bfdec_ftoa(size_t *plen, const bfdec_t *a, limb_t prec, bf_flags_t flags) -{ - return bf_ftoa_internal(plen, (const bf_t *)a, 10, prec, flags, true); -} - -int bfdec_atof(bfdec_t *r, const char *str, const char **pnext, - limb_t prec, bf_flags_t flags) -{ - slimb_t dummy_exp; - return bf_atof_internal((bf_t *)r, &dummy_exp, str, pnext, 10, prec, - flags, true); -} - -#endif /* USE_BF_DEC */ - -#ifdef USE_FFT_MUL -/***************************************************************/ -/* Integer multiplication with FFT */ - -/* or LIMB_BITS at bit position 'pos' in tab */ -static inline void put_bits(limb_t *tab, limb_t len, slimb_t pos, limb_t val) -{ - limb_t i; - int p; - - i = pos >> LIMB_LOG2_BITS; - p = pos & (LIMB_BITS - 1); - if (i < len) - tab[i] |= val << p; - if (p != 0) { - i++; - if (i < len) { - tab[i] |= val >> (LIMB_BITS - p); - } - } -} - -#if defined(__AVX2__) - -typedef double NTTLimb; - -/* we must have: modulo >= 1 << NTT_MOD_LOG2_MIN */ -#define NTT_MOD_LOG2_MIN 50 -#define NTT_MOD_LOG2_MAX 51 -#define NB_MODS 5 -#define NTT_PROOT_2EXP 39 -static const int ntt_int_bits[NB_MODS] = { 254, 203, 152, 101, 50, }; - -static const limb_t ntt_mods[NB_MODS] = { 0x00073a8000000001, 0x0007858000000001, 0x0007a38000000001, 0x0007a68000000001, 0x0007fd8000000001, -}; - -static const limb_t ntt_proot[2][NB_MODS] = { - { 0x00056198d44332c8, 0x0002eb5d640aad39, 0x00047e31eaa35fd0, 0x0005271ac118a150, 0x00075e0ce8442bd5, }, - { 0x000461169761bcc5, 0x0002dac3cb2da688, 0x0004abc97751e3bf, 0x000656778fc8c485, 0x0000dc6469c269fa, }, -}; - -static const limb_t ntt_mods_cr[NB_MODS * (NB_MODS - 1) / 2] = { - 0x00020e4da740da8e, 0x0004c3dc09c09c1d, 0x000063bd097b4271, 0x000799d8f18f18fd, - 0x0005384222222264, 0x000572b07c1f07fe, 0x00035cd08888889a, - 0x00066015555557e3, 0x000725960b60b623, - 0x0002fc1fa1d6ce12, -}; - -#else - -typedef limb_t NTTLimb; - -#if LIMB_BITS == 64 - -#define NTT_MOD_LOG2_MIN 61 -#define NTT_MOD_LOG2_MAX 62 -#define NB_MODS 5 -#define NTT_PROOT_2EXP 51 -static const int ntt_int_bits[NB_MODS] = { 307, 246, 185, 123, 61, }; - -static const limb_t ntt_mods[NB_MODS] = { 0x28d8000000000001, 0x2a88000000000001, 0x2ed8000000000001, 0x3508000000000001, 0x3aa8000000000001, -}; - -static const limb_t ntt_proot[2][NB_MODS] = { - { 0x1b8ea61034a2bea7, 0x21a9762de58206fb, 0x02ca782f0756a8ea, 0x278384537a3e50a1, 0x106e13fee74ce0ab, }, - { 0x233513af133e13b8, 0x1d13140d1c6f75f1, 0x12cde57f97e3eeda, 0x0d6149e23cbe654f, 0x36cd204f522a1379, }, -}; - -static const limb_t ntt_mods_cr[NB_MODS * (NB_MODS - 1) / 2] = { - 0x08a9ed097b425eea, 0x18a44aaaaaaaaab3, 0x2493f57f57f57f5d, 0x126b8d0649a7f8d4, - 0x09d80ed7303b5ccc, 0x25b8bcf3cf3cf3d5, 0x2ce6ce63398ce638, - 0x0e31fad40a57eb59, 0x02a3529fd4a7f52f, - 0x3a5493e93e93e94a, -}; - -#elif LIMB_BITS == 32 - -/* we must have: modulo >= 1 << NTT_MOD_LOG2_MIN */ -#define NTT_MOD_LOG2_MIN 29 -#define NTT_MOD_LOG2_MAX 30 -#define NB_MODS 5 -#define NTT_PROOT_2EXP 20 -static const int ntt_int_bits[NB_MODS] = { 148, 119, 89, 59, 29, }; - -static const limb_t ntt_mods[NB_MODS] = { 0x0000000032b00001, 0x0000000033700001, 0x0000000036d00001, 0x0000000037300001, 0x000000003e500001, -}; - -static const limb_t ntt_proot[2][NB_MODS] = { - { 0x0000000032525f31, 0x0000000005eb3b37, 0x00000000246eda9f, 0x0000000035f25901, 0x00000000022f5768, }, - { 0x00000000051eba1a, 0x00000000107be10e, 0x000000001cd574e0, 0x00000000053806e6, 0x000000002cd6bf98, }, -}; - -static const limb_t ntt_mods_cr[NB_MODS * (NB_MODS - 1) / 2] = { - 0x000000000449559a, 0x000000001eba6ca9, 0x000000002ec18e46, 0x000000000860160b, - 0x000000000d321307, 0x000000000bf51120, 0x000000000f662938, - 0x000000000932ab3e, 0x000000002f40eef8, - 0x000000002e760905, -}; - -#endif /* LIMB_BITS */ - -#endif /* !AVX2 */ - -#if defined(__AVX2__) -#define NTT_TRIG_K_MAX 18 -#else -#define NTT_TRIG_K_MAX 19 -#endif - -typedef struct BFNTTState { - bf_context_t *ctx; - - /* used for mul_mod_fast() */ - limb_t ntt_mods_div[NB_MODS]; - - limb_t ntt_proot_pow[NB_MODS][2][NTT_PROOT_2EXP + 1]; - limb_t ntt_proot_pow_inv[NB_MODS][2][NTT_PROOT_2EXP + 1]; - NTTLimb *ntt_trig[NB_MODS][2][NTT_TRIG_K_MAX + 1]; - /* 1/2^n mod m */ - limb_t ntt_len_inv[NB_MODS][NTT_PROOT_2EXP + 1][2]; -#if defined(__AVX2__) - __m256d ntt_mods_cr_vec[NB_MODS * (NB_MODS - 1) / 2]; - __m256d ntt_mods_vec[NB_MODS]; - __m256d ntt_mods_inv_vec[NB_MODS]; -#else - limb_t ntt_mods_cr_inv[NB_MODS * (NB_MODS - 1) / 2]; -#endif -} BFNTTState; - -static NTTLimb *get_trig(BFNTTState *s, int k, int inverse, int m_idx); - -/* add modulo with up to (LIMB_BITS-1) bit modulo */ -static inline limb_t add_mod(limb_t a, limb_t b, limb_t m) -{ - limb_t r; - r = a + b; - if (r >= m) - r -= m; - return r; -} - -/* sub modulo with up to LIMB_BITS bit modulo */ -static inline limb_t sub_mod(limb_t a, limb_t b, limb_t m) -{ - limb_t r; - r = a - b; - if (r > a) - r += m; - return r; -} - -/* return (r0+r1*B) mod m - precondition: 0 <= r0+r1*B < 2^(64+NTT_MOD_LOG2_MIN) -*/ -static inline limb_t mod_fast(dlimb_t r, - limb_t m, limb_t m_inv) -{ - limb_t a1, q, t0, r1, r0; - - a1 = r >> NTT_MOD_LOG2_MIN; - - q = ((dlimb_t)a1 * m_inv) >> LIMB_BITS; - r = r - (dlimb_t)q * m - m * 2; - r1 = r >> LIMB_BITS; - t0 = (slimb_t)r1 >> 1; - r += m & t0; - r0 = r; - r1 = r >> LIMB_BITS; - r0 += m & r1; - return r0; -} - -/* faster version using precomputed modulo inverse. - precondition: 0 <= a * b < 2^(64+NTT_MOD_LOG2_MIN) */ -static inline limb_t mul_mod_fast(limb_t a, limb_t b, - limb_t m, limb_t m_inv) -{ - dlimb_t r; - r = (dlimb_t)a * (dlimb_t)b; - return mod_fast(r, m, m_inv); -} - -static inline limb_t init_mul_mod_fast(limb_t m) -{ - dlimb_t t; - assert(m < (limb_t)1 << NTT_MOD_LOG2_MAX); - assert(m >= (limb_t)1 << NTT_MOD_LOG2_MIN); - t = (dlimb_t)1 << (LIMB_BITS + NTT_MOD_LOG2_MIN); - return t / m; -} - -/* Faster version used when the multiplier is constant. 0 <= a < 2^64, - 0 <= b < m. */ -static inline limb_t mul_mod_fast2(limb_t a, limb_t b, - limb_t m, limb_t b_inv) -{ - limb_t r, q; - - q = ((dlimb_t)a * (dlimb_t)b_inv) >> LIMB_BITS; - r = a * b - q * m; - if (r >= m) - r -= m; - return r; -} - -/* Faster version used when the multiplier is constant. 0 <= a < 2^64, - 0 <= b < m. Let r = a * b mod m. The return value is 'r' or 'r + - m'. */ -static inline limb_t mul_mod_fast3(limb_t a, limb_t b, - limb_t m, limb_t b_inv) -{ - limb_t r, q; - - q = ((dlimb_t)a * (dlimb_t)b_inv) >> LIMB_BITS; - r = a * b - q * m; - return r; -} - -static inline limb_t init_mul_mod_fast2(limb_t b, limb_t m) -{ - return ((dlimb_t)b << LIMB_BITS) / m; -} - -#ifdef __AVX2__ - -static inline limb_t ntt_limb_to_int(NTTLimb a, limb_t m) -{ - slimb_t v; - v = a; - if (v < 0) - v += m; - if (v >= m) - v -= m; - return v; -} - -static inline NTTLimb int_to_ntt_limb(limb_t a, limb_t m) -{ - return (slimb_t)a; -} - -static inline NTTLimb int_to_ntt_limb2(limb_t a, limb_t m) -{ - if (a >= (m / 2)) - a -= m; - return (slimb_t)a; -} - -/* return r + m if r < 0 otherwise r. */ -static inline __m256d ntt_mod1(__m256d r, __m256d m) -{ - return _mm256_blendv_pd(r, r + m, r); -} - -/* input: abs(r) < 2 * m. Output: abs(r) < m */ -static inline __m256d ntt_mod(__m256d r, __m256d mf, __m256d m2f) -{ - return _mm256_blendv_pd(r, r + m2f, r) - mf; -} - -/* input: abs(a*b) < 2 * m^2, output: abs(r) < m */ -static inline __m256d ntt_mul_mod(__m256d a, __m256d b, __m256d mf, - __m256d m_inv) -{ - __m256d r, q, ab1, ab0, qm0, qm1; - ab1 = a * b; - q = _mm256_round_pd(ab1 * m_inv, 0); /* round to nearest */ - qm1 = q * mf; - qm0 = _mm256_fmsub_pd(q, mf, qm1); /* low part */ - ab0 = _mm256_fmsub_pd(a, b, ab1); /* low part */ - r = (ab1 - qm1) + (ab0 - qm0); - return r; -} - -static void *bf_aligned_malloc(bf_context_t *s, size_t size, size_t align) -{ - void *ptr; - void **ptr1; - ptr = bf_malloc(s, size + sizeof(void *) + align - 1); - if (!ptr) - return NULL; - ptr1 = (void **)(((uintptr_t)ptr + sizeof(void *) + align - 1) & - ~(align - 1)); - ptr1[-1] = ptr; - return ptr1; -} - -static void bf_aligned_free(bf_context_t *s, void *ptr) -{ - if (!ptr) - return; - bf_free(s, ((void **)ptr)[-1]); -} - -static void *ntt_malloc(BFNTTState *s, size_t size) -{ - return bf_aligned_malloc(s->ctx, size, 64); -} - -static void ntt_free(BFNTTState *s, void *ptr) -{ - bf_aligned_free(s->ctx, ptr); -} - -static no_inline int ntt_fft(BFNTTState *s, - NTTLimb *out_buf, NTTLimb *in_buf, - NTTLimb *tmp_buf, int fft_len_log2, - int inverse, int m_idx) -{ - limb_t nb_blocks, fft_per_block, p, k, n, stride_in, i, j; - NTTLimb *tab_in, *tab_out, *tmp, *trig; - __m256d m_inv, mf, m2f, c, a0, a1, b0, b1; - limb_t m; - int l; - - m = ntt_mods[m_idx]; - - m_inv = _mm256_set1_pd(1.0 / (double)m); - mf = _mm256_set1_pd(m); - m2f = _mm256_set1_pd(m * 2); - - n = (limb_t)1 << fft_len_log2; - assert(n >= 8); - stride_in = n / 2; - - tab_in = in_buf; - tab_out = tmp_buf; - trig = get_trig(s, fft_len_log2, inverse, m_idx); - if (!trig) - return -1; - p = 0; - for(k = 0; k < stride_in; k += 4) { - a0 = _mm256_load_pd(&tab_in[k]); - a1 = _mm256_load_pd(&tab_in[k + stride_in]); - c = _mm256_load_pd(trig); - trig += 4; - b0 = ntt_mod(a0 + a1, mf, m2f); - b1 = ntt_mul_mod(a0 - a1, c, mf, m_inv); - a0 = _mm256_permute2f128_pd(b0, b1, 0x20); - a1 = _mm256_permute2f128_pd(b0, b1, 0x31); - a0 = _mm256_permute4x64_pd(a0, 0xd8); - a1 = _mm256_permute4x64_pd(a1, 0xd8); - _mm256_store_pd(&tab_out[p], a0); - _mm256_store_pd(&tab_out[p + 4], a1); - p += 2 * 4; - } - tmp = tab_in; - tab_in = tab_out; - tab_out = tmp; - - trig = get_trig(s, fft_len_log2 - 1, inverse, m_idx); - if (!trig) - return -1; - p = 0; - for(k = 0; k < stride_in; k += 4) { - a0 = _mm256_load_pd(&tab_in[k]); - a1 = _mm256_load_pd(&tab_in[k + stride_in]); - c = _mm256_setr_pd(trig[0], trig[0], trig[1], trig[1]); - trig += 2; - b0 = ntt_mod(a0 + a1, mf, m2f); - b1 = ntt_mul_mod(a0 - a1, c, mf, m_inv); - a0 = _mm256_permute2f128_pd(b0, b1, 0x20); - a1 = _mm256_permute2f128_pd(b0, b1, 0x31); - _mm256_store_pd(&tab_out[p], a0); - _mm256_store_pd(&tab_out[p + 4], a1); - p += 2 * 4; - } - tmp = tab_in; - tab_in = tab_out; - tab_out = tmp; - - nb_blocks = n / 4; - fft_per_block = 4; - - l = fft_len_log2 - 2; - while (nb_blocks != 2) { - nb_blocks >>= 1; - p = 0; - k = 0; - trig = get_trig(s, l, inverse, m_idx); - if (!trig) - return -1; - for(i = 0; i < nb_blocks; i++) { - c = _mm256_set1_pd(trig[0]); - trig++; - for(j = 0; j < fft_per_block; j += 4) { - a0 = _mm256_load_pd(&tab_in[k + j]); - a1 = _mm256_load_pd(&tab_in[k + j + stride_in]); - b0 = ntt_mod(a0 + a1, mf, m2f); - b1 = ntt_mul_mod(a0 - a1, c, mf, m_inv); - _mm256_store_pd(&tab_out[p + j], b0); - _mm256_store_pd(&tab_out[p + j + fft_per_block], b1); - } - k += fft_per_block; - p += 2 * fft_per_block; - } - fft_per_block <<= 1; - l--; - tmp = tab_in; - tab_in = tab_out; - tab_out = tmp; - } - - tab_out = out_buf; - for(k = 0; k < stride_in; k += 4) { - a0 = _mm256_load_pd(&tab_in[k]); - a1 = _mm256_load_pd(&tab_in[k + stride_in]); - b0 = ntt_mod(a0 + a1, mf, m2f); - b1 = ntt_mod(a0 - a1, mf, m2f); - _mm256_store_pd(&tab_out[k], b0); - _mm256_store_pd(&tab_out[k + stride_in], b1); - } - return 0; -} - -static void ntt_vec_mul(BFNTTState *s, - NTTLimb *tab1, NTTLimb *tab2, limb_t fft_len_log2, - int k_tot, int m_idx) -{ - limb_t i, c_inv, n, m; - __m256d m_inv, mf, a, b, c; - - m = ntt_mods[m_idx]; - c_inv = s->ntt_len_inv[m_idx][k_tot][0]; - m_inv = _mm256_set1_pd(1.0 / (double)m); - mf = _mm256_set1_pd(m); - c = _mm256_set1_pd(int_to_ntt_limb(c_inv, m)); - n = (limb_t)1 << fft_len_log2; - for(i = 0; i < n; i += 4) { - a = _mm256_load_pd(&tab1[i]); - b = _mm256_load_pd(&tab2[i]); - a = ntt_mul_mod(a, b, mf, m_inv); - a = ntt_mul_mod(a, c, mf, m_inv); - _mm256_store_pd(&tab1[i], a); - } -} - -static no_inline void mul_trig(NTTLimb *buf, - limb_t n, limb_t c1, limb_t m, limb_t m_inv1) -{ - limb_t i, c2, c3, c4; - __m256d c, c_mul, a0, mf, m_inv; - assert(n >= 2); - - mf = _mm256_set1_pd(m); - m_inv = _mm256_set1_pd(1.0 / (double)m); - - c2 = mul_mod_fast(c1, c1, m, m_inv1); - c3 = mul_mod_fast(c2, c1, m, m_inv1); - c4 = mul_mod_fast(c2, c2, m, m_inv1); - c = _mm256_setr_pd(1, int_to_ntt_limb(c1, m), - int_to_ntt_limb(c2, m), int_to_ntt_limb(c3, m)); - c_mul = _mm256_set1_pd(int_to_ntt_limb(c4, m)); - for(i = 0; i < n; i += 4) { - a0 = _mm256_load_pd(&buf[i]); - a0 = ntt_mul_mod(a0, c, mf, m_inv); - _mm256_store_pd(&buf[i], a0); - c = ntt_mul_mod(c, c_mul, mf, m_inv); - } -} - -#else - -static void *ntt_malloc(BFNTTState *s, size_t size) -{ - return bf_malloc(s->ctx, size); -} - -static void ntt_free(BFNTTState *s, void *ptr) -{ - bf_free(s->ctx, ptr); -} - -static inline limb_t ntt_limb_to_int(NTTLimb a, limb_t m) -{ - if (a >= m) - a -= m; - return a; -} - -static inline NTTLimb int_to_ntt_limb(slimb_t a, limb_t m) -{ - return a; -} - -static no_inline int ntt_fft(BFNTTState *s, NTTLimb *out_buf, NTTLimb *in_buf, - NTTLimb *tmp_buf, int fft_len_log2, - int inverse, int m_idx) -{ - limb_t nb_blocks, fft_per_block, p, k, n, stride_in, i, j, m, m2; - NTTLimb *tab_in, *tab_out, *tmp, a0, a1, b0, b1, c, *trig, c_inv; - int l; - - m = ntt_mods[m_idx]; - m2 = 2 * m; - n = (limb_t)1 << fft_len_log2; - nb_blocks = n; - fft_per_block = 1; - stride_in = n / 2; - tab_in = in_buf; - tab_out = tmp_buf; - l = fft_len_log2; - while (nb_blocks != 2) { - nb_blocks >>= 1; - p = 0; - k = 0; - trig = get_trig(s, l, inverse, m_idx); - if (!trig) - return -1; - for(i = 0; i < nb_blocks; i++) { - c = trig[0]; - c_inv = trig[1]; - trig += 2; - for(j = 0; j < fft_per_block; j++) { - a0 = tab_in[k + j]; - a1 = tab_in[k + j + stride_in]; - b0 = add_mod(a0, a1, m2); - b1 = a0 - a1 + m2; - b1 = mul_mod_fast3(b1, c, m, c_inv); - tab_out[p + j] = b0; - tab_out[p + j + fft_per_block] = b1; - } - k += fft_per_block; - p += 2 * fft_per_block; - } - fft_per_block <<= 1; - l--; - tmp = tab_in; - tab_in = tab_out; - tab_out = tmp; - } - /* no twiddle in last step */ - tab_out = out_buf; - for(k = 0; k < stride_in; k++) { - a0 = tab_in[k]; - a1 = tab_in[k + stride_in]; - b0 = add_mod(a0, a1, m2); - b1 = sub_mod(a0, a1, m2); - tab_out[k] = b0; - tab_out[k + stride_in] = b1; - } - return 0; -} - -static void ntt_vec_mul(BFNTTState *s, - NTTLimb *tab1, NTTLimb *tab2, int fft_len_log2, - int k_tot, int m_idx) -{ - limb_t i, norm, norm_inv, a, n, m, m_inv; - - m = ntt_mods[m_idx]; - m_inv = s->ntt_mods_div[m_idx]; - norm = s->ntt_len_inv[m_idx][k_tot][0]; - norm_inv = s->ntt_len_inv[m_idx][k_tot][1]; - n = (limb_t)1 << fft_len_log2; - for(i = 0; i < n; i++) { - a = tab1[i]; - /* need to reduce the range so that the product is < - 2^(LIMB_BITS+NTT_MOD_LOG2_MIN) */ - if (a >= m) - a -= m; - a = mul_mod_fast(a, tab2[i], m, m_inv); - a = mul_mod_fast3(a, norm, m, norm_inv); - tab1[i] = a; - } -} - -static no_inline void mul_trig(NTTLimb *buf, - limb_t n, limb_t c_mul, limb_t m, limb_t m_inv) -{ - limb_t i, c0, c_mul_inv; - - c0 = 1; - c_mul_inv = init_mul_mod_fast2(c_mul, m); - for(i = 0; i < n; i++) { - buf[i] = mul_mod_fast(buf[i], c0, m, m_inv); - c0 = mul_mod_fast2(c0, c_mul, m, c_mul_inv); - } -} - -#endif /* !AVX2 */ - -static no_inline NTTLimb *get_trig(BFNTTState *s, - int k, int inverse, int m_idx) -{ - NTTLimb *tab; - limb_t i, n2, c, c_mul, m, c_mul_inv; - - if (k > NTT_TRIG_K_MAX) - return NULL; - - tab = s->ntt_trig[m_idx][inverse][k]; - if (tab) - return tab; - n2 = (limb_t)1 << (k - 1); - m = ntt_mods[m_idx]; -#ifdef __AVX2__ - tab = ntt_malloc(s, sizeof(NTTLimb) * n2); -#else - tab = ntt_malloc(s, sizeof(NTTLimb) * n2 * 2); -#endif - if (!tab) - return NULL; - c = 1; - c_mul = s->ntt_proot_pow[m_idx][inverse][k]; - c_mul_inv = s->ntt_proot_pow_inv[m_idx][inverse][k]; - for(i = 0; i < n2; i++) { -#ifdef __AVX2__ - tab[i] = int_to_ntt_limb2(c, m); -#else - tab[2 * i] = int_to_ntt_limb(c, m); - tab[2 * i + 1] = init_mul_mod_fast2(c, m); -#endif - c = mul_mod_fast2(c, c_mul, m, c_mul_inv); - } - s->ntt_trig[m_idx][inverse][k] = tab; - return tab; -} - -void fft_clear_cache(bf_context_t *s1) -{ - int m_idx, inverse, k; - BFNTTState *s = s1->ntt_state; - if (s) { - for(m_idx = 0; m_idx < NB_MODS; m_idx++) { - for(inverse = 0; inverse < 2; inverse++) { - for(k = 0; k < NTT_TRIG_K_MAX + 1; k++) { - if (s->ntt_trig[m_idx][inverse][k]) { - ntt_free(s, s->ntt_trig[m_idx][inverse][k]); - s->ntt_trig[m_idx][inverse][k] = NULL; - } - } - } - } -#if defined(__AVX2__) - bf_aligned_free(s1, s); -#else - bf_free(s1, s); -#endif - s1->ntt_state = NULL; - } -} - -#define STRIP_LEN 16 - -/* dst = buf1, src = buf2 */ -static int ntt_fft_partial(BFNTTState *s, NTTLimb *buf1, - int k1, int k2, limb_t n1, limb_t n2, int inverse, - limb_t m_idx) -{ - limb_t i, j, c_mul, c0, m, m_inv, strip_len, l; - NTTLimb *buf2, *buf3; - - buf2 = NULL; - buf3 = ntt_malloc(s, sizeof(NTTLimb) * n1); - if (!buf3) - goto fail; - if (k2 == 0) { - if (ntt_fft(s, buf1, buf1, buf3, k1, inverse, m_idx)) - goto fail; - } else { - strip_len = STRIP_LEN; - buf2 = ntt_malloc(s, sizeof(NTTLimb) * n1 * strip_len); - if (!buf2) - goto fail; - m = ntt_mods[m_idx]; - m_inv = s->ntt_mods_div[m_idx]; - c0 = s->ntt_proot_pow[m_idx][inverse][k1 + k2]; - c_mul = 1; - assert((n2 % strip_len) == 0); - for(j = 0; j < n2; j += strip_len) { - for(i = 0; i < n1; i++) { - for(l = 0; l < strip_len; l++) { - buf2[i + l * n1] = buf1[i * n2 + (j + l)]; - } - } - for(l = 0; l < strip_len; l++) { - if (inverse) - mul_trig(buf2 + l * n1, n1, c_mul, m, m_inv); - if (ntt_fft(s, buf2 + l * n1, buf2 + l * n1, buf3, k1, inverse, m_idx)) - goto fail; - if (!inverse) - mul_trig(buf2 + l * n1, n1, c_mul, m, m_inv); - c_mul = mul_mod_fast(c_mul, c0, m, m_inv); - } - - for(i = 0; i < n1; i++) { - for(l = 0; l < strip_len; l++) { - buf1[i * n2 + (j + l)] = buf2[i + l *n1]; - } - } - } - ntt_free(s, buf2); - } - ntt_free(s, buf3); - return 0; - fail: - ntt_free(s, buf2); - ntt_free(s, buf3); - return -1; -} - - -/* dst = buf1, src = buf2, tmp = buf3 */ -static int ntt_conv(BFNTTState *s, NTTLimb *buf1, NTTLimb *buf2, - int k, int k_tot, limb_t m_idx) -{ - limb_t n1, n2, i; - int k1, k2; - - if (k <= NTT_TRIG_K_MAX) { - k1 = k; - } else { - /* recursive split of the FFT */ - k1 = bf_min(k / 2, NTT_TRIG_K_MAX); - } - k2 = k - k1; - n1 = (limb_t)1 << k1; - n2 = (limb_t)1 << k2; - - if (ntt_fft_partial(s, buf1, k1, k2, n1, n2, 0, m_idx)) - return -1; - if (ntt_fft_partial(s, buf2, k1, k2, n1, n2, 0, m_idx)) - return -1; - if (k2 == 0) { - ntt_vec_mul(s, buf1, buf2, k, k_tot, m_idx); - } else { - for(i = 0; i < n1; i++) { - ntt_conv(s, buf1 + i * n2, buf2 + i * n2, k2, k_tot, m_idx); - } - } - if (ntt_fft_partial(s, buf1, k1, k2, n1, n2, 1, m_idx)) - return -1; - return 0; -} - - -static no_inline void limb_to_ntt(BFNTTState *s, - NTTLimb *tabr, limb_t fft_len, - const limb_t *taba, limb_t a_len, int dpl, - int first_m_idx, int nb_mods) -{ - slimb_t i, n; - dlimb_t a, b; - int j, shift; - limb_t base_mask1, a0, a1, a2, r, m, m_inv; - - memset(tabr, 0, sizeof(NTTLimb) * fft_len * nb_mods); - shift = dpl & (LIMB_BITS - 1); - if (shift == 0) - base_mask1 = -1; - else - base_mask1 = ((limb_t)1 << shift) - 1; - n = bf_min(fft_len, (a_len * LIMB_BITS + dpl - 1) / dpl); - for(i = 0; i < n; i++) { - a0 = get_bits(taba, a_len, i * dpl); - if (dpl <= LIMB_BITS) { - a0 &= base_mask1; - a = a0; - } else { - a1 = get_bits(taba, a_len, i * dpl + LIMB_BITS); - if (dpl <= (LIMB_BITS + NTT_MOD_LOG2_MIN)) { - a = a0 | ((dlimb_t)(a1 & base_mask1) << LIMB_BITS); - } else { - if (dpl > 2 * LIMB_BITS) { - a2 = get_bits(taba, a_len, i * dpl + LIMB_BITS * 2) & - base_mask1; - } else { - a1 &= base_mask1; - a2 = 0; - } - // printf("a=0x%016lx%016lx%016lx\n", a2, a1, a0); - a = (a0 >> (LIMB_BITS - NTT_MOD_LOG2_MAX + NTT_MOD_LOG2_MIN)) | - ((dlimb_t)a1 << (NTT_MOD_LOG2_MAX - NTT_MOD_LOG2_MIN)) | - ((dlimb_t)a2 << (LIMB_BITS + NTT_MOD_LOG2_MAX - NTT_MOD_LOG2_MIN)); - a0 &= ((limb_t)1 << (LIMB_BITS - NTT_MOD_LOG2_MAX + NTT_MOD_LOG2_MIN)) - 1; - } - } - for(j = 0; j < nb_mods; j++) { - m = ntt_mods[first_m_idx + j]; - m_inv = s->ntt_mods_div[first_m_idx + j]; - r = mod_fast(a, m, m_inv); - if (dpl > (LIMB_BITS + NTT_MOD_LOG2_MIN)) { - b = ((dlimb_t)r << (LIMB_BITS - NTT_MOD_LOG2_MAX + NTT_MOD_LOG2_MIN)) | a0; - r = mod_fast(b, m, m_inv); - } - tabr[i + j * fft_len] = int_to_ntt_limb(r, m); - } - } -} - -#if defined(__AVX2__) - -#define VEC_LEN 4 - -typedef union { - __m256d v; - double d[4]; -} VecUnion; - -static no_inline void ntt_to_limb(BFNTTState *s, limb_t *tabr, limb_t r_len, - const NTTLimb *buf, int fft_len_log2, int dpl, - int nb_mods) -{ - const limb_t *mods = ntt_mods + NB_MODS - nb_mods; - const __m256d *mods_cr_vec, *mf, *m_inv; - VecUnion y[NB_MODS]; - limb_t u[NB_MODS], carry[NB_MODS], fft_len, base_mask1, r; - slimb_t i, len, pos; - int j, k, l, shift, n_limb1, p; - dlimb_t t; - - j = NB_MODS * (NB_MODS - 1) / 2 - nb_mods * (nb_mods - 1) / 2; - mods_cr_vec = s->ntt_mods_cr_vec + j; - mf = s->ntt_mods_vec + NB_MODS - nb_mods; - m_inv = s->ntt_mods_inv_vec + NB_MODS - nb_mods; - - shift = dpl & (LIMB_BITS - 1); - if (shift == 0) - base_mask1 = -1; - else - base_mask1 = ((limb_t)1 << shift) - 1; - n_limb1 = ((unsigned)dpl - 1) / LIMB_BITS; - for(j = 0; j < NB_MODS; j++) - carry[j] = 0; - for(j = 0; j < NB_MODS; j++) - u[j] = 0; /* avoid warnings */ - memset(tabr, 0, sizeof(limb_t) * r_len); - fft_len = (limb_t)1 << fft_len_log2; - len = bf_min(fft_len, (r_len * LIMB_BITS + dpl - 1) / dpl); - len = (len + VEC_LEN - 1) & ~(VEC_LEN - 1); - i = 0; - while (i < len) { - for(j = 0; j < nb_mods; j++) - y[j].v = *(__m256d *)&buf[i + fft_len * j]; - - /* Chinese remainder to get mixed radix representation */ - l = 0; - for(j = 0; j < nb_mods - 1; j++) { - y[j].v = ntt_mod1(y[j].v, mf[j]); - for(k = j + 1; k < nb_mods; k++) { - y[k].v = ntt_mul_mod(y[k].v - y[j].v, - mods_cr_vec[l], mf[k], m_inv[k]); - l++; - } - } - y[j].v = ntt_mod1(y[j].v, mf[j]); - - for(p = 0; p < VEC_LEN; p++) { - /* back to normal representation */ - u[0] = (int64_t)y[nb_mods - 1].d[p]; - l = 1; - for(j = nb_mods - 2; j >= 1; j--) { - r = (int64_t)y[j].d[p]; - for(k = 0; k < l; k++) { - t = (dlimb_t)u[k] * mods[j] + r; - r = t >> LIMB_BITS; - u[k] = t; - } - u[l] = r; - l++; - } - /* XXX: for nb_mods = 5, l should be 4 */ - - /* last step adds the carry */ - r = (int64_t)y[0].d[p]; - for(k = 0; k < l; k++) { - t = (dlimb_t)u[k] * mods[j] + r + carry[k]; - r = t >> LIMB_BITS; - u[k] = t; - } - u[l] = r + carry[l]; - - /* write the digits */ - pos = i * dpl; - for(j = 0; j < n_limb1; j++) { - put_bits(tabr, r_len, pos, u[j]); - pos += LIMB_BITS; - } - put_bits(tabr, r_len, pos, u[n_limb1] & base_mask1); - /* shift by dpl digits and set the carry */ - if (shift == 0) { - for(j = n_limb1 + 1; j < nb_mods; j++) - carry[j - (n_limb1 + 1)] = u[j]; - } else { - for(j = n_limb1; j < nb_mods - 1; j++) { - carry[j - n_limb1] = (u[j] >> shift) | - (u[j + 1] << (LIMB_BITS - shift)); - } - carry[nb_mods - 1 - n_limb1] = u[nb_mods - 1] >> shift; - } - i++; - } - } -} -#else -static no_inline void ntt_to_limb(BFNTTState *s, limb_t *tabr, limb_t r_len, - const NTTLimb *buf, int fft_len_log2, int dpl, - int nb_mods) -{ - const limb_t *mods = ntt_mods + NB_MODS - nb_mods; - const limb_t *mods_cr, *mods_cr_inv; - limb_t y[NB_MODS], u[NB_MODS], carry[NB_MODS], fft_len, base_mask1, r; - slimb_t i, len, pos; - int j, k, l, shift, n_limb1; - dlimb_t t; - - j = NB_MODS * (NB_MODS - 1) / 2 - nb_mods * (nb_mods - 1) / 2; - mods_cr = ntt_mods_cr + j; - mods_cr_inv = s->ntt_mods_cr_inv + j; - - shift = dpl & (LIMB_BITS - 1); - if (shift == 0) - base_mask1 = -1; - else - base_mask1 = ((limb_t)1 << shift) - 1; - n_limb1 = ((unsigned)dpl - 1) / LIMB_BITS; - for(j = 0; j < NB_MODS; j++) - carry[j] = 0; - for(j = 0; j < NB_MODS; j++) - u[j] = 0; /* avoid warnings */ - memset(tabr, 0, sizeof(limb_t) * r_len); - fft_len = (limb_t)1 << fft_len_log2; - len = bf_min(fft_len, (r_len * LIMB_BITS + dpl - 1) / dpl); - for(i = 0; i < len; i++) { - for(j = 0; j < nb_mods; j++) { - y[j] = ntt_limb_to_int(buf[i + fft_len * j], mods[j]); - } - - /* Chinese remainder to get mixed radix representation */ - l = 0; - for(j = 0; j < nb_mods - 1; j++) { - for(k = j + 1; k < nb_mods; k++) { - limb_t m; - m = mods[k]; - /* Note: there is no overflow in the sub_mod() because - the modulos are sorted by increasing order */ - y[k] = mul_mod_fast2(y[k] - y[j] + m, - mods_cr[l], m, mods_cr_inv[l]); - l++; - } - } - - /* back to normal representation */ - u[0] = y[nb_mods - 1]; - l = 1; - for(j = nb_mods - 2; j >= 1; j--) { - r = y[j]; - for(k = 0; k < l; k++) { - t = (dlimb_t)u[k] * mods[j] + r; - r = t >> LIMB_BITS; - u[k] = t; - } - u[l] = r; - l++; - } - - /* last step adds the carry */ - r = y[0]; - for(k = 0; k < l; k++) { - t = (dlimb_t)u[k] * mods[j] + r + carry[k]; - r = t >> LIMB_BITS; - u[k] = t; - } - u[l] = r + carry[l]; - - /* write the digits */ - pos = i * dpl; - for(j = 0; j < n_limb1; j++) { - put_bits(tabr, r_len, pos, u[j]); - pos += LIMB_BITS; - } - put_bits(tabr, r_len, pos, u[n_limb1] & base_mask1); - /* shift by dpl digits and set the carry */ - if (shift == 0) { - for(j = n_limb1 + 1; j < nb_mods; j++) - carry[j - (n_limb1 + 1)] = u[j]; - } else { - for(j = n_limb1; j < nb_mods - 1; j++) { - carry[j - n_limb1] = (u[j] >> shift) | - (u[j + 1] << (LIMB_BITS - shift)); - } - carry[nb_mods - 1 - n_limb1] = u[nb_mods - 1] >> shift; - } - } -} -#endif - -static int ntt_static_init(bf_context_t *s1) -{ - BFNTTState *s; - int inverse, i, j, k, l; - limb_t c, c_inv, c_inv2, m, m_inv; - - if (s1->ntt_state) - return 0; -#if defined(__AVX2__) - s = bf_aligned_malloc(s1, sizeof(*s), 64); -#else - s = bf_malloc(s1, sizeof(*s)); -#endif - if (!s) - return -1; - memset(s, 0, sizeof(*s)); - s1->ntt_state = s; - s->ctx = s1; - - for(j = 0; j < NB_MODS; j++) { - m = ntt_mods[j]; - m_inv = init_mul_mod_fast(m); - s->ntt_mods_div[j] = m_inv; -#if defined(__AVX2__) - s->ntt_mods_vec[j] = _mm256_set1_pd(m); - s->ntt_mods_inv_vec[j] = _mm256_set1_pd(1.0 / (double)m); -#endif - c_inv2 = (m + 1) / 2; /* 1/2 */ - c_inv = 1; - for(i = 0; i <= NTT_PROOT_2EXP; i++) { - s->ntt_len_inv[j][i][0] = c_inv; - s->ntt_len_inv[j][i][1] = init_mul_mod_fast2(c_inv, m); - c_inv = mul_mod_fast(c_inv, c_inv2, m, m_inv); - } - - for(inverse = 0; inverse < 2; inverse++) { - c = ntt_proot[inverse][j]; - for(i = 0; i < NTT_PROOT_2EXP; i++) { - s->ntt_proot_pow[j][inverse][NTT_PROOT_2EXP - i] = c; - s->ntt_proot_pow_inv[j][inverse][NTT_PROOT_2EXP - i] = - init_mul_mod_fast2(c, m); - c = mul_mod_fast(c, c, m, m_inv); - } - } - } - - l = 0; - for(j = 0; j < NB_MODS - 1; j++) { - for(k = j + 1; k < NB_MODS; k++) { -#if defined(__AVX2__) - s->ntt_mods_cr_vec[l] = _mm256_set1_pd(int_to_ntt_limb2(ntt_mods_cr[l], - ntt_mods[k])); -#else - s->ntt_mods_cr_inv[l] = init_mul_mod_fast2(ntt_mods_cr[l], - ntt_mods[k]); -#endif - l++; - } - } - return 0; -} - -int bf_get_fft_size(int *pdpl, int *pnb_mods, limb_t len) -{ - int dpl, fft_len_log2, n_bits, nb_mods, dpl_found, fft_len_log2_found; - int int_bits, nb_mods_found; - limb_t cost, min_cost; - - min_cost = -1; - dpl_found = 0; - nb_mods_found = 4; - fft_len_log2_found = 0; - for(nb_mods = 3; nb_mods <= NB_MODS; nb_mods++) { - int_bits = ntt_int_bits[NB_MODS - nb_mods]; - dpl = bf_min((int_bits - 4) / 2, - 2 * LIMB_BITS + 2 * NTT_MOD_LOG2_MIN - NTT_MOD_LOG2_MAX); - for(;;) { - fft_len_log2 = ceil_log2((len * LIMB_BITS + dpl - 1) / dpl); - if (fft_len_log2 > NTT_PROOT_2EXP) - goto next; - n_bits = fft_len_log2 + 2 * dpl; - if (n_bits <= int_bits) { - cost = ((limb_t)(fft_len_log2 + 1) << fft_len_log2) * nb_mods; - // printf("n=%d dpl=%d: cost=%" PRId64 "\n", nb_mods, dpl, (int64_t)cost); - if (cost < min_cost) { - min_cost = cost; - dpl_found = dpl; - nb_mods_found = nb_mods; - fft_len_log2_found = fft_len_log2; - } - break; - } - dpl--; - if (dpl == 0) - break; - } - next: ; - } - if (!dpl_found) - abort(); - /* limit dpl if possible to reduce fixed cost of limb/NTT conversion */ - if (dpl_found > (LIMB_BITS + NTT_MOD_LOG2_MIN) && - ((limb_t)(LIMB_BITS + NTT_MOD_LOG2_MIN) << fft_len_log2_found) >= - len * LIMB_BITS) { - dpl_found = LIMB_BITS + NTT_MOD_LOG2_MIN; - } - *pnb_mods = nb_mods_found; - *pdpl = dpl_found; - return fft_len_log2_found; -} - -/* return 0 if OK, -1 if memory error */ -static no_inline int fft_mul(bf_context_t *s1, - bf_t *res, limb_t *a_tab, limb_t a_len, - limb_t *b_tab, limb_t b_len, int mul_flags) -{ - BFNTTState *s; - int dpl, fft_len_log2, j, nb_mods, reduced_mem; - slimb_t len, fft_len; - NTTLimb *buf1, *buf2, *ptr; -#if defined(USE_MUL_CHECK) - limb_t ha, hb, hr, h_ref; -#endif - - if (ntt_static_init(s1)) - return -1; - s = s1->ntt_state; - - /* find the optimal number of digits per limb (dpl) */ - len = a_len + b_len; - fft_len_log2 = bf_get_fft_size(&dpl, &nb_mods, len); - fft_len = (uint64_t)1 << fft_len_log2; - // printf("len=%" PRId64 " fft_len_log2=%d dpl=%d\n", len, fft_len_log2, dpl); -#if defined(USE_MUL_CHECK) - ha = mp_mod1(a_tab, a_len, BF_CHKSUM_MOD, 0); - hb = mp_mod1(b_tab, b_len, BF_CHKSUM_MOD, 0); -#endif - if ((mul_flags & (FFT_MUL_R_OVERLAP_A | FFT_MUL_R_OVERLAP_B)) == 0) { - if (!(mul_flags & FFT_MUL_R_NORESIZE)) - bf_resize(res, 0); - } else if (mul_flags & FFT_MUL_R_OVERLAP_B) { - limb_t *tmp_tab, tmp_len; - /* it is better to free 'b' first */ - tmp_tab = a_tab; - a_tab = b_tab; - b_tab = tmp_tab; - tmp_len = a_len; - a_len = b_len; - b_len = tmp_len; - } - buf1 = ntt_malloc(s, sizeof(NTTLimb) * fft_len * nb_mods); - if (!buf1) - return -1; - limb_to_ntt(s, buf1, fft_len, a_tab, a_len, dpl, - NB_MODS - nb_mods, nb_mods); - if ((mul_flags & (FFT_MUL_R_OVERLAP_A | FFT_MUL_R_OVERLAP_B)) == - FFT_MUL_R_OVERLAP_A) { - if (!(mul_flags & FFT_MUL_R_NORESIZE)) - bf_resize(res, 0); - } - reduced_mem = (fft_len_log2 >= 14); - if (!reduced_mem) { - buf2 = ntt_malloc(s, sizeof(NTTLimb) * fft_len * nb_mods); - if (!buf2) - goto fail; - limb_to_ntt(s, buf2, fft_len, b_tab, b_len, dpl, - NB_MODS - nb_mods, nb_mods); - if (!(mul_flags & FFT_MUL_R_NORESIZE)) - bf_resize(res, 0); /* in case res == b */ - } else { - buf2 = ntt_malloc(s, sizeof(NTTLimb) * fft_len); - if (!buf2) - goto fail; - } - for(j = 0; j < nb_mods; j++) { - if (reduced_mem) { - limb_to_ntt(s, buf2, fft_len, b_tab, b_len, dpl, - NB_MODS - nb_mods + j, 1); - ptr = buf2; - } else { - ptr = buf2 + fft_len * j; - } - if (ntt_conv(s, buf1 + fft_len * j, ptr, - fft_len_log2, fft_len_log2, j + NB_MODS - nb_mods)) - goto fail; - } - if (!(mul_flags & FFT_MUL_R_NORESIZE)) - bf_resize(res, 0); /* in case res == b and reduced mem */ - ntt_free(s, buf2); - buf2 = NULL; - if (!(mul_flags & FFT_MUL_R_NORESIZE)) { - if (bf_resize(res, len)) - goto fail; - } - ntt_to_limb(s, res->tab, len, buf1, fft_len_log2, dpl, nb_mods); - ntt_free(s, buf1); -#if defined(USE_MUL_CHECK) - hr = mp_mod1(res->tab, len, BF_CHKSUM_MOD, 0); - h_ref = mul_mod(ha, hb, BF_CHKSUM_MOD); - if (hr != h_ref) { - printf("ntt_mul_error: len=%" PRId_LIMB " fft_len_log2=%d dpl=%d nb_mods=%d\n", - len, fft_len_log2, dpl, nb_mods); - // printf("ha=0x" FMT_LIMB" hb=0x" FMT_LIMB " hr=0x" FMT_LIMB " expected=0x" FMT_LIMB "\n", ha, hb, hr, h_ref); - exit(1); - } -#endif - return 0; - fail: - ntt_free(s, buf1); - ntt_free(s, buf2); - return -1; -} - -#else /* USE_FFT_MUL */ - -int bf_get_fft_size(int *pdpl, int *pnb_mods, limb_t len) -{ - return 0; -} - -#endif /* !USE_FFT_MUL */ - -#undef malloc -#undef free -#undef realloc diff --git a/lib/monoucha0/monoucha/qjs/libbf.h b/lib/monoucha0/monoucha/qjs/libbf.h deleted file mode 100644 index 3586532e..00000000 --- a/lib/monoucha0/monoucha/qjs/libbf.h +++ /dev/null @@ -1,545 +0,0 @@ -/* - * Tiny arbitrary precision floating point library - * - * Copyright (c) 2017-2021 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#ifndef LIBBF_H -#define LIBBF_H - -#include <stddef.h> -#include <stdint.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#if INTPTR_MAX >= INT64_MAX && !defined(_WIN32) && !defined(__TINYC__) -#define LIMB_LOG2_BITS 6 -#else -#define LIMB_LOG2_BITS 5 -#endif - -#define LIMB_BITS (1 << LIMB_LOG2_BITS) - -#if LIMB_BITS == 64 -#ifndef INT128_MAX -__extension__ typedef __int128 int128_t; -__extension__ typedef unsigned __int128 uint128_t; -#endif -typedef int64_t slimb_t; -typedef uint64_t limb_t; -typedef uint128_t dlimb_t; -#define BF_RAW_EXP_MIN INT64_MIN -#define BF_RAW_EXP_MAX INT64_MAX - -#define LIMB_DIGITS 19 -#define BF_DEC_BASE UINT64_C(10000000000000000000) - -#else - -typedef int32_t slimb_t; -typedef uint32_t limb_t; -typedef uint64_t dlimb_t; -#define BF_RAW_EXP_MIN INT32_MIN -#define BF_RAW_EXP_MAX INT32_MAX - -#define LIMB_DIGITS 9 -#define BF_DEC_BASE 1000000000U - -#endif - -/* in bits */ -/* minimum number of bits for the exponent */ -#define BF_EXP_BITS_MIN 3 -/* maximum number of bits for the exponent */ -#define BF_EXP_BITS_MAX (LIMB_BITS - 3) -/* extended range for exponent, used internally */ -#define BF_EXT_EXP_BITS_MAX (BF_EXP_BITS_MAX + 1) -/* minimum possible precision */ -#define BF_PREC_MIN 2 -/* minimum possible precision */ -#define BF_PREC_MAX (((limb_t)1 << (LIMB_BITS - 2)) - 2) -/* some operations support infinite precision */ -#define BF_PREC_INF (BF_PREC_MAX + 1) /* infinite precision */ - -#if LIMB_BITS == 64 -#define BF_CHKSUM_MOD (UINT64_C(975620677) * UINT64_C(9795002197)) -#else -#define BF_CHKSUM_MOD 975620677U -#endif - -#define BF_EXP_ZERO BF_RAW_EXP_MIN -#define BF_EXP_INF (BF_RAW_EXP_MAX - 1) -#define BF_EXP_NAN BF_RAW_EXP_MAX - -/* +/-zero is represented with expn = BF_EXP_ZERO and len = 0, - +/-infinity is represented with expn = BF_EXP_INF and len = 0, - NaN is represented with expn = BF_EXP_NAN and len = 0 (sign is ignored) - */ -typedef struct { - struct bf_context_t *ctx; - int sign; - slimb_t expn; - limb_t len; - limb_t *tab; -} bf_t; - -typedef struct { - /* must be kept identical to bf_t */ - struct bf_context_t *ctx; - int sign; - slimb_t expn; - limb_t len; - limb_t *tab; -} bfdec_t; - -typedef enum { - BF_RNDN, /* round to nearest, ties to even */ - BF_RNDZ, /* round to zero */ - BF_RNDD, /* round to -inf (the code relies on (BF_RNDD xor BF_RNDU) = 1) */ - BF_RNDU, /* round to +inf */ - BF_RNDNA, /* round to nearest, ties away from zero */ - BF_RNDA, /* round away from zero */ - BF_RNDF, /* faithful rounding (nondeterministic, either RNDD or RNDU, - inexact flag is always set) */ -} bf_rnd_t; - -/* allow subnormal numbers. Only available if the number of exponent - bits is <= BF_EXP_BITS_USER_MAX and prec != BF_PREC_INF. */ -#define BF_FLAG_SUBNORMAL (1 << 3) -/* 'prec' is the precision after the radix point instead of the whole - mantissa. Can only be used with bf_round() and - bfdec_[add|sub|mul|div|sqrt|round](). */ -#define BF_FLAG_RADPNT_PREC (1 << 4) - -#define BF_RND_MASK 0x7 -#define BF_EXP_BITS_SHIFT 5 -#define BF_EXP_BITS_MASK 0x3f - -/* shortcut for bf_set_exp_bits(BF_EXT_EXP_BITS_MAX) */ -#define BF_FLAG_EXT_EXP (BF_EXP_BITS_MASK << BF_EXP_BITS_SHIFT) - -/* contains the rounding mode and number of exponents bits */ -typedef uint32_t bf_flags_t; - -typedef void *bf_realloc_func_t(void *opaque, void *ptr, size_t size); - -typedef struct { - bf_t val; - limb_t prec; -} BFConstCache; - -typedef struct bf_context_t { - void *realloc_opaque; - bf_realloc_func_t *realloc_func; - BFConstCache log2_cache; - BFConstCache pi_cache; - struct BFNTTState *ntt_state; -} bf_context_t; - -static inline int bf_get_exp_bits(bf_flags_t flags) -{ - int e; - e = (flags >> BF_EXP_BITS_SHIFT) & BF_EXP_BITS_MASK; - if (e == BF_EXP_BITS_MASK) - return BF_EXP_BITS_MAX + 1; - else - return BF_EXP_BITS_MAX - e; -} - -static inline bf_flags_t bf_set_exp_bits(int n) -{ - return ((BF_EXP_BITS_MAX - n) & BF_EXP_BITS_MASK) << BF_EXP_BITS_SHIFT; -} - -/* returned status */ -#define BF_ST_INVALID_OP (1 << 0) -#define BF_ST_DIVIDE_ZERO (1 << 1) -#define BF_ST_OVERFLOW (1 << 2) -#define BF_ST_UNDERFLOW (1 << 3) -#define BF_ST_INEXACT (1 << 4) -/* indicate that a memory allocation error occured. NaN is returned */ -#define BF_ST_MEM_ERROR (1 << 5) - -#define BF_RADIX_MAX 36 /* maximum radix for bf_atof() and bf_ftoa() */ - -static inline slimb_t bf_max(slimb_t a, slimb_t b) -{ - if (a > b) - return a; - else - return b; -} - -static inline slimb_t bf_min(slimb_t a, slimb_t b) -{ - if (a < b) - return a; - else - return b; -} - -void bf_context_init(bf_context_t *s, bf_realloc_func_t *realloc_func, - void *realloc_opaque); -void bf_context_end(bf_context_t *s); -/* free memory allocated for the bf cache data */ -void bf_clear_cache(bf_context_t *s); - -static inline void *bf_realloc(bf_context_t *s, void *ptr, size_t size) -{ - return s->realloc_func(s->realloc_opaque, ptr, size); -} - -/* 'size' must be != 0 */ -static inline void *bf_malloc(bf_context_t *s, size_t size) -{ - return bf_realloc(s, NULL, size); -} - -static inline void bf_free(bf_context_t *s, void *ptr) -{ - /* must test ptr otherwise equivalent to malloc(0) */ - if (ptr) - bf_realloc(s, ptr, 0); -} - -void bf_init(bf_context_t *s, bf_t *r); - -static inline void bf_delete(bf_t *r) -{ - bf_context_t *s = r->ctx; - /* we accept to delete a zeroed bf_t structure */ - if (s && r->tab) { - bf_realloc(s, r->tab, 0); - } -} - -static inline void bf_neg(bf_t *r) -{ - r->sign ^= 1; -} - -static inline int bf_is_finite(const bf_t *a) -{ - return (a->expn < BF_EXP_INF); -} - -static inline int bf_is_nan(const bf_t *a) -{ - return (a->expn == BF_EXP_NAN); -} - -static inline int bf_is_zero(const bf_t *a) -{ - return (a->expn == BF_EXP_ZERO); -} - -static inline void bf_memcpy(bf_t *r, const bf_t *a) -{ - *r = *a; -} - -int bf_set_ui(bf_t *r, uint64_t a); -int bf_set_si(bf_t *r, int64_t a); -void bf_set_nan(bf_t *r); -void bf_set_zero(bf_t *r, int is_neg); -void bf_set_inf(bf_t *r, int is_neg); -int bf_set(bf_t *r, const bf_t *a); -void bf_move(bf_t *r, bf_t *a); -int bf_get_float64(const bf_t *a, double *pres, bf_rnd_t rnd_mode); -int bf_set_float64(bf_t *a, double d); - -int bf_cmpu(const bf_t *a, const bf_t *b); -int bf_cmp_full(const bf_t *a, const bf_t *b); -int bf_cmp(const bf_t *a, const bf_t *b); -static inline int bf_cmp_eq(const bf_t *a, const bf_t *b) -{ - return bf_cmp(a, b) == 0; -} - -static inline int bf_cmp_le(const bf_t *a, const bf_t *b) -{ - return bf_cmp(a, b) <= 0; -} - -static inline int bf_cmp_lt(const bf_t *a, const bf_t *b) -{ - return bf_cmp(a, b) < 0; -} - -int bf_add(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); -int bf_sub(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); -int bf_add_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, bf_flags_t flags); -int bf_mul(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); -int bf_mul_ui(bf_t *r, const bf_t *a, uint64_t b1, limb_t prec, bf_flags_t flags); -int bf_mul_si(bf_t *r, const bf_t *a, int64_t b1, limb_t prec, - bf_flags_t flags); -int bf_mul_2exp(bf_t *r, slimb_t e, limb_t prec, bf_flags_t flags); -int bf_div(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, bf_flags_t flags); -#define BF_DIVREM_EUCLIDIAN BF_RNDF -int bf_divrem(bf_t *q, bf_t *r, const bf_t *a, const bf_t *b, - limb_t prec, bf_flags_t flags, int rnd_mode); -int bf_rem(bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode); -int bf_remquo(slimb_t *pq, bf_t *r, const bf_t *a, const bf_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode); -/* round to integer with infinite precision */ -int bf_rint(bf_t *r, int rnd_mode); -int bf_round(bf_t *r, limb_t prec, bf_flags_t flags); -int bf_sqrtrem(bf_t *r, bf_t *rem1, const bf_t *a); -int bf_sqrt(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -slimb_t bf_get_exp_min(const bf_t *a); -int bf_logic_or(bf_t *r, const bf_t *a, const bf_t *b); -int bf_logic_xor(bf_t *r, const bf_t *a, const bf_t *b); -int bf_logic_and(bf_t *r, const bf_t *a, const bf_t *b); - -/* additional flags for bf_atof */ -/* do not accept hex radix prefix (0x or 0X) if radix = 0 or radix = 16 */ -#define BF_ATOF_NO_HEX (1 << 16) -/* accept binary (0b or 0B) or octal (0o or 0O) radix prefix if radix = 0 */ -#define BF_ATOF_BIN_OCT (1 << 17) -/* Do not parse NaN or Inf */ -#define BF_ATOF_NO_NAN_INF (1 << 18) -/* return the exponent separately */ -#define BF_ATOF_EXPONENT (1 << 19) - -int bf_atof(bf_t *a, const char *str, const char **pnext, int radix, - limb_t prec, bf_flags_t flags); -/* this version accepts prec = BF_PREC_INF and returns the radix - exponent */ -int bf_atof2(bf_t *r, slimb_t *pexponent, - const char *str, const char **pnext, int radix, - limb_t prec, bf_flags_t flags); -int bf_mul_pow_radix(bf_t *r, const bf_t *T, limb_t radix, - slimb_t expn, limb_t prec, bf_flags_t flags); - - -/* Conversion of floating point number to string. Return a null - terminated string or NULL if memory error. *plen contains its - length if plen != NULL. The exponent letter is "e" for base 10, - "p" for bases 2, 8, 16 with a binary exponent and "@" for the other - bases. */ - -#define BF_FTOA_FORMAT_MASK (3 << 16) - -/* fixed format: prec significant digits rounded with (flags & - BF_RND_MASK). Exponential notation is used if too many zeros are - needed.*/ -#define BF_FTOA_FORMAT_FIXED (0 << 16) -/* fractional format: prec digits after the decimal point rounded with - (flags & BF_RND_MASK) */ -#define BF_FTOA_FORMAT_FRAC (1 << 16) -/* free format: - - For binary radices with bf_ftoa() and for bfdec_ftoa(): use the minimum - number of digits to represent 'a'. The precision and the rounding - mode are ignored. - - For the non binary radices with bf_ftoa(): use as many digits as - necessary so that bf_atof() return the same number when using - precision 'prec', rounding to nearest and the subnormal - configuration of 'flags'. The result is meaningful only if 'a' is - already rounded to 'prec' bits. If the subnormal flag is set, the - exponent in 'flags' must also be set to the desired exponent range. -*/ -#define BF_FTOA_FORMAT_FREE (2 << 16) -/* same as BF_FTOA_FORMAT_FREE but uses the minimum number of digits - (takes more computation time). Identical to BF_FTOA_FORMAT_FREE for - binary radices with bf_ftoa() and for bfdec_ftoa(). */ -#define BF_FTOA_FORMAT_FREE_MIN (3 << 16) - -/* force exponential notation for fixed or free format */ -#define BF_FTOA_FORCE_EXP (1 << 20) -/* add 0x prefix for base 16, 0o prefix for base 8 or 0b prefix for - base 2 if non zero value */ -#define BF_FTOA_ADD_PREFIX (1 << 21) -/* return "Infinity" instead of "Inf" and add a "+" for positive - exponents */ -#define BF_FTOA_JS_QUIRKS (1 << 22) - -char *bf_ftoa(size_t *plen, const bf_t *a, int radix, limb_t prec, - bf_flags_t flags); - -/* modulo 2^n instead of saturation. NaN and infinity return 0 */ -#define BF_GET_INT_MOD (1 << 0) -int bf_get_int32(int *pres, const bf_t *a, int flags); -int bf_get_int64(int64_t *pres, const bf_t *a, int flags); -int bf_get_uint64(uint64_t *pres, const bf_t *a); - -/* the following functions are exported for testing only. */ -void mp_print_str(const char *str, const limb_t *tab, limb_t n); -void bf_print_str(const char *str, const bf_t *a); -int bf_resize(bf_t *r, limb_t len); -int bf_get_fft_size(int *pdpl, int *pnb_mods, limb_t len); -int bf_normalize_and_round(bf_t *r, limb_t prec1, bf_flags_t flags); -int bf_can_round(const bf_t *a, slimb_t prec, bf_rnd_t rnd_mode, slimb_t k); -slimb_t bf_mul_log2_radix(slimb_t a1, unsigned int radix, int is_inv, - int is_ceil1); -int mp_mul(bf_context_t *s, limb_t *result, - const limb_t *op1, limb_t op1_size, - const limb_t *op2, limb_t op2_size); -limb_t mp_add(limb_t *res, const limb_t *op1, const limb_t *op2, - limb_t n, limb_t carry); -limb_t mp_add_ui(limb_t *tab, limb_t b, size_t n); -int mp_sqrtrem(bf_context_t *s, limb_t *tabs, limb_t *taba, limb_t n); -int mp_recip(bf_context_t *s, limb_t *tabr, const limb_t *taba, limb_t n); -limb_t bf_isqrt(limb_t a); - -/* transcendental functions */ -int bf_const_log2(bf_t *T, limb_t prec, bf_flags_t flags); -int bf_const_pi(bf_t *T, limb_t prec, bf_flags_t flags); -int bf_exp(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_log(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -#define BF_POW_JS_QUIRKS (1 << 16) /* (+/-1)^(+/-Inf) = NaN, 1^NaN = NaN */ -int bf_pow(bf_t *r, const bf_t *x, const bf_t *y, limb_t prec, bf_flags_t flags); -int bf_cos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_sin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_tan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_atan(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_atan2(bf_t *r, const bf_t *y, const bf_t *x, - limb_t prec, bf_flags_t flags); -int bf_asin(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); -int bf_acos(bf_t *r, const bf_t *a, limb_t prec, bf_flags_t flags); - -/* decimal floating point */ - -static inline void bfdec_init(bf_context_t *s, bfdec_t *r) -{ - bf_init(s, (bf_t *)r); -} -static inline void bfdec_delete(bfdec_t *r) -{ - bf_delete((bf_t *)r); -} - -static inline void bfdec_neg(bfdec_t *r) -{ - r->sign ^= 1; -} - -static inline int bfdec_is_finite(const bfdec_t *a) -{ - return (a->expn < BF_EXP_INF); -} - -static inline int bfdec_is_nan(const bfdec_t *a) -{ - return (a->expn == BF_EXP_NAN); -} - -static inline int bfdec_is_zero(const bfdec_t *a) -{ - return (a->expn == BF_EXP_ZERO); -} - -static inline void bfdec_memcpy(bfdec_t *r, const bfdec_t *a) -{ - bf_memcpy((bf_t *)r, (const bf_t *)a); -} - -int bfdec_set_ui(bfdec_t *r, uint64_t a); -int bfdec_set_si(bfdec_t *r, int64_t a); - -static inline void bfdec_set_nan(bfdec_t *r) -{ - bf_set_nan((bf_t *)r); -} -static inline void bfdec_set_zero(bfdec_t *r, int is_neg) -{ - bf_set_zero((bf_t *)r, is_neg); -} -static inline void bfdec_set_inf(bfdec_t *r, int is_neg) -{ - bf_set_inf((bf_t *)r, is_neg); -} -static inline int bfdec_set(bfdec_t *r, const bfdec_t *a) -{ - return bf_set((bf_t *)r, (bf_t *)a); -} -static inline void bfdec_move(bfdec_t *r, bfdec_t *a) -{ - bf_move((bf_t *)r, (bf_t *)a); -} -static inline int bfdec_cmpu(const bfdec_t *a, const bfdec_t *b) -{ - return bf_cmpu((const bf_t *)a, (const bf_t *)b); -} -static inline int bfdec_cmp_full(const bfdec_t *a, const bfdec_t *b) -{ - return bf_cmp_full((const bf_t *)a, (const bf_t *)b); -} -static inline int bfdec_cmp(const bfdec_t *a, const bfdec_t *b) -{ - return bf_cmp((const bf_t *)a, (const bf_t *)b); -} -static inline int bfdec_cmp_eq(const bfdec_t *a, const bfdec_t *b) -{ - return bfdec_cmp(a, b) == 0; -} -static inline int bfdec_cmp_le(const bfdec_t *a, const bfdec_t *b) -{ - return bfdec_cmp(a, b) <= 0; -} -static inline int bfdec_cmp_lt(const bfdec_t *a, const bfdec_t *b) -{ - return bfdec_cmp(a, b) < 0; -} - -int bfdec_add(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags); -int bfdec_sub(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags); -int bfdec_add_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, - bf_flags_t flags); -int bfdec_mul(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags); -int bfdec_mul_si(bfdec_t *r, const bfdec_t *a, int64_t b1, limb_t prec, - bf_flags_t flags); -int bfdec_div(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags); -int bfdec_divrem(bfdec_t *q, bfdec_t *r, const bfdec_t *a, const bfdec_t *b, - limb_t prec, bf_flags_t flags, int rnd_mode); -int bfdec_rem(bfdec_t *r, const bfdec_t *a, const bfdec_t *b, limb_t prec, - bf_flags_t flags, int rnd_mode); -int bfdec_rint(bfdec_t *r, int rnd_mode); -int bfdec_sqrt(bfdec_t *r, const bfdec_t *a, limb_t prec, bf_flags_t flags); -int bfdec_round(bfdec_t *r, limb_t prec, bf_flags_t flags); -int bfdec_get_int32(int *pres, const bfdec_t *a); -int bfdec_pow_ui(bfdec_t *r, const bfdec_t *a, limb_t b); - -char *bfdec_ftoa(size_t *plen, const bfdec_t *a, limb_t prec, bf_flags_t flags); -int bfdec_atof(bfdec_t *r, const char *str, const char **pnext, - limb_t prec, bf_flags_t flags); - -/* the following functions are exported for testing only. */ -extern const limb_t mp_pow_dec[LIMB_DIGITS + 1]; -void bfdec_print_str(const char *str, const bfdec_t *a); -static inline int bfdec_resize(bfdec_t *r, limb_t len) -{ - return bf_resize((bf_t *)r, len); -} -int bfdec_normalize_and_round(bfdec_t *r, limb_t prec1, bf_flags_t flags); - -#ifdef __cplusplus -} /* extern "C" { */ -#endif - -#endif /* LIBBF_H */ diff --git a/lib/monoucha0/monoucha/qjs/libregexp.c b/lib/monoucha0/monoucha/qjs/libregexp.c index 693acbbf..da79dedb 100644 --- a/lib/monoucha0/monoucha/qjs/libregexp.c +++ b/lib/monoucha0/monoucha/qjs/libregexp.c @@ -53,6 +53,9 @@ typedef enum { #define CAPTURE_COUNT_MAX 255 #define STACK_SIZE_MAX 255 +/* must be large enough to have a negligible runtime cost and small + enough to call the interrupt callback often. */ +#define INTERRUPT_COUNTER_INIT 10000 /* unicode code points */ #define CP_LS 0x2028 @@ -2012,6 +2015,7 @@ typedef struct { bool multi_line; bool ignore_case; bool is_unicode; + int interrupt_counter; void *opaque; /* used for stack overflow check */ size_t state_size; @@ -2058,7 +2062,17 @@ static int push_state(REExecContext *s, return 0; } -/* return 1 if match, 0 if not match or -1 if error. */ +static int lre_poll_timeout(REExecContext *s) +{ + if (unlikely(--s->interrupt_counter <= 0)) { + s->interrupt_counter = INTERRUPT_COUNTER_INIT; + if (lre_check_timeout(s->opaque)) + return LRE_RET_TIMEOUT; + } + return 0; +} + +/* return 1 if match, 0 if not match or < 0 if error. */ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, StackInt *stack, int stack_len, const uint8_t *pc, const uint8_t *cptr, @@ -2089,6 +2103,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, ret = 0; recurse: for(;;) { + if (lre_poll_timeout(s)) + return LRE_RET_TIMEOUT; if (s->state_stack_len == 0) return ret; rs = (REExecState *)(s->state_stack + @@ -2182,7 +2198,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, ret = push_state(s, capture, stack, stack_len, pc1, cptr, RE_EXEC_STATE_SPLIT, 0); if (ret < 0) - return -1; + return LRE_RET_MEMORY_ERROR; break; } case REOP_lookahead: @@ -2194,12 +2210,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, RE_EXEC_STATE_LOOKAHEAD + opcode - REOP_lookahead, 0); if (ret < 0) - return -1; + return LRE_RET_MEMORY_ERROR; break; case REOP_goto: val = get_u32(pc); pc += 4 + (int)val; + if (lre_poll_timeout(s)) + return LRE_RET_TIMEOUT; break; case REOP_line_start: if (cptr == s->cbuf) @@ -2264,6 +2282,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, pc += 4; if (--stack[stack_len - 1] != 0) { pc += (int)val; + if (lre_poll_timeout(s)) + return LRE_RET_TIMEOUT; } break; case REOP_push_char_pos: @@ -2438,9 +2458,12 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, q = 0; for(;;) { + if (lre_poll_timeout(s)) + return LRE_RET_TIMEOUT; res = lre_exec_backtrack(s, capture, stack, stack_len, pc1, cptr, true); - if (res == -1) + if (res == LRE_RET_MEMORY_ERROR || + res == LRE_RET_TIMEOUT) return res; if (!res) break; @@ -2458,7 +2481,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, RE_EXEC_STATE_GREEDY_QUANT, q - quant_min); if (ret < 0) - return -1; + return LRE_RET_MEMORY_ERROR; } } break; @@ -2468,7 +2491,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } } -/* Return 1 if match, 0 if not match or -1 if error. cindex is the +/* Return 1 if match, 0 if not match or < 0 if error (see LRE_RET_x). cindex is the starting position of the match and must be such as 0 <= cindex <= clen. */ int lre_exec(uint8_t **capture, @@ -2492,6 +2515,7 @@ int lre_exec(uint8_t **capture, s->cbuf_type = cbuf_type; if (s->cbuf_type == 1 && s->is_unicode) s->cbuf_type = 2; + s->interrupt_counter = INTERRUPT_COUNTER_INIT; s->opaque = opaque; s->state_size = sizeof(REExecState) + diff --git a/lib/monoucha0/monoucha/qjs/libregexp.h b/lib/monoucha0/monoucha/qjs/libregexp.h index 0b8fec52..898e9a7a 100644 --- a/lib/monoucha0/monoucha/qjs/libregexp.h +++ b/lib/monoucha0/monoucha/qjs/libregexp.h @@ -43,6 +43,9 @@ extern "C" { #define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ #define LRE_FLAG_UNICODE_SETS (1 << 8) +#define LRE_RET_MEMORY_ERROR (-1) +#define LRE_RET_TIMEOUT (-2) + uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, const char *buf, size_t buf_len, int re_flags, void *opaque); @@ -60,6 +63,8 @@ void lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped); /* must be provided by the user */ bool lre_check_stack_overflow(void *opaque, size_t alloca_size); +/* must be provided by the user, return non zero if time out */ +int lre_check_timeout(void *opaque); void *lre_realloc(void *opaque, void *ptr, size_t size); /* JS identifier test */ diff --git a/lib/monoucha0/monoucha/qjs/quickjs-atom.h b/lib/monoucha0/monoucha/qjs/quickjs-atom.h index 358fe230..67e17e7e 100644 --- a/lib/monoucha0/monoucha/qjs/quickjs-atom.h +++ b/lib/monoucha0/monoucha/qjs/quickjs-atom.h @@ -154,6 +154,7 @@ DEF(brand, "<brand>") DEF(hash_constructor, "#constructor") DEF(as, "as") DEF(from, "from") +DEF(fromAsync, "fromAsync") DEF(meta, "meta") DEF(_default_, "*default*") DEF(_star_, "*") diff --git a/lib/monoucha0/monoucha/qjs/quickjs-opcode.h b/lib/monoucha0/monoucha/qjs/quickjs-opcode.h index 42c3faee..bd5be754 100644 --- a/lib/monoucha0/monoucha/qjs/quickjs-opcode.h +++ b/lib/monoucha0/monoucha/qjs/quickjs-opcode.h @@ -264,6 +264,7 @@ DEF( strict_eq, 1, 2, 1, none) DEF( strict_neq, 1, 2, 1, none) DEF(is_undefined_or_null, 1, 1, 1, none) DEF( private_in, 1, 2, 1, none) +DEF(push_bigint_i32, 5, 0, 1, i32) /* must be the last non short and non temporary opcode */ DEF( nop, 1, 0, 0, none) diff --git a/lib/monoucha0/monoucha/qjs/quickjs.c b/lib/monoucha0/monoucha/qjs/quickjs.c index 8909a934..80c492bb 100644 --- a/lib/monoucha0/monoucha/qjs/quickjs.c +++ b/lib/monoucha0/monoucha/qjs/quickjs.c @@ -1,8 +1,8 @@ /* * QuickJS Javascript Engine * - * Copyright (c) 2017-2024 Fabrice Bellard - * Copyright (c) 2017-2024 Charlie Gordon + * Copyright (c) 2017-2025 Fabrice Bellard + * Copyright (c) 2017-2025 Charlie Gordon * Copyright (c) 2023-2025 Ben Noordhuis * Copyright (c) 2023-2025 Saúl Ibarra Corretgé * @@ -47,7 +47,7 @@ #include "list.h" #include "quickjs.h" #include "libregexp.h" -#include "libbf.h" +#include "xsum.h" #if defined(EMSCRIPTEN) || defined(_MSC_VER) #define DIRECT_DISPATCH 0 @@ -237,6 +237,11 @@ typedef struct JSRuntimeFinalizerState { void *arg; } JSRuntimeFinalizerState; +typedef struct JSValueLink { + struct JSValueLink *next; + JSValueConst value; +} JSValueLink; + struct JSRuntime { JSMallocFunctions mf; JSMallocState malloc_state; @@ -286,6 +291,12 @@ struct JSRuntime { JSInterruptHandler *interrupt_handler; void *interrupt_opaque; + JSPromiseHook *promise_hook; + void *promise_hook_opaque; + // for smuggling the parent promise from js_promise_then + // to js_promise_constructor + JSValueLink *parent_promise; + JSHostPromiseRejectionTracker *host_promise_rejection_tracker; void *host_promise_rejection_tracker_opaque; @@ -308,7 +319,6 @@ struct JSRuntime { int shape_hash_size; int shape_hash_count; /* number of hashed shapes */ JSShape **shape_hash; - bf_context_t bf_ctx; void *user_opaque; void *libc_opaque; JSRuntimeFinalizerState *finalizers; @@ -369,15 +379,7 @@ typedef struct JSVarRef { struct { int __gc_ref_count; /* corresponds to header.ref_count */ uint8_t __gc_mark; /* corresponds to header.mark/gc_obj_type */ - - /* 0 : the JSVarRef is on the stack. header.link is an element - of JSStackFrame.var_ref_list. - 1 : the JSVarRef is detached. header.link has the normal meanning - */ - uint8_t is_detached : 1; - uint8_t is_arg : 1; - uint16_t var_idx; /* index of the corresponding function variable on - the stack */ + bool is_detached; }; }; JSValue *pvalue; /* pointer to the value, either on the stack or @@ -389,19 +391,48 @@ typedef struct JSRefCountHeader { int ref_count; } JSRefCountHeader; -/* the same structure is used for big integers. - Big integers are never infinite or NaNs */ +/* bigint */ +typedef int32_t js_slimb_t; +typedef uint32_t js_limb_t; +typedef int64_t js_sdlimb_t; +typedef uint64_t js_dlimb_t; + +#define JS_LIMB_DIGITS 9 + +/* Must match the size of short_big_int in JSValueUnion */ +#define JS_LIMB_BITS 32 +#define JS_SHORT_BIG_INT_BITS JS_LIMB_BITS +#define JS_BIGINT_MAX_SIZE ((1024 * 1024) / JS_LIMB_BITS) /* in limbs */ +#define JS_SHORT_BIG_INT_MIN INT32_MIN +#define JS_SHORT_BIG_INT_MAX INT32_MAX + + typedef struct JSBigInt { JSRefCountHeader header; /* must come first, 32-bit */ - bf_t num; + uint32_t len; /* number of limbs, >= 1 */ + js_limb_t tab[]; /* two's complement representation, always + normalized so that 'len' is the minimum + possible length >= 1 */ } JSBigInt; +/* this bigint structure can hold a 64 bit integer */ +typedef struct { + js_limb_t big_int_buf[sizeof(JSBigInt) / sizeof(js_limb_t)]; /* for JSBigInt */ + /* must come just after */ + js_limb_t tab[(64 + JS_LIMB_BITS - 1) / JS_LIMB_BITS]; +} JSBigIntBuf; + typedef enum { JS_AUTOINIT_ID_PROTOTYPE, JS_AUTOINIT_ID_MODULE_NS, JS_AUTOINIT_ID_PROP, + JS_AUTOINIT_ID_BYTECODE, } JSAutoInitIDEnum; +enum { + JS_BUILTIN_ARRAY_FROMASYNC = 1, +}; + /* must be large enough to have a negligible runtime cost and small enough to call the interrupt callback often. */ #define JS_INTERRUPT_COUNTER_INIT 10000 @@ -440,7 +471,7 @@ struct JSContext { double time_origin; uint64_t random_state; - bf_context_t *bf_ctx; /* points to rt->bf_ctx, shared by all contexts */ + /* when the counter reaches zero, JSRutime.interrupt_handler is called */ int interrupt_counter; @@ -478,6 +509,26 @@ typedef struct JSWeakRefRecord { } u; } JSWeakRefRecord; +typedef struct JSMapRecord { + int ref_count; /* used during enumeration to avoid freeing the record */ + bool empty; /* true if the record is deleted */ + struct JSMapState *map; + struct list_head link; + struct list_head hash_link; + JSValue key; + JSValue value; +} JSMapRecord; + +typedef struct JSMapState { + bool is_weak; /* true if WeakSet/WeakMap */ + struct list_head records; /* list of JSMapRecord.link */ + uint32_t record_count; + struct list_head *hash_table; + uint32_t hash_size; /* must be a power of two */ + uint32_t record_count_threshold; /* count at which a hash table + resize is needed */ +} JSMapState; + enum { JS_ATOM_TYPE_STRING = 1, JS_ATOM_TYPE_GLOBAL_SYMBOL, @@ -1122,6 +1173,12 @@ static int JS_NewClass1(JSRuntime *rt, JSClassID class_id, const JSClassDef *class_def, JSAtom name); static JSValue js_array_push(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv, int unshift); +static JSValue js_array_constructor(JSContext *ctx, JSValueConst new_target, + int argc, JSValueConst *argv); +static JSValue js_error_constructor(JSContext *ctx, JSValueConst new_target, + int argc, JSValueConst *argv, int magic); +static JSValue js_object_defineProperty(JSContext *ctx, JSValueConst this_val, + int argc, JSValueConst *argv, int magic); typedef enum JSStrictEqModeEnum { JS_EQ_STRICT, @@ -1137,18 +1194,7 @@ static bool js_same_value_zero(JSContext *ctx, JSValueConst op1, JSValueConst op static JSValue JS_ToObjectFree(JSContext *ctx, JSValue val); static JSProperty *add_property(JSContext *ctx, JSObject *p, JSAtom prop, int prop_flags); -static JSValue JS_NewBigInt(JSContext *ctx); -static inline bf_t *JS_GetBigInt(JSValueConst val) -{ - JSBigInt *p = JS_VALUE_GET_PTR(val); - return &p->num; -} -static JSValue JS_CompactBigInt1(JSContext *ctx, JSValue val); -static JSValue JS_CompactBigInt(JSContext *ctx, JSValue val); static int JS_ToBigInt64Free(JSContext *ctx, int64_t *pres, JSValue val); -static bf_t *JS_ToBigInt(JSContext *ctx, bf_t *buf, JSValueConst val); -static bf_t *JS_ToBigInt1(JSContext *ctx, bf_t *buf, JSValueConst val); -static void JS_FreeBigInt(JSContext *ctx, bf_t *a, bf_t *buf); JSValue JS_ThrowOutOfMemory(JSContext *ctx); static JSValue JS_ThrowTypeErrorRevokedProxy(JSContext *ctx); static JSValue js_proxy_getPrototypeOf(JSContext *ctx, JSValueConst obj); @@ -1505,13 +1551,6 @@ void *js_mallocz_rt(JSRuntime *rt, size_t size) return js_calloc_rt(rt, 1, size); } -/* called by libbf */ -static void *js_bf_realloc(void *opaque, void *ptr, size_t size) -{ - JSRuntime *rt = opaque; - return js_realloc_rt(rt, ptr, size); -} - /* Throw out of memory in case of error */ void *js_calloc(JSContext *ctx, size_t count, size_t size) { @@ -1691,8 +1730,8 @@ static JSClassShortDef const js_std_class_def[] = { { JS_ATOM_BigInt, js_object_data_finalizer, js_object_data_mark }, /* JS_CLASS_BIG_INT */ { JS_ATOM_Map, js_map_finalizer, js_map_mark }, /* JS_CLASS_MAP */ { JS_ATOM_Set, js_map_finalizer, js_map_mark }, /* JS_CLASS_SET */ - { JS_ATOM_WeakMap, js_map_finalizer, js_map_mark }, /* JS_CLASS_WEAKMAP */ - { JS_ATOM_WeakSet, js_map_finalizer, js_map_mark }, /* JS_CLASS_WEAKSET */ + { JS_ATOM_WeakMap, js_map_finalizer, NULL }, /* JS_CLASS_WEAKMAP */ + { JS_ATOM_WeakSet, js_map_finalizer, NULL }, /* JS_CLASS_WEAKSET */ { JS_ATOM_Iterator, NULL, NULL }, /* JS_CLASS_ITERATOR */ { JS_ATOM_IteratorHelper, js_iterator_helper_finalizer, js_iterator_helper_mark }, /* JS_CLASS_ITERATOR_HELPER */ { JS_ATOM_IteratorWrap, js_iterator_wrap_finalizer, js_iterator_wrap_mark }, /* JS_CLASS_ITERATOR_WRAP */ @@ -1770,8 +1809,6 @@ JSRuntime *JS_NewRuntime2(const JSMallocFunctions *mf, void *opaque) rt->malloc_state = ms; rt->malloc_gc_threshold = 256 * 1024; - bf_context_init(&rt->bf_ctx, js_bf_realloc, rt); - init_list_head(&rt->context_list); init_list_head(&rt->gc_obj_list); init_list_head(&rt->gc_zero_ref_count_list); @@ -2127,8 +2164,6 @@ void JS_FreeRuntime(JSRuntime *rt) } js_free_rt(rt, rt->class_array); - bf_context_end(&rt->bf_ctx); - #ifdef ENABLE_DUMPS // JS_DUMP_ATOM_LEAKS /* only the atoms defined in JS_InitAtoms() should be left */ if (check_dump_flag(rt, JS_DUMP_ATOM_LEAKS)) { @@ -2275,7 +2310,6 @@ JSContext *JS_NewContextRaw(JSRuntime *rt) } ctx->rt = rt; list_add_tail(&ctx->link, &rt->context_list); - ctx->bf_ctx = &rt->bf_ctx; for(i = 0; i < rt->class_count; i++) ctx->class_proto[i] = JS_NULL; ctx->array_ctor = JS_NULL; @@ -3445,6 +3479,8 @@ const char *JS_AtomToCString(JSContext *ctx, JSAtom atom) return cstr; } +#ifndef QJS_DISABLE_PARSER + /* return a string atom containing name concatenated with str1 */ /* `str1` may be pure ASCII or UTF-8 encoded */ // TODO(chqrlie): use string concatenation instead of UTF-8 conversion @@ -3487,6 +3523,8 @@ static JSAtom js_atom_concat_num(JSContext *ctx, JSAtom name, uint32_t n) return js_atom_concat_str(ctx, name, buf); } +#endif // QJS_DISABLE_PARSER + static inline bool JS_IsEmptyString(JSValueConst v) { return JS_VALUE_GET_TAG(v) == JS_TAG_STRING && JS_VALUE_GET_STRING(v)->len == 0; @@ -4041,6 +4079,19 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len) return JS_MKPTR(JS_TAG_STRING, str); } +JSValue JS_NewTwoByteString(JSContext *ctx, const uint16_t *buf, size_t len) +{ + JSString *str; + + if (!len) + return JS_AtomToString(ctx, JS_ATOM_empty_string); + str = js_alloc_string(ctx, len, 1); + if (!str) + return JS_EXCEPTION; + memcpy(str16(str), buf, len * sizeof(*buf)); + return JS_MKPTR(JS_TAG_STRING, str); +} + static JSValue JS_ConcatString3(JSContext *ctx, const char *str1, JSValue str2, const char *str3) { @@ -5778,9 +5829,8 @@ static void js_free_value_rt(JSRuntime *rt, JSValue v) break; case JS_TAG_BIG_INT: { - JSBigInt *bf = JS_VALUE_GET_PTR(v); - bf_delete(&bf->num); - js_free_rt(rt, bf); + JSBigInt *p = JS_VALUE_GET_PTR(v); + js_free_rt(rt, p); } break; case JS_TAG_SYMBOL: @@ -5839,6 +5889,22 @@ void JS_MarkValue(JSRuntime *rt, JSValueConst val, JS_MarkFunc *mark_func) } } +static void mark_weak_map_value(JSRuntime *rt, JSWeakRefRecord *first_weak_ref, JS_MarkFunc *mark_func) { + JSWeakRefRecord *wr; + JSMapRecord *mr; + JSMapState *s; + + for (wr = first_weak_ref; wr != NULL; wr = wr->next_weak_ref) { + if (wr->kind == JS_WEAK_REF_KIND_MAP) { + mr = wr->u.map_record; + s = mr->map; + assert(s->is_weak); + assert(!mr->empty); /* no iterator on WeakMap/WeakSet */ + JS_MarkValue(rt, mr->value, mark_func); + } + } +} + static void mark_children(JSRuntime *rt, JSGCObjectHeader *gp, JS_MarkFunc *mark_func) { @@ -5878,6 +5944,10 @@ static void mark_children(JSRuntime *rt, JSGCObjectHeader *gp, prs++; } + if (unlikely(p->first_weak_ref)) { + mark_weak_map_value(rt, p->first_weak_ref, mark_func); + } + if (p->class_id != JS_CLASS_OBJECT) { JSClassGCMark *gc_mark; gc_mark = rt->class_array[p->class_id].gc_mark; @@ -7219,15 +7289,19 @@ static JSValue JS_ThrowTypeErrorInvalidClass(JSContext *ctx, int class_id) return JS_ThrowTypeErrorAtom(ctx, "%s object expected", name); } +static void JS_ThrowInterrupted(JSContext *ctx) +{ + JS_ThrowInternalError(ctx, "interrupted"); + JS_SetUncatchableError(ctx, ctx->rt->current_exception); +} + static no_inline __exception int __js_poll_interrupts(JSContext *ctx) { JSRuntime *rt = ctx->rt; ctx->interrupt_counter = JS_INTERRUPT_COUNTER_INIT; if (rt->interrupt_handler) { if (rt->interrupt_handler(rt, rt->interrupt_opaque)) { - /* XXX: should set a specific flag to avoid catching */ - JS_ThrowInternalError(ctx, "interrupted"); - js_set_uncatchable_error(ctx, ctx->rt->current_exception, true); + JS_ThrowInterrupted(ctx); return -1; } } @@ -7331,6 +7405,7 @@ static JSValueConst JS_GetPrototypePrimitive(JSContext *ctx, JSValueConst val) { JSValue ret; switch(JS_VALUE_GET_NORM_TAG(val)) { + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: ret = ctx->class_proto[JS_CLASS_BIG_INT]; break; @@ -7494,13 +7569,55 @@ int JS_IsInstanceOf(JSContext *ctx, JSValueConst val, JSValueConst obj) return JS_OrdinaryIsInstanceOf(ctx, val, obj); } +#include "builtin-array-fromasync.h" + +static JSValue js_bytecode_autoinit(JSContext *ctx, JSObject *p, JSAtom atom, + void *opaque) +{ + switch ((uintptr_t)opaque) { + default: + abort(); + case JS_BUILTIN_ARRAY_FROMASYNC: + { + JSValue obj = JS_ReadObject(ctx, qjsc_builtin_array_fromasync, + sizeof(qjsc_builtin_array_fromasync), + JS_READ_OBJ_BYTECODE); + if (JS_IsException(obj)) + return JS_EXCEPTION; + JSValue fun = JS_EvalFunction(ctx, obj); + if (JS_IsException(fun)) + return JS_EXCEPTION; + assert(JS_IsFunction(ctx, fun)); + JSValue args[] = { + JS_NewCFunction(ctx, js_array_constructor, "Array", 0), + JS_NewCFunctionMagic(ctx, js_error_constructor, "TypeError", 1, + JS_CFUNC_constructor_or_func_magic, + JS_TYPE_ERROR), + JS_AtomToValue(ctx, JS_ATOM_Symbol_asyncIterator), + JS_NewCFunctionMagic(ctx, js_object_defineProperty, + "Object.defineProperty", 3, + JS_CFUNC_generic_magic, 0), + JS_AtomToValue(ctx, JS_ATOM_Symbol_iterator), + }; + JSValue result = JS_Call(ctx, fun, JS_UNDEFINED, + countof(args), vc(args)); + for (size_t i = 0; i < countof(args); i++) + JS_FreeValue(ctx, args[i]); + JS_FreeValue(ctx, fun); + return result; + } + } + return JS_UNDEFINED; +} + /* return the value associated to the autoinit property or an exception */ typedef JSValue JSAutoInitFunc(JSContext *ctx, JSObject *p, JSAtom atom, void *opaque); -static JSAutoInitFunc *js_autoinit_func_table[] = { +static JSAutoInitFunc *const js_autoinit_func_table[] = { js_instantiate_prototype, /* JS_AUTOINIT_ID_PROTOTYPE */ js_module_ns_autoinit, /* JS_AUTOINIT_ID_MODULE_NS */ JS_InstantiateFunctionListItem2, /* JS_AUTOINIT_ID_PROP */ + js_bytecode_autoinit, /* JS_AUTOINIT_ID_BYTECODE */ }; /* warning: 'prs' is reallocated after it */ @@ -7534,9 +7651,8 @@ static JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj, JSObject *p; JSProperty *pr; JSShapeProperty *prs; - uint32_t tag, proto_depth; + uint32_t tag; - proto_depth = 0; tag = JS_VALUE_GET_TAG(obj); if (unlikely(tag != JS_TAG_OBJECT)) { switch(tag) { @@ -7658,7 +7774,6 @@ static JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj, } } } - proto_depth++; p = p->shape->proto; if (!p) break; @@ -8988,6 +9103,8 @@ retry: goto retry2; } else if (!(prs->flags & JS_PROP_WRITABLE)) { goto read_only_prop; + } else { + break; } } } @@ -10442,11 +10559,24 @@ static int JS_ToBoolFree(JSContext *ctx, JSValue val) JS_FreeValue(ctx, val); return ret; } + case JS_TAG_SHORT_BIG_INT: + return JS_VALUE_GET_SHORT_BIG_INT(val) != 0; case JS_TAG_BIG_INT: { JSBigInt *p = JS_VALUE_GET_PTR(val); bool ret; - ret = p->num.expn != BF_EXP_ZERO && p->num.expn != BF_EXP_NAN; + int i; + + /* fail safe: we assume it is not necessarily + normalized. Beginning from the MSB ensures that the + test is fast. */ + ret = false; + for(i = p->len - 1; i >= 0; i--) { + if (p->tab[i] != 0) { + ret = true; + break; + } + } JS_FreeValue(ctx, val); return ret; } @@ -10509,6 +10639,1392 @@ static inline int to_digit(int c) return 36; } +/* bigint support */ + +#define ADDC(res, carry_out, op1, op2, carry_in) \ +do { \ + js_limb_t __v, __a, __k, __k1; \ + __v = (op1); \ + __a = __v + (op2); \ + __k1 = __a < __v; \ + __k = (carry_in); \ + __a = __a + __k; \ + carry_out = (__a < __k) | __k1; \ + res = __a; \ +} while (0) + +/* a != 0 */ +static inline js_limb_t js_limb_clz(js_limb_t a) +{ + return clz32(a); +} + +static js_limb_t mp_add(js_limb_t *res, const js_limb_t *op1, const js_limb_t *op2, + js_limb_t n, js_limb_t carry) +{ + int i; + for(i = 0;i < n; i++) { + ADDC(res[i], carry, op1[i], op2[i], carry); + } + return carry; +} + +static js_limb_t mp_sub(js_limb_t *res, const js_limb_t *op1, const js_limb_t *op2, + int n, js_limb_t carry) +{ + int i; + js_limb_t k, a, v, k1; + + k = carry; + for(i=0;i<n;i++) { + v = op1[i]; + a = v - op2[i]; + k1 = a > v; + v = a - k; + k = (v > a) | k1; + res[i] = v; + } + return k; +} + +/* compute 0 - op2. carry = 0 or 1. */ +static js_limb_t mp_neg(js_limb_t *res, const js_limb_t *op2, int n) +{ + int i; + js_limb_t v, carry; + + carry = 1; + for(i=0;i<n;i++) { + v = ~op2[i] + carry; + carry = v < carry; + res[i] = v; + } + return carry; +} + +/* tabr[] = taba[] * b + l. Return the high carry */ +static js_limb_t mp_mul1(js_limb_t *tabr, const js_limb_t *taba, js_limb_t n, + js_limb_t b, js_limb_t l) +{ + js_limb_t i; + js_dlimb_t t; + + for(i = 0; i < n; i++) { + t = (js_dlimb_t)taba[i] * (js_dlimb_t)b + l; + tabr[i] = t; + l = t >> JS_LIMB_BITS; + } + return l; +} + +static js_limb_t mp_div1(js_limb_t *tabr, const js_limb_t *taba, js_limb_t n, + js_limb_t b, js_limb_t r) +{ + js_slimb_t i; + js_dlimb_t a1; + for(i = n - 1; i >= 0; i--) { + a1 = ((js_dlimb_t)r << JS_LIMB_BITS) | taba[i]; + tabr[i] = a1 / b; + r = a1 % b; + } + return r; +} + +/* tabr[] += taba[] * b, return the high word. */ +static js_limb_t mp_add_mul1(js_limb_t *tabr, const js_limb_t *taba, js_limb_t n, + js_limb_t b) +{ + js_limb_t i, l; + js_dlimb_t t; + + l = 0; + for(i = 0; i < n; i++) { + t = (js_dlimb_t)taba[i] * (js_dlimb_t)b + l + tabr[i]; + tabr[i] = t; + l = t >> JS_LIMB_BITS; + } + return l; +} + +/* size of the result : op1_size + op2_size. */ +static void mp_mul_basecase(js_limb_t *result, + const js_limb_t *op1, js_limb_t op1_size, + const js_limb_t *op2, js_limb_t op2_size) +{ + int i; + js_limb_t r; + + result[op1_size] = mp_mul1(result, op1, op1_size, op2[0], 0); + for(i=1;i<op2_size;i++) { + r = mp_add_mul1(result + i, op1, op1_size, op2[i]); + result[i + op1_size] = r; + } +} + +/* tabr[] -= taba[] * b. Return the value to substract to the high + word. */ +static js_limb_t mp_sub_mul1(js_limb_t *tabr, const js_limb_t *taba, js_limb_t n, + js_limb_t b) +{ + js_limb_t i, l; + js_dlimb_t t; + + l = 0; + for(i = 0; i < n; i++) { + t = tabr[i] - (js_dlimb_t)taba[i] * (js_dlimb_t)b - l; + tabr[i] = t; + l = -(t >> JS_LIMB_BITS); + } + return l; +} + +/* WARNING: d must be >= 2^(JS_LIMB_BITS-1) */ +static inline js_limb_t udiv1norm_init(js_limb_t d) +{ + js_limb_t a0, a1; + a1 = -d - 1; + a0 = -1; + return (((js_dlimb_t)a1 << JS_LIMB_BITS) | a0) / d; +} + +/* return the quotient and the remainder in '*pr'of 'a1*2^JS_LIMB_BITS+a0 + / d' with 0 <= a1 < d. */ +static inline js_limb_t udiv1norm(js_limb_t *pr, js_limb_t a1, js_limb_t a0, + js_limb_t d, js_limb_t d_inv) +{ + js_limb_t n1m, n_adj, q, r, ah; + js_dlimb_t a; + n1m = ((js_slimb_t)a0 >> (JS_LIMB_BITS - 1)); + n_adj = a0 + (n1m & d); + a = (js_dlimb_t)d_inv * (a1 - n1m) + n_adj; + q = (a >> JS_LIMB_BITS) + a1; + /* compute a - q * r and update q so that the remainder is\ + between 0 and d - 1 */ + a = ((js_dlimb_t)a1 << JS_LIMB_BITS) | a0; + a = a - (js_dlimb_t)q * d - d; + ah = a >> JS_LIMB_BITS; + q += 1 + ah; + r = (js_limb_t)a + (ah & d); + *pr = r; + return q; +} + +#define UDIV1NORM_THRESHOLD 3 + +/* b must be >= 1 << (JS_LIMB_BITS - 1) */ +static js_limb_t mp_div1norm(js_limb_t *tabr, const js_limb_t *taba, js_limb_t n, + js_limb_t b, js_limb_t r) +{ + js_slimb_t i; + + if (n >= UDIV1NORM_THRESHOLD) { + js_limb_t b_inv; + b_inv = udiv1norm_init(b); + for(i = n - 1; i >= 0; i--) { + tabr[i] = udiv1norm(&r, r, taba[i], b, b_inv); + } + } else { + js_dlimb_t a1; + for(i = n - 1; i >= 0; i--) { + a1 = ((js_dlimb_t)r << JS_LIMB_BITS) | taba[i]; + tabr[i] = a1 / b; + r = a1 % b; + } + } + return r; +} + +/* base case division: divides taba[0..na-1] by tabb[0..nb-1]. tabb[nb + - 1] must be >= 1 << (JS_LIMB_BITS - 1). na - nb must be >= 0. 'taba' + is modified and contains the remainder (nb limbs). tabq[0..na-nb] + contains the quotient with tabq[na - nb] <= 1. */ +static void mp_divnorm(js_limb_t *tabq, js_limb_t *taba, js_limb_t na, + const js_limb_t *tabb, js_limb_t nb) +{ + js_limb_t r, a, c, q, v, b1, b1_inv, n, dummy_r; + int i, j; + + b1 = tabb[nb - 1]; + if (nb == 1) { + taba[0] = mp_div1norm(tabq, taba, na, b1, 0); + return; + } + n = na - nb; + + if (n >= UDIV1NORM_THRESHOLD) + b1_inv = udiv1norm_init(b1); + else + b1_inv = 0; + + /* first iteration: the quotient is only 0 or 1 */ + q = 1; + for(j = nb - 1; j >= 0; j--) { + if (taba[n + j] != tabb[j]) { + if (taba[n + j] < tabb[j]) + q = 0; + break; + } + } + tabq[n] = q; + if (q) { + mp_sub(taba + n, taba + n, tabb, nb, 0); + } + + for(i = n - 1; i >= 0; i--) { + if (unlikely(taba[i + nb] >= b1)) { + q = -1; + } else if (b1_inv) { + q = udiv1norm(&dummy_r, taba[i + nb], taba[i + nb - 1], b1, b1_inv); + } else { + js_dlimb_t al; + al = ((js_dlimb_t)taba[i + nb] << JS_LIMB_BITS) | taba[i + nb - 1]; + q = al / b1; + r = al % b1; + } + r = mp_sub_mul1(taba + i, tabb, nb, q); + + v = taba[i + nb]; + a = v - r; + c = (a > v); + taba[i + nb] = a; + + if (c != 0) { + /* negative result */ + for(;;) { + q--; + c = mp_add(taba + i, taba + i, tabb, nb, 0); + /* propagate carry and test if positive result */ + if (c != 0) { + if (++taba[i + nb] == 0) { + break; + } + } + } + } + tabq[i] = q; + } +} + +/* 1 <= shift <= JS_LIMB_BITS - 1 */ +static js_limb_t mp_shl(js_limb_t *tabr, const js_limb_t *taba, int n, + int shift) +{ + int i; + js_limb_t l, v; + l = 0; + for(i = 0; i < n; i++) { + v = taba[i]; + tabr[i] = (v << shift) | l; + l = v >> (JS_LIMB_BITS - shift); + } + return l; +} + +/* r = (a + high*B^n) >> shift. Return the remainder r (0 <= r < 2^shift). + 1 <= shift <= LIMB_BITS - 1 */ +static js_limb_t mp_shr(js_limb_t *tab_r, const js_limb_t *tab, int n, + int shift, js_limb_t high) +{ + int i; + js_limb_t l, a; + + l = high; + for(i = n - 1; i >= 0; i--) { + a = tab[i]; + tab_r[i] = (a >> shift) | (l << (JS_LIMB_BITS - shift)); + l = a; + } + return l & (((js_limb_t)1 << shift) - 1); +} + +static JSBigInt *js_bigint_new(JSContext *ctx, int len) +{ + JSBigInt *r; + if (len > JS_BIGINT_MAX_SIZE) { + JS_ThrowRangeError(ctx, "BigInt is too large to allocate"); + return NULL; + } + r = js_malloc(ctx, sizeof(JSBigInt) + len * sizeof(js_limb_t)); + if (!r) + return NULL; + r->header.ref_count = 1; + r->len = len; + return r; +} + +static JSBigInt *js_bigint_set_si(JSBigIntBuf *buf, js_slimb_t a) +{ + JSBigInt *r = (JSBigInt *)buf->big_int_buf; + r->header.ref_count = 0; /* fail safe */ + r->len = 1; + r->tab[0] = a; + return r; +} + +static JSBigInt *js_bigint_set_si64(JSBigIntBuf *buf, int64_t a) +{ + JSBigInt *r = (JSBigInt *)buf->big_int_buf; + r->header.ref_count = 0; /* fail safe */ + if (a >= INT32_MIN && a <= INT32_MAX) { + r->len = 1; + r->tab[0] = a; + } else { + r->len = 2; + r->tab[0] = a; + r->tab[1] = a >> JS_LIMB_BITS; + } + return r; +} + +/* val must be a short big int */ +static JSBigInt *js_bigint_set_short(JSBigIntBuf *buf, JSValueConst val) +{ + return js_bigint_set_si(buf, JS_VALUE_GET_SHORT_BIG_INT(val)); +} + +static __maybe_unused void js_bigint_dump1(JSContext *ctx, const char *str, + const js_limb_t *tab, int len) +{ + int i; + printf("%s: ", str); + for(i = len - 1; i >= 0; i--) { + printf(" %08x", tab[i]); + } + printf("\n"); +} + +static __maybe_unused void js_bigint_dump(JSContext *ctx, const char *str, + const JSBigInt *p) +{ + js_bigint_dump1(ctx, str, p->tab, p->len); +} + +static JSBigInt *js_bigint_new_si(JSContext *ctx, js_slimb_t a) +{ + JSBigInt *r; + r = js_bigint_new(ctx, 1); + if (!r) + return NULL; + r->tab[0] = a; + return r; +} + +static JSBigInt *js_bigint_new_si64(JSContext *ctx, int64_t a) +{ + if (a >= INT32_MIN && a <= INT32_MAX) { + return js_bigint_new_si(ctx, a); + } else { + JSBigInt *r; + r = js_bigint_new(ctx, 2); + if (!r) + return NULL; + r->tab[0] = a; + r->tab[1] = a >> 32; + return r; + } +} + +static JSBigInt *js_bigint_new_ui64(JSContext *ctx, uint64_t a) +{ + if (a <= INT64_MAX) { + return js_bigint_new_si64(ctx, a); + } else { + JSBigInt *r; + r = js_bigint_new(ctx, (65 + JS_LIMB_BITS - 1) / JS_LIMB_BITS); + if (!r) + return NULL; + r->tab[0] = a; + r->tab[1] = a >> 32; + r->tab[2] = 0; + return r; + } +} + +static JSBigInt *js_bigint_new_di(JSContext *ctx, js_sdlimb_t a) +{ + JSBigInt *r; + if (a == (js_slimb_t)a) { + r = js_bigint_new(ctx, 1); + if (!r) + return NULL; + r->tab[0] = a; + } else { + r = js_bigint_new(ctx, 2); + if (!r) + return NULL; + r->tab[0] = a; + r->tab[1] = a >> JS_LIMB_BITS; + } + return r; +} + +/* Remove redundant high order limbs. Warning: 'a' may be + reallocated. Can never fail. +*/ +static JSBigInt *js_bigint_normalize1(JSContext *ctx, JSBigInt *a, int l) +{ + js_limb_t v; + + assert(a->header.ref_count == 1); + while (l > 1) { + v = a->tab[l - 1]; + if ((v != 0 && v != -1) || + (v & 1) != (a->tab[l - 2] >> (JS_LIMB_BITS - 1))) { + break; + } + l--; + } + if (l != a->len) { + JSBigInt *a1; + /* realloc to reduce the size */ + a->len = l; + a1 = js_realloc(ctx, a, sizeof(JSBigInt) + l * sizeof(js_limb_t)); + if (a1) + a = a1; + } + return a; +} + +static JSBigInt *js_bigint_normalize(JSContext *ctx, JSBigInt *a) +{ + return js_bigint_normalize1(ctx, a, a->len); +} + +/* return 0 or 1 depending on the sign */ +static inline int js_bigint_sign(const JSBigInt *a) +{ + return a->tab[a->len - 1] >> (JS_LIMB_BITS - 1); +} + +static js_slimb_t js_bigint_get_si_sat(const JSBigInt *a) +{ + if (a->len == 1) { + return a->tab[0]; + } else { + if (js_bigint_sign(a)) + return INT32_MIN; + else + return INT32_MAX; + } +} + +/* add the op1 limb */ +static JSBigInt *js_bigint_extend(JSContext *ctx, JSBigInt *r, + js_limb_t op1) +{ + int n2 = r->len; + if ((op1 != 0 && op1 != -1) || + (op1 & 1) != r->tab[n2 - 1] >> (JS_LIMB_BITS - 1)) { + JSBigInt *r1; + r1 = js_realloc(ctx, r, + sizeof(JSBigInt) + (n2 + 1) * sizeof(js_limb_t)); + if (!r1) { + js_free(ctx, r); + return NULL; + } + r = r1; + r->len = n2 + 1; + r->tab[n2] = op1; + } else { + /* otherwise still need to normalize the result */ + r = js_bigint_normalize(ctx, r); + } + return r; +} + +/* return NULL in case of error. Compute a + b (b_neg = 0) or a - b + (b_neg = 1) */ +/* XXX: optimize */ +static JSBigInt *js_bigint_add(JSContext *ctx, const JSBigInt *a, + const JSBigInt *b, int b_neg) +{ + JSBigInt *r; + int n1, n2, i; + js_limb_t carry, op1, op2, a_sign, b_sign; + + n2 = max_int(a->len, b->len); + n1 = min_int(a->len, b->len); + r = js_bigint_new(ctx, n2); + if (!r) + return NULL; + /* XXX: optimize */ + /* common part */ + carry = b_neg; + for(i = 0; i < n1; i++) { + op1 = a->tab[i]; + op2 = b->tab[i] ^ (-b_neg); + ADDC(r->tab[i], carry, op1, op2, carry); + } + a_sign = -js_bigint_sign(a); + b_sign = (-js_bigint_sign(b)) ^ (-b_neg); + /* part with sign extension of one operand */ + if (a->len > b->len) { + for(i = n1; i < n2; i++) { + op1 = a->tab[i]; + ADDC(r->tab[i], carry, op1, b_sign, carry); + } + } else if (a->len < b->len) { + for(i = n1; i < n2; i++) { + op2 = b->tab[i] ^ (-b_neg); + ADDC(r->tab[i], carry, a_sign, op2, carry); + } + } + + /* part with sign extension for both operands. Extend the result + if necessary */ + return js_bigint_extend(ctx, r, a_sign + b_sign + carry); +} + +/* XXX: optimize */ +static JSBigInt *js_bigint_neg(JSContext *ctx, const JSBigInt *a) +{ + JSBigIntBuf buf; + JSBigInt *b; + b = js_bigint_set_si(&buf, 0); + return js_bigint_add(ctx, b, a, 1); +} + +static JSBigInt *js_bigint_mul(JSContext *ctx, const JSBigInt *a, + const JSBigInt *b) +{ + JSBigInt *r; + + r = js_bigint_new(ctx, a->len + b->len); + if (!r) + return NULL; + mp_mul_basecase(r->tab, a->tab, a->len, b->tab, b->len); + /* correct the result if negative operands (no overflow is + possible) */ + if (js_bigint_sign(a)) + mp_sub(r->tab + a->len, r->tab + a->len, b->tab, b->len, 0); + if (js_bigint_sign(b)) + mp_sub(r->tab + b->len, r->tab + b->len, a->tab, a->len, 0); + return js_bigint_normalize(ctx, r); +} + +/* return the division or the remainder. 'b' must be != 0. return NULL + in case of exception (division by zero or memory error) */ +static JSBigInt *js_bigint_divrem(JSContext *ctx, const JSBigInt *a, + const JSBigInt *b, bool is_rem) +{ + JSBigInt *r, *q; + js_limb_t *tabb, h; + int na, nb, a_sign, b_sign, shift; + + if (b->len == 1 && b->tab[0] == 0) { + JS_ThrowRangeError(ctx, "BigInt division by zero"); + return NULL; + } + + a_sign = js_bigint_sign(a); + b_sign = js_bigint_sign(b); + na = a->len; + nb = b->len; + + r = js_bigint_new(ctx, na + 2); + if (!r) + return NULL; + if (a_sign) { + mp_neg(r->tab, a->tab, na); + } else { + memcpy(r->tab, a->tab, na * sizeof(a->tab[0])); + } + /* normalize */ + while (na > 1 && r->tab[na - 1] == 0) + na--; + + tabb = js_malloc(ctx, nb * sizeof(tabb[0])); + if (!tabb) { + js_free(ctx, r); + return NULL; + } + if (b_sign) { + mp_neg(tabb, b->tab, nb); + } else { + memcpy(tabb, b->tab, nb * sizeof(tabb[0])); + } + /* normalize */ + while (nb > 1 && tabb[nb - 1] == 0) + nb--; + + /* trivial case if 'a' is small */ + if (na < nb) { + js_free(ctx, r); + js_free(ctx, tabb); + if (is_rem) { + /* r = a */ + r = js_bigint_new(ctx, a->len); + if (!r) + return NULL; + memcpy(r->tab, a->tab, a->len * sizeof(a->tab[0])); + return r; + } else { + /* q = 0 */ + return js_bigint_new_si(ctx, 0); + } + } + + /* normalize 'b' */ + shift = js_limb_clz(tabb[nb - 1]); + if (shift != 0) { + mp_shl(tabb, tabb, nb, shift); + h = mp_shl(r->tab, r->tab, na, shift); + if (h != 0) + r->tab[na++] = h; + } + + q = js_bigint_new(ctx, na - nb + 2); /* one more limb for the sign */ + if (!q) { + js_free(ctx, r); + js_free(ctx, tabb); + return NULL; + } + + // js_bigint_dump1(ctx, "a", r->tab, na); + // js_bigint_dump1(ctx, "b", tabb, nb); + mp_divnorm(q->tab, r->tab, na, tabb, nb); + js_free(ctx, tabb); + + if (is_rem) { + js_free(ctx, q); + if (shift != 0) + mp_shr(r->tab, r->tab, nb, shift, 0); + r->tab[nb++] = 0; + if (a_sign) + mp_neg(r->tab, r->tab, nb); + r = js_bigint_normalize1(ctx, r, nb); + return r; + } else { + js_free(ctx, r); + q->tab[na - nb + 1] = 0; + if (a_sign ^ b_sign) { + mp_neg(q->tab, q->tab, q->len); + } + q = js_bigint_normalize(ctx, q); + return q; + } +} + +/* and, or, xor */ +static JSBigInt *js_bigint_logic(JSContext *ctx, const JSBigInt *a, + const JSBigInt *b, OPCodeEnum op) +{ + JSBigInt *r; + js_limb_t b_sign; + int a_len, b_len, i; + + if (a->len < b->len) { + const JSBigInt *tmp; + tmp = a; + a = b; + b = tmp; + } + /* a_len >= b_len */ + a_len = a->len; + b_len = b->len; + b_sign = -js_bigint_sign(b); + + r = js_bigint_new(ctx, a_len); + if (!r) + return NULL; + switch(op) { + case OP_or: + for(i = 0; i < b_len; i++) { + r->tab[i] = a->tab[i] | b->tab[i]; + } + for(i = b_len; i < a_len; i++) { + r->tab[i] = a->tab[i] | b_sign; + } + break; + case OP_and: + for(i = 0; i < b_len; i++) { + r->tab[i] = a->tab[i] & b->tab[i]; + } + for(i = b_len; i < a_len; i++) { + r->tab[i] = a->tab[i] & b_sign; + } + break; + case OP_xor: + for(i = 0; i < b_len; i++) { + r->tab[i] = a->tab[i] ^ b->tab[i]; + } + for(i = b_len; i < a_len; i++) { + r->tab[i] = a->tab[i] ^ b_sign; + } + break; + default: + abort(); + } + return js_bigint_normalize(ctx, r); +} + +static JSBigInt *js_bigint_not(JSContext *ctx, const JSBigInt *a) +{ + JSBigInt *r; + int i; + + r = js_bigint_new(ctx, a->len); + if (!r) + return NULL; + for(i = 0; i < a->len; i++) { + r->tab[i] = ~a->tab[i]; + } + /* no normalization is needed */ + return r; +} + +static JSBigInt *js_bigint_shl(JSContext *ctx, const JSBigInt *a, + unsigned int shift1) +{ + int d, i, shift; + JSBigInt *r; + js_limb_t l; + + if (a->len == 1 && a->tab[0] == 0) + return js_bigint_new_si(ctx, 0); /* zero case */ + d = shift1 / JS_LIMB_BITS; + shift = shift1 % JS_LIMB_BITS; + r = js_bigint_new(ctx, a->len + d); + if (!r) + return NULL; + for(i = 0; i < d; i++) + r->tab[i] = 0; + if (shift == 0) { + for(i = 0; i < a->len; i++) { + r->tab[i + d] = a->tab[i]; + } + } else { + l = mp_shl(r->tab + d, a->tab, a->len, shift); + if (js_bigint_sign(a)) + l |= (js_limb_t)(-1) << shift; + r = js_bigint_extend(ctx, r, l); + } + return r; +} + +static JSBigInt *js_bigint_shr(JSContext *ctx, const JSBigInt *a, + unsigned int shift1) +{ + int d, i, shift, a_sign, n1; + JSBigInt *r; + + d = shift1 / JS_LIMB_BITS; + shift = shift1 % JS_LIMB_BITS; + a_sign = js_bigint_sign(a); + if (d >= a->len) + return js_bigint_new_si(ctx, -a_sign); + n1 = a->len - d; + r = js_bigint_new(ctx, n1); + if (!r) + return NULL; + if (shift == 0) { + for(i = 0; i < n1; i++) { + r->tab[i] = a->tab[i + d]; + } + /* no normalization is needed */ + } else { + mp_shr(r->tab, a->tab + d, n1, shift, -a_sign); + r = js_bigint_normalize(ctx, r); + } + return r; +} + +static JSBigInt *js_bigint_pow(JSContext *ctx, const JSBigInt *a, JSBigInt *b) +{ + uint32_t e; + int n_bits, i; + JSBigInt *r, *r1; + + /* b must be >= 0 */ + if (js_bigint_sign(b)) { + JS_ThrowRangeError(ctx, "BigInt negative exponent"); + return NULL; + } + if (b->len == 1 && b->tab[0] == 0) { + /* a^0 = 1 */ + return js_bigint_new_si(ctx, 1); + } else if (a->len == 1) { + js_limb_t v; + bool is_neg; + + v = a->tab[0]; + if (v <= 1) + return js_bigint_new_si(ctx, v); + else if (v == -1) + return js_bigint_new_si(ctx, 1 - 2 * (b->tab[0] & 1)); + is_neg = (js_slimb_t)v < 0; + if (is_neg) + v = -v; + if ((v & (v - 1)) == 0) { + uint64_t e1; + int n; + /* v = 2^n */ + n = JS_LIMB_BITS - 1 - js_limb_clz(v); + if (b->len > 1) + goto overflow; + if (b->tab[0] > INT32_MAX) + goto overflow; + e = b->tab[0]; + e1 = (uint64_t)e * n; + if (e1 > JS_BIGINT_MAX_SIZE * JS_LIMB_BITS) + goto overflow; + e = e1; + if (is_neg) + is_neg = b->tab[0] & 1; + r = js_bigint_new(ctx, + (e + JS_LIMB_BITS + 1 - is_neg) / JS_LIMB_BITS); + if (!r) + return NULL; + memset(r->tab, 0, sizeof(r->tab[0]) * r->len); + r->tab[e / JS_LIMB_BITS] = + (js_limb_t)(1 - 2 * is_neg) << (e % JS_LIMB_BITS); + return r; + } + } + if (b->len > 1) + goto overflow; + if (b->tab[0] > INT32_MAX) + goto overflow; + e = b->tab[0]; + n_bits = 32 - clz32(e); + + r = js_bigint_new(ctx, a->len); + if (!r) + return NULL; + memcpy(r->tab, a->tab, a->len * sizeof(a->tab[0])); + for(i = n_bits - 2; i >= 0; i--) { + r1 = js_bigint_mul(ctx, r, r); + if (!r1) + return NULL; + js_free(ctx, r); + r = r1; + if ((e >> i) & 1) { + r1 = js_bigint_mul(ctx, r, a); + if (!r1) + return NULL; + js_free(ctx, r); + r = r1; + } + } + return r; + overflow: + JS_ThrowRangeError(ctx, "BigInt is too large"); + return NULL; +} + +/* return (mant, exp) so that abs(a) ~ mant*2^(exp - (limb_bits - + 1). a must be != 0. */ +static uint64_t js_bigint_get_mant_exp(JSContext *ctx, + int *pexp, const JSBigInt *a) +{ + js_limb_t t[4 - JS_LIMB_BITS / 32], carry, v, low_bits; + int n1, n2, sgn, shift, i, j, e; + uint64_t a1, a0; + + n2 = 4 - JS_LIMB_BITS / 32; + n1 = a->len - n2; + sgn = js_bigint_sign(a); + + /* low_bits != 0 if there are a non zero low bit in abs(a) */ + low_bits = 0; + carry = sgn; + for(i = 0; i < n1; i++) { + v = (a->tab[i] ^ (-sgn)) + carry; + carry = v < carry; + low_bits |= v; + } + /* get the n2 high limbs of abs(a) */ + for(j = 0; j < n2; j++) { + i = j + n1; + if (i < 0) { + v = 0; + } else { + v = (a->tab[i] ^ (-sgn)) + carry; + carry = v < carry; + } + t[j] = v; + } + + a1 = ((uint64_t)t[2] << 32) | t[1]; + a0 = (uint64_t)t[0] << 32; + a0 |= (low_bits != 0); + /* normalize */ + { + shift = clz64(a1); + if (shift != 0) { + a1 = (a1 << shift) | (a0 >> (64 - shift)); + a0 <<= shift; + } + } + a1 |= (a0 != 0); /* keep the bits for the final rounding */ + /* compute the exponent */ + e = a->len * JS_LIMB_BITS - shift - 1; + *pexp = e; + return a1; +} + +/* shift left with round to nearest, ties to even. n >= 1 */ +static uint64_t shr_rndn(uint64_t a, int n) +{ + uint64_t addend = ((a >> n) & 1) + ((1 << (n - 1)) - 1); + return (a + addend) >> n; +} + +/* convert to float64 with round to nearest, ties to even. Return + +/-infinity if too large. */ +static double js_bigint_to_float64(JSContext *ctx, const JSBigInt *a) +{ + int sgn, e; + uint64_t mant; + + if (a->len == 1) { + /* fast case, including zero */ + return (double)(js_slimb_t)a->tab[0]; + } + + sgn = js_bigint_sign(a); + mant = js_bigint_get_mant_exp(ctx, &e, a); + if (e > 1023) { + /* overflow: return infinity */ + mant = 0; + e = 1024; + } else { + mant = (mant >> 1) | (mant & 1); /* avoid overflow in rounding */ + mant = shr_rndn(mant, 10); + /* rounding can cause an overflow */ + if (mant >= ((uint64_t)1 << 53)) { + mant >>= 1; + e++; + } + mant &= (((uint64_t)1 << 52) - 1); + } + return uint64_as_float64(((uint64_t)sgn << 63) | + ((uint64_t)(e + 1023) << 52) | + mant); +} + +/* return (1, NULL) if not an integer, (2, NULL) if NaN or Infinity, + (0, n) if an integer, (0, NULL) in case of memory error */ +static JSBigInt *js_bigint_from_float64(JSContext *ctx, int *pres, double a1) +{ + uint64_t a = float64_as_uint64(a1); + int sgn, e, shift; + uint64_t mant; + JSBigIntBuf buf; + JSBigInt *r; + + sgn = a >> 63; + e = (a >> 52) & ((1 << 11) - 1); + mant = a & (((uint64_t)1 << 52) - 1); + if (e == 2047) { + /* NaN, Infinity */ + *pres = 2; + return NULL; + } + if (e == 0 && mant == 0) { + /* zero */ + *pres = 0; + return js_bigint_new_si(ctx, 0); + } + e -= 1023; + /* 0 < a < 1 : not an integer */ + if (e < 0) + goto not_an_integer; + mant |= (uint64_t)1 << 52; + if (e < 52) { + shift = 52 - e; + /* check that there is no fractional part */ + if (mant & (((uint64_t)1 << shift) - 1)) { + not_an_integer: + *pres = 1; + return NULL; + } + mant >>= shift; + e = 0; + } else { + e -= 52; + } + if (sgn) + mant = -mant; + /* the integer is mant*2^e */ + r = js_bigint_set_si64(&buf, (int64_t)mant); + *pres = 0; + return js_bigint_shl(ctx, r, e); +} + +/* return -1, 0, 1 or (2) (unordered) */ +static int js_bigint_float64_cmp(JSContext *ctx, const JSBigInt *a, + double b) +{ + int b_sign, a_sign, e, f; + uint64_t mant, b1, a_mant; + + b1 = float64_as_uint64(b); + b_sign = b1 >> 63; + e = (b1 >> 52) & ((1 << 11) - 1); + mant = b1 & (((uint64_t)1 << 52) - 1); + a_sign = js_bigint_sign(a); + if (e == 2047) { + if (mant != 0) { + /* NaN */ + return 2; + } else { + /* +/- infinity */ + return 2 * b_sign - 1; + } + } else if (e == 0 && mant == 0) { + /* b = +/-0 */ + if (a->len == 1 && a->tab[0] == 0) + return 0; + else + return 1 - 2 * a_sign; + } else if (a->len == 1 && a->tab[0] == 0) { + /* a = 0, b != 0 */ + return 2 * b_sign - 1; + } else if (a_sign != b_sign) { + return 1 - 2 * a_sign; + } else { + e -= 1023; + /* Note: handling denormals is not necessary because we + compare to integers hence f >= 0 */ + /* compute f so that 2^f <= abs(a) < 2^(f+1) */ + a_mant = js_bigint_get_mant_exp(ctx, &f, a); + if (f != e) { + if (f < e) + return -1; + else + return 1; + } else { + mant = (mant | ((uint64_t)1 << 52)) << 11; /* align to a_mant */ + if (a_mant < mant) + return 2 * a_sign - 1; + else if (a_mant > mant) + return 1 - 2 * a_sign; + else + return 0; + } + } +} + +/* return -1, 0 or 1 */ +static int js_bigint_cmp(JSContext *ctx, const JSBigInt *a, + const JSBigInt *b) +{ + int a_sign, b_sign, res, i; + a_sign = js_bigint_sign(a); + b_sign = js_bigint_sign(b); + if (a_sign != b_sign) { + res = 1 - 2 * a_sign; + } else { + /* we assume the numbers are normalized */ + if (a->len != b->len) { + if (a->len < b->len) + res = 2 * a_sign - 1; + else + res = 1 - 2 * a_sign; + } else { + res = 0; + for(i = a->len -1; i >= 0; i--) { + if (a->tab[i] != b->tab[i]) { + if (a->tab[i] < b->tab[i]) + res = -1; + else + res = 1; + break; + } + } + } + } + return res; +} + +/* contains 10^i */ +static const js_limb_t js_pow_dec[JS_LIMB_DIGITS + 1] = { + 1U, + 10U, + 100U, + 1000U, + 10000U, + 100000U, + 1000000U, + 10000000U, + 100000000U, + 1000000000U, +}; + +/* syntax: [-]digits in base radix. Return NULL if memory error. radix + = 10, 2, 8 or 16. */ +static JSBigInt *js_bigint_from_string(JSContext *ctx, + const char *str, int radix) +{ + const char *p = str; + int is_neg, n_digits, n_limbs, len, log2_radix, n_bits, i; + JSBigInt *r; + js_limb_t v, c, h; + + is_neg = 0; + if (*p == '-') { + is_neg = 1; + p++; + } + while (*p == '0') + p++; + n_digits = strlen(p); + log2_radix = 32 - clz32(radix - 1); /* ceil(log2(radix)) */ + /* compute the maximum number of limbs */ + /* XXX: overflow */ + if (radix == 10) { + n_bits = (n_digits * 27 + 7) / 8; /* >= ceil(n_digits * log2(10)) */ + } else { + n_bits = n_digits * log2_radix; + } + /* we add one extra bit for the sign */ + n_limbs = max_int(1, n_bits / JS_LIMB_BITS + 1); + r = js_bigint_new(ctx, n_limbs); + if (!r) + return NULL; + if (radix == 10) { + int digits_per_limb = JS_LIMB_DIGITS; + len = 1; + r->tab[0] = 0; + for(;;) { + /* XXX: slow */ + v = 0; + for(i = 0; i < digits_per_limb; i++) { + c = to_digit(*p); + if (c >= radix) + break; + p++; + v = v * 10 + c; + } + if (i == 0) + break; + if (len == 1 && r->tab[0] == 0) { + r->tab[0] = v; + } else { + h = mp_mul1(r->tab, r->tab, len, js_pow_dec[i], v); + if (h != 0) { + r->tab[len++] = h; + } + } + } + /* add one extra limb to have the correct sign*/ + if ((r->tab[len - 1] >> (JS_LIMB_BITS - 1)) != 0) + r->tab[len++] = 0; + r->len = len; + } else { + unsigned int bit_pos, shift, pos; + + /* power of two base: no multiplication is needed */ + r->len = n_limbs; + memset(r->tab, 0, sizeof(r->tab[0]) * n_limbs); + for(i = 0; i < n_digits; i++) { + c = to_digit(p[n_digits - 1 - i]); + assert(c < radix); + bit_pos = i * log2_radix; + shift = bit_pos & (JS_LIMB_BITS - 1); + pos = bit_pos / JS_LIMB_BITS; + r->tab[pos] |= c << shift; + /* if log2_radix does not divide JS_LIMB_BITS, needed an + additional op */ + if (shift + log2_radix > JS_LIMB_BITS) { + r->tab[pos + 1] |= c >> (JS_LIMB_BITS - shift); + } + } + } + r = js_bigint_normalize(ctx, r); + /* XXX: could do it in place */ + if (is_neg) { + JSBigInt *r1; + r1 = js_bigint_neg(ctx, r); + js_free(ctx, r); + r = r1; + } + return r; +} + +/* 2 <= base <= 36 */ +static char const digits[36] = { + '0','1','2','3','4','5','6','7','8','9', + 'a','b','c','d','e','f','g','h','i','j', + 'k','l','m','n','o','p','q','r','s','t', + 'u','v','w','x','y','z' +}; + +/* special version going backwards */ +/* XXX: use dtoa.c */ +static char *js_u64toa(char *q, int64_t n, unsigned int base) +{ + int digit; + if (base == 10) { + /* division by known base uses multiplication */ + do { + digit = (uint64_t)n % 10; + n = (uint64_t)n / 10; + *--q = '0' + digit; + } while (n != 0); + } else { + do { + digit = (uint64_t)n % base; + n = (uint64_t)n / base; + *--q = digits[digit]; + } while (n != 0); + } + return q; +} + +/* len >= 1. 2 <= radix <= 36 */ +static char *limb_to_a(char *q, js_limb_t n, unsigned int radix, int len) +{ + int digit, i; + + if (radix == 10) { + /* specific case with constant divisor */ + /* XXX: optimize */ + for(i = 0; i < len; i++) { + digit = (js_limb_t)n % 10; + n = (js_limb_t)n / 10; + *--q = digit + '0'; + } + } else { + for(i = 0; i < len; i++) { + digit = (js_limb_t)n % radix; + n = (js_limb_t)n / radix; + *--q = digits[digit]; + } + } + return q; +} + +#define JS_RADIX_MAX 36 + +static const uint8_t digits_per_limb_table[JS_RADIX_MAX - 1] = { +32,20,16,13,12,11,10,10, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +}; + +static const js_limb_t radix_base_table[JS_RADIX_MAX - 1] = { + 0x00000000, 0xcfd41b91, 0x00000000, 0x48c27395, + 0x81bf1000, 0x75db9c97, 0x40000000, 0xcfd41b91, + 0x3b9aca00, 0x8c8b6d2b, 0x19a10000, 0x309f1021, + 0x57f6c100, 0x98c29b81, 0x00000000, 0x18754571, + 0x247dbc80, 0x3547667b, 0x4c4b4000, 0x6b5a6e1d, + 0x94ace180, 0xcaf18367, 0x0b640000, 0x0e8d4a51, + 0x1269ae40, 0x17179149, 0x1cb91000, 0x23744899, + 0x2b73a840, 0x34e63b41, 0x40000000, 0x4cfa3cc1, + 0x5c13d840, 0x6d91b519, 0x81bf1000, +}; + +static JSValue js_bigint_to_string1(JSContext *ctx, JSValueConst val, int radix) +{ + if (JS_VALUE_GET_TAG(val) == JS_TAG_SHORT_BIG_INT) { + char buf[66]; + int len; + len = i64toa_radix(buf, JS_VALUE_GET_SHORT_BIG_INT(val), radix); + return js_new_string8_len(ctx, buf, len); + } else { + JSBigInt *r, *tmp = NULL; + char *buf, *q, *buf_end; + int is_neg, n_bits, log2_radix, n_digits; + bool is_binary_radix; + JSValue res; + + assert(JS_VALUE_GET_TAG(val) == JS_TAG_BIG_INT); + r = JS_VALUE_GET_PTR(val); + if (r->len == 1 && r->tab[0] == 0) { + /* '0' case */ + return js_new_string8_len(ctx, "0", 1); + } + is_binary_radix = ((radix & (radix - 1)) == 0); + is_neg = js_bigint_sign(r); + if (is_neg) { + tmp = js_bigint_neg(ctx, r); + if (!tmp) + return JS_EXCEPTION; + r = tmp; + } else if (!is_binary_radix) { + /* need to modify 'r' */ + tmp = js_bigint_new(ctx, r->len); + if (!tmp) + return JS_EXCEPTION; + memcpy(tmp->tab, r->tab, r->len * sizeof(r->tab[0])); + r = tmp; + } + log2_radix = 31 - clz32(radix); /* floor(log2(radix)) */ + n_bits = r->len * JS_LIMB_BITS - js_limb_clz(r->tab[r->len - 1]); + /* n_digits is exact only if radix is a power of + two. Otherwise it is >= the exact number of digits */ + n_digits = (n_bits + log2_radix - 1) / log2_radix; + /* XXX: could directly build the JSString */ + buf = js_malloc(ctx, n_digits + is_neg + 1); + if (!buf) { + js_free(ctx, tmp); + return JS_EXCEPTION; + } + q = buf + n_digits + is_neg + 1; + *--q = '\0'; + buf_end = q; + if (!is_binary_radix) { + int len; + js_limb_t radix_base, v; + radix_base = radix_base_table[radix - 2]; + len = r->len; + for(;;) { + /* remove leading zero limbs */ + while (len > 1 && r->tab[len - 1] == 0) + len--; + if (len == 1 && r->tab[0] < radix_base) { + v = r->tab[0]; + if (v != 0) { + q = js_u64toa(q, v, radix); + } + break; + } else { + v = mp_div1(r->tab, r->tab, len, radix_base, 0); + q = limb_to_a(q, v, radix, digits_per_limb_table[radix - 2]); + } + } + } else { + int i, shift; + unsigned int bit_pos, pos, c; + + /* radix is a power of two */ + for(i = 0; i < n_digits; i++) { + bit_pos = i * log2_radix; + pos = bit_pos / JS_LIMB_BITS; + shift = bit_pos % JS_LIMB_BITS; + if (likely((shift + log2_radix) <= JS_LIMB_BITS)) { + c = r->tab[pos] >> shift; + } else { + c = (r->tab[pos] >> shift) | + (r->tab[pos + 1] << (JS_LIMB_BITS - shift)); + } + c &= (radix - 1); + *--q = digits[c]; + } + } + if (is_neg) + *--q = '-'; + js_free(ctx, tmp); + res = js_new_string8_len(ctx, q, buf_end - q); + js_free(ctx, buf); + return res; + } +} + +/* if possible transform a BigInt to short big and free it, otherwise + return a normal bigint */ +static JSValue JS_CompactBigInt(JSContext *ctx, JSBigInt *p) +{ + JSValue res; + if (p->len == 1) { + res = __JS_NewShortBigInt(ctx, (js_slimb_t)p->tab[0]); + js_free(ctx, p); + return res; + } else { + return JS_MKPTR(JS_TAG_BIG_INT, p); + } +} + /* XXX: remove */ static double js_strtod(const char *str, int radix, bool is_float) { @@ -10536,7 +12052,10 @@ static double js_strtod(const char *str, int radix, bool is_float) n_max = ((uint64_t)-1 - (radix - 1)) / radix; /* XXX: could be more precise */ int_exp = 0; - while ((c = to_digit(*p)) < radix) { + while (*p != '\0') { + c = to_digit((uint8_t)*p); + if (c >= radix) + break; if (n <= n_max) { n = n * radix + c; } else { @@ -10559,23 +12078,6 @@ static double js_strtod(const char *str, int radix, bool is_float) return d; } -static JSValue js_string_to_bigint(JSContext *ctx, const char *buf, int radix) -{ - bf_t *a; - int ret; - JSValue val; - val = JS_NewBigInt(ctx); - if (JS_IsException(val)) - return val; - a = JS_GetBigInt(val); - ret = bf_atof(a, buf, NULL, radix, BF_PREC_INF, BF_RNDZ); - if (ret & BF_ST_MEM_ERROR) { - JS_FreeValue(ctx, val); - return JS_ThrowOutOfMemory(ctx); - } - return JS_CompactBigInt1(ctx, val); -} - /* `js_atof(ctx, p, len, pp, radix, flags)` Convert the string pointed to by `p` to a number value. Return an exception in case of memory error. @@ -10718,8 +12220,15 @@ static JSValue js_atof(JSContext *ctx, const char *p, size_t len, } if (flags & ATOD_WANT_BIG_INT) { - if (!is_float) - val = js_string_to_bigint(ctx, buf, radix); + JSBigInt *r; + if (!is_float) { + r = js_bigint_from_string(ctx, buf, radix); + if (!r) { + val = JS_ThrowOutOfMemory(ctx); + goto done; + } + val = JS_CompactBigInt(ctx, r); + } } else { d = js_strtod(buf, radix, is_float); val = js_number(d); /* return int or float64 */ @@ -10755,6 +12264,7 @@ static JSValue JS_ToNumberHintFree(JSContext *ctx, JSValue val, tag = JS_VALUE_GET_NORM_TAG(val); switch(tag) { case JS_TAG_BIG_INT: + case JS_TAG_SHORT_BIG_INT: if (flag != TON_FLAG_NUMERIC) { JS_FreeValue(ctx, val); return JS_ThrowTypeError(ctx, "cannot convert BigInt to number"); @@ -10788,6 +12298,7 @@ static JSValue JS_ToNumberHintFree(JSContext *ctx, JSValue val, JS_FreeValue(ctx, val); if (!str) return JS_EXCEPTION; + // TODO(saghul): Sync with bellard/quickjs ? flags = ATOD_TRIM_SPACES | ATOD_ACCEPT_EMPTY | ATOD_ACCEPT_FLOAT | ATOD_ACCEPT_INFINITY | ATOD_ACCEPT_HEX_PREFIX | ATOD_ACCEPT_BIN_OCT | @@ -10829,10 +12340,8 @@ static __exception int __JS_ToFloat64Free(JSContext *ctx, double *pres, uint32_t tag; val = JS_ToNumberFree(ctx, val); - if (JS_IsException(val)) { - *pres = NAN; - return -1; - } + if (JS_IsException(val)) + goto fail; tag = JS_VALUE_GET_NORM_TAG(val); switch(tag) { case JS_TAG_INT: @@ -10841,21 +12350,14 @@ static __exception int __JS_ToFloat64Free(JSContext *ctx, double *pres, case JS_TAG_FLOAT64: d = JS_VALUE_GET_FLOAT64(val); break; - case JS_TAG_BIG_INT: - { - JSBigInt *p = JS_VALUE_GET_PTR(val); - /* XXX: there can be a double rounding issue with some - primitives (such as JS_ToUint8ClampFree()), but it is - not critical to fix it. */ - bf_get_float64(&p->num, &d, BF_RNDN); - JS_FreeValue(ctx, val); - } - break; default: abort(); } *pres = d; return 0; +fail: + *pres = NAN; + return -1; } static inline int JS_ToFloat64Free(JSContext *ctx, double *pres, JSValue val) @@ -11239,21 +12741,6 @@ static __exception int JS_ToArrayLengthFree(JSContext *ctx, uint32_t *plen, len = v; } break; - case JS_TAG_BIG_INT: - { - JSBigInt *p = JS_VALUE_GET_PTR(val); - bf_t a; - bool res; - bf_get_int32((int32_t *)&len, &p->num, BF_GET_INT_MOD); - bf_init(ctx->bf_ctx, &a); - bf_set_ui(&a, len); - res = bf_cmp_eq(&a, &p->num); - bf_delete(&a); - JS_FreeValue(ctx, val); - if (!res) - goto fail; - } - break; default: if (JS_TAG_IS_FLOAT64(tag)) { double d; @@ -11359,42 +12846,18 @@ static bool JS_NumberIsNegativeOrMinusZero(JSContext *ctx, JSValueConst val) u.d = JS_VALUE_GET_FLOAT64(val); return (u.u64 >> 63); } + case JS_TAG_SHORT_BIG_INT: + return (JS_VALUE_GET_SHORT_BIG_INT(val) < 0); case JS_TAG_BIG_INT: { JSBigInt *p = JS_VALUE_GET_PTR(val); - /* Note: integer zeros are not necessarily positive */ - return p->num.sign && !bf_is_zero(&p->num); + return js_bigint_sign(p); } default: return false; } } -static JSValue js_bigint_to_string1(JSContext *ctx, JSValueConst val, int radix) -{ - JSValue ret; - bf_t a_s, *a; - char *str; - int saved_sign; - size_t len; - - a = JS_ToBigInt(ctx, &a_s, val); - if (!a) - return JS_EXCEPTION; - saved_sign = a->sign; - if (a->expn == BF_EXP_ZERO) - a->sign = 0; - str = bf_ftoa(&len, a, radix, 0, BF_RNDZ | BF_FTOA_FORMAT_FRAC | - BF_FTOA_JS_QUIRKS); - a->sign = saved_sign; - JS_FreeBigInt(ctx, a, &a_s); - if (!str) - return JS_ThrowOutOfMemory(ctx); - ret = js_new_string8_len(ctx, str, len); - bf_free(ctx->bf_ctx, str); - return ret; -} - static JSValue js_bigint_to_string(JSContext *ctx, JSValueConst val) { return js_bigint_to_string1(ctx, val, 10); @@ -11867,6 +13330,7 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, } case JS_TAG_FLOAT64: return js_dtoa(ctx, JS_VALUE_GET_FLOAT64(val), 0, JS_DTOA_TOSTRING); + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: return js_bigint_to_string(ctx, val); case JS_TAG_UNINITIALIZED: @@ -12152,14 +13616,26 @@ static __maybe_unused void JS_DumpValue(JSRuntime *rt, JSValueConst val) case JS_TAG_FLOAT64: printf("%.14g", JS_VALUE_GET_FLOAT64(val)); break; + case JS_TAG_SHORT_BIG_INT: + printf("%" PRId64 "n", (int64_t)JS_VALUE_GET_SHORT_BIG_INT(val)); + break; case JS_TAG_BIG_INT: { JSBigInt *p = JS_VALUE_GET_PTR(val); - char *str; - str = bf_ftoa(NULL, &p->num, 10, 0, - BF_RNDZ | BF_FTOA_FORMAT_FRAC); - printf("%sn", str); - bf_realloc(&rt->bf_ctx, str, 0); + int sgn, i; + /* In order to avoid allocations we just dump the limbs */ + sgn = js_bigint_sign(p); + if (sgn) + printf("BigInt.asIntN(%d,", p->len * JS_LIMB_BITS); + printf("0x"); + for(i = p->len - 1; i >= 0; i--) { + if (i != p->len - 1) + printf("_"); + printf("%08x", p->tab[i]); + } + printf("n"); + if (sgn) + printf(")"); } break; case JS_TAG_STRING: @@ -12242,76 +13718,28 @@ static double js_math_pow(double a, double b) JSValue JS_NewBigInt64(JSContext *ctx, int64_t v) { - JSValue val; - bf_t *a; - val = JS_NewBigInt(ctx); - if (JS_IsException(val)) - return val; - a = JS_GetBigInt(val); - if (bf_set_si(a, v)) { - JS_FreeValue(ctx, val); - return JS_ThrowOutOfMemory(ctx); + if (v >= JS_SHORT_BIG_INT_MIN && v <= JS_SHORT_BIG_INT_MAX) { + return __JS_NewShortBigInt(ctx, v); + } else { + JSBigInt *p; + p = js_bigint_new_si64(ctx, v); + if (!p) + return JS_EXCEPTION; + return JS_MKPTR(JS_TAG_BIG_INT, p); } - return val; } JSValue JS_NewBigUint64(JSContext *ctx, uint64_t v) { - JSValue val; - bf_t *a; - val = JS_NewBigInt(ctx); - if (JS_IsException(val)) - return val; - a = JS_GetBigInt(val); - if (bf_set_ui(a, v)) { - JS_FreeValue(ctx, val); - return JS_ThrowOutOfMemory(ctx); - } - - return val; -} - -/* if the returned bigint is allocated it is equal to - 'buf'. Otherwise it is a pointer to the bigint in 'val'. Return - NULL in case of error. */ -// TODO(bnoordhuis) Merge with JS_ToBigInt() -static bf_t *JS_ToBigInt1(JSContext *ctx, bf_t *buf, JSValueConst val) -{ - uint32_t tag; - bf_t *r; - JSBigInt *p; - - tag = JS_VALUE_GET_NORM_TAG(val); - switch(tag) { - case JS_TAG_INT: - case JS_TAG_BOOL: - case JS_TAG_NULL: - r = buf; - bf_init(ctx->bf_ctx, r); - if (bf_set_si(r, JS_VALUE_GET_INT(val))) - goto fail; - break; - case JS_TAG_FLOAT64: - r = buf; - bf_init(ctx->bf_ctx, r); - if (bf_set_float64(r, JS_VALUE_GET_FLOAT64(val))) { - fail: - bf_delete(r); - return NULL; - } - break; - case JS_TAG_BIG_INT: - p = JS_VALUE_GET_PTR(val); - r = &p->num; - break; - case JS_TAG_UNDEFINED: - default: - r = buf; - bf_init(ctx->bf_ctx, r); - bf_set_nan(r); - break; + if (v <= JS_SHORT_BIG_INT_MAX) { + return __JS_NewShortBigInt(ctx, v); + } else { + JSBigInt *p; + p = js_bigint_new_ui64(ctx, v); + if (!p) + return JS_EXCEPTION; + return JS_MKPTR(JS_TAG_BIG_INT, p); } - return r; } /* return NaN if bad bigint literal */ @@ -12325,6 +13753,7 @@ static JSValue JS_StringToBigInt(JSContext *ctx, JSValue val) JS_FreeValue(ctx, val); if (!str) return JS_EXCEPTION; + // TODO(saghul): sync with bellard/quickjs ? flags = ATOD_WANT_BIG_INT | ATOD_TRIM_SPACES | ATOD_ACCEPT_EMPTY | ATOD_ACCEPT_HEX_PREFIX | ATOD_ACCEPT_BIN_OCT | @@ -12342,106 +13771,69 @@ static JSValue JS_StringToBigIntErr(JSContext *ctx, JSValue val) return val; } -/* if the returned bigint is allocated it is equal to - 'buf'. Otherwise it is a pointer to the bigint in 'val'. */ -static bf_t *JS_ToBigIntFree(JSContext *ctx, bf_t *buf, JSValue val) +/* JS Numbers are not allowed */ +static JSValue JS_ToBigIntFree(JSContext *ctx, JSValue val) { uint32_t tag; - bf_t *r; - JSBigInt *p; redo: tag = JS_VALUE_GET_NORM_TAG(val); switch(tag) { + case JS_TAG_SHORT_BIG_INT: + case JS_TAG_BIG_INT: + break; case JS_TAG_INT: case JS_TAG_NULL: case JS_TAG_UNDEFINED: case JS_TAG_FLOAT64: goto fail; case JS_TAG_BOOL: - r = buf; - bf_init(ctx->bf_ctx, r); - bf_set_si(r, JS_VALUE_GET_INT(val)); - break; - case JS_TAG_BIG_INT: - p = JS_VALUE_GET_PTR(val); - r = &p->num; + val = __JS_NewShortBigInt(ctx, JS_VALUE_GET_INT(val)); break; case JS_TAG_STRING: val = JS_StringToBigIntErr(ctx, val); if (JS_IsException(val)) - return NULL; + return val; goto redo; case JS_TAG_OBJECT: val = JS_ToPrimitiveFree(ctx, val, HINT_NUMBER); if (JS_IsException(val)) - return NULL; + return val; goto redo; default: fail: JS_FreeValue(ctx, val); - JS_ThrowTypeError(ctx, "cannot convert to BigInt"); - return NULL; - } - return r; -} - -static bf_t *JS_ToBigInt(JSContext *ctx, bf_t *buf, JSValueConst val) -{ - return JS_ToBigIntFree(ctx, buf, js_dup(val)); -} - -static __maybe_unused JSValue JS_ToBigIntValueFree(JSContext *ctx, JSValue val) -{ - if (JS_VALUE_GET_TAG(val) == JS_TAG_BIG_INT) { - return val; - } else { - bf_t a_s, *a, *r; - int ret; - JSValue res; - - res = JS_NewBigInt(ctx); - if (JS_IsException(res)) - return JS_EXCEPTION; - a = JS_ToBigIntFree(ctx, &a_s, val); - if (!a) { - JS_FreeValue(ctx, res); - return JS_EXCEPTION; - } - r = JS_GetBigInt(res); - ret = bf_set(r, a); - JS_FreeBigInt(ctx, a, &a_s); - if (ret) { - JS_FreeValue(ctx, res); - return JS_ThrowOutOfMemory(ctx); - } - return JS_CompactBigInt(ctx, res); + return JS_ThrowTypeError(ctx, "cannot convert to bigint"); } + return val; } -/* free the bf_t allocated by JS_ToBigInt */ -static void JS_FreeBigInt(JSContext *ctx, bf_t *a, bf_t *buf) +static JSValue JS_ToBigInt(JSContext *ctx, JSValueConst val) { - if (a == buf) { - bf_delete(a); - } else { - JSBigInt *p = (JSBigInt *)((uint8_t *)a - offsetof(JSBigInt, num)); - JS_FreeValue(ctx, JS_MKPTR(JS_TAG_BIG_INT, p)); - } + return JS_ToBigIntFree(ctx, js_dup(val)); } /* XXX: merge with JS_ToInt64Free with a specific flag */ static int JS_ToBigInt64Free(JSContext *ctx, int64_t *pres, JSValue val) { - bf_t a_s, *a; + uint64_t res; - a = JS_ToBigIntFree(ctx, &a_s, val); - if (!a) { + val = JS_ToBigIntFree(ctx, val); + if (JS_IsException(val)) { *pres = 0; return -1; } - bf_get_int64(pres, a, BF_GET_INT_MOD); - JS_FreeBigInt(ctx, a, &a_s); + if (JS_VALUE_GET_TAG(val) == JS_TAG_SHORT_BIG_INT) { + res = JS_VALUE_GET_SHORT_BIG_INT(val); + } else { + JSBigInt *p = JS_VALUE_GET_PTR(val); + /* return the value mod 2^64 */ + res = p->tab[0]; + if (p->len >= 2) + res |= (uint64_t)p->tab[1] << 32; + JS_FreeValue(ctx, val); + } + *pres = res; return 0; } @@ -12455,103 +13847,6 @@ int JS_ToBigUint64(JSContext *ctx, uint64_t *pres, JSValueConst val) return JS_ToBigInt64Free(ctx, (int64_t *)pres, js_dup(val)); } -static JSValue JS_NewBigInt(JSContext *ctx) -{ - JSBigInt *p; - p = js_malloc(ctx, sizeof(*p)); - if (!p) - return JS_EXCEPTION; - p->header.ref_count = 1; - bf_init(ctx->bf_ctx, &p->num); - return JS_MKPTR(JS_TAG_BIG_INT, p); -} - -static JSValue JS_CompactBigInt1(JSContext *ctx, JSValue val) -{ - if (JS_VALUE_GET_TAG(val) != JS_TAG_BIG_INT) - return val; /* fail safe */ - bf_t *a = JS_GetBigInt(val); - if (a->expn == BF_EXP_ZERO && a->sign) { - assert(((JSBigInt*)JS_VALUE_GET_PTR(val))->header.ref_count == 1); - a->sign = 0; - } - return val; -} - -/* Nnormalize the zero representation. Could also be used to convert the bigint - to a short bigint value. The reference count of the value must be - 1. Cannot fail */ -static JSValue JS_CompactBigInt(JSContext *ctx, JSValue val) -{ - return JS_CompactBigInt1(ctx, val); -} - -static JSValue throw_bf_exception(JSContext *ctx, int status) -{ - const char *str; - if (status & BF_ST_MEM_ERROR) - return JS_ThrowOutOfMemory(ctx); - if (status & BF_ST_DIVIDE_ZERO) { - str = "division by zero"; - } else if (status & BF_ST_INVALID_OP) { - str = "invalid operation"; - } else { - str = "integer overflow"; - } - return JS_ThrowRangeError(ctx, "%s", str); -} - -static int js_unary_arith_bigint(JSContext *ctx, - JSValue *pres, OPCodeEnum op, JSValue op1) -{ - bf_t a_s, *r, *a; - int ret, v; - JSValue res; - - if (op == OP_plus) { - JS_ThrowTypeError(ctx, "BigInt argument with unary +"); - JS_FreeValue(ctx, op1); - return -1; - } - res = JS_NewBigInt(ctx); - if (JS_IsException(res)) { - JS_FreeValue(ctx, op1); - return -1; - } - r = JS_GetBigInt(res); - a = JS_ToBigIntFree(ctx, &a_s, op1); // infallible, always a bigint - ret = 0; - switch(op) { - case OP_inc: - case OP_dec: - v = 2 * (op - OP_dec) - 1; - ret = bf_add_si(r, a, v, BF_PREC_INF, BF_RNDZ); - break; - case OP_plus: - ret = bf_set(r, a); - break; - case OP_neg: - ret = bf_set(r, a); - bf_neg(r); - break; - case OP_not: - ret = bf_add_si(r, a, 1, BF_PREC_INF, BF_RNDZ); - bf_neg(r); - break; - default: - abort(); - } - JS_FreeBigInt(ctx, a, &a_s); - if (unlikely(ret)) { - JS_FreeValue(ctx, res); - throw_bf_exception(ctx, ret); - return -1; - } - res = JS_CompactBigInt(ctx, res); - *pres = res; - return 0; -} - static no_inline __exception int js_unary_arith_slow(JSContext *ctx, JSValue *sp, OPCodeEnum op) @@ -12559,12 +13854,13 @@ static no_inline __exception int js_unary_arith_slow(JSContext *ctx, JSValue op1; int v; uint32_t tag; + JSBigIntBuf buf1; + JSBigInt *p1; op1 = sp[-1]; /* fast path for float64 */ if (JS_TAG_IS_FLOAT64(JS_VALUE_GET_TAG(op1))) goto handle_float64; - op1 = JS_ToNumericFree(ctx, op1); if (JS_IsException(op1)) goto exception; @@ -12593,17 +13889,80 @@ static no_inline __exception int js_unary_arith_slow(JSContext *ctx, default: abort(); } - sp[-1] = js_int64(v64); + sp[-1] = JS_NewInt64(ctx, v64); + } + break; + case JS_TAG_SHORT_BIG_INT: + { + int64_t v; + v = JS_VALUE_GET_SHORT_BIG_INT(op1); + switch(op) { + case OP_plus: + JS_ThrowTypeError(ctx, "bigint argument with unary +"); + goto exception; + case OP_inc: + if (v == JS_SHORT_BIG_INT_MAX) + goto bigint_slow_case; + sp[-1] = __JS_NewShortBigInt(ctx, v + 1); + break; + case OP_dec: + if (v == JS_SHORT_BIG_INT_MIN) + goto bigint_slow_case; + sp[-1] = __JS_NewShortBigInt(ctx, v - 1); + break; + case OP_neg: + v = JS_VALUE_GET_SHORT_BIG_INT(op1); + if (v == JS_SHORT_BIG_INT_MIN) { + bigint_slow_case: + p1 = js_bigint_set_short(&buf1, op1); + goto bigint_slow_case1; + } + sp[-1] = __JS_NewShortBigInt(ctx, -v); + break; + default: + abort(); + } } break; case JS_TAG_BIG_INT: - if (js_unary_arith_bigint(ctx, sp - 1, op, op1)) - goto exception; + { + JSBigInt *r; + p1 = JS_VALUE_GET_PTR(op1); + bigint_slow_case1: + switch(op) { + case OP_plus: + JS_ThrowTypeError(ctx, "bigint argument with unary +"); + JS_FreeValue(ctx, op1); + goto exception; + case OP_inc: + case OP_dec: + { + JSBigIntBuf buf2; + JSBigInt *p2; + p2 = js_bigint_set_si(&buf2, 2 * (op - OP_dec) - 1); + r = js_bigint_add(ctx, p1, p2, 0); + } + break; + case OP_neg: + r = js_bigint_neg(ctx, p1); + break; + case OP_not: + r = js_bigint_not(ctx, p1); + break; + default: + abort(); + } + JS_FreeValue(ctx, op1); + if (!r) + goto exception; + sp[-1] = JS_CompactBigInt(ctx, r); + } break; default: handle_float64: { - double d = JS_VALUE_GET_FLOAT64(op1); + double d; + d = JS_VALUE_GET_FLOAT64(op1); switch(op) { case OP_inc: case OP_dec: @@ -12649,17 +14008,24 @@ static no_inline int js_not_slow(JSContext *ctx, JSValue *sp) { JSValue op1; - op1 = JS_ToNumericFree(ctx, sp[-1]); + op1 = sp[-1]; + op1 = JS_ToNumericFree(ctx, op1); if (JS_IsException(op1)) goto exception; - if (JS_VALUE_GET_TAG(op1) == JS_TAG_BIG_INT) { - if (js_unary_arith_bigint(ctx, sp - 1, OP_not, op1)) + if (JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT) { + sp[-1] = __JS_NewShortBigInt(ctx, ~JS_VALUE_GET_SHORT_BIG_INT(op1)); + } else if (JS_VALUE_GET_TAG(op1) == JS_TAG_BIG_INT) { + JSBigInt *r; + r = js_bigint_not(ctx, JS_VALUE_GET_PTR(op1)); + JS_FreeValue(ctx, op1); + if (!r) goto exception; + sp[-1] = JS_CompactBigInt(ctx, r); } else { int32_t v1; if (unlikely(JS_ToInt32Free(ctx, &v1, op1))) goto exception; - sp[-1] = js_int32(~v1); + sp[-1] = JS_NewInt32(ctx, ~v1); } return 0; exception: @@ -12667,107 +14033,6 @@ static no_inline int js_not_slow(JSContext *ctx, JSValue *sp) return -1; } -static int js_binary_arith_bigint(JSContext *ctx, OPCodeEnum op, - JSValue *pres, JSValue op1, JSValue op2) -{ - bf_t a_s, b_s, *r, *a, *b; - int ret; - JSValue res; - - a = JS_ToBigIntFree(ctx, &a_s, op1); - if (!a) { - JS_FreeValue(ctx, op2); - return -1; - } - b = JS_ToBigIntFree(ctx, &b_s, op2); - if (!b) { - JS_FreeBigInt(ctx, a, &a_s); - return -1; - } - res = JS_NewBigInt(ctx); - if (JS_IsException(res)) { - JS_FreeBigInt(ctx, a, &a_s); - JS_FreeBigInt(ctx, b, &b_s); - return -1; - } - r = JS_GetBigInt(res); - ret = 0; - switch(op) { - case OP_add: - ret = bf_add(r, a, b, BF_PREC_INF, BF_RNDZ); - break; - case OP_sub: - ret = bf_sub(r, a, b, BF_PREC_INF, BF_RNDZ); - break; - case OP_mul: - ret = bf_mul(r, a, b, BF_PREC_INF, BF_RNDZ); - break; - case OP_div: - { - bf_t rem_s, *rem = &rem_s; - bf_init(ctx->bf_ctx, rem); - ret = bf_divrem(r, rem, a, b, BF_PREC_INF, BF_RNDZ, BF_RNDZ); - bf_delete(rem); - } - break; - case OP_mod: - ret = bf_rem(r, a, b, BF_PREC_INF, BF_RNDZ, - BF_RNDZ) & BF_ST_INVALID_OP; - break; - case OP_pow: - if (b->sign) { - ret = BF_ST_INVALID_OP; - } else { - ret = bf_pow(r, a, b, BF_PREC_INF, BF_RNDZ | BF_POW_JS_QUIRKS); - } - break; - - /* logical operations */ - case OP_shl: - case OP_sar: - { - slimb_t v2; -#if LIMB_BITS == 32 - bf_get_int32(&v2, b, 0); - if (v2 == INT32_MIN) - v2 = INT32_MIN + 1; -#else - bf_get_int64(&v2, b, 0); - if (v2 == INT64_MIN) - v2 = INT64_MIN + 1; -#endif - if (op == OP_sar) - v2 = -v2; - ret = bf_set(r, a); - ret |= bf_mul_2exp(r, v2, BF_PREC_INF, BF_RNDZ); - if (v2 < 0) { - ret |= bf_rint(r, BF_RNDD) & (BF_ST_OVERFLOW | BF_ST_MEM_ERROR); - } - } - break; - case OP_and: - ret = bf_logic_and(r, a, b); - break; - case OP_or: - ret = bf_logic_or(r, a, b); - break; - case OP_xor: - ret = bf_logic_xor(r, a, b); - break; - default: - abort(); - } - JS_FreeBigInt(ctx, a, &a_s); - JS_FreeBigInt(ctx, b, &b_s); - if (unlikely(ret)) { - JS_FreeValue(ctx, res); - throw_bf_exception(ctx, ret); - return -1; - } - *pres = JS_CompactBigInt(ctx, res); - return 0; -} - static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *sp, OPCodeEnum op) { @@ -12785,7 +14050,50 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s d2 = JS_VALUE_GET_FLOAT64(op2); goto handle_float64; } - + /* fast path for short big int operations */ + if (tag1 == JS_TAG_SHORT_BIG_INT && tag2 == JS_TAG_SHORT_BIG_INT) { + js_slimb_t v1, v2; + js_sdlimb_t v; + v1 = JS_VALUE_GET_SHORT_BIG_INT(op1); + v2 = JS_VALUE_GET_SHORT_BIG_INT(op2); + switch(op) { + case OP_sub: + v = (js_sdlimb_t)v1 - (js_sdlimb_t)v2; + break; + case OP_mul: + v = (js_sdlimb_t)v1 * (js_sdlimb_t)v2; + break; + case OP_div: + if (v2 == 0 || + ((js_limb_t)v1 == (js_limb_t)1 << (JS_LIMB_BITS - 1) && + v2 == -1)) { + goto slow_big_int; + } + sp[-2] = __JS_NewShortBigInt(ctx, v1 / v2); + return 0; + case OP_mod: + if (v2 == 0 || + ((js_limb_t)v1 == (js_limb_t)1 << (JS_LIMB_BITS - 1) && + v2 == -1)) { + goto slow_big_int; + } + sp[-2] = __JS_NewShortBigInt(ctx, v1 % v2); + return 0; + case OP_pow: + goto slow_big_int; + default: + abort(); + } + if (likely(v >= JS_SHORT_BIG_INT_MIN && v <= JS_SHORT_BIG_INT_MAX)) { + sp[-2] = __JS_NewShortBigInt(ctx, v); + } else { + JSBigInt *r = js_bigint_new_di(ctx, v); + if (!r) + goto exception; + sp[-2] = JS_MKPTR(JS_TAG_BIG_INT, r); + } + return 0; + } op1 = JS_ToNumericFree(ctx, op1); if (JS_IsException(op1)) { JS_FreeValue(ctx, op2); @@ -12816,7 +14124,7 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s } break; case OP_div: - sp[-2] = js_float64((double)v1 / (double)v2); + sp[-2] = js_number((double)v1 / (double)v2); return 0; case OP_mod: if (v1 < 0 || v2 <= 0) { @@ -12833,9 +14141,47 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s abort(); } sp[-2] = js_int64(v); - } else if (tag1 == JS_TAG_BIG_INT || tag2 == JS_TAG_BIG_INT) { - if (js_binary_arith_bigint(ctx, op, sp - 2, op1, op2)) + } else if ((tag1 == JS_TAG_SHORT_BIG_INT || tag1 == JS_TAG_BIG_INT) && + (tag2 == JS_TAG_SHORT_BIG_INT || tag2 == JS_TAG_BIG_INT)) { + JSBigInt *p1, *p2, *r; + JSBigIntBuf buf1, buf2; + slow_big_int: + /* bigint result */ + if (JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT) + p1 = js_bigint_set_short(&buf1, op1); + else + p1 = JS_VALUE_GET_PTR(op1); + if (JS_VALUE_GET_TAG(op2) == JS_TAG_SHORT_BIG_INT) + p2 = js_bigint_set_short(&buf2, op2); + else + p2 = JS_VALUE_GET_PTR(op2); + switch(op) { + case OP_add: + r = js_bigint_add(ctx, p1, p2, 0); + break; + case OP_sub: + r = js_bigint_add(ctx, p1, p2, 1); + break; + case OP_mul: + r = js_bigint_mul(ctx, p1, p2); + break; + case OP_div: + r = js_bigint_divrem(ctx, p1, p2, false); + break; + case OP_mod: + r = js_bigint_divrem(ctx, p1, p2, true); + break; + case OP_pow: + r = js_bigint_pow(ctx, p1, p2); + break; + default: + abort(); + } + JS_FreeValue(ctx, op1); + JS_FreeValue(ctx, op2); + if (!r) goto exception; + sp[-2] = JS_CompactBigInt(ctx, r); } else { double dr; /* float64 result */ @@ -12892,6 +14238,23 @@ static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp) sp[-2] = js_float64(d1 + d2); return 0; } + /* fast path for short bigint */ + if (tag1 == JS_TAG_SHORT_BIG_INT && tag2 == JS_TAG_SHORT_BIG_INT) { + js_slimb_t v1, v2; + js_sdlimb_t v; + v1 = JS_VALUE_GET_SHORT_BIG_INT(op1); + v2 = JS_VALUE_GET_SHORT_BIG_INT(op2); + v = (js_sdlimb_t)v1 + (js_sdlimb_t)v2; + if (likely(v >= JS_SHORT_BIG_INT_MIN && v <= JS_SHORT_BIG_INT_MAX)) { + sp[-2] = __JS_NewShortBigInt(ctx, v); + } else { + JSBigInt *r = js_bigint_new_di(ctx, v); + if (!r) + goto exception; + sp[-2] = JS_MKPTR(JS_TAG_BIG_INT, r); + } + return 0; + } if (tag1 == JS_TAG_OBJECT || tag2 == JS_TAG_OBJECT) { op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE); @@ -12935,10 +14298,26 @@ static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp) v1 = JS_VALUE_GET_INT(op1); v2 = JS_VALUE_GET_INT(op2); v = (int64_t)v1 + (int64_t)v2; - sp[-2] = js_int64(v); - } else if (tag1 == JS_TAG_BIG_INT || tag2 == JS_TAG_BIG_INT) { - if (js_binary_arith_bigint(ctx, OP_add, sp - 2, op1, op2)) + sp[-2] = JS_NewInt64(ctx, v); + } else if ((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) && + (tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT)) { + JSBigInt *p1, *p2, *r; + JSBigIntBuf buf1, buf2; + /* bigint result */ + if (JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT) + p1 = js_bigint_set_short(&buf1, op1); + else + p1 = JS_VALUE_GET_PTR(op1); + if (JS_VALUE_GET_TAG(op2) == JS_TAG_SHORT_BIG_INT) + p2 = js_bigint_set_short(&buf2, op2); + else + p2 = JS_VALUE_GET_PTR(op2); + r = js_bigint_add(ctx, p1, p2, 0); + JS_FreeValue(ctx, op1); + JS_FreeValue(ctx, op2); + if (!r) goto exception; + sp[-2] = JS_CompactBigInt(ctx, r); } else { double d1, d2; /* float64 result */ @@ -12970,6 +14349,62 @@ static no_inline __exception int js_binary_logic_slow(JSContext *ctx, tag1 = JS_VALUE_GET_NORM_TAG(op1); tag2 = JS_VALUE_GET_NORM_TAG(op2); + if (tag1 == JS_TAG_SHORT_BIG_INT && tag2 == JS_TAG_SHORT_BIG_INT) { + js_slimb_t v1, v2, v; + js_sdlimb_t vd; + v1 = JS_VALUE_GET_SHORT_BIG_INT(op1); + v2 = JS_VALUE_GET_SHORT_BIG_INT(op2); + /* bigint fast path */ + switch(op) { + case OP_and: + v = v1 & v2; + break; + case OP_or: + v = v1 | v2; + break; + case OP_xor: + v = v1 ^ v2; + break; + case OP_sar: + if (v2 > (JS_LIMB_BITS - 1)) { + goto slow_big_int; + } else if (v2 < 0) { + if (v2 < -(JS_LIMB_BITS - 1)) + goto slow_big_int; + v2 = -v2; + goto bigint_shl; + } + bigint_sar: + v = v1 >> v2; + break; + case OP_shl: + if (v2 > (JS_LIMB_BITS - 1)) { + goto slow_big_int; + } else if (v2 < 0) { + if (v2 < -(JS_LIMB_BITS - 1)) + goto slow_big_int; + v2 = -v2; + goto bigint_sar; + } + bigint_shl: + vd = (js_dlimb_t)v1 << v2; + if (likely(vd >= JS_SHORT_BIG_INT_MIN && + vd <= JS_SHORT_BIG_INT_MAX)) { + v = vd; + } else { + JSBigInt *r = js_bigint_new_di(ctx, vd); + if (!r) + goto exception; + sp[-2] = JS_MKPTR(JS_TAG_BIG_INT, r); + return 0; + } + break; + default: + abort(); + } + sp[-2] = __JS_NewShortBigInt(ctx, v); + return 0; + } op1 = JS_ToNumericFree(ctx, op1); if (JS_IsException(op1)) { JS_FreeValue(ctx, op2); @@ -12983,15 +14418,50 @@ static no_inline __exception int js_binary_logic_slow(JSContext *ctx, tag1 = JS_VALUE_GET_TAG(op1); tag2 = JS_VALUE_GET_TAG(op2); - if (tag1 == JS_TAG_BIG_INT || tag2 == JS_TAG_BIG_INT) { - if (tag1 != tag2) { - JS_FreeValue(ctx, op1); - JS_FreeValue(ctx, op2); - JS_ThrowTypeError(ctx, "both operands must be BigInt"); - goto exception; - } else if (js_binary_arith_bigint(ctx, op, sp - 2, op1, op2)) { - goto exception; + if ((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) && + (tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT)) { + JSBigInt *p1, *p2, *r; + JSBigIntBuf buf1, buf2; + slow_big_int: + if (JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT) + p1 = js_bigint_set_short(&buf1, op1); + else + p1 = JS_VALUE_GET_PTR(op1); + if (JS_VALUE_GET_TAG(op2) == JS_TAG_SHORT_BIG_INT) + p2 = js_bigint_set_short(&buf2, op2); + else + p2 = JS_VALUE_GET_PTR(op2); + switch(op) { + case OP_and: + case OP_or: + case OP_xor: + r = js_bigint_logic(ctx, p1, p2, op); + break; + case OP_shl: + case OP_sar: + { + js_slimb_t shift; + shift = js_bigint_get_si_sat(p2); + if (shift > INT32_MAX) + shift = INT32_MAX; + else if (shift < -INT32_MAX) + shift = -INT32_MAX; + if (op == OP_sar) + shift = -shift; + if (shift >= 0) + r = js_bigint_shl(ctx, p1, shift); + else + r = js_bigint_shr(ctx, p1, -shift); + } + break; + default: + abort(); } + JS_FreeValue(ctx, op1); + JS_FreeValue(ctx, op2); + if (!r) + goto exception; + sp[-2] = JS_CompactBigInt(ctx, r); } else { if (unlikely(JS_ToInt32Free(ctx, (int32_t *)&v1, op1))) { JS_FreeValue(ctx, op2); @@ -13027,49 +14497,96 @@ static no_inline __exception int js_binary_logic_slow(JSContext *ctx, return -1; } +/* op1 must be a bigint or int. */ +static JSBigInt *JS_ToBigIntBuf(JSContext *ctx, JSBigIntBuf *buf1, + JSValue op1) +{ + JSBigInt *p1; + + switch(JS_VALUE_GET_TAG(op1)) { + case JS_TAG_INT: + p1 = js_bigint_set_si(buf1, JS_VALUE_GET_INT(op1)); + break; + case JS_TAG_SHORT_BIG_INT: + p1 = js_bigint_set_short(buf1, op1); + break; + case JS_TAG_BIG_INT: + p1 = JS_VALUE_GET_PTR(op1); + break; + default: + abort(); + } + return p1; +} + +/* op1 and op2 must be numeric types and at least one must be a + bigint. No exception is generated. */ static int js_compare_bigint(JSContext *ctx, OPCodeEnum op, JSValue op1, JSValue op2) { - bf_t a_s, b_s, *a, *b; - int res; + int res, val, tag1, tag2; + JSBigIntBuf buf1, buf2; + JSBigInt *p1, *p2; - a = JS_ToBigInt1(ctx, &a_s, op1); - if (!a) { - JS_FreeValue(ctx, op2); - return -1; - } - b = JS_ToBigInt1(ctx, &b_s, op2); - if (!b) { - if (a == &a_s) - bf_delete(a); + tag1 = JS_VALUE_GET_NORM_TAG(op1); + tag2 = JS_VALUE_GET_NORM_TAG(op2); + if ((tag1 == JS_TAG_SHORT_BIG_INT || tag1 == JS_TAG_INT) && + (tag2 == JS_TAG_SHORT_BIG_INT || tag2 == JS_TAG_INT)) { + /* fast path */ + js_slimb_t v1, v2; + if (tag1 == JS_TAG_INT) + v1 = JS_VALUE_GET_INT(op1); + else + v1 = JS_VALUE_GET_SHORT_BIG_INT(op1); + if (tag2 == JS_TAG_INT) + v2 = JS_VALUE_GET_INT(op2); + else + v2 = JS_VALUE_GET_SHORT_BIG_INT(op2); + val = (v1 > v2) - (v1 < v2); + } else { + if (tag1 == JS_TAG_FLOAT64) { + p2 = JS_ToBigIntBuf(ctx, &buf2, op2); + val = js_bigint_float64_cmp(ctx, p2, JS_VALUE_GET_FLOAT64(op1)); + if (val == 2) + goto unordered; + val = -val; + } else if (tag2 == JS_TAG_FLOAT64) { + p1 = JS_ToBigIntBuf(ctx, &buf1, op1); + val = js_bigint_float64_cmp(ctx, p1, JS_VALUE_GET_FLOAT64(op2)); + if (val == 2) { + unordered: + JS_FreeValue(ctx, op1); + JS_FreeValue(ctx, op2); + return false; + } + } else { + p1 = JS_ToBigIntBuf(ctx, &buf1, op1); + p2 = JS_ToBigIntBuf(ctx, &buf2, op2); + val = js_bigint_cmp(ctx, p1, p2); + } JS_FreeValue(ctx, op1); - return -1; + JS_FreeValue(ctx, op2); } + switch(op) { case OP_lt: - res = bf_cmp_lt(a, b); /* if NaN return false */ + res = val < 0; break; case OP_lte: - res = bf_cmp_le(a, b); /* if NaN return false */ + res = val <= 0; break; case OP_gt: - res = bf_cmp_lt(b, a); /* if NaN return false */ + res = val > 0; break; case OP_gte: - res = bf_cmp_le(b, a); /* if NaN return false */ + res = val >= 0; break; case OP_eq: - res = bf_cmp_eq(a, b); /* if NaN return false */ + res = val == 0; break; default: abort(); } - if (a == &a_s) - bf_delete(a); - if (b == &b_s) - bf_delete(b); - JS_FreeValue(ctx, op1); - JS_FreeValue(ctx, op2); return res; } @@ -13125,16 +14642,20 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp, /* fast path for float64/int */ goto float64_compare; } else { - if (((tag1 == JS_TAG_BIG_INT && tag2 == JS_TAG_STRING) || - (tag2 == JS_TAG_BIG_INT && tag1 == JS_TAG_STRING))) { + if ((((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) && + tag2 == JS_TAG_STRING) || + ((tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) && + tag1 == JS_TAG_STRING))) { if (tag1 == JS_TAG_STRING) { op1 = JS_StringToBigInt(ctx, op1); - if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT) + if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT && + JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT) goto invalid_bigint_string; } if (tag2 == JS_TAG_STRING) { op2 = JS_StringToBigInt(ctx, op2); - if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT) { + if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT && + JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT) { invalid_bigint_string: JS_FreeValue(ctx, op1); JS_FreeValue(ctx, op2); @@ -13158,10 +14679,9 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp, tag1 = JS_VALUE_GET_NORM_TAG(op1); tag2 = JS_VALUE_GET_NORM_TAG(op2); - if (tag1 == JS_TAG_BIG_INT || tag2 == JS_TAG_BIG_INT) { + if (tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT || + tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) { res = js_compare_bigint(ctx, op, op1, op2); - if (res < 0) - goto exception; } else { double d1, d2; @@ -13205,8 +14725,9 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp, static bool tag_is_number(uint32_t tag) { - return (tag == JS_TAG_INT || tag == JS_TAG_BIG_INT || - tag == JS_TAG_FLOAT64); + return (tag == JS_TAG_INT || + tag == JS_TAG_FLOAT64 || + tag == JS_TAG_BIG_INT || tag == JS_TAG_SHORT_BIG_INT); } static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp, @@ -13253,15 +14774,18 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp, } else if ((tag1 == JS_TAG_STRING && tag_is_number(tag2)) || (tag2 == JS_TAG_STRING && tag_is_number(tag1))) { - if ((tag1 == JS_TAG_BIG_INT || tag2 == JS_TAG_BIG_INT)) { + if (tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT || + tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) { if (tag1 == JS_TAG_STRING) { op1 = JS_StringToBigInt(ctx, op1); - if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT) + if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT && + JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT) goto invalid_bigint_string; } if (tag2 == JS_TAG_STRING) { op2 = JS_StringToBigInt(ctx, op2); - if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT) { + if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT && + JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT ) { invalid_bigint_string: JS_FreeValue(ctx, op1); JS_FreeValue(ctx, op2); @@ -13343,8 +14867,10 @@ static no_inline int js_shr_slow(JSContext *ctx, JSValue *sp) goto exception; } - if ((JS_VALUE_GET_TAG(op1) == JS_TAG_BIG_INT || - JS_VALUE_GET_TAG(op2) == JS_TAG_BIG_INT)) { + if (JS_VALUE_GET_TAG(op1) == JS_TAG_BIG_INT || + JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT || + JS_VALUE_GET_TAG(op2) == JS_TAG_BIG_INT || + JS_VALUE_GET_TAG(op2) == JS_TAG_SHORT_BIG_INT) { JS_ThrowTypeError(ctx, "BigInt operands are forbidden for >>>"); JS_FreeValue(ctx, op1); JS_FreeValue(ctx, op2); @@ -13453,20 +14979,27 @@ static bool js_strict_eq2(JSContext *ctx, JSValue op1, JSValue op2, res = (d1 == d2); /* if NaN return false and +0 == -0 */ } goto done_no_free; + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: { - bf_t a_s, *a, b_s, *b; - if (tag1 != tag2) { + JSBigIntBuf buf1, buf2; + JSBigInt *p1, *p2; + + if (tag2 != JS_TAG_SHORT_BIG_INT && + tag2 != JS_TAG_BIG_INT) { res = false; break; } - a = JS_ToBigInt1(ctx, &a_s, op1); - b = JS_ToBigInt1(ctx, &b_s, op2); - res = bf_cmp_eq(a, b); - if (a == &a_s) - bf_delete(a); - if (b == &b_s) - bf_delete(b); + + if (JS_VALUE_GET_TAG(op1) == JS_TAG_SHORT_BIG_INT) + p1 = js_bigint_set_short(&buf1, op1); + else + p1 = JS_VALUE_GET_PTR(op1); + if (JS_VALUE_GET_TAG(op2) == JS_TAG_SHORT_BIG_INT) + p2 = js_bigint_set_short(&buf2, op2); + else + p2 = JS_VALUE_GET_PTR(op2); + res = (js_bigint_cmp(ctx, p1, p2) == 0); } break; default: @@ -13605,6 +15138,7 @@ static __exception int js_operator_typeof(JSContext *ctx, JSValue op1) tag = JS_VALUE_GET_NORM_TAG(op1); switch(tag) { + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: atom = JS_ATOM_bigint; break; @@ -14130,9 +15664,13 @@ static JSValue JS_IteratorNext(JSContext *ctx, JSValueConst enum_obj, obj = JS_IteratorNext2(ctx, enum_obj, method, argc, argv, &done); if (JS_IsException(obj)) goto fail; - if (done != 2) { - *pdone = done; + if (likely(done == 0)) { + *pdone = false; return obj; + } else if (done != 2) { + JS_FreeValue(ctx, obj); + *pdone = true; + return JS_UNDEFINED; } else { done_val = JS_GetProperty(ctx, obj, JS_ATOM_done); if (JS_IsException(done_val)) @@ -14505,10 +16043,16 @@ static JSVarRef *get_var_ref(JSContext *ctx, JSStackFrame *sf, { JSVarRef *var_ref; struct list_head *el; + JSValue *pvalue; + + if (is_arg) + pvalue = &sf->arg_buf[var_idx]; + else + pvalue = &sf->var_buf[var_idx]; list_for_each(el, &sf->var_ref_list) { var_ref = list_entry(el, JSVarRef, header.link); - if (var_ref->var_idx == var_idx && var_ref->is_arg == is_arg) { + if (var_ref->pvalue == pvalue) { var_ref->header.ref_count++; return var_ref; } @@ -14519,13 +16063,8 @@ static JSVarRef *get_var_ref(JSContext *ctx, JSStackFrame *sf, return NULL; var_ref->header.ref_count = 1; var_ref->is_detached = false; - var_ref->is_arg = is_arg; - var_ref->var_idx = var_idx; list_add_tail(&var_ref->header.link, &sf->var_ref_list); - if (is_arg) - var_ref->pvalue = &sf->arg_buf[var_idx]; - else - var_ref->pvalue = &sf->var_buf[var_idx]; + var_ref->pvalue = pvalue; var_ref->value = JS_UNDEFINED; return var_ref; } @@ -14754,15 +16293,10 @@ static void close_var_refs(JSRuntime *rt, JSStackFrame *sf) { struct list_head *el, *el1; JSVarRef *var_ref; - int var_idx; list_for_each_safe(el, el1, &sf->var_ref_list) { var_ref = list_entry(el, JSVarRef, header.link); - var_idx = var_ref->var_idx; - if (var_ref->is_arg) - var_ref->value = js_dup(sf->arg_buf[var_idx]); - else - var_ref->value = js_dup(sf->var_buf[var_idx]); + var_ref->value = js_dup(*var_ref->pvalue); var_ref->pvalue = &var_ref->value; /* the reference is no longer to a local variable */ var_ref->is_detached = true; @@ -14772,13 +16306,15 @@ static void close_var_refs(JSRuntime *rt, JSStackFrame *sf) static void close_lexical_var(JSContext *ctx, JSStackFrame *sf, int var_idx) { + JSValue *pvalue; struct list_head *el, *el1; JSVarRef *var_ref; + pvalue = &sf->var_buf[var_idx]; list_for_each_safe(el, el1, &sf->var_ref_list) { var_ref = list_entry(el, JSVarRef, header.link); - if (var_idx == var_ref->var_idx && !var_ref->is_arg) { - var_ref->value = js_dup(sf->var_buf[var_idx]); + if (var_ref->pvalue == pvalue) { + var_ref->value = js_dup(*var_ref->pvalue); var_ref->pvalue = &var_ref->value; list_del(&var_ref->header.link); /* the reference is no longer to a local variable */ @@ -15128,6 +16664,10 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, *sp++ = js_int32(get_u32(pc)); pc += 4; BREAK; + CASE(OP_push_bigint_i32): + *sp++ = __JS_NewShortBigInt(ctx, (int)get_u32(pc)); + pc += 4; + BREAK; CASE(OP_push_const): *sp++ = js_dup(b->cpool[get_u32(pc)]); pc += 4; @@ -18978,8 +20518,6 @@ static const JSOpCode opcode_info[OP_COUNT + (OP_TEMP_END - OP_TEMP_START)] = { opcode_info[(op) >= OP_TEMP_START ? \ (op) + (OP_TEMP_END - OP_TEMP_START) : (op)] -static __exception int next_token(JSParseState *s); - static void free_token(JSParseState *s, JSToken *token) { switch(token->val) { @@ -19086,6 +20624,10 @@ int JS_PRINTF_FORMAT_ATTR(2, 3) js_parse_error(JSParseState *s, JS_PRINTF_FORMAT return -1; } +#ifndef QJS_DISABLE_PARSER + +static __exception int next_token(JSParseState *s); + static int js_parse_expect(JSParseState *s, int tok) { char buf[ATOM_GET_STR_BUF_SIZE]; @@ -19430,6 +20972,8 @@ static __exception int js_parse_regexp(JSParseState *s) return -1; } +#endif // QJS_DISABLE_PARSER + static __exception int ident_realloc(JSContext *ctx, char **pbuf, size_t *psize, char *static_buf) { @@ -19457,6 +21001,8 @@ static __exception int ident_realloc(JSContext *ctx, char **pbuf, size_t *psize, return 0; } +#ifndef QJS_DISABLE_PARSER + /* convert a TOK_IDENT to a keyword when needed */ static void update_token_ident(JSParseState *s) { @@ -20031,6 +21577,8 @@ static __exception int next_token(JSParseState *s) return -1; } +#endif // QJS_DISABLE_PARSER + static int json_parse_error(JSParseState *s, const uint8_t *curp, const char *msg) { const uint8_t *p, *line_start; @@ -20331,6 +21879,8 @@ static __exception int json_next_token(JSParseState *s) return -1; } +#ifndef QJS_DISABLE_PARSER + /* only used for ':' and '=>', 'let' or 'function' look-ahead. *pp is only set if TOK_IMPORT is returned */ /* XXX: handle all unicode cases */ @@ -23482,7 +25032,17 @@ static __exception int js_parse_postfix_expr(JSParseState *s, int parse_flags) if (JS_VALUE_GET_TAG(val) == JS_TAG_INT) { emit_op(s, OP_push_i32); emit_u32(s, JS_VALUE_GET_INT(val)); + } else if (JS_VALUE_GET_TAG(val) == JS_TAG_SHORT_BIG_INT) { + int64_t v; + v = JS_VALUE_GET_SHORT_BIG_INT(val); + if (v >= INT32_MIN && v <= INT32_MAX) { + emit_op(s, OP_push_bigint_i32); + emit_u32(s, v); + } else { + goto large_number; + } } else { + large_number: if (emit_push_const(s, val, 0) < 0) return -1; } @@ -26237,6 +27797,8 @@ fail: return -1; } +#endif // QJS_DISABLE_PARSER + /* 'name' is freed */ static JSModuleDef *js_new_module_def(JSContext *ctx, JSAtom name) { @@ -26322,6 +27884,8 @@ static void js_free_module_def(JSContext *ctx, JSModuleDef *m) js_free(ctx, m); } +#ifndef QJS_DISABLE_PARSER + static int add_req_module_entry(JSContext *ctx, JSModuleDef *m, JSAtom module_name) { @@ -26346,6 +27910,8 @@ static int add_req_module_entry(JSContext *ctx, JSModuleDef *m, return i; } +#endif // QJS_DISABLE_PARSER + static JSExportEntry *find_export_entry(JSContext *ctx, const JSModuleDef *m, JSAtom export_name) { @@ -26390,6 +27956,8 @@ static JSExportEntry *add_export_entry2(JSContext *ctx, return me; } +#ifndef QJS_DISABLE_PARSER + static JSExportEntry *add_export_entry(JSParseState *s, JSModuleDef *m, JSAtom local_name, JSAtom export_name, JSExportTypeEnum export_type) @@ -26413,6 +27981,8 @@ static int add_star_export_entry(JSContext *ctx, JSModuleDef *m, return 0; } +#endif // QJS_DISABLE_PARSER + /* create a C module */ /* `name_str` may be pure ASCII or UTF-8 encoded */ JSModuleDef *JS_NewCModule(JSContext *ctx, const char *name_str, @@ -28061,6 +29631,8 @@ static JSValue js_evaluate_module(JSContext *ctx, JSModuleDef *m) return js_dup(m->promise); } +#ifndef QJS_DISABLE_PARSER + static __exception JSAtom js_parse_from_clause(JSParseState *s) { JSAtom module_name; @@ -28500,6 +30072,8 @@ static JSFunctionDef *js_new_function_def(JSContext *ctx, return fd; } +#endif // QJS_DISABLE_PARSER + static void free_bytecode_atoms(JSRuntime *rt, const uint8_t *bc_buf, int bc_len, bool use_short_opcodes) @@ -28533,6 +30107,8 @@ static void free_bytecode_atoms(JSRuntime *rt, } } +#ifndef QJS_DISABLE_PARSER + static void js_free_function_def(JSContext *ctx, JSFunctionDef *fd) { int i; @@ -28595,6 +30171,8 @@ static void js_free_function_def(JSContext *ctx, JSFunctionDef *fd) js_free(ctx, fd); } +#endif // QJS_DISABLE_PARSER + #ifdef ENABLE_DUMPS // JS_DUMP_BYTECODE_* static const char *skip_lines(const char *p, int n) { while (p && n-- > 0 && *p) { @@ -29051,6 +30629,8 @@ static __maybe_unused void js_dump_function_bytecode(JSContext *ctx, JSFunctionB } #endif +#ifndef QJS_DISABLE_PARSER + static int add_closure_var(JSContext *ctx, JSFunctionDef *s, bool is_local, bool is_arg, int var_idx, JSAtom var_name, @@ -31544,6 +33124,28 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) push_short_int(&bc_out, val); break; + case OP_push_bigint_i32: + { + /* transform i32(val) neg -> i32(-val) */ + val = get_i32(bc_buf + pos + 1); + if (val != INT32_MIN + && code_match(&cc, pos_next, OP_neg, -1)) { + if (cc.line_num >= 0) line_num = cc.line_num; + if (cc.col_num >= 0) col_num = cc.col_num; + if (code_match(&cc, cc.pos, OP_drop, -1)) { + if (cc.line_num >= 0) line_num = cc.line_num; + if (cc.col_num >= 0) col_num = cc.col_num; + } else { + add_pc2line_info(s, bc_out.size, line_num, col_num); + dbuf_putc(&bc_out, OP_push_bigint_i32); + dbuf_put_u32(&bc_out, -val); + } + pos_next = cc.pos; + break; + } + } + goto no_change; + case OP_push_const: case OP_fclosure: { @@ -32336,8 +33938,10 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd) are used to compile the eval and they must be ordered by scope, so it is necessary to create the closure variables before any other variable lookup is done. */ +#ifndef QJS_DISABLE_PARSER if (fd->has_eval_call) add_eval_variables(ctx, fd); +#endif // QJS_DISABLE_PARSER /* add the module global variables in the closure */ if (fd->module) { @@ -32496,6 +34100,8 @@ static JSValue js_create_function(JSContext *ctx, JSFunctionDef *fd) return JS_EXCEPTION; } +#endif // QJS_DISABLE_PARSER + static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) { int i; @@ -32530,6 +34136,8 @@ static void free_function_bytecode(JSRuntime *rt, JSFunctionBytecode *b) } } +#ifndef QJS_DISABLE_PARSER + static __exception int js_parse_directives(JSParseState *s) { char str[20]; @@ -33341,6 +34949,8 @@ static __exception int js_parse_program(JSParseState *s) return 0; } +#endif // QJS_DISABLE_PARSER + static void js_parse_init(JSContext *ctx, JSParseState *s, const char *input, size_t input_len, const char *filename, int line) @@ -33396,6 +35006,8 @@ JSValue JS_EvalFunction(JSContext *ctx, JSValue fun_obj) return JS_EvalFunctionInternal(ctx, fun_obj, ctx->global_obj, NULL, NULL); } +#ifndef QJS_DISABLE_PARSER + /* 'input' must be zero terminated i.e. input[input_len] = '\0'. */ /* `export_name` and `input` may be pure ASCII or UTF-8 encoded */ static JSValue __JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, @@ -33512,6 +35124,8 @@ static JSValue __JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, return JS_EXCEPTION; } +#endif // QJS_DISABLE_PARSER + /* the indirection is needed to make 'eval' optional */ static JSValue JS_EvalInternal(JSContext *ctx, JSValueConst this_obj, const char *input, size_t input_len, @@ -33748,7 +35362,7 @@ typedef enum BCTagEnum { BC_TAG_SYMBOL, } BCTagEnum; -#define BC_VERSION 19 +#define BC_VERSION 20 typedef struct BCWriterState { JSContext *ctx; @@ -33798,6 +35412,13 @@ static const char * const bc_tag_str[] = { "Set", "Symbol", }; + +static const char *bc_tag_name(uint8_t tag) +{ + if (tag >= countof(bc_tag_str)) + return "<bad tag>"; + return bc_tag_str[tag]; +} #endif static void bc_put_u8(BCWriterState *s, uint8_t v) @@ -34018,71 +35639,44 @@ static void JS_WriteString(BCWriterState *s, JSString *p) static int JS_WriteBigInt(BCWriterState *s, JSValueConst obj) { - uint32_t tag, tag1; - int64_t e; - JSBigInt *bf = JS_VALUE_GET_PTR(obj); - bf_t *a = &bf->num; - size_t len, i, n1, j; - limb_t v; + JSBigIntBuf buf; + JSBigInt *p; + uint32_t len, i; + js_limb_t v, b; + int shift; - tag = JS_VALUE_GET_TAG(obj); - switch(tag) { - case JS_TAG_BIG_INT: - tag1 = BC_TAG_BIG_INT; - break; - default: - abort(); - } - bc_put_u8(s, tag1); + bc_put_u8(s, BC_TAG_BIG_INT); - /* sign + exponent */ - if (a->expn == BF_EXP_ZERO) - e = 0; - else if (a->expn == BF_EXP_INF) - e = 1; - else if (a->expn == BF_EXP_NAN) - e = 2; - else if (a->expn >= 0) - e = a->expn + 3; + if (JS_VALUE_GET_TAG(obj) == JS_TAG_SHORT_BIG_INT) + p = js_bigint_set_short(&buf, obj); else - e = a->expn; - e = (e * 2) | a->sign; - if (e < INT32_MIN || e > INT32_MAX) { - JS_ThrowRangeError(s->ctx, "maximum BigInt size exceeded"); - return -1; - } - bc_put_sleb128(s, e); - - /* mantissa */ - if (a->len != 0) { - i = 0; - while (i < a->len && a->tab[i] == 0) - i++; - assert(i < a->len); - v = a->tab[i]; - n1 = sizeof(limb_t); - while ((v & 0xff) == 0) { - n1--; - v >>= 8; - } - i++; - len = (a->len - i) * sizeof(limb_t) + n1; - if (len > INT32_MAX) { - JS_ThrowRangeError(s->ctx, "maximum BigInt size exceeded"); - return -1; + p = JS_VALUE_GET_PTR(obj); + if (p->len == 1 && p->tab[0] == 0) { + /* zero case */ + len = 0; + } else { + /* compute the length of the two's complement representation + in bytes */ + len = p->len * (JS_LIMB_BITS / 8); + v = p->tab[p->len - 1]; + shift = JS_LIMB_BITS - 8; + while (shift > 0) { + b = (v >> shift) & 0xff; + if (b != 0x00 && b != 0xff) + break; + if ((b & 1) != ((v >> (shift - 1)) & 1)) + break; + shift -= 8; + len--; } - bc_put_leb128(s, len); - /* always saved in byte based little endian representation */ - for(j = 0; j < n1; j++) { - bc_put_u8(s, v >> (j * 8)); + } + bc_put_leb128(s, len); + if (len > 0) { + for(i = 0; i < (len / (JS_LIMB_BITS / 8)); i++) { + bc_put_u32(s, p->tab[i]); } - for(; i < a->len; i++) { - limb_t v = a->tab[i]; -#if LIMB_BITS == 32 - bc_put_u32(s, v); -#else - bc_put_u64(s, v); -#endif + for(i = 0; i < len % (JS_LIMB_BITS / 8); i++) { + bc_put_u8(s, (p->tab[p->len - 1] >> (i * 8)) & 0xff); } } return 0; @@ -34505,6 +36099,7 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj) goto fail; } break; + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: if (JS_WriteBigInt(s, obj)) goto fail; @@ -34850,6 +36445,10 @@ static JSString *JS_ReadString(BCReaderState *s) return NULL; is_wide_char = len & 1; len >>= 1; + if (len > JS_STRING_LEN_MAX) { + JS_ThrowInternalError(s->ctx, "string too long"); + return NULL; + } p = js_alloc_string(s->ctx, len, is_wide_char); if (!p) { s->error_state = -1; @@ -34947,77 +36546,46 @@ static int JS_ReadFunctionBytecode(BCReaderState *s, JSFunctionBytecode *b, static JSValue JS_ReadBigInt(BCReaderState *s) { - JSValue obj; + JSValue obj = JS_UNDEFINED; + uint32_t len, i, n; + JSBigInt *p; + js_limb_t v; uint8_t v8; - int32_t e; - uint32_t len; - limb_t l, i, n; - limb_t v; - bf_t *a; - obj = JS_NewBigInt(s->ctx); - if (JS_IsException(obj)) + if (bc_get_leb128(s, &len)) goto fail; - - /* sign + exponent */ - if (bc_get_sleb128(s, &e)) + bc_read_trace(s, "len=%" PRId64 "\n", (int64_t)len); + if (len == 0) { + /* zero case */ + bc_read_trace(s, "}\n"); + return __JS_NewShortBigInt(s->ctx, 0); + } + p = js_bigint_new(s->ctx, (len - 1) / (JS_LIMB_BITS / 8) + 1); + if (!p) goto fail; - - a = JS_GetBigInt(obj); - a->sign = e & 1; - e >>= 1; - if (e == 0) - a->expn = BF_EXP_ZERO; - else if (e == 1) - a->expn = BF_EXP_INF; - else if (e == 2) - a->expn = BF_EXP_NAN; - else if (e >= 3) - a->expn = e - 3; - else - a->expn = e; - - /* mantissa */ - if (a->expn != BF_EXP_ZERO && - a->expn != BF_EXP_INF && - a->expn != BF_EXP_NAN) { - if (bc_get_leb128(s, &len)) - goto fail; - bc_read_trace(s, "len=%" PRId64 "\n", (int64_t)len); - if (len == 0) { - JS_ThrowRangeError(s->ctx, "maximum BigInt size exceeded"); - goto fail; - } - l = (len + sizeof(limb_t) - 1) / sizeof(limb_t); - if (bf_resize(a, l)) { - JS_ThrowOutOfMemory(s->ctx); + for(i = 0; i < len / (JS_LIMB_BITS / 8); i++) { + if (bc_get_u32(s, &v)) goto fail; - } - n = len & (sizeof(limb_t) - 1); - if (n != 0) { - v = 0; - for(i = 0; i < n; i++) { - if (bc_get_u8(s, &v8)) - goto fail; - v |= (limb_t)v8 << ((sizeof(limb_t) - n + i) * 8); - } - a->tab[0] = v; - i = 1; - } else { - i = 0; - } - for(; i < l; i++) { -#if LIMB_BITS == 32 - if (bc_get_u32(s, &v)) - goto fail; -#else - if (bc_get_u64(s, &v)) + p->tab[i] = v; + } + n = len % (JS_LIMB_BITS / 8); + if (n != 0) { + int shift; + v = 0; + for(i = 0; i < n; i++) { + if (bc_get_u8(s, &v8)) goto fail; -#endif - a->tab[i] = v; + v |= (js_limb_t)v8 << (i * 8); + } + shift = JS_LIMB_BITS - n * 8; + /* extend the sign */ + if (shift != 0) { + v = (js_slimb_t)(v << shift) >> shift; } + p->tab[p->len - 1] = v; } - return obj; + bc_read_trace(s, "}\n"); + return JS_CompactBigInt(s->ctx, p); fail: JS_FreeValue(s->ctx, obj); return JS_EXCEPTION; @@ -35707,7 +37275,7 @@ static JSValue JS_ReadObjectRec(BCReaderState *s) if (bc_get_u8(s, &tag)) return JS_EXCEPTION; - bc_read_trace(s, "%s {\n", bc_tag_str[tag]); + bc_read_trace(s, "%s {\n", bc_tag_name(tag)); switch(tag) { case BC_TAG_NULL: @@ -36284,6 +37852,7 @@ JSValue JS_ToObject(JSContext *ctx, JSValueConst val) case JS_TAG_OBJECT: case JS_TAG_EXCEPTION: return js_dup(val); + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: obj = JS_NewObjectClass(ctx, JS_CLASS_BIG_INT); goto set_value; @@ -37289,10 +38858,8 @@ static JSValue js_object_fromEntries(JSContext *ctx, JSValueConst this_val, item = JS_IteratorNext(ctx, iter, next_method, 0, NULL, &done); if (JS_IsException(item)) goto fail; - if (done) { - JS_FreeValue(ctx, item); + if (done) break; - } key = JS_UNDEFINED; value = JS_UNDEFINED; @@ -41056,13 +42623,18 @@ static JSValue js_number_constructor(JSContext *ctx, JSValueConst new_target, if (JS_IsException(val)) return val; switch(JS_VALUE_GET_TAG(val)) { + case JS_TAG_SHORT_BIG_INT: + val = JS_NewInt64(ctx, JS_VALUE_GET_SHORT_BIG_INT(val)); + if (JS_IsException(val)) + return val; + break; case JS_TAG_BIG_INT: { JSBigInt *p = JS_VALUE_GET_PTR(val); double d; - bf_get_float64(&p->num, &d, BF_RNDN); + d = js_bigint_to_float64(ctx, p); JS_FreeValue(ctx, val); - val = js_float64(d); + val = JS_NewFloat64(ctx, d); } break; default: @@ -42555,7 +44127,7 @@ static JSValue js_string_pad(JSContext *ctx, JSValueConst this_val, } if (n > JS_STRING_LEN_MAX) { JS_ThrowRangeError(ctx, "invalid string length"); - goto fail2; + goto fail3; } if (string_buffer_init(ctx, b, n)) goto fail3; @@ -43287,25 +44859,32 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val, return js_int32(r); } +typedef enum SumPreciseStateEnum { + SUM_PRECISE_STATE_MINUS_ZERO, + SUM_PRECISE_STATE_NOT_A_NUMBER, + SUM_PRECISE_STATE_MINUS_INFINITY, + SUM_PRECISE_STATE_PLUS_INFINITY, + SUM_PRECISE_STATE_FINITE, +} SumPreciseStateEnum; + static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValue iter, next, item, ret; - bf_t a, b; int done; double d; - int r; + xsum_small_accumulator acc; + SumPreciseStateEnum state; iter = JS_GetIterator(ctx, argv[0], /*async*/false); if (JS_IsException(iter)) return JS_EXCEPTION; - bf_init(ctx->bf_ctx, &a); - bf_init(ctx->bf_ctx, &b); ret = JS_EXCEPTION; next = JS_GetProperty(ctx, iter, JS_ATOM_next); if (JS_IsException(next)) goto fail; - bf_set_zero(&a, /*is_neg*/true); + xsum_small_init(&acc); + state = SUM_PRECISE_STATE_MINUS_ZERO; for (;;) { item = JS_IteratorNext(ctx, iter, next, 0, NULL, &done); if (JS_IsException(item)) @@ -43324,25 +44903,52 @@ static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val, d = JS_VALUE_GET_FLOAT64(item); break; } - if (bf_set_float64(&b, d)) - goto oom; - // Infinity + -Infinity results in BF_ST_INVALID_OP, sets |a| to nan - if ((r = bf_add(&a, &a, &b, BF_PREC_INF, BF_RNDN))) - if (r != BF_ST_INVALID_OP) - goto oom; + + if (state != SUM_PRECISE_STATE_NOT_A_NUMBER) { + if (isnan(d)) + state = SUM_PRECISE_STATE_NOT_A_NUMBER; + else if (!isfinite(d) && d > 0.0) + if (state == SUM_PRECISE_STATE_MINUS_INFINITY) + state = SUM_PRECISE_STATE_NOT_A_NUMBER; + else + state = SUM_PRECISE_STATE_PLUS_INFINITY; + else if (!isfinite(d) && d < 0.0) + if (state == SUM_PRECISE_STATE_PLUS_INFINITY) + state = SUM_PRECISE_STATE_NOT_A_NUMBER; + else + state = SUM_PRECISE_STATE_MINUS_INFINITY; + else if (!(d == 0.0 && signbit(d)) && (state == SUM_PRECISE_STATE_MINUS_ZERO || state == SUM_PRECISE_STATE_FINITE)) { + state = SUM_PRECISE_STATE_FINITE; + xsum_small_add1(&acc, d); + } + } + } + + switch (state) { + case SUM_PRECISE_STATE_NOT_A_NUMBER: + d = NAN; + break; + case SUM_PRECISE_STATE_MINUS_INFINITY: + d = -INFINITY; + break; + case SUM_PRECISE_STATE_PLUS_INFINITY: + d = INFINITY; + break; + case SUM_PRECISE_STATE_MINUS_ZERO: + d = -0.0; + break; + case SUM_PRECISE_STATE_FINITE: + d = xsum_small_round(&acc); + break; + default: + abort(); } - bf_get_float64(&a, &d, BF_RNDN); // return value deliberately ignored ret = js_float64(d); fail: JS_IteratorClose(ctx, iter, JS_IsException(ret)); JS_FreeValue(ctx, iter); JS_FreeValue(ctx, next); - bf_delete(&a); - bf_delete(&b); return ret; -oom: - JS_ThrowOutOfMemory(ctx); - goto fail; } /* xorshift* random number generator by Marsaglia */ @@ -43965,6 +45571,14 @@ bool lre_check_stack_overflow(void *opaque, size_t alloca_size) return js_check_stack_overflow(ctx->rt, alloca_size); } +int lre_check_timeout(void *opaque) +{ + JSContext *ctx = opaque; + JSRuntime *rt = ctx->rt; + return (rt->interrupt_handler && + rt->interrupt_handler(rt, rt->interrupt_opaque)); +} + #if 0 void *lre_realloc(void *opaque, void *ptr, size_t size) { @@ -44081,7 +45695,11 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, goto fail; } } else { - JS_ThrowInternalError(ctx, "out of memory in regexp execution"); + if (rc == LRE_RET_TIMEOUT) { + JS_ThrowInterrupted(ctx); + } else { + JS_ThrowInternalError(ctx, "out of memory in regexp execution"); + } goto fail; } } else { @@ -44276,7 +45894,11 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValue ar goto fail; } } else { - JS_ThrowInternalError(ctx, "out of memory in regexp execution"); + if (ret == LRE_RET_TIMEOUT) { + JS_ThrowInterrupted(ctx); + } else { + JS_ThrowInternalError(ctx, "out of memory in regexp execution"); + } goto fail; } break; @@ -45422,6 +47044,7 @@ static JSValue js_json_check(JSContext *ctx, JSONStringifyContext *jsc, case JS_TAG_FLOAT64: case JS_TAG_BOOL: case JS_TAG_NULL: + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: case JS_TAG_EXCEPTION: return val; @@ -45452,6 +47075,11 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, tab = JS_UNDEFINED; prop = JS_UNDEFINED; + if (js_check_stack_overflow(ctx->rt, 0)) { + JS_ThrowStackOverflow(ctx); + goto exception; + } + if (JS_IsObject(val)) { p = JS_VALUE_GET_OBJ(val); cl = p->class_id; @@ -45599,6 +47227,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc, case JS_TAG_NULL: concat_value: return string_buffer_concat_value_free(jsc->b, val); + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: JS_ThrowTypeError(ctx, "BigInt are forbidden in JSON.stringify"); goto exception; @@ -47050,26 +48679,6 @@ static const JSCFunctionListEntry js_symbol_funcs[] = { /* Set/Map/WeakSet/WeakMap */ -typedef struct JSMapRecord { - int ref_count; /* used during enumeration to avoid freeing the record */ - bool empty; /* true if the record is deleted */ - struct JSMapState *map; - struct list_head link; - struct list_head hash_link; - JSValue key; - JSValue value; -} JSMapRecord; - -typedef struct JSMapState { - bool is_weak; /* true if WeakSet/WeakMap */ - struct list_head records; /* list of JSMapRecord.link */ - uint32_t record_count; - struct list_head *hash_table; - uint32_t hash_size; /* must be a power of two */ - uint32_t record_count_threshold; /* count at which a hash table - resize is needed */ -} JSMapState; - #define MAGIC_SET (1 << 0) #define MAGIC_WEAK (1 << 1) @@ -47125,10 +48734,8 @@ static JSValue js_map_constructor(JSContext *ctx, JSValueConst new_target, item = JS_IteratorNext(ctx, iter, next_method, 0, NULL, &done); if (JS_IsException(item)) goto fail; - if (done) { - JS_FreeValue(ctx, item); + if (done) break; - } if (is_set) { ret = JS_Call(ctx, adder, obj, 1, vc(&item)); if (JS_IsException(ret)) { @@ -47206,7 +48813,7 @@ static uint32_t map_hash_key(JSContext *ctx, JSValueConst key) uint32_t h; double d; JSFloat64Union u; - bf_t *a; + JSBigInt *r; switch(tag) { case JS_TAG_BOOL: @@ -47222,9 +48829,12 @@ static uint32_t map_hash_key(JSContext *ctx, JSValueConst key) case JS_TAG_INT: d = JS_VALUE_GET_INT(key); goto hash_float64; + case JS_TAG_SHORT_BIG_INT: + d = JS_VALUE_GET_SHORT_BIG_INT(key); + goto hash_float64; case JS_TAG_BIG_INT: - a = JS_GetBigInt(key); - h = hash_string8((void *)a->tab, a->len * sizeof(*a->tab), 0); + r = JS_VALUE_GET_PTR(key); + h = hash_string8((void *)r->tab, r->len * sizeof(*r->tab), 0); break; case JS_TAG_FLOAT64: d = JS_VALUE_GET_FLOAT64(key); @@ -47261,7 +48871,6 @@ static JSMapRecord *map_find_record(JSContext *ctx, JSMapState *s, static void map_hash_resize(JSContext *ctx, JSMapState *s) { uint32_t new_hash_size, i, h; - size_t slack; struct list_head *new_hash_table, *el; JSMapRecord *mr; @@ -47270,11 +48879,10 @@ static void map_hash_resize(JSContext *ctx, JSMapState *s) new_hash_size = 4; else new_hash_size = s->hash_size * 2; - new_hash_table = js_realloc2(ctx, s->hash_table, - sizeof(new_hash_table[0]) * new_hash_size, &slack); + new_hash_table = js_realloc(ctx, s->hash_table, + sizeof(new_hash_table[0]) * new_hash_size); if (!new_hash_table) return; - new_hash_size += slack / sizeof(*new_hash_table); for(i = 0; i < new_hash_size; i++) init_list_head(&new_hash_table[i]); @@ -47674,10 +49282,10 @@ static void js_map_mark(JSRuntime *rt, JSValueConst val, s = p->u.map_state; if (s) { + assert(!s->is_weak); list_for_each(el, &s->records) { mr = list_entry(el, JSMapRecord, link); - if (!s->is_weak) - JS_MarkValue(rt, mr->key, mark_func); + JS_MarkValue(rt, mr->key, mark_func); JS_MarkValue(rt, mr->value, mark_func); } } @@ -48741,6 +50349,38 @@ static JSValue promise_reaction_job(JSContext *ctx, int argc, return res2; } +static JSValue promise_rejection_tracker_job(JSContext *ctx, int argc, + JSValueConst *argv) +{ + JSRuntime *rt; + JSPromiseData *s; + JSValueConst promise; + + assert(argc == 1); + + rt = ctx->rt; + promise = argv[0]; + s = JS_GetOpaque(promise, JS_CLASS_PROMISE); + + if (!s || s->promise_state != JS_PROMISE_REJECTED) + return JS_UNDEFINED; /* should never happen */ + + promise_trace(ctx, "promise_rejection_tracker_job\n"); + + // Check again in case the hook was removed. + if (rt->host_promise_rejection_tracker) + rt->host_promise_rejection_tracker( + ctx, promise, s->promise_result, s->is_handled, rt->host_promise_rejection_tracker_opaque); + + return JS_UNDEFINED; +} + +void JS_SetPromiseHook(JSRuntime *rt, JSPromiseHook promise_hook, void *opaque) +{ + rt->promise_hook = promise_hook; + rt->promise_hook_opaque = opaque; +} + void JS_SetHostPromiseRejectionTracker(JSRuntime *rt, JSHostPromiseRejectionTracker *cb, void *opaque) @@ -48764,11 +50404,11 @@ static void fulfill_or_reject_promise(JSContext *ctx, JSValueConst promise, promise_trace(ctx, "fulfill_or_reject_promise: is_reject=%d\n", is_reject); - if (s->promise_state == JS_PROMISE_REJECTED && !s->is_handled) { + if (s->promise_state == JS_PROMISE_FULFILLED) { JSRuntime *rt = ctx->rt; - if (rt->host_promise_rejection_tracker) { - rt->host_promise_rejection_tracker(ctx, promise, value, false, - rt->host_promise_rejection_tracker_opaque); + if (rt->promise_hook) { + rt->promise_hook(ctx, JS_PROMISE_HOOK_RESOLVE, promise, + JS_UNDEFINED, rt->promise_hook_opaque); } } @@ -48789,12 +50429,12 @@ static void fulfill_or_reject_promise(JSContext *ctx, JSValueConst promise, list_del(&rd->link); promise_reaction_data_free(ctx->rt, rd); } -} -static void reject_promise(JSContext *ctx, JSValueConst promise, - JSValueConst value) -{ - fulfill_or_reject_promise(ctx, promise, value, true); + if (s->promise_state == JS_PROMISE_REJECTED && !s->is_handled) { + JSRuntime *rt = ctx->rt; + if (rt->host_promise_rejection_tracker) + JS_EnqueueJob(ctx, promise_rejection_tracker_job, 1, &promise); + } } static JSValue js_promise_resolve_thenable_job(JSContext *ctx, @@ -48802,6 +50442,7 @@ static JSValue js_promise_resolve_thenable_job(JSContext *ctx, { JSValueConst promise, thenable, then; JSValue args[2], res; + JSRuntime *rt; promise_trace(ctx, "js_promise_resolve_thenable_job\n"); @@ -48811,7 +50452,16 @@ static JSValue js_promise_resolve_thenable_job(JSContext *ctx, then = argv[2]; if (js_create_resolving_functions(ctx, args, promise) < 0) return JS_EXCEPTION; + rt = ctx->rt; + if (rt->promise_hook) { + rt->promise_hook(ctx, JS_PROMISE_HOOK_BEFORE, promise, JS_UNDEFINED, + rt->promise_hook_opaque); + } res = JS_Call(ctx, then, thenable, 2, vc(args)); + if (rt->promise_hook) { + rt->promise_hook(ctx, JS_PROMISE_HOOK_AFTER, promise, JS_UNDEFINED, + rt->promise_hook_opaque); + } if (JS_IsException(res)) { JSValue error = JS_GetException(ctx); res = JS_Call(ctx, args[1], JS_UNDEFINED, 1, vc(&error)); @@ -48932,7 +50582,7 @@ static JSValue js_promise_resolve_function_call(JSContext *ctx, JSValue error; fail_reject: error = JS_GetException(ctx); - reject_promise(ctx, s->promise, error); + fulfill_or_reject_promise(ctx, s->promise, error, true); JS_FreeValue(ctx, error); } else if (!JS_IsFunction(ctx, then)) { JS_FreeValue(ctx, then); @@ -48994,6 +50644,7 @@ static JSValue js_promise_constructor(JSContext *ctx, JSValueConst new_target, JSValueConst executor; JSValue obj; JSPromiseData *s; + JSRuntime *rt; JSValue args[2], ret; int i; @@ -49014,6 +50665,14 @@ static JSValue js_promise_constructor(JSContext *ctx, JSValueConst new_target, JS_SetOpaqueInternal(obj, s); if (js_create_resolving_functions(ctx, args, obj)) goto fail; + rt = ctx->rt; + if (rt->promise_hook) { + JSValueConst parent_promise = JS_UNDEFINED; + if (rt->parent_promise) + parent_promise = rt->parent_promise->value; + rt->promise_hook(ctx, JS_PROMISE_HOOK_INIT, obj, parent_promise, + rt->promise_hook_opaque); + } ret = JS_Call(ctx, executor, JS_UNDEFINED, 2, vc(args)); if (JS_IsException(ret)) { JSValue ret2, error; @@ -49071,8 +50730,7 @@ static JSValue js_new_promise_capability(JSContext *ctx, executor = js_promise_executor_new(ctx); if (JS_IsException(executor)) - return executor; - + return JS_EXCEPTION; if (JS_IsUndefined(ctor)) { result_promise = js_promise_constructor(ctx, ctor, 1, vc(&executor)); } else { @@ -49523,10 +51181,8 @@ static __exception int perform_promise_then(JSContext *ctx, JSValueConst args[5]; if (s->promise_state == JS_PROMISE_REJECTED && !s->is_handled) { JSRuntime *rt = ctx->rt; - if (rt->host_promise_rejection_tracker) { - rt->host_promise_rejection_tracker(ctx, promise, s->promise_result, - true, rt->host_promise_rejection_tracker_opaque); - } + if (rt->host_promise_rejection_tracker) + JS_EnqueueJob(ctx, promise_rejection_tracker_job, 1, &promise); } i = s->promise_state - JS_PROMISE_FULFILLED; rd = rd_array[i]; @@ -49547,7 +51203,10 @@ static JSValue js_promise_then(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { JSValue ctor, result_promise, resolving_funcs[2]; + bool have_promise_hook; + JSValueLink link; JSPromiseData *s; + JSRuntime *rt; int i, ret; s = JS_GetOpaque2(ctx, this_val, JS_CLASS_PROMISE); @@ -49557,7 +51216,16 @@ static JSValue js_promise_then(JSContext *ctx, JSValueConst this_val, ctor = JS_SpeciesConstructor(ctx, this_val, JS_UNDEFINED); if (JS_IsException(ctor)) return ctor; + rt = ctx->rt; + // always restore, even if js_new_promise_capability callee removes hook + have_promise_hook = (rt->promise_hook != NULL); + if (have_promise_hook) { + link = (JSValueLink){rt->parent_promise, this_val}; + rt->parent_promise = &link; + } result_promise = js_new_promise_capability(ctx, resolving_funcs, ctor); + if (have_promise_hook) + rt->parent_promise = link.next; JS_FreeValue(ctx, ctor); if (JS_IsException(result_promise)) return result_promise; @@ -50804,6 +52472,9 @@ static bool string_get_digits(const uint8_t *sp, int *pp, int *pval, p_start = p; while ((c = sp[p]) >= '0' && c <= '9') { + /* arbitrary limit to 9 digits */ + if (v >= 100000000) + return false; v = v * 10 + c - '0'; p++; if (p - p_start == max_digits) @@ -50847,7 +52518,7 @@ static bool string_get_tzoffset(const uint8_t *sp, int *pp, int *tzp, bool stric sgn = sp[p++]; if (sgn == '+' || sgn == '-') { int n = p; - if (!string_get_digits(sp, &p, &hh, 1, 9)) + if (!string_get_digits(sp, &p, &hh, 1, 0)) return false; n = p - n; if (strict && n != 2 && n != 4) @@ -51041,7 +52712,7 @@ static bool js_date_parse_otherstring(const uint8_t *sp, *is_local = false; } else { p++; - if (string_get_digits(sp, &p, &val, 1, 9)) { + if (string_get_digits(sp, &p, &val, 1, 0)) { if (c == '-') { if (val == 0) return false; @@ -51052,7 +52723,7 @@ static bool js_date_parse_otherstring(const uint8_t *sp, } } } else - if (string_get_digits(sp, &p, &val, 1, 9)) { + if (string_get_digits(sp, &p, &val, 1, 0)) { if (string_skip_char(sp, &p, ':')) { /* time part */ fields[3] = val; @@ -51444,7 +53115,9 @@ void JS_AddIntrinsicDate(JSContext *ctx) void JS_AddIntrinsicEval(JSContext *ctx) { +#ifndef QJS_DISABLE_PARSER ctx->eval_internal = __JS_EvalInternal; +#endif // QJS_DISABLE_PARSER } /* BigInt */ @@ -51460,40 +53133,25 @@ static JSValue JS_ToBigIntCtorFree(JSContext *ctx, JSValue val) case JS_TAG_BOOL: val = JS_NewBigInt64(ctx, JS_VALUE_GET_INT(val)); break; + case JS_TAG_SHORT_BIG_INT: case JS_TAG_BIG_INT: break; case JS_TAG_FLOAT64: { - bf_t *a, a_s; - - a = JS_ToBigInt1(ctx, &a_s, val); - if (!bf_is_finite(a)) { - JS_FreeValue(ctx, val); - val = JS_ThrowRangeError(ctx, "cannot convert NaN or Infinity to BigInt"); - } else { - JSValue val1 = JS_NewBigInt(ctx); - bf_t *r; - int ret; - if (JS_IsException(val1)) { - JS_FreeValue(ctx, val); - return JS_EXCEPTION; - } - r = JS_GetBigInt(val1); - ret = bf_set(r, a); - ret |= bf_rint(r, BF_RNDZ); - JS_FreeValue(ctx, val); - if (ret & BF_ST_MEM_ERROR) { - JS_FreeValue(ctx, val1); - val = JS_ThrowOutOfMemory(ctx); - } else if (ret & BF_ST_INEXACT) { - JS_FreeValue(ctx, val1); + double d = JS_VALUE_GET_FLOAT64(val); + JSBigInt *r; + int res; + r = js_bigint_from_float64(ctx, &res, d); + if (!r) { + if (res == 0) { + val = JS_EXCEPTION; + } else if (res == 1) { val = JS_ThrowRangeError(ctx, "cannot convert to BigInt: not an integer"); } else { - val = JS_CompactBigInt(ctx, val1); - } + val = JS_ThrowRangeError(ctx, "cannot convert NaN or Infinity to BigInt"); } + } else { + val = JS_CompactBigInt(ctx, r); } - if (a == &a_s) - bf_delete(a); } break; case JS_TAG_STRING: @@ -51573,38 +53231,62 @@ static JSValue js_bigint_asUintN(JSContext *ctx, int argc, JSValueConst *argv, int asIntN) { uint64_t bits; - bf_t a_s, *a = &a_s, *r, mask_s, *mask = &mask_s; - JSValue res; + JSValue res, a; if (JS_ToIndex(ctx, &bits, argv[0])) return JS_EXCEPTION; - res = JS_NewBigInt(ctx); - if (JS_IsException(res)) - return JS_EXCEPTION; - a = JS_ToBigInt(ctx, &a_s, argv[1]); - if (!a) { - JS_FreeValue(ctx, res); + a = JS_ToBigInt(ctx, argv[1]); + if (JS_IsException(a)) return JS_EXCEPTION; + if (bits == 0) { + JS_FreeValue(ctx, a); + res = __JS_NewShortBigInt(ctx, 0); + } else if (JS_VALUE_GET_TAG(a) == JS_TAG_SHORT_BIG_INT) { + /* fast case */ + if (bits >= JS_SHORT_BIG_INT_BITS) { + res = a; + } else { + uint64_t v; + int shift; + shift = 64 - bits; + v = JS_VALUE_GET_SHORT_BIG_INT(a); + v = v << shift; + if (asIntN) + v = (int64_t)v >> shift; + else + v = v >> shift; + res = __JS_NewShortBigInt(ctx, v); + } + } else { + JSBigInt *r, *p = JS_VALUE_GET_PTR(a); + if (bits >= p->len * JS_LIMB_BITS) { + res = a; + } else { + int len, shift, i; + js_limb_t v; + len = (bits + JS_LIMB_BITS - 1) / JS_LIMB_BITS; + r = js_bigint_new(ctx, len); + if (!r) { + JS_FreeValue(ctx, a); + return JS_EXCEPTION; + } + r->len = len; + for(i = 0; i < len - 1; i++) + r->tab[i] = p->tab[i]; + shift = (-bits) & (JS_LIMB_BITS - 1); + /* 0 <= shift <= JS_LIMB_BITS - 1 */ + v = p->tab[len - 1] << shift; + if (asIntN) + v = (js_slimb_t)v >> shift; + else + v = v >> shift; + r->tab[len - 1] = v; + r = js_bigint_normalize(ctx, r); + JS_FreeValue(ctx, a); + res = JS_CompactBigInt(ctx, r); + } } - /* XXX: optimize */ - r = JS_GetBigInt(res); - bf_init(ctx->bf_ctx, mask); - bf_set_ui(mask, 1); - bf_mul_2exp(mask, bits, BF_PREC_INF, BF_RNDZ); - bf_add_si(mask, mask, -1, BF_PREC_INF, BF_RNDZ); - bf_logic_and(r, a, mask); - if (asIntN && bits != 0) { - bf_set_ui(mask, 1); - bf_mul_2exp(mask, bits - 1, BF_PREC_INF, BF_RNDZ); - if (bf_cmpu(r, mask) >= 0) { - bf_set_ui(mask, 1); - bf_mul_2exp(mask, bits, BF_PREC_INF, BF_RNDZ); - bf_sub(r, r, mask, BF_PREC_INF, BF_RNDZ); - } - } - bf_delete(mask); - JS_FreeBigInt(ctx, a, &a_s); - return JS_CompactBigInt(ctx, res); + return res; } static const JSCFunctionListEntry js_bigint_funcs[] = { @@ -51778,6 +53460,10 @@ void JS_AddIntrinsicBaseObjects(JSContext *ctx) ctx->array_ctor = js_dup(obj); JS_SetPropertyFunctionList(ctx, obj, js_array_funcs, countof(js_array_funcs)); + JS_DefineAutoInitProperty(ctx, obj, JS_ATOM_fromAsync, + JS_AUTOINIT_ID_BYTECODE, + (void *)(uintptr_t)JS_BUILTIN_ARRAY_FROMASYNC, + JS_PROP_WRITABLE|JS_PROP_CONFIGURABLE); /* XXX: create auto_initializer */ { @@ -52386,8 +54072,12 @@ static JSValue js_array_buffer_resize(JSContext *ctx, JSValueConst this_val, list_for_each(el, &abuf->array_list) { ta = list_entry(el, JSTypedArray, link); p = ta->obj; - if (p->class_id == JS_CLASS_DATAVIEW) + if (p->class_id == JS_CLASS_DATAVIEW) { + if (ta->track_rab && ta->offset < len) + ta->length = len - ta->offset; + continue; + } p->u.array.count = 0; p->u.array.u.ptr = NULL; size_log2 = typed_array_size_log2(p->class_id); @@ -53340,19 +55030,33 @@ static JSValue js_typed_array_indexOf(JSContext *ctx, JSValueConst this_val, v64 = d; is_int = (v64 == d); } - } else - if (tag == JS_TAG_BIG_INT) { - JSBigInt *p1 = JS_VALUE_GET_PTR(argv[0]); + } else if (tag == JS_TAG_BIG_INT || tag == JS_TAG_SHORT_BIG_INT) { + JSBigIntBuf buf1; + JSBigInt *p1; + int sz = (64 / JS_LIMB_BITS); + if (tag == JS_TAG_SHORT_BIG_INT) + p1 = js_bigint_set_short(&buf1, argv[0]); + else + p1 = JS_VALUE_GET_PTR(argv[0]); if (p->class_id == JS_CLASS_BIG_INT64_ARRAY) { - if (bf_get_int64(&v64, &p1->num, 0) != 0) - goto done; + if (p1->len > sz) + goto done; /* does not fit an int64 : cannot be found */ } else if (p->class_id == JS_CLASS_BIG_UINT64_ARRAY) { - if (bf_get_uint64((uint64_t *)&v64, &p1->num) != 0) + if (js_bigint_sign(p1)) + goto done; /* v < 0 */ + if (p1->len <= sz) { + /* OK */ + } else if (p1->len == sz + 1 && p1->tab[sz] == 0) { + /* 2^63 <= v <= 2^64-1 */ + } else { goto done; + } } else { goto done; } + if (JS_ToBigInt64(ctx, &v64, argv[0])) + goto exception; d = 0; is_bigint = 1; } else { @@ -54259,10 +55963,8 @@ static JSValue js_array_from_iterator(JSContext *ctx, uint32_t *plen, val = JS_IteratorNext(ctx, iter, next_method, 0, NULL, &done); if (JS_IsException(val)) goto fail; - if (done) { - JS_FreeValue(ctx, val); + if (done) break; - } if (JS_CreateDataPropertyUint32(ctx, arr, k, val, JS_PROP_THROW) < 0) goto fail; k++; @@ -54776,8 +56478,7 @@ static JSValue js_dataview_setValue(JSContext *ctx, if (class_id <= JS_CLASS_UINT32_ARRAY) { if (JS_ToUint32(ctx, &v, val)) return JS_EXCEPTION; - } else - if (class_id <= JS_CLASS_BIG_UINT64_ARRAY) { + } else if (class_id <= JS_CLASS_BIG_UINT64_ARRAY) { if (JS_ToBigInt64(ctx, (int64_t *)&v64, val)) return JS_EXCEPTION; } else { @@ -55156,7 +56857,7 @@ static JSValue js_atomics_store(JSContext *ctx, return JS_EXCEPTION; if (size_log2 == 3) { int64_t v64; - ret = JS_ToBigIntValueFree(ctx, js_dup(argv[2])); + ret = JS_ToBigIntFree(ctx, js_dup(argv[2])); if (JS_IsException(ret)) return ret; if (JS_ToBigInt64(ctx, &v64, ret)) { @@ -55846,6 +57547,7 @@ static void reset_weak_ref(JSRuntime *rt, JSWeakRefRecord **first_weak_ref) assert(!mr->empty); /* no iterator on WeakMap/WeakSet */ list_del(&mr->hash_link); list_del(&mr->link); + s->record_count--; break; case JS_WEAK_REF_KIND_WEAK_REF: wrd = wr->u.weak_ref_data; @@ -56082,6 +57784,7 @@ static void _JS_AddIntrinsicCallSite(JSContext *ctx) bool JS_DetectModule(const char *input, size_t input_len) { +#ifndef QJS_DISABLE_PARSER JSRuntime *rt; JSContext *ctx; JSValue val; @@ -56110,6 +57813,9 @@ bool JS_DetectModule(const char *input, size_t input_len) JS_FreeContext(ctx); JS_FreeRuntime(rt); return is_module; +#else + return false; +#endif // QJS_DISABLE_PARSER } uintptr_t js_std_cmd(int cmd, ...) { diff --git a/lib/monoucha0/monoucha/qjs/quickjs.h b/lib/monoucha0/monoucha/qjs/quickjs.h index d1530db9..8fa81616 100644 --- a/lib/monoucha0/monoucha/qjs/quickjs.h +++ b/lib/monoucha0/monoucha/qjs/quickjs.h @@ -102,7 +102,8 @@ enum { JS_TAG_UNINITIALIZED = 4, JS_TAG_CATCH_OFFSET = 5, JS_TAG_EXCEPTION = 6, - JS_TAG_FLOAT64 = 7, + JS_TAG_SHORT_BIG_INT = 7, + JS_TAG_FLOAT64 = 8, /* any larger tag is FLOAT64 if JS_NAN_BOXING */ }; @@ -136,6 +137,7 @@ typedef const struct JSValue *JSValueConst; #define JS_MKPTR(tag, ptr) ((JSValue)((tag) | (intptr_t)(ptr))) #define JS_VALUE_GET_NORM_TAG(v) ((int)((intptr_t)(v) & 15)) #define JS_VALUE_GET_TAG(v) ((int)((intptr_t)(v) & 15)) +#define JS_VALUE_GET_SHORT_BIG_INT(v) JS_VALUE_GET_INT(v) #define JS_VALUE_GET_PTR(v) ((void *)((intptr_t)(v) & ~15)) #define JS_VALUE_GET_INT(v) ((int)((intptr_t)(v) >> 4)) #define JS_VALUE_GET_BOOL(v) ((int)((intptr_t)(v) >> 4)) @@ -148,6 +150,12 @@ static inline JSValue __JS_NewFloat64(double d) return JS_MKVAL(JS_TAG_FLOAT64, (int)d); } +static inline JSValue __JS_NewShortBigInt(JSContext *ctx, int32_t d) +{ + (void)&ctx; + return JS_MKVAL(JS_TAG_SHORT_BIG_INT, d); +} + static inline bool JS_VALUE_IS_NAN(JSValue v) { (void)&v; @@ -161,6 +169,7 @@ typedef uint64_t JSValue; #define JS_VALUE_GET_TAG(v) (int)((v) >> 32) #define JS_VALUE_GET_INT(v) (int)(v) #define JS_VALUE_GET_BOOL(v) (int)(v) +#define JS_VALUE_GET_SHORT_BIG_INT(v) (int)(v) #define JS_VALUE_GET_PTR(v) (void *)(intptr_t)(v) #define JS_MKVAL(tag, val) (((uint64_t)(tag) << 32) | (uint32_t)(val)) @@ -197,6 +206,12 @@ static inline JSValue __JS_NewFloat64(double d) return v; } +static inline JSValue __JS_NewShortBigInt(JSContext *ctx, int32_t d) +{ + (void)&ctx; + return JS_MKVAL(JS_TAG_SHORT_BIG_INT, d); +} + #define JS_TAG_IS_FLOAT64(tag) ((unsigned)((tag) - JS_TAG_FIRST) >= (JS_TAG_FLOAT64 - JS_TAG_FIRST)) /* same as JS_VALUE_GET_TAG, but return JS_TAG_FLOAT64 with NaN boxing */ @@ -223,6 +238,7 @@ typedef union JSValueUnion { int32_t int32; double float64; void *ptr; + int32_t short_big_int; } JSValueUnion; typedef struct JSValue { @@ -236,6 +252,7 @@ typedef struct JSValue { #define JS_VALUE_GET_INT(v) ((v).u.int32) #define JS_VALUE_GET_BOOL(v) ((v).u.int32) #define JS_VALUE_GET_FLOAT64(v) ((v).u.float64) +#define JS_VALUE_GET_SHORT_BIG_INT(v) ((v).u.short_big_int) #define JS_VALUE_GET_PTR(v) ((v).u.ptr) /* msvc doesn't understand designated initializers without /std:c++20 */ @@ -281,6 +298,15 @@ static inline JSValue __JS_NewFloat64(double d) return v; } +static inline JSValue __JS_NewShortBigInt(JSContext *ctx, int64_t d) +{ + (void)&ctx; + JSValue v; + v.tag = JS_TAG_SHORT_BIG_INT; + v.u.short_big_int = d; + return v; +} + static inline bool JS_VALUE_IS_NAN(JSValue v) { union { @@ -671,7 +697,8 @@ static inline bool JS_IsNumber(JSValueConst v) static inline bool JS_IsBigInt(JSContext *ctx, JSValueConst v) { (void)&ctx; - return JS_VALUE_GET_TAG(v) == JS_TAG_BIG_INT; + int tag = JS_VALUE_GET_TAG(v); + return tag == JS_TAG_BIG_INT || tag == JS_TAG_SHORT_BIG_INT; } static inline bool JS_IsBool(JSValueConst v) @@ -767,6 +794,10 @@ JS_EXTERN JSValue JS_NewStringLen(JSContext *ctx, const char *str1, size_t len1) static inline JSValue JS_NewString(JSContext *ctx, const char *str) { return JS_NewStringLen(ctx, str, strlen(str)); } +// makes a copy of the input; does not check if the input is valid UTF-16, +// that is the responsibility of the caller +JS_EXTERN JSValue JS_NewTwoByteString(JSContext *ctx, const uint16_t *buf, + size_t len); JS_EXTERN JSValue JS_NewAtomString(JSContext *ctx, const char *str); JS_EXTERN JSValue JS_ToString(JSContext *ctx, JSValueConst val); JS_EXTERN JSValue JS_ToPropertyKey(JSContext *ctx, JSValueConst val); @@ -886,6 +917,7 @@ JS_EXTERN JSValue JS_CallConstructor2(JSContext *ctx, JSValueConst func_obj, * wholly infallible: non-strict classic scripts may _parse_ okay as a module * but not _execute_ as one (different runtime semantics.) Use with caution. * |input| can be either ASCII or UTF-8 encoded source code. + * Returns false if QuickJS was built with -DQJS_DISABLE_PARSER. */ JS_EXTERN bool JS_DetectModule(const char *input, size_t input_len); /* 'input' must be zero terminated i.e. input[input_len] = '\0'. */ @@ -985,6 +1017,23 @@ JS_EXTERN bool JS_IsPromise(JSValueConst val); JS_EXTERN JSValue JS_NewSymbol(JSContext *ctx, const char *description, bool is_global); +typedef enum JSPromiseHookType { + JS_PROMISE_HOOK_INIT, // emitted when a new promise is created + JS_PROMISE_HOOK_BEFORE, // runs right before promise.then is invoked + JS_PROMISE_HOOK_AFTER, // runs right after promise.then is invoked + JS_PROMISE_HOOK_RESOLVE, // not emitted for rejected promises +} JSPromiseHookType; + +// parent_promise is only passed in when type == JS_PROMISE_HOOK_INIT and +// is then either a promise object or JS_UNDEFINED if the new promise does +// not have a parent promise; only promises created with promise.then have +// a parent promise +typedef void JSPromiseHook(JSContext *ctx, JSPromiseHookType type, + JSValueConst promise, JSValueConst parent_promise, + void *opaque); +JS_EXTERN void JS_SetPromiseHook(JSRuntime *rt, JSPromiseHook promise_hook, + void *opaque); + /* is_handled = true means that the rejection is handled */ typedef void JSHostPromiseRejectionTracker(JSContext *ctx, JSValueConst promise, JSValueConst reason, @@ -1206,14 +1255,14 @@ JS_EXTERN int JS_AddModuleExportList(JSContext *ctx, JSModuleDef *m, const JSCFunctionListEntry *tab, int len); /* can only be called after the module is instantiated */ JS_EXTERN int JS_SetModuleExport(JSContext *ctx, JSModuleDef *m, const char *export_name, - JSValueConst val); + JSValue val); JS_EXTERN int JS_SetModuleExportList(JSContext *ctx, JSModuleDef *m, const JSCFunctionListEntry *tab, int len); /* Version */ #define QJS_VERSION_MAJOR 0 -#define QJS_VERSION_MINOR 9 +#define QJS_VERSION_MINOR 10 #define QJS_VERSION_PATCH 0 #define QJS_VERSION_SUFFIX "" diff --git a/lib/monoucha0/monoucha/qjs/xsum.c b/lib/monoucha0/monoucha/qjs/xsum.c new file mode 100644 index 00000000..98d970b8 --- /dev/null +++ b/lib/monoucha0/monoucha/qjs/xsum.c @@ -0,0 +1,1122 @@ +/* FUNCTIONS FOR EXACT SUMMATION. */ + +/* Copyright 2015, 2018, 2021, 2024 Radford M. Neal + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <string.h> +#include <math.h> +#include "xsum.h" + + +/* ---------------------- IMPLEMENTATION ASSUMPTIONS ----------------------- */ + +/* This code makes the following assumptions: + + o The 'double' type is a IEEE-754 standard 64-bit floating-point value. + + o The 'int64_t' and 'uint64_t' types exist, for 64-bit signed and + unsigned integers. + + o The 'endianness' of 'double' and 64-bit integers is consistent + between these types - that is, looking at the bits of a 'double' + value as an 64-bit integer will have the expected result. + + o Right shifts of a signed operand produce the results expected for + a two's complement representation. + + o Rounding should be done in the "round to nearest, ties to even" mode. +*/ + + +/* --------------------------- CONFIGURATION ------------------------------- */ + + +/* IMPLEMENTATION OPTIONS. Can be set to either 0 or 1, whichever seems + to be fastest. */ + +#define USE_SIMD 1 /* Use SIMD intrinsics (SSE2/AVX) if available? */ + +#define USE_MEMSET_SMALL 1 /* Use memset rather than a loop (for small mem)? */ + +#define OPT_SMALL 0 /* Class of manual optimization for operations on */ + /* small accumulator: 0 (none), 1, 2, 3 (SIMD) */ +#define OPT_CARRY 1 /* Use manually optimized carry propagation? */ + +#define INLINE_SMALL 1 /* Inline more of the small accumulator routines? */ + /* (Not currently used) */ + + +/* INCLUDE INTEL INTRINSICS IF USED AND AVAILABLE. */ + +#if USE_SIMD && __SSE2__ +# include <immintrin.h> +#endif + + +/* COPY A 64-BIT QUANTITY - DOUBLE TO 64-BIT INT OR VICE VERSA. The + arguments are destination and source variables (not values). */ + +#define COPY64(dst,src) memcpy(&(dst),&(src),sizeof(double)) + + +/* SET UP DEBUG FLAG. It's a variable if debuging is enabled, and a + constant if disabled (so that no code will be generated then). */ + +int xsum_debug = 0; + +#ifndef DEBUG +# define xsum_debug 0 +#endif + + +/* SET UP INLINE / NOINLINE MACROS. */ + +#if __GNUC__ +# define INLINE inline __attribute__ ((always_inline)) +# define NOINLINE __attribute__ ((noinline)) +#else +# define INLINE inline +# define NOINLINE +#endif + + +/* ------------------------ INTERNAL ROUTINES ------------------------------- */ + + +/* ADD AN INF OR NAN TO A SMALL ACCUMULATOR. This only changes the flags, + not the chunks in the accumulator, which retains the sum of the finite + terms (which is perhaps sometimes useful to access, though no function + to do so is defined at present). A NaN with larger payload (seen as a + 52-bit unsigned integer) takes precedence, with the sign of the NaN always + being positive. This ensures that the order of summing NaN values doesn't + matter. */ + +static NOINLINE void xsum_small_add_inf_nan + (xsum_small_accumulator *restrict sacc, xsum_int ivalue) +{ + xsum_int mantissa; + double fltv; + + mantissa = ivalue & XSUM_MANTISSA_MASK; + + if (mantissa == 0) /* Inf */ + { if (sacc->Inf == 0) + { /* no previous Inf */ + sacc->Inf = ivalue; + } + else if (sacc->Inf != ivalue) + { /* previous Inf was opposite sign */ + COPY64 (fltv, ivalue); + fltv = fltv - fltv; /* result will be a NaN */ + COPY64 (sacc->Inf, fltv); + } + } + else /* NaN */ + { /* Choose the NaN with the bigger payload and clear its sign. Using <= + ensures that we will choose the first NaN over the previous zero. */ + if ((sacc->NaN & XSUM_MANTISSA_MASK) <= mantissa) + { sacc->NaN = ivalue & ~XSUM_SIGN_MASK; + } + } +} + + +/* PROPAGATE CARRIES TO NEXT CHUNK IN A SMALL ACCUMULATOR. Needs to + be called often enough that accumulated carries don't overflow out + the top, as indicated by sacc->adds_until_propagate. Returns the + index of the uppermost non-zero chunk (0 if number is zero). + + After carry propagation, the uppermost non-zero chunk will indicate + the sign of the number, and will not be -1 (all 1s). It will be in + the range -2^XSUM_LOW_MANTISSA_BITS to 2^XSUM_LOW_MANTISSA_BITS - 1. + Lower chunks will be non-negative, and in the range from 0 up to + 2^XSUM_LOW_MANTISSA_BITS - 1. */ + +static NOINLINE int xsum_carry_propagate (xsum_small_accumulator *restrict sacc) +{ + int i, u, uix; + + /* Set u to the index of the uppermost non-zero (for now) chunk, or + return with value 0 if there is none. */ + +# if OPT_CARRY + + { u = XSUM_SCHUNKS-1; + switch (XSUM_SCHUNKS & 0x3) /* get u to be a multiple of 4 minus one */ + { + case 3: if (sacc->chunk[u] != 0) + { goto found2; + } + u -= 1; /* XSUM_SCHUNKS is a */ + case 2: if (sacc->chunk[u] != 0) /* constant, so the */ + { goto found2; /* compiler will do */ + } /* simple code here */ + u -= 1; + case 1: if (sacc->chunk[u] != 0) + { goto found2; + } + u -= 1; + case 0: ; + } + + do /* here, u should be a multiple of 4 minus one, and at least 3 */ + { +# if USE_SIMD && __AVX__ + { __m256i ch; + ch = _mm256_loadu_si256 ((__m256i *)(sacc->chunk+u-3)); + if (!_mm256_testz_si256(ch,ch)) + { goto found; + } + u -= 4; + if (u < 0) /* never actually happens, because value of XSUM_SCHUNKS */ + { break; /* is such that u < 0 occurs at end of do loop instead */ + } + ch = _mm256_loadu_si256 ((__m256i *)(sacc->chunk+u-3)); + if (!_mm256_testz_si256(ch,ch)) + { goto found; + } + u -= 4; + } +# else + { if (sacc->chunk[u] | sacc->chunk[u-1] + | sacc->chunk[u-2] | sacc->chunk[u-3]) + { goto found; + } + u -= 4; + } +# endif + + } while (u >= 0); + + uix = 0; + goto done; + + found: + if (sacc->chunk[u] != 0) + { goto found2; + } + u -= 1; + if (sacc->chunk[u] != 0) + { goto found2; + } + u -= 1; + if (sacc->chunk[u] != 0) + { goto found2; + } + u -= 1; + + found2: ; + } + +# else /* Non-optimized search for uppermost non-zero chunk */ + + { for (u = XSUM_SCHUNKS-1; sacc->chunk[u] == 0; u--) + { if (u == 0) + { + uix = 0; + goto done; + } + } + } + +# endif + + /* At this point, sacc->chunk[u] must be non-zero */ + + /* Carry propagate, starting at the low-order chunks. Note that the + loop limit of u may be increased inside the loop. */ + + i = 0; /* set to the index of the next non-zero chunck, from bottom */ + +# if OPT_CARRY + { + /* Quickly skip over unused low-order chunks. Done here at the start + on the theory that there are often many unused low-order chunks, + justifying some overhead to begin, but later stretches of unused + chunks may not be as large. */ + + int e = u-3; /* go only to 3 before so won't access beyond chunk array */ + + do + { +# if USE_SIMD && __AVX__ + { __m256i ch; + ch = _mm256_loadu_si256 ((__m256i *)(sacc->chunk+i)); + if (!_mm256_testz_si256(ch,ch)) + { break; + } + i += 4; + if (i >= e) + { break; + } + ch = _mm256_loadu_si256 ((__m256i *)(sacc->chunk+i)); + if (!_mm256_testz_si256(ch,ch)) + { break; + } + } +# else + { if (sacc->chunk[i] | sacc->chunk[i+1] + | sacc->chunk[i+2] | sacc->chunk[i+3]) + { break; + } + } +# endif + + i += 4; + + } while (i <= e); + } +# endif + + uix = -1; /* indicates that a non-zero chunk has not been found yet */ + + do + { xsum_schunk c; /* Set to the chunk at index i (next non-zero one) */ + xsum_schunk clow; /* Low-order bits of c */ + xsum_schunk chigh; /* High-order bits of c */ + + /* Find the next non-zero chunk, setting i to its index, or break out + of loop if there is none. Note that the chunk at index u is not + necessarily non-zero - it was initially, but u or the chunk at u + may have changed. */ + +# if OPT_CARRY + { + c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + if (i > u) + { break; /* reaching here is only possible when u == i initially, */ + } /* with the last add to a chunk having changed it to 0 */ + + for (;;) + { c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + } + } +# else + { + do + { c = sacc->chunk[i]; + if (c != 0) + { goto nonzero; + } + i += 1; + } while (i <= u); + + break; + } +# endif + + /* Propagate possible carry from this chunk to next chunk up. */ + + nonzero: + chigh = c >> XSUM_LOW_MANTISSA_BITS; + if (chigh == 0) + { uix = i; + i += 1; + continue; /* no need to change this chunk */ + } + + if (u == i) + { if (chigh == -1) + { uix = i; + break; /* don't propagate -1 into the region of all zeros above */ + } + u = i+1; /* we will change chunk[u+1], so we'll need to look at it */ + } + + clow = c & XSUM_LOW_MANTISSA_MASK; + if (clow != 0) + { uix = i; + } + + /* We now change chunk[i] and add to chunk[i+1]. Note that i+1 should be + in range (no bigger than XSUM_CHUNKS-1) if summing memory, since + the number of chunks is big enough to hold any sum, and we do not + store redundant chunks with values 0 or -1 above previously non-zero + chunks. But other add operations might cause overflow, in which + case we produce a NaN with all 1s as payload. (We can't reliably produce + an Inf of the right sign.) */ + + sacc->chunk[i] = clow; + if (i+1 >= XSUM_SCHUNKS) + { xsum_small_add_inf_nan (sacc, + ((xsum_int)XSUM_EXP_MASK << XSUM_MANTISSA_BITS) | XSUM_MANTISSA_MASK); + u = i; + } + else + { sacc->chunk[i+1] += chigh; /* note: this could make this chunk be zero */ + } + + i += 1; + + } while (i <= u); + + /* Check again for the number being zero, since carry propagation might + have created zero from something that initially looked non-zero. */ + + if (uix < 0) + { + uix = 0; + goto done; + } + + /* While the uppermost chunk is negative, with value -1, combine it with + the chunk below (if there is one) to produce the same number but with + one fewer non-zero chunks. */ + + while (sacc->chunk[uix] == -1 && uix > 0) + { /* Left shift of a negative number is undefined according to the standard, + so do a multiply - it's all presumably constant-folded by the compiler.*/ + sacc->chunk[uix-1] += ((xsum_schunk) -1) + * (((xsum_schunk) 1) << XSUM_LOW_MANTISSA_BITS); + sacc->chunk[uix] = 0; + uix -= 1; + } + + /* We can now add one less than the total allowed terms before the + next carry propagate. */ + +done: + sacc->adds_until_propagate = XSUM_SMALL_CARRY_TERMS-1; + + /* Return index of uppermost non-zero chunk. */ + + return uix; +} + + +/* ------------------------ EXTERNAL ROUTINES ------------------------------- */ + + +/* INITIALIZE A SMALL ACCUMULATOR TO ZERO. */ + +void xsum_small_init (xsum_small_accumulator *restrict sacc) +{ + sacc->adds_until_propagate = XSUM_SMALL_CARRY_TERMS; + sacc->Inf = sacc->NaN = 0; +# if USE_MEMSET_SMALL + { memset (sacc->chunk, 0, XSUM_SCHUNKS * sizeof(xsum_schunk)); + } +# elif USE_SIMD && __AVX__ && XSUM_SCHUNKS==67 + { xsum_schunk *ch = sacc->chunk; + __m256i z = _mm256_setzero_si256(); + _mm256_storeu_si256 ((__m256i *)(ch+0), z); + _mm256_storeu_si256 ((__m256i *)(ch+4), z); + _mm256_storeu_si256 ((__m256i *)(ch+8), z); + _mm256_storeu_si256 ((__m256i *)(ch+12), z); + _mm256_storeu_si256 ((__m256i *)(ch+16), z); + _mm256_storeu_si256 ((__m256i *)(ch+20), z); + _mm256_storeu_si256 ((__m256i *)(ch+24), z); + _mm256_storeu_si256 ((__m256i *)(ch+28), z); + _mm256_storeu_si256 ((__m256i *)(ch+32), z); + _mm256_storeu_si256 ((__m256i *)(ch+36), z); + _mm256_storeu_si256 ((__m256i *)(ch+40), z); + _mm256_storeu_si256 ((__m256i *)(ch+44), z); + _mm256_storeu_si256 ((__m256i *)(ch+48), z); + _mm256_storeu_si256 ((__m256i *)(ch+52), z); + _mm256_storeu_si256 ((__m256i *)(ch+56), z); + _mm256_storeu_si256 ((__m256i *)(ch+60), z); + _mm_storeu_si128 ((__m128i *)(ch+64), _mm256_castsi256_si128(z)); + _mm_storeu_si64 (ch+66, _mm256_castsi256_si128(z)); + } +# else + { xsum_schunk *p; + int n; + p = sacc->chunk; + n = XSUM_SCHUNKS; + do { *p++ = 0; n -= 1; } while (n > 0); + } +# endif +} + + +/* ADD ONE NUMBER TO A SMALL ACCUMULATOR ASSUMING NO CARRY PROPAGATION REQ'D. + This function is declared INLINE regardless of the setting of INLINE_SMALL + and for good performance it must be inlined by the compiler (otherwise the + procedure call overhead will result in substantial inefficiency). */ + +static INLINE void xsum_add1_no_carry (xsum_small_accumulator *restrict sacc, + xsum_flt value) +{ + xsum_int ivalue; + xsum_int mantissa; + xsum_expint exp, low_exp, high_exp; + xsum_schunk *chunk_ptr; + + /* Extract exponent and mantissa. Split exponent into high and low parts. */ + + COPY64 (ivalue, value); + + exp = (ivalue >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK; + mantissa = ivalue & XSUM_MANTISSA_MASK; + high_exp = exp >> XSUM_LOW_EXP_BITS; + low_exp = exp & XSUM_LOW_EXP_MASK; + + /* Categorize number as normal, denormalized, or Inf/NaN according to + the value of the exponent field. */ + + if (exp == 0) /* zero or denormalized */ + { /* If it's a zero (positive or negative), we do nothing. */ + if (mantissa == 0) + { return; + } + /* Denormalized mantissa has no implicit 1, but exponent is 1 not 0. */ + exp = low_exp = 1; + } + else if (exp == XSUM_EXP_MASK) /* Inf or NaN */ + { /* Just update flags in accumulator structure. */ + xsum_small_add_inf_nan (sacc, ivalue); + return; + } + else /* normalized */ + { /* OR in implicit 1 bit at top of mantissa */ + mantissa |= (xsum_int)1 << XSUM_MANTISSA_BITS; + } + + /* Use high part of exponent as index of chunk, and low part of + exponent to give position within chunk. Fetch the two chunks + that will be modified. */ + + chunk_ptr = sacc->chunk + high_exp; + + /* Separate mantissa into two parts, after shifting, and add to (or + subtract from) this chunk and the next higher chunk (which always + exists since there are three extra ones at the top). + + Note that low_mantissa will have at most XSUM_LOW_MANTISSA_BITS bits, + while high_mantissa will have at most XSUM_MANTISSA_BITS bits, since + even though the high mantissa includes the extra implicit 1 bit, it will + also be shifted right by at least one bit. */ + + xsum_int split_mantissa[2]; + split_mantissa[0] = ((xsum_uint)mantissa << low_exp) & XSUM_LOW_MANTISSA_MASK; + split_mantissa[1] = mantissa >> (XSUM_LOW_MANTISSA_BITS - low_exp); + + /* Add to, or subtract from, the two affected chunks. */ + +# if OPT_SMALL==1 + { xsum_int ivalue_sign = ivalue<0 ? -1 : 1; + chunk_ptr[0] += ivalue_sign * split_mantissa[0]; + chunk_ptr[1] += ivalue_sign * split_mantissa[1]; + } +# elif OPT_SMALL==2 + { xsum_int ivalue_neg + = ivalue>>(XSUM_SCHUNK_BITS-1); /* all 0s if +ve, all 1s if -ve */ + chunk_ptr[0] += (split_mantissa[0] ^ ivalue_neg) + (ivalue_neg & 1); + chunk_ptr[1] += (split_mantissa[1] ^ ivalue_neg) + (ivalue_neg & 1); + } +# elif OPT_SMALL==3 && USE_SIMD && __SSE2__ + { xsum_int ivalue_neg + = ivalue>>(XSUM_SCHUNK_BITS-1); /* all 0s if +ve, all 1s if -ve */ + _mm_storeu_si128 ((__m128i *)chunk_ptr, + _mm_add_epi64 (_mm_loadu_si128 ((__m128i *)chunk_ptr), + _mm_add_epi64 (_mm_set1_epi64((__m64)(ivalue_neg&1)), + _mm_xor_si128 (_mm_set1_epi64((__m64)ivalue_neg), + _mm_loadu_si128 ((__m128i *)split_mantissa))))); + } +# else + { if (ivalue < 0) + { chunk_ptr[0] -= split_mantissa[0]; + chunk_ptr[1] -= split_mantissa[1]; + } + else + { chunk_ptr[0] += split_mantissa[0]; + chunk_ptr[1] += split_mantissa[1]; + } + } +# endif +} + + +/* ADD ONE DOUBLE TO A SMALL ACCUMULATOR. This is equivalent to, but + somewhat faster than, calling xsum_small_addv with a vector of one + value. */ + +void xsum_small_add1 (xsum_small_accumulator *restrict sacc, xsum_flt value) +{ + if (sacc->adds_until_propagate == 0) + { (void) xsum_carry_propagate(sacc); + } + + xsum_add1_no_carry (sacc, value); + + sacc->adds_until_propagate -= 1; +} + + +/* ADD A VECTOR OF FLOATING-POINT NUMBERS TO A SMALL ACCUMULATOR. Mixes + calls of xsum_carry_propagate with calls of xsum_add1_no_carry. */ + +void xsum_small_addv (xsum_small_accumulator *restrict sacc, + const xsum_flt *restrict vec, + xsum_length n) +{ xsum_length m, i; + + while (n > 0) + { if (sacc->adds_until_propagate == 0) + { (void) xsum_carry_propagate(sacc); + } + m = n <= sacc->adds_until_propagate ? n : sacc->adds_until_propagate; + for (i = 0; i < m; i++) + { xsum_add1_no_carry (sacc, vec[i]); + } + sacc->adds_until_propagate -= m; + vec += m; + n -= m; + } +} + + +/* ADD SQUARED NORM OF VECTOR OF FLOATING-POINT NUMBERS TO SMALL ACCUMULATOR. + Mixes calls of xsum_carry_propagate with calls of xsum_add1_no_carry. */ + +void xsum_small_add_sqnorm (xsum_small_accumulator *restrict sacc, + const xsum_flt *restrict vec, + xsum_length n) +{ xsum_length m, i; + + while (n > 0) + { if (sacc->adds_until_propagate == 0) + { (void) xsum_carry_propagate(sacc); + } + m = n <= sacc->adds_until_propagate ? n : sacc->adds_until_propagate; + for (i = 0; i < m; i++) + { xsum_add1_no_carry (sacc, vec[i] * vec[i]); + } + sacc->adds_until_propagate -= m; + vec += m; + n -= m; + } +} + + +/* ADD DOT PRODUCT OF VECTORS OF FLOATING-POINT NUMBERS TO SMALL ACCUMULATOR. + Mixes calls of xsum_carry_propagate with calls of xsum_add1_no_carry. */ + +void xsum_small_add_dot (xsum_small_accumulator *restrict sacc, + const xsum_flt *vec1, const xsum_flt *vec2, + xsum_length n) +{ xsum_length m, i; + + while (n > 0) + { if (sacc->adds_until_propagate == 0) + { (void) xsum_carry_propagate(sacc); + } + m = n <= sacc->adds_until_propagate ? n : sacc->adds_until_propagate; + for (i = 0; i < m; i++) + { xsum_add1_no_carry (sacc, vec1[i] * vec2[i]); + } + sacc->adds_until_propagate -= m; + vec1 += m; + vec2 += m; + n -= m; + } +} + + +/* ADD A SMALL ACCUMULATOR TO ANOTHER SMALL ACCUMULATOR. The first argument + is the destination, which is modified. The second is the accumulator to + add, which may also be modified, but should still represent the same + number. Source and destination may be the same. */ + +void xsum_small_add_accumulator (xsum_small_accumulator *dst_sacc, + xsum_small_accumulator *src_sacc) +{ + int i; + + xsum_carry_propagate (dst_sacc); + + if (dst_sacc == src_sacc) + { for (i = 0; i < XSUM_SCHUNKS; i++) + { dst_sacc->chunk[i] += dst_sacc->chunk[i]; + } + } + else + { + xsum_carry_propagate (src_sacc); + + if (src_sacc->Inf) xsum_small_add_inf_nan (dst_sacc, src_sacc->Inf); + if (src_sacc->NaN) xsum_small_add_inf_nan (dst_sacc, src_sacc->NaN); + + for (i = 0; i < XSUM_SCHUNKS; i++) + { dst_sacc->chunk[i] += src_sacc->chunk[i]; + } + } + + dst_sacc->adds_until_propagate = XSUM_SMALL_CARRY_TERMS-2; +} + + +/* NEGATE THE VALUE IN A SMALL ACCUMULATOR. */ + +void xsum_small_negate (xsum_small_accumulator *restrict sacc) +{ + int i; + + for (i = 0; i < XSUM_SCHUNKS; i++) + { sacc->chunk[i] = -sacc->chunk[i]; + } + + if (sacc->Inf != 0) + { sacc->Inf ^= XSUM_SIGN_MASK; + } +} + + +/* RETURN THE RESULT OF ROUNDING A SMALL ACCUMULATOR. The rounding mode + is to nearest, with ties to even. The small accumulator may be modified + by this operation (by carry propagation being done), but the value it + represents should not change. */ + +xsum_flt xsum_small_round (xsum_small_accumulator *restrict sacc) +{ + xsum_int ivalue; + xsum_schunk lower; + int i, j, e, more; + xsum_int intv; + double fltv; + + /* See if we have a NaN from one of the numbers being a NaN, in + which case we return the NaN with largest payload, or an infinite + result (+Inf, -Inf, or a NaN if both +Inf and -Inf occurred). + Note that we do NOT return NaN if we have both an infinite number + and a sum of other numbers that overflows with opposite sign, + since there is no real ambiguity regarding the sign in such a case. */ + + if (sacc->NaN != 0) + { COPY64(fltv, sacc->NaN); + return fltv; + } + + if (sacc->Inf != 0) + { COPY64 (fltv, sacc->Inf); + return fltv; + } + + /* If none of the numbers summed were infinite or NaN, we proceed to + propagate carries, as a preliminary to finding the magnitude of + the sum. This also ensures that the sign of the result can be + determined from the uppermost non-zero chunk. + + We also find the index, i, of this uppermost non-zero chunk, as + the value returned by xsum_carry_propagate, and set ivalue to + sacc->chunk[i]. Note that ivalue will not be 0 or -1, unless + i is 0 (the lowest chunk), in which case it will be handled by + the code for denormalized numbers. */ + + i = xsum_carry_propagate(sacc); + + ivalue = sacc->chunk[i]; + + /* Handle a possible denormalized number, including zero. */ + + if (i <= 1) + { + /* Check for zero value, in which case we can return immediately. */ + + if (ivalue == 0) + { return 0.0; + } + + /* Check if it is actually a denormalized number. It always is if only + the lowest chunk is non-zero. If the highest non-zero chunk is the + next-to-lowest, we check the magnitude of the absolute value. + Note that the real exponent is 1 (not 0), so we need to shift right + by 1 here. */ + + if (i == 0) + { intv = ivalue >= 0 ? ivalue : -ivalue; + intv >>= 1; + if (ivalue < 0) + { intv |= XSUM_SIGN_MASK; + } + COPY64 (fltv, intv); + return fltv; + } + else + { /* Note: Left shift of -ve number is undefined, so do a multiply instead, + which is probably optimized to a shift. */ + intv = ivalue * ((xsum_int)1 << (XSUM_LOW_MANTISSA_BITS-1)) + + (sacc->chunk[0] >> 1); + if (intv < 0) + { if (intv > - ((xsum_int)1 << XSUM_MANTISSA_BITS)) + { intv = (-intv) | XSUM_SIGN_MASK; + COPY64 (fltv, intv); + return fltv; + } + } + else /* non-negative */ + { if ((xsum_uint)intv < (xsum_uint)1 << XSUM_MANTISSA_BITS) + { + COPY64 (fltv, intv); + return fltv; + } + } + /* otherwise, it's not actually denormalized, so fall through to below */ + } + } + + /* Find the location of the uppermost 1 bit in the absolute value of + the upper chunk by converting it (as a signed integer) to a + floating point value, and looking at the exponent. Then set + 'more' to the number of bits from the lower chunk (and maybe the + next lower) that are needed to fill out the mantissa of the + result (including the top implicit 1 bit), plus two extra bits to + help decide on rounding. For negative numbers, it may turn out + later that we need another bit, because negating a negative value + may carry out of the top here, but not carry out of the top once + more bits are shifted into the bottom later on. */ + + fltv = (xsum_flt) ivalue; /* finds position of topmost 1 bit of |ivalue| */ + COPY64 (intv, fltv); + e = (intv >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK; /* e-bias is in 0..32 */ + more = 2 + XSUM_MANTISSA_BITS + XSUM_EXP_BIAS - e; + + /* Change 'ivalue' to put in 'more' bits from lower chunks into the bottom. + Also set 'j' to the index of the lowest chunk from which these bits came, + and 'lower' to the remaining bits of that chunk not now in 'ivalue'. + Note that 'lower' initially has at least one bit in it, which we can + later move into 'ivalue' if it turns out that one more bit is needed. */ + + ivalue *= (xsum_int)1 << more; /* multiply, since << of negative undefined */ + + j = i-1; + lower = sacc->chunk[j]; /* must exist, since denormalized if i==0 */ + if (more >= XSUM_LOW_MANTISSA_BITS) + { more -= XSUM_LOW_MANTISSA_BITS; + ivalue += lower << more; + j -= 1; + lower = j < 0 ? 0 : sacc->chunk[j]; + } + ivalue += lower >> (XSUM_LOW_MANTISSA_BITS - more); + lower &= ((xsum_schunk)1 << (XSUM_LOW_MANTISSA_BITS - more)) - 1; + + /* Decide on rounding, with separate code for positive and negative values. + + At this point, 'ivalue' has the signed mantissa bits, plus two extra + bits, with 'e' recording the exponent position for these within their + top chunk. For positive 'ivalue', the bits in 'lower' and chunks + below 'j' add to the absolute value; for negative 'ivalue' they + subtract. + + After setting 'ivalue' to the tentative unsigned mantissa + (shifted left 2), and 'intv' to have the correct sign, this + code goes to done_rounding if it finds that just discarding lower + order bits is correct, and to round_away_from_zero if instead the + magnitude should be increased by one in the lowest mantissa bit. */ + + if (ivalue >= 0) /* number is positive, lower bits are added to magnitude */ + { + intv = 0; /* positive sign */ + + if ((ivalue & 2) == 0) /* extra bits are 0x */ + { + goto done_rounding; + } + + if ((ivalue & 1) != 0) /* extra bits are 11 */ + { + goto round_away_from_zero; + } + + if ((ivalue & 4) != 0) /* low bit is 1 (odd), extra bits are 10 */ + { + goto round_away_from_zero; + } + + if (lower == 0) /* see if any lower bits are non-zero */ + { while (j > 0) + { j -= 1; + if (sacc->chunk[j] != 0) + { lower = 1; + break; + } + } + } + + if (lower != 0) /* low bit 0 (even), extra bits 10, non-zero lower bits */ + { + goto round_away_from_zero; + } + else /* low bit 0 (even), extra bits 10, all lower bits 0 */ + { + goto done_rounding; + } + } + + else /* number is negative, lower bits are subtracted from magnitude */ + { + /* Check for a negative 'ivalue' that when negated doesn't contain a full + mantissa's worth of bits, plus one to help rounding. If so, move one + more bit into 'ivalue' from 'lower' (and remove it from 'lower'). + This happens when the negation of the upper part of 'ivalue' has the + form 10000... but the negation of the full 'ivalue' is not 10000... */ + + if (((-ivalue) & ((xsum_int)1 << (XSUM_MANTISSA_BITS+2))) == 0) + { int pos = (xsum_schunk)1 << (XSUM_LOW_MANTISSA_BITS - 1 - more); + ivalue *= 2; /* note that left shift undefined if ivalue is negative */ + if (lower & pos) + { ivalue += 1; + lower &= ~pos; + } + e -= 1; + } + + intv = XSUM_SIGN_MASK; /* negative sign */ + ivalue = -ivalue; /* ivalue now contains the absolute value */ + + if ((ivalue & 3) == 3) /* extra bits are 11 */ + { + goto round_away_from_zero; + } + + if ((ivalue & 3) <= 1) /* extra bits are 00 or 01 */ + { + goto done_rounding; + } + + if ((ivalue & 4) == 0) /* low bit is 0 (even), extra bits are 10 */ + { + goto done_rounding; + } + + if (lower == 0) /* see if any lower bits are non-zero */ + { while (j > 0) + { j -= 1; + if (sacc->chunk[j] != 0) + { lower = 1; + break; + } + } + } + + if (lower != 0) /* low bit 1 (odd), extra bits 10, non-zero lower bits */ + { + goto done_rounding; + } + else /* low bit 1 (odd), extra bits are 10, lower bits are all 0 */ + { + goto round_away_from_zero; + } + + } + +round_away_from_zero: + + /* Round away from zero, then check for carry having propagated out the + top, and shift if so. */ + + ivalue += 4; /* add 1 to low-order mantissa bit */ + if (ivalue & ((xsum_int)1 << (XSUM_MANTISSA_BITS+3))) + { ivalue >>= 1; + e += 1; + } + +done_rounding: ; + + /* Get rid of the bottom 2 bits that were used to decide on rounding. */ + + ivalue >>= 2; + + /* Adjust to the true exponent, accounting for where this chunk is. */ + + e += (i<<XSUM_LOW_EXP_BITS) - XSUM_EXP_BIAS - XSUM_MANTISSA_BITS; + + /* If exponent has overflowed, change to plus or minus Inf and return. */ + + if (e >= XSUM_EXP_MASK) + { intv |= (xsum_int) XSUM_EXP_MASK << XSUM_MANTISSA_BITS; + COPY64 (fltv, intv); + + return fltv; + } + + /* Put exponent and mantissa into intv, which already has the sign, + then copy into fltv. */ + + intv += (xsum_int)e << XSUM_MANTISSA_BITS; + intv += ivalue & XSUM_MANTISSA_MASK; /* mask out the implicit 1 bit */ + COPY64 (fltv, intv); + + if (xsum_debug) + { + if ((ivalue >> XSUM_MANTISSA_BITS) != 1) abort(); + } + + return fltv; +} + + +/* FIND RESULT OF DIVIDING SMALL ACCUMULATOR BY UNSIGNED INTEGER. */ + +xsum_flt xsum_small_div_unsigned + (xsum_small_accumulator *restrict sacc, unsigned div) +{ + xsum_flt result; + unsigned rem; + double fltv; + int sign; + int i, j; + + /* Return NaN or an Inf if that's what's in the superaccumulator. */ + + if (sacc->NaN != 0) + { COPY64(fltv, sacc->NaN); + return fltv; + } + + if (sacc->Inf != 0) + { COPY64 (fltv, sacc->Inf); + return fltv; + } + + /* Make a copy of the superaccumulator, so we can change it here without + changing *sacc. */ + + xsum_small_accumulator tacc = *sacc; + + /* Carry propagate in the temporary copy of the superaccumulator. + Sets 'i' to the index of the topmost nonzero chunk. */ + + i = xsum_carry_propagate(&tacc); + + /* Check for division by zero, and if so, return +Inf, -Inf, or NaN, + depending on whether the superaccumulator is positive, negative, + or zero. */ + + if (div == 0) + { + return tacc.chunk[i] > 0 ? INFINITY : tacc.chunk[i] < 0 ? -INFINITY : NAN; + } + + /* Record sign of accumulator, and if it's negative, negate and + re-propagate so that it will be positive. */ + + sign = +1; + + if (tacc.chunk[i] < 0) + { xsum_small_negate(&tacc); + i = xsum_carry_propagate(&tacc); + if (xsum_debug) + { + if (tacc.chunk[i] < 0) abort(); + } + sign = -1; + } + + /* Do the division in the small accumulator, putting the remainder after + dividing the bottom chunk in 'rem'. */ + + rem = 0; + for (j = i; j>=0; j--) + { xsum_uint num = ((xsum_uint) rem << XSUM_LOW_MANTISSA_BITS) + tacc.chunk[j]; + xsum_uint quo = num / div; + rem = num - quo*div; + tacc.chunk[j] = quo; + } + + /* Find new top chunk. */ + + while (i > 0 && tacc.chunk[i] == 0) + { i -= 1; + } + + /* Do rounding, with separate approachs for a normal number with biased + exponent greater than 1, and for a normal number with exponent of 1 + or a denormalized number (also having true biased exponent of 1). */ + + if (i > 1 || tacc.chunk[1] >= (1 << (XSUM_HIGH_MANTISSA_BITS+2))) + { + /* Normalized number with at least two bits at bottom of chunk 0 + below the mantissa. Just need to 'or' in a 1 at the bottom if + remainder is non-zero to break a tie if bits below bottom of + mantissa are exactly 1/2. */ + + if (rem > 0) + { tacc.chunk[0] |= 1; + } + } + else + { + /* Denormalized number or normal number with biased exponent of 1. + Lowest bit of bottom chunk is just below lowest bit of + mantissa. Need to explicitly round here using the bottom bit + and the remainder - round up if lower > 1/2 or >= 1/2 and + odd. */ + + if (tacc.chunk[0] & 1) /* lower part is >= 1/2 */ + { + if (tacc.chunk[0] & 2) /* lowest bit of mantissa is 1 (odd) */ + { tacc.chunk[0] += 2; /* round up */ + } + else /* lowest bit of mantissa is 0 (even) */ + { if (rem > 0) /* lower part is > 1/2 */ + { tacc.chunk[0] += 2; /* round up */ + } + } + + tacc.chunk[0] &= ~1; /* clear low bit (but should anyway be ignored) */ + } + } + + /* Do the final rounding, with the lowest bit set as above. */ + + result = xsum_small_round (&tacc); + + return sign*result; +} + + +/* FIND RESULT OF DIVIDING SMALL ACCUMULATOR BY SIGNED INTEGER. */ + +xsum_flt xsum_small_div_int + (xsum_small_accumulator *restrict sacc, int div) +{ + if (div < 0) + { return -xsum_small_div_unsigned (sacc, (unsigned) -div); + } + else + { return xsum_small_div_unsigned (sacc, (unsigned) div); + } +} diff --git a/lib/monoucha0/monoucha/qjs/xsum.h b/lib/monoucha0/monoucha/qjs/xsum.h new file mode 100644 index 00000000..2372cac6 --- /dev/null +++ b/lib/monoucha0/monoucha/qjs/xsum.h @@ -0,0 +1,133 @@ +/* INTERFACE TO FUNCTIONS FOR EXACT SUMMATION. */ + +/* Copyright 2015, 2018, 2021 Radford M. Neal + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef XSUM_H +#define XSUM_H + +#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> + + +/* CONSTANTS DEFINING THE FLOATING POINT FORMAT. */ + +typedef double xsum_flt; /* C floating point type sums are done for */ + +typedef int64_t xsum_int; /* Signed integer type for a fp value */ +typedef uint64_t xsum_uint; /* Unsigned integer type for a fp value */ +typedef int_fast16_t xsum_expint; /* Integer type for holding an exponent */ + +#define XSUM_MANTISSA_BITS 52 /* Bits in fp mantissa, excludes implict 1 */ +#define XSUM_EXP_BITS 11 /* Bits in fp exponent */ + +#define XSUM_MANTISSA_MASK \ + (((xsum_int)1 << XSUM_MANTISSA_BITS) - 1) /* Mask for mantissa bits */ + +#define XSUM_EXP_MASK \ + ((1 << XSUM_EXP_BITS) - 1) /* Mask for exponent */ + +#define XSUM_EXP_BIAS \ + ((1 << (XSUM_EXP_BITS-1)) - 1) /* Bias added to signed exponent */ + +#define XSUM_SIGN_BIT \ + (XSUM_MANTISSA_BITS + XSUM_EXP_BITS) /* Position of sign bit */ + +#define XSUM_SIGN_MASK \ + ((xsum_uint)1 << XSUM_SIGN_BIT) /* Mask for sign bit */ + + +/* CONSTANTS DEFINING THE SMALL ACCUMULATOR FORMAT. */ + +#define XSUM_SCHUNK_BITS 64 /* Bits in chunk of the small accumulator */ +typedef int64_t xsum_schunk; /* Integer type of small accumulator chunk */ + +#define XSUM_LOW_EXP_BITS 5 /* # of low bits of exponent, in one chunk */ + +#define XSUM_LOW_EXP_MASK \ + ((1 << XSUM_LOW_EXP_BITS) - 1) /* Mask for low-order exponent bits */ + +#define XSUM_HIGH_EXP_BITS \ + (XSUM_EXP_BITS - XSUM_LOW_EXP_BITS) /* # of high exponent bits for index */ + +#define XSUM_HIGH_EXP_MASK \ + ((1 << HIGH_EXP_BITS) - 1) /* Mask for high-order exponent bits */ + +#define XSUM_SCHUNKS \ + ((1 << XSUM_HIGH_EXP_BITS) + 3) /* # of chunks in small accumulator */ + +#define XSUM_LOW_MANTISSA_BITS \ + (1 << XSUM_LOW_EXP_BITS) /* Bits in low part of mantissa */ + +#define XSUM_HIGH_MANTISSA_BITS \ + (XSUM_MANTISSA_BITS - XSUM_LOW_MANTISSA_BITS) /* Bits in high part */ + +#define XSUM_LOW_MANTISSA_MASK \ + (((xsum_int)1 << XSUM_LOW_MANTISSA_BITS) - 1) /* Mask for low bits */ + +#define XSUM_SMALL_CARRY_BITS \ + ((XSUM_SCHUNK_BITS-1) - XSUM_MANTISSA_BITS) /* Bits sums can carry into */ + +#define XSUM_SMALL_CARRY_TERMS \ + ((1 << XSUM_SMALL_CARRY_BITS) - 1) /* # terms can add before need prop. */ + +typedef struct +{ xsum_schunk chunk[XSUM_SCHUNKS]; /* Chunks making up small accumulator */ + xsum_int Inf; /* If non-zero, +Inf, -Inf, or NaN */ + xsum_int NaN; /* If non-zero, a NaN value with payload */ + int adds_until_propagate; /* Number of remaining adds before carry */ +} xsum_small_accumulator; /* propagation must be done again */ + + +/* TYPE FOR LENGTHS OF ARRAYS. Must be a signed integer type. Set to + ptrdiff_t here on the assumption that this will be big enough, but + not unnecessarily big, which seems to be true. */ + +typedef ptrdiff_t xsum_length; + + +/* FUNCTIONS FOR EXACT SUMMATION, WITH POSSIBLE DIVISION BY AN INTEGER. */ + +void xsum_small_init (xsum_small_accumulator *restrict); +void xsum_small_add1 (xsum_small_accumulator *restrict, xsum_flt); +void xsum_small_addv (xsum_small_accumulator *restrict, + const xsum_flt *restrict, xsum_length); +void xsum_small_add_sqnorm (xsum_small_accumulator *restrict, + const xsum_flt *restrict, xsum_length); +void xsum_small_add_dot (xsum_small_accumulator *restrict, + const xsum_flt *, const xsum_flt *, xsum_length); +void xsum_small_add_accumulator (xsum_small_accumulator *, + xsum_small_accumulator *); +void xsum_small_negate (xsum_small_accumulator *restrict); +xsum_flt xsum_small_round (xsum_small_accumulator *restrict); + +xsum_flt xsum_small_div_unsigned (xsum_small_accumulator *restrict, unsigned); +xsum_flt xsum_small_div_int (xsum_small_accumulator *restrict, int); + + +/* DEBUG FLAG. Set to non-zero for debug ouptut. Ignored unless xsum.c + is compiled with -DDEBUG. */ + +extern int xsum_debug; + +#endif diff --git a/lib/monoucha0/monoucha/quickjs.nim b/lib/monoucha0/monoucha/quickjs.nim index 2154b42a..4ffefae7 100644 --- a/lib/monoucha0/monoucha/quickjs.nim +++ b/lib/monoucha0/monoucha/quickjs.nim @@ -20,7 +20,7 @@ else: {.passl: "-lpthread".} {.compile("qjs/quickjs.c", CFLAGS).} -{.compile("qjs/libbf.c", CFLAGS).} +{.compile("qjs/xsum.c", CFLAGS).} {.passc: "-I" & currentSourcePath().parentDir().} diff --git a/res/license.md b/res/license.md index 23e8fbc4..5efcb054 100644 --- a/res/license.md +++ b/res/license.md @@ -26,6 +26,7 @@ Table of contents: * [JebP](#jebp) * [NanoSVG](#nanosvg) * [QuickJS-NG](#quickjs-ng) + * [xsum](#xsum) * [Punycode library](#punycode-library) * [GNU Unifont](#gnu-unifont) @@ -161,6 +162,33 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` +### xsum + +QuickJS-NG also includes xsum.c by Radford M. Neal for Math.sumPrecise. + +``` +Copyright 2015, 2018, 2021, 2024 Radford M. Neal + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` + ## Punycode library We vendor the punycode library, which is no longer included in the Nim |