From bac188ce07b957807d4c649cb5d4e5e253360278 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Fri, 16 Aug 2024 14:24:20 -0400
Subject: Change division and modulus to use euclidean division, plus fix up a
 few integer bugs

---
 builtins/integers.c | 27 +++++++++++++-----------
 builtins/integers.h | 59 ++++++++++++++++++++++++++++++++++++++++++++---------
 environment.c       | 14 ++++++++++++-
 test/integers.tm    | 11 ++++++++--
 4 files changed, 86 insertions(+), 25 deletions(-)

diff --git a/builtins/integers.c b/builtins/integers.c
index bb82fab6..0bf7dc22 100644
--- a/builtins/integers.c
+++ b/builtins/integers.c
@@ -152,17 +152,20 @@ public Int_t Int$slow_times(Int_t x, Int_t y) {
     return Int$from_mpz(result);
 }
 
-public Int_t Int$slow_divided_by(Int_t x, Int_t y) {
-    mpz_t result;
-    mpz_init_set_int(result, x);
-    if (y.small & 1) {
-        mpz_t y_mpz;
-        mpz_init_set_si(y_mpz, y.small >> 2);
-        mpz_cdiv_q(result, result, y_mpz);
-    } else {
-        mpz_cdiv_q(result, result, *y.big);
+public Int_t Int$slow_divided_by(Int_t dividend, Int_t divisor) {
+    // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+    mpz_t quotient, remainder;
+    mpz_init_set_int(quotient, dividend);
+    mpz_init_set_int(remainder, divisor);
+    mpz_tdiv_qr(quotient, remainder, quotient, remainder);
+    if (mpz_sgn(remainder) < 0) {
+        bool d_positive = __builtin_expect(divisor.small & 1, 1) ? divisor.small > 0x1 : mpz_sgn(*divisor.big) > 0;
+        if (d_positive)
+            mpz_sub_ui(quotient, quotient, 1);
+        else
+            mpz_add_ui(quotient, quotient, 1);
     }
-    return Int$from_mpz(result);
+    return Int$from_mpz(quotient);
 }
 
 public Int_t Int$slow_modulo(Int_t x, Int_t modulus)
@@ -359,7 +362,7 @@ public const TypeInfo $Int = {
     } \
     public CORD KindOfInt ## $format(c_type i, Int_t digits_int) { \
         int64_t digits = Int_to_Int64(digits_int, false); \
-        return CORD_asprintf("%0*" fmt, (int)digits, i); \
+        return CORD_asprintf("%0*ld", (int)digits, (int64_t)i); \
     } \
     public CORD KindOfInt ## $hex(c_type i, Int_t digits_int, bool uppercase, bool prefix) { \
         int64_t digits = Int_to_Int64(digits_int, false); \
@@ -427,7 +430,7 @@ public const TypeInfo $Int = {
         .CustomInfo={.compare=(void*)KindOfInt##$compare, .as_text=(void*)KindOfInt##$as_text}, \
     };
 
-DEFINE_INT_TYPE(int64_t,  Int64,  "ld",     INT64_MIN, INT64_MAX);
+DEFINE_INT_TYPE(int64_t,  Int64,  "ld_i64", INT64_MIN, INT64_MAX);
 DEFINE_INT_TYPE(int32_t,  Int32,  "d_i32",  INT32_MIN, INT32_MAX);
 DEFINE_INT_TYPE(int16_t,  Int16,  "d_i16",  INT16_MIN, INT16_MAX);
 DEFINE_INT_TYPE(int8_t,   Int8,   "d_i8",   INT8_MIN,  INT8_MAX);
diff --git a/builtins/integers.h b/builtins/integers.h
index 898469e2..e6b5b1fb 100644
--- a/builtins/integers.h
+++ b/builtins/integers.h
@@ -35,7 +35,26 @@
     Range_t type_name ## $to(c_type from, c_type to); \
     c_type type_name ## $from_text(CORD text, CORD *the_rest); \
     extern const c_type type_name ## $min, type_name##$max; \
-    extern const TypeInfo $ ## type_name;
+    extern const TypeInfo $ ## type_name; \
+    static inline c_type type_name ## $divided_by(c_type D, c_type d) { \
+        c_type q = D/d, r = D%d; \
+        if (r < 0) { \
+            if (d > 0) q = q-1; \
+            else q = q+1; \
+        } \
+        return q; \
+    } \
+    static inline c_type type_name ## $modulo(c_type D, c_type d) { \
+        c_type r = D%d; \
+        if (r < 0) { \
+            if (d > 0) r = r + d; \
+            else r = r - d; \
+        } \
+        return r; \
+    } \
+    static inline c_type type_name ## $modulo1(c_type D, c_type d) { \
+        return type_name ## $modulo(D-1, d) + 1; \
+    }
 
 DEFINE_INT_TYPE(int64_t, Int64);
 DEFINE_INT_TYPE(int32_t, Int32);
@@ -128,27 +147,47 @@ static inline Int_t Int$times(Int_t x, Int_t y) {
 
 static inline Int_t Int$divided_by(Int_t x, Int_t y) {
     if (__builtin_expect(((x.small & y.small) & 1) != 0, 1)) {
-        const int64_t z = ((x.small>>1) / (y.small>>1)) << 2;
-        if (__builtin_expect(z == (int32_t)z, 1))
-            return (Int_t){.small=z|1};
+        // Euclidean division, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+        const int64_t D = (x.small>>2);
+        const int64_t d = (y.small>>2);
+        int64_t q = D/d;
+        int64_t r = D%d;
+        if (r < 0) {
+            if (d > 0) q = q-1;
+            else q = q+1;
+        }
+        if (__builtin_expect(q == (int32_t)q, 1))
+            return (Int_t){.small=(q<<2)|1};
     }
     return Int$slow_divided_by(x, y);
 }
 
 static inline Int_t Int$modulo(Int_t x, Int_t y) {
     if (__builtin_expect(((x.small & y.small) & 1) != 0, 1)) {
-        int64_t mod = (x.small>>2) % (y.small>>2);
-        if (mod < 0) mod += (y.small>>2);
-        return (Int_t){.small=(mod<<2)+1};
+        // Euclidean modulus, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+        const int64_t D = (x.small>>2);
+        const int64_t d = (y.small>>2);
+        int64_t r = D%d;
+        if (r < 0) {
+            if (d > 0) r = r + d;
+            else r = r - d;
+        }
+        return (Int_t){.small=(r<<2)|1};
     }
     return Int$slow_modulo(x, y);
 }
 
 static inline Int_t Int$modulo1(Int_t x, Int_t y) {
     if (__builtin_expect(((x.small & y.small) & 1) != 0, 1)) {
-        int64_t mod = ((x.small>>2)-1) % (y.small>>2);
-        if (mod < 0) mod += (y.small>>2);
-        return (Int_t){.small=((mod+1)<<2)+1};
+        // Euclidean modulus, see: https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/divmodnote-letter.pdf
+        const int64_t D = (x.small>>2)-1;
+        const int64_t d = (y.small>>2);
+        int64_t r = D%d;
+        if (r < 0) {
+            if (d > 0) r = r + d;
+            else r = r - d;
+        }
+        return (Int_t){.small=((r+1)<<2)|1};
     }
     return Int$slow_modulo1(x, y);
 }
diff --git a/environment.c b/environment.c
index 5fa1b965..becf1291 100644
--- a/environment.c
+++ b/environment.c
@@ -115,13 +115,16 @@ env_t *new_compilation_unit(CORD *libname)
             {"format", "Int64$format", "func(i:Int64, digits=0)->Text"},
             {"hex", "Int64$hex", "func(i:Int64, digits=0, uppercase=yes, prefix=yes)->Text"},
             {"octal", "Int64$octal", "func(i:Int64, digits=0, prefix=yes)->Text"},
-            {"random", "Int64$random", "func(min=-0x8000000000000000, max=0x7FFFFFFFFFFFFFFF)->Int64"},
+            {"random", "Int64$random", "func(min=-0x8000000000000000_i64, max=0x7FFFFFFFFFFFFFFF_i64)->Int64"},
             {"from_text", "Int64$from_text", "func(text:Text, the_rest=!&Text)->Int64"},
             {"bits", "Int64$bits", "func(x:Int64)->[Bool]"},
             {"abs", "labs", "func(i:Int64)->Int64"},
             {"min", "Int64$min", "Int64"},
             {"max", "Int64$max", "Int64"},
             {"to", "Int64$to", "func(from:Int64,to:Int64)->Range"},
+            {"divided_by", "Int64$divided_by", "func(x:Int64,y:Int64)->Int64"},
+            {"modulo", "Int64$modulo", "func(x:Int64,y:Int64)->Int64"},
+            {"modulo1", "Int64$modulo1", "func(x:Int64,y:Int64)->Int64"},
         )},
         {"Int32", Type(IntType, .bits=32), "Int32_t", "$Int32", TypedArray(ns_entry_t,
             {"format", "Int32$format", "func(i:Int32, digits=0)->Text"},
@@ -134,6 +137,9 @@ env_t *new_compilation_unit(CORD *libname)
             {"min", "Int32$min", "Int32"},
             {"max", "Int32$max", "Int32"},
             {"to", "Int32$to", "func(from:Int32,to:Int32)->Range"},
+            {"divided_by", "Int32$divided_by", "func(x:Int32,y:Int32)->Int32"},
+            {"modulo", "Int32$modulo", "func(x:Int32,y:Int32)->Int32"},
+            {"modulo1", "Int32$modulo1", "func(x:Int32,y:Int32)->Int32"},
         )},
         {"Int16", Type(IntType, .bits=16), "Int16_t", "$Int16", TypedArray(ns_entry_t,
             {"format", "Int16$format", "func(i:Int16, digits=0)->Text"},
@@ -146,6 +152,9 @@ env_t *new_compilation_unit(CORD *libname)
             {"min", "Int16$min", "Int16"},
             {"max", "Int16$max", "Int16"},
             {"to", "Int16$to", "func(from:Int16,to:Int16)->Range"},
+            {"divided_by", "Int16$divided_by", "func(x:Int16,y:Int16)->Int16"},
+            {"modulo", "Int16$modulo", "func(x:Int16,y:Int16)->Int16"},
+            {"modulo1", "Int16$modulo1", "func(x:Int16,y:Int16)->Int16"},
         )},
         {"Int8", Type(IntType, .bits=8), "Int8_t", "$Int8", TypedArray(ns_entry_t,
             {"format", "Int8$format", "func(i:Int8, digits=0)->Text"},
@@ -158,6 +167,9 @@ env_t *new_compilation_unit(CORD *libname)
             {"min", "Int8$min", "Int8"},
             {"max", "Int8$max", "Int8"},
             {"to", "Int8$to", "func(from:Int8,to:Int8)->Range"},
+            {"divided_by", "Int8$divided_by", "func(x:Int8,y:Int8)->Int8"},
+            {"modulo", "Int8$modulo", "func(x:Int8,y:Int8)->Int8"},
+            {"modulo1", "Int8$modulo1", "func(x:Int8,y:Int8)->Int8"},
         )},
 #define C(name) {#name, "M_"#name, "Num"}
 #define F(name) {#name, #name, "func(n:Num)->Num"}
diff --git a/test/integers.tm b/test/integers.tm
index 55683500..36119f01 100644
--- a/test/integers.tm
+++ b/test/integers.tm
@@ -42,9 +42,9 @@ func main():
 
 	>> Int.random(1, 100)
 	>> Int64.min
-	= -9223372036854775808
+	= -9223372036854775808_i64
 	>> Int64.max
-	= 9223372036854775807
+	= 9223372036854775807_i64
 
 
 	>> 123_i32:hex()
@@ -75,3 +75,10 @@ func main():
 		>> super_big + 1
 		= 10000000000000000000000
 
+	do:
+		for in 20:
+			>> n := Int.random(-999999, 999999)
+			>> d := Int.random(-999, 999)
+			//! n={n}, d={d}:
+			>> (n/d)*d + (n mod d) == n
+			= yes
-- 
cgit v1.2.3