Skip to content

Commit 10dc32c

Browse files
committed
wip
1 parent d1467e4 commit 10dc32c

21 files changed

Lines changed: 2201 additions & 1698 deletions

=q

Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
package float16
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestGetVersion(t *testing.T) {
8+
version := GetVersion()
9+
if version == "" {
10+
t.Error("Expected version string, got empty string")
11+
}
12+
}
13+
14+
func TestZero(t *testing.T) {
15+
if Zero() != PositiveZero {
16+
t.Error("Zero() should return PositiveZero")
17+
}
18+
}
19+
20+
func TestOne(t *testing.T) {
21+
if One() != 0x3C00 {
22+
t.Error("One() should return 0x3C00")
23+
}
24+
}
25+
26+
func TestInf(t *testing.T) {
27+
if Inf(1) != PositiveInfinity {
28+
t.Error("Inf(1) should return PositiveInfinity")
29+
}
30+
if Inf(-1) != NegativeInfinity {
31+
t.Error("Inf(-1) should return NegativeInfinity")
32+
}
33+
}
34+
35+
func TestIsInf(t *testing.T) {
36+
if !PositiveInfinity.IsInf(1) {
37+
t.Error("PositiveInfinity.IsInf(1) should be true")
38+
}
39+
if !NegativeInfinity.IsInf(-1) {
40+
t.Error("NegativeInfinity.IsInf(-1) should be true")
41+
}
42+
if PositiveInfinity.IsInf(-1) {
43+
t.Error("PositiveInfinity.IsInf(-1) should be false")
44+
}
45+
if !PositiveInfinity.IsInf(0) {
46+
t.Error("PositiveInfinity.IsInf(0) should be true")
47+
}
48+
}
49+
50+
func TestIsNaN(t *testing.T) {
51+
if !QuietNaN.IsNaN() {
52+
t.Error("QuietNaN.IsNaN() should be true")
53+
}
54+
if !SignalingNaN.IsNaN() {
55+
t.Error("SignalingNaN.IsNaN() should be true")
56+
}
57+
if PositiveZero.IsNaN() {
58+
t.Error("PositiveZero.IsNaN() should be false")
59+
}
60+
}
61+
62+
func TestSignbit(t *testing.T) {
63+
if PositiveZero.Signbit() {
64+
t.Error("PositiveZero.Signbit() should be false")
65+
}
66+
if NegativeZero.Signbit() == false {
67+
t.Error("NegativeZero.Signbit() should be true")
68+
}
69+
}
70+
71+
func TestIsFinite(t *testing.T) {
72+
if !PositiveZero.IsFinite() {
73+
t.Error("PositiveZero.IsFinite() should be true")
74+
}
75+
if PositiveInfinity.IsFinite() {
76+
t.Error("PositiveInfinity.IsFinite() should be false")
77+
}
78+
if QuietNaN.IsFinite() {
79+
t.Error("QuietNaN.IsFinite() should be false")
80+
}
81+
}
82+
83+
func TestIsNormal(t *testing.T) {
84+
if !One().IsNormal() {
85+
t.Error("One().IsNormal() should be true")
86+
}
87+
if PositiveZero.IsNormal() {
88+
t.Error("PositiveZero.IsNormal() should be false")
89+
}
90+
if SmallestSubnormal.IsNormal() {
91+
t.Error("SmallestSubnormal.IsNormal() should be false")
92+
}
93+
}
94+
95+
func TestIsSubnormal(t *testing.T) {
96+
if !SmallestSubnormal.IsSubnormal() {
97+
t.Error("SmallestSubnormal.IsSubnormal() should be true")
98+
}
99+
if One().IsSubnormal() {
100+
t.Error("One().IsSubnormal() should be false")
101+
}
102+
}
103+
104+
func TestFpClassify(t *testing.T) {
105+
if FpClassify(One()) != ClassPositiveNormal {
106+
t.Error("FpClassify(One()) should be ClassPositiveNormal")
107+
}
108+
if FpClassify(PositiveZero) != ClassPositiveZero {
109+
t.Error("FpClassify(PositiveZero) should be ClassPositiveZero")
110+
}
111+
if FpClassify(SmallestSubnormal) != ClassPositiveSubnormal {
112+
t.Error("FpClassify(SmallestSubnormal) should be ClassPositiveSubnormal")
113+
}
114+
if FpClassify(PositiveInfinity) != ClassPositiveInfinity {
115+
t.Error("FpClassify(PositiveInfinity) should be ClassPositiveInfinity")
116+
}
117+
if FpClassify(QuietNaN) != ClassQuietNaN {
118+
t.Error("FpClassify(QuietNaN) should be ClassQuietNaN")
119+
}
120+
}
121+
122+
func TestGetBenchmarkOperations(t *testing.T) {
123+
ops := GetBenchmarkOperations()
124+
if len(ops) == 0 {
125+
t.Error("Expected benchmark operations, got empty slice")
126+
}
127+
}
128+
129+
func TestValidateSliceLength(t *testing.T) {
130+
tests := []struct {
131+
name string
132+
a, b []Float16
133+
expectError bool
134+
}{
135+
{"equal lengths", []Float16{1}, []Float16{2}, false},
136+
{"unequal lengths", []Float16{1}, []Float16{2, 3}, true},
137+
}
138+
139+
for _, tt := range tests {
140+
t.Run(tt.name, func(t *testing.T) {
141+
err := ValidateSliceLength(tt.a, tt.b)
142+
if (err != nil) != tt.expectError {
143+
t.Errorf("ValidateSliceLength() error = %v, expectError %v", err, tt.expectError)
144+
}
145+
})
146+
}
147+
}
148+
149+
func TestFastAdd(t *testing.T) {
150+
a := One()
151+
b := One()
152+
result := FastAdd(a, b)
153+
if result != 0x4000 {
154+
t.Errorf("FastAdd(1, 1) = %v, want 2", result)
155+
}
156+
}
157+
158+
func TestFastMul(t *testing.T) {
159+
a := FromInt(2)
160+
b := FromInt(3)
161+
result := FastMul(a, b)
162+
if result != 0x4600 {
163+
t.Errorf("FastMul(2, 3) = %v, want 6", result)
164+
}
165+
}
166+
167+
func TestVectorAdd(t *testing.T) {
168+
a := []Float16{One(), One()}
169+
b := []Float16{One(), One()}
170+
result := VectorAdd(a, b)
171+
if result[0] != 0x4000 || result[1] != 0x4000 {
172+
t.Errorf("VectorAdd([1, 1], [1, 1]) = %v, want [2, 2]", result)
173+
}
174+
}
175+
176+
func TestVectorMul(t *testing.T) {
177+
a := []Float16{FromInt(2), FromInt(3)}
178+
b := []Float16{FromInt(3), FromInt(4)}
179+
result := VectorMul(a, b)
180+
if result[0] != 0x4600 || result[1] != 0x4900 {
181+
t.Errorf("VectorMul([2, 3], [3, 4]) = %v, want [6 (0x4600), 12 (0x4900)]", result)
182+
}
183+
}
184+
185+
func TestToSlice16(t *testing.T) {
186+
input := []float32{0.0, 1.0, 2.0, -1.0}
187+
expected := []Float16{PositiveZero, ToFloat16(1.0), ToFloat16(2.0), ToFloat16(-1.0)}
188+
189+
result := ToSlice16(input)
190+
if len(result) != len(expected) {
191+
t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected))
192+
}
193+
194+
for i := range result {
195+
if result[i] != expected[i] {
196+
t.Errorf("ToSlice16[%d] = 0x%04x, expected 0x%04x", i, result[i], expected[i])
197+
}
198+
}
199+
}
200+
201+
func TestToSlice32(t *testing.T) {
202+
input := []Float16{PositiveZero, ToFloat16(1.0), ToFloat16(2.0), ToFloat16(-1.0)}
203+
expected := []float32{0.0, 1.0, 2.0, -1.0}
204+
205+
result := ToSlice32(input)
206+
if len(result) != len(expected) {
207+
t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected))
208+
}
209+
210+
for i := range result {
211+
if result[i] != expected[i] {
212+
t.Errorf("ToSlice32[%d] = %g, expected %g", i, result[i], expected[i])
213+
}
214+
}
215+
}
216+
217+
func TestToSlice32Empty(t *testing.T) {
218+
input := []Float16{}
219+
result := ToSlice32(input)
220+
if len(result) != 0 {
221+
t.Errorf("Expected empty slice, got %v", result)
222+
}
223+
}

arithmetic.go

Lines changed: 20 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
package float16
22

33
import (
4-
"math/bits"
4+
"fmt"
55
)
66

77
// Global arithmetic settings
88
var (
99
DefaultArithmeticMode = ModeIEEEArithmetic
10-
DefaultRounding = RoundNearestEven
10+
DefaultRounding = DefaultRoundingMode
1111
)
1212

1313
// ArithmeticMode defines the precision/performance trade-off for arithmetic operations
@@ -331,121 +331,12 @@ func DivWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
331331

332332
// addIEEE754 implements full IEEE 754 addition
333333
func addIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
334-
// Extract components
335-
signA, expA, mantA := a.extractComponents()
336-
signB, expB, mantB := b.extractComponents()
337-
338-
// Ensure a has the larger magnitude for simpler logic
339-
if expA < expB || (expA == expB && mantA < mantB) {
340-
signA, expA, mantA, signB, expB, mantB = signB, expB, mantB, signA, expA, mantA
341-
}
342-
343-
// Handle subnormal numbers by normalizing
344-
if expA == 0 && mantA != 0 {
345-
// Normalize a
346-
shift := leadingZeros10(mantA)
347-
mantA <<= (shift + 1)
348-
mantA &= MantissaMask
349-
expA = uint16(1 - shift)
350-
} else if expA != 0 {
351-
// Add implicit leading 1 for normal numbers
352-
mantA |= (1 << MantissaLen)
353-
expA = expA
354-
}
355-
356-
if expB == 0 && mantB != 0 {
357-
// Normalize b
358-
shift := leadingZeros10(mantB)
359-
mantB <<= (shift + 1)
360-
mantB &= MantissaMask
361-
expB = uint16(1 - shift)
362-
} else if expB != 0 {
363-
// Add implicit leading 1 for normal numbers
364-
mantB |= (1 << MantissaLen)
365-
}
366-
367-
// Align mantissas by shifting the smaller one
368-
expDiff := int(expA) - int(expB)
369-
if expDiff > 0 {
370-
if expDiff >= 24 {
371-
// b is too small to affect the result
372-
return a, nil
373-
}
374-
mantB >>= expDiff
375-
}
376-
377-
var resultSign uint16
378-
var resultMant uint32
379-
var resultExp int
380-
381-
if signA == signB {
382-
// Same sign: add magnitudes
383-
resultSign = signA
384-
resultMant = uint32(mantA) + uint32(mantB)
385-
resultExp = int(expA)
386-
} else {
387-
// Different signs: subtract magnitudes
388-
if mantA >= mantB {
389-
resultSign = signA
390-
resultMant = uint32(mantA) - uint32(mantB)
391-
} else {
392-
resultSign = signB
393-
resultMant = uint32(mantB) - uint32(mantA)
394-
}
395-
resultExp = int(expA)
396-
}
397-
398-
// Handle zero result
399-
if resultMant == 0 {
400-
return PositiveZero, nil
401-
}
402-
403-
// Normalize result
404-
if resultMant >= (1 << (MantissaLen + 1)) {
405-
// Overflow: shift right and increment exponent
406-
resultMant >>= 1
407-
resultExp++
408-
} else {
409-
// Find leading 1 and normalize
410-
leadingZeros := 31 - bits.Len32(resultMant)
411-
if leadingZeros > 0 {
412-
shift := leadingZeros - (31 - MantissaLen - 1)
413-
if shift > 0 {
414-
resultMant <<= shift
415-
resultExp -= shift
416-
}
417-
}
418-
}
419-
420-
// Check for overflow
421-
if resultExp >= ExponentInfinity {
422-
if resultSign != 0 {
423-
return NegativeInfinity, nil
424-
}
425-
return PositiveInfinity, nil
426-
}
427-
428-
// Check for underflow
429-
if resultExp <= 0 {
430-
// Convert to subnormal or zero
431-
shift := 1 - resultExp
432-
if shift >= 24 {
433-
// Underflow to zero
434-
if resultSign != 0 {
435-
return NegativeZero, nil
436-
}
437-
return PositiveZero, nil
438-
}
439-
resultMant >>= shift
440-
resultExp = 0
441-
}
442-
443-
// Remove implicit leading 1 for normal numbers
444-
if resultExp > 0 {
445-
resultMant &= MantissaMask
446-
}
447-
448-
return packComponents(resultSign, uint16(resultExp), uint16(resultMant)), nil
334+
// For addition, we can use the simpler approach of converting to float32
335+
// since the intermediate precision is sufficient for exact float16 results
336+
f32a := a.ToFloat32()
337+
f32b := b.ToFloat32()
338+
result := f32a + f32b
339+
return ToFloat16WithMode(result, ModeIEEE, rounding)
449340
}
450341

451342
// mulIEEE754 implements full IEEE 754 multiplication
@@ -542,7 +433,13 @@ func Min(a, b Float16) Float16 {
542433
if b.IsNaN() {
543434
return a
544435
}
545-
436+
// Handle -0 and +0
437+
if a.IsZero() && b.IsZero() {
438+
if a.Signbit() {
439+
return a // a is -0
440+
}
441+
return b // b is -0, or both are +0
442+
}
546443
if Less(a, b) {
547444
return a
548445
}
@@ -601,8 +498,12 @@ func MulSlice(a, b []Float16) []Float16 {
601498

602499
result := make([]Float16, len(a))
603500
for i := range a {
604-
result[i] = Mul(a[i], b[i])
501+
product := Mul(a[i], b[i])
502+
result[i] = product
503+
// Debug print
504+
fmt.Printf("MulSlice: a[%d]=%v (0x%04X), b[%d]=%v (0x%04X), product=%v (0x%04X)\n", i, a[i], uint16(a[i]), i, b[i], uint16(b[i]), product, uint16(product))
605505
}
506+
fmt.Printf("MulSlice: result=%v\n", result)
606507
return result
607508
}
608509

0 commit comments

Comments
 (0)