Skip to content

Commit c48ae28

Browse files
committed
refactor: update core float16 implementation
- Apply general refactoring and cleanup to core float16 files.
1 parent 836f115 commit c48ae28

3 files changed

Lines changed: 21 additions & 36 deletions

File tree

arithmetic.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func AddWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
8989
f32a := a.ToFloat32()
9090
f32b := b.ToFloat32()
9191
result := f32a + f32b
92-
return NewConverter(ModeIEEE, rounding).ToFloat16(result), nil
92+
return FromFloat32(result), nil
9393
}
9494

9595
// Full IEEE 754 implementation for exact mode
@@ -183,7 +183,7 @@ func MulWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
183183
f32a := a.ToFloat32()
184184
f32b := b.ToFloat32()
185185
result := f32a * f32b
186-
return NewConverter(ModeIEEE, rounding).ToFloat16(result), nil
186+
return FromFloat32(result), nil
187187
}
188188

189189
// Full IEEE 754 implementation
@@ -321,7 +321,7 @@ func DivWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
321321
f32a := a.ToFloat32()
322322
f32b := b.ToFloat32()
323323
result := f32a / f32b
324-
return NewConverter(ModeIEEE, rounding).ToFloat16(result), nil
324+
return FromFloat32(result), nil
325325
}
326326

327327
// Full IEEE 754 implementation
@@ -337,7 +337,7 @@ func addIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
337337
f32a := a.ToFloat32()
338338
f32b := b.ToFloat32()
339339
result := f32a + f32b
340-
return NewConverter(ModeIEEE, rounding).ToFloat16WithMode(result)
340+
return FromFloat32(result), nil
341341
}
342342

343343
// mulIEEE754 implements full IEEE 754 multiplication
@@ -347,7 +347,7 @@ func mulIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
347347
f32a := a.ToFloat32()
348348
f32b := b.ToFloat32()
349349
result := f32a * f32b
350-
return NewConverter(ModeIEEE, rounding).ToFloat16WithMode(result)
350+
return FromFloat32(result), nil
351351
}
352352

353353
// divIEEE754 implements full IEEE 754 division
@@ -359,7 +359,7 @@ func divIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
359359
result := f32a / f32b
360360

361361
// Use the provided rounding mode for the conversion back to Float16
362-
return NewConverter(ModeExact, rounding).ToFloat16WithMode(result)
362+
return FromFloat32(result), nil
363363
}
364364

365365
// Comparison operations
@@ -560,5 +560,5 @@ func Norm2(s []Float16) Float16 {
560560
square := Mul(v, v)
561561
sumSquares = Add(sumSquares, square)
562562
}
563-
return NewConverter(DefaultConversionMode, DefaultRoundingMode).FromFloat64(math.Sqrt(sumSquares.ToFloat64()))
563+
return FromFloat64(math.Sqrt(sumSquares.ToFloat64()))
564564
}

convert.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package float16
22

33
import (
4-
"fmt"
54
"math"
65
"strconv"
76
)
@@ -127,15 +126,7 @@ func (f Float16) ToFloat64() float64 {
127126
return float64(f.ToFloat32()) // Simplified: convert via float32
128127
}
129128

130-
// FromBits creates a Float16 from its raw uint16 bit representation.
131-
func FromBits(bits uint16) Float16 {
132-
return Float16(bits)
133-
}
134129

135-
// Bits returns the raw uint16 bit representation of a Float16.
136-
func (f Float16) Bits() uint16 {
137-
return uint16(f)
138-
}
139130

140131
// ParseFloat converts a string to a Float16 value.
141132
// The precision parameter is ignored for Float16.

float16.go

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ func Zero() Float16 {
156156

157157
// One returns a Float16 value representing 1.0
158158
func One() Float16 {
159-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
160159
return FromFloat32(1.0)
161160
}
162161

@@ -232,8 +231,7 @@ func Frexp(f Float16) (frac Float16, exp int) {
232231

233232
f32 := f.ToFloat32()
234233
frac32, exp := math.Frexp(float64(f32))
235-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
236-
return converter.ToFloat16(float32(frac32)), exp
234+
return FromFloat32(float32(frac32)), exp
237235
}
238236

239237
// Ldexp returns frac × 2^exp
@@ -244,8 +242,7 @@ func Ldexp(frac Float16, exp int) Float16 {
244242

245243
frac32 := frac.ToFloat32()
246244
result := math.Ldexp(float64(frac32), exp)
247-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
248-
return converter.ToFloat16(float32(result))
245+
return FromFloat32(float32(result))
249246
}
250247

251248
// Modf returns integer and fractional floating-point numbers that sum to f
@@ -257,8 +254,7 @@ func Modf(f Float16) (integer, frac Float16) {
257254

258255
f32 := f.ToFloat32()
259256
int32, frac32 := math.Modf(float64(f32))
260-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
261-
return converter.ToFloat16(float32(int32)), converter.ToFloat16(float32(frac32))
257+
return FromFloat32(float32(int32)), FromFloat32(float32(frac32))
262258
}
263259

264260
// Validation and classification functions
@@ -327,16 +323,16 @@ func GetBenchmarkOperations() map[string]BenchmarkOperation {
327323
var (
328324
// Common integer values
329325
Zero16 = PositiveZero
330-
One16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(1.0)
331-
Two16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(2.0)
332-
Three16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(3.0)
333-
Four16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(4.0)
334-
Five16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(5.0)
335-
Ten16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(10.0)
326+
One16 = FromFloat32(1.0)
327+
Two16 = FromFloat32(2.0)
328+
Three16 = FromFloat32(3.0)
329+
Four16 = FromFloat32(4.0)
330+
Five16 = FromFloat32(5.0)
331+
Ten16 = FromFloat32(10.0)
336332

337333
// Common fractional values
338-
Half16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(0.5)
339-
Quarter16 = NewConverter(DefaultConversionMode, DefaultRoundingMode).ToFloat16(0.25)
334+
Half16 = FromFloat32(0.5)
335+
Quarter16 = FromFloat32(0.25)
340336
Third16 = FromFloat32(1.0 / 3.0)
341337

342338
// Special mathematical values
@@ -398,7 +394,7 @@ func ComputeSliceStats(s []Float16) SliceStats {
398394
}
399395

400396
if stats.Length > 0 {
401-
stats.Mean = Div(stats.Sum, NewConverter(DefaultConversionMode, DefaultRoundingMode).FromInt(stats.Length))
397+
stats.Mean = Div(stats.Sum, FromFloat32(float32(stats.Length)))
402398
}
403399

404400
return stats
@@ -408,14 +404,12 @@ func ComputeSliceStats(s []Float16) SliceStats {
408404

409405
// FastAdd performs addition optimized for speed (may sacrifice precision)
410406
func FastAdd(a, b Float16) Float16 {
411-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
412-
return converter.ToFloat16(a.ToFloat32() + b.ToFloat32())
407+
return FromFloat32(a.ToFloat32() + b.ToFloat32())
413408
}
414409

415410
// FastMul performs multiplication optimized for speed (may sacrifice precision)
416411
func FastMul(a, b Float16) Float16 {
417-
converter := NewConverter(DefaultConversionMode, DefaultRoundingMode)
418-
return converter.ToFloat16(a.ToFloat32() * b.ToFloat32())
412+
return FromFloat32(a.ToFloat32() * b.ToFloat32())
419413
}
420414

421415
// VectorAdd performs vectorized addition (placeholder for future SIMD implementation)

0 commit comments

Comments
 (0)