Skip to content

Commit f6bffda

Browse files
authored
Implement Bytes.__new__ (#943)
* Implement Bytes.__new__ * Add tests for Bytes.__new__ * Support __bytes__ for all Bytes construction code paths * Optimize construction of bytes from bytes
1 parent e630a0a commit f6bffda

8 files changed

Lines changed: 218 additions & 77 deletions

File tree

Src/IronPython/Runtime/ByteArray.cs

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public void __init__(CodeContext context, object? source) {
9292
}
9393
IEnumerator ie = PythonOps.GetEnumerator(context, source);
9494
while (ie.MoveNext()) {
95-
Add(GetByte(ie.Current));
95+
Add(ByteOps.GetByte(ie.Current));
9696
}
9797
}
9898
}
@@ -127,7 +127,7 @@ public void append(int item) {
127127

128128
public void append(object? item) {
129129
lock (this) {
130-
_bytes.Add(GetByte(item));
130+
_bytes.Add(ByteOps.GetByte(item));
131131
}
132132
}
133133

@@ -138,13 +138,13 @@ public void extend([NotNull]IEnumerable<byte> seq) {
138138
}
139139
}
140140

141-
public void extend(object? seq) {
141+
public void extend(CodeContext context, object? seq) {
142142
// We don't make use of the length hint when extending the byte array.
143143
// However, in order to match CPython behavior with invalid length hints we
144144
// we need to go through the motions and get the length hint and attempt
145145
// to convert it to an int.
146146

147-
extend(GetBytes(seq, useHint: true));
147+
extend(ByteOps.GetBytes(seq, useHint: true, context));
148148
}
149149

150150
public void insert(int index, int value) {
@@ -205,7 +205,7 @@ public void remove(int value) {
205205

206206
public void remove(object? value) {
207207
lock (this) {
208-
RemoveByte(GetByte(value));
208+
RemoveByte(ByteOps.GetByte(value));
209209
}
210210
}
211211

@@ -1214,7 +1214,7 @@ public object? this[int index] {
12141214
}
12151215
set {
12161216
lock (this) {
1217-
_bytes[PythonOps.FixIndex(index, _bytes.Count)] = GetByte(value);
1217+
_bytes[PythonOps.FixIndex(index, _bytes.Count)] = ByteOps.GetByte(value);
12181218
}
12191219
}
12201220
}
@@ -1251,7 +1251,7 @@ public object? this[[NotNull]Slice slice] {
12511251
// integers, longs, etc... - fill in an array of 0 bytes
12521252
// list of bytes, indexables, etc...
12531253

1254-
IList<byte> list = GetBytes(value, useHint: false);
1254+
IList<byte> list = ByteOps.GetBytes(value, useHint: false);
12551255

12561256
lock (this) {
12571257
slice.indices(_bytes.Count, out int start, out int stop, out int step);
@@ -1345,41 +1345,6 @@ private void SliceNoStep(int start, int stop, IList<byte> other) {
13451345
}
13461346
}
13471347

1348-
private static byte GetByte(object? value) {
1349-
if (Converter.TryConvertToIndex(value, out object index)) {
1350-
switch (index) {
1351-
case int i: return i.ToByteChecked();
1352-
case BigInteger bi: return bi.ToByteChecked();
1353-
default: throw new InvalidOperationException(); // unreachable
1354-
}
1355-
}
1356-
throw PythonOps.TypeError("an integer is required");
1357-
}
1358-
1359-
internal static IList<byte> GetBytes(object? value, bool useHint) {
1360-
switch (value) {
1361-
case IList<byte> lob when !(lob is ListGenericWrapper<byte>):
1362-
return lob;
1363-
case IBufferProtocol bp:
1364-
using (IPythonBuffer buf = bp.GetBuffer()) {
1365-
return buf.AsReadOnlySpan().ToArray();
1366-
}
1367-
case ReadOnlyMemory<byte> rom:
1368-
return rom.ToArray();
1369-
case Memory<byte> mem:
1370-
return mem.ToArray();
1371-
default:
1372-
int len = 0;
1373-
if (useHint) PythonOps.TryInvokeLengthHint(DefaultContext.Default, value, out len);
1374-
List<byte> ret = new List<byte>(len);
1375-
IEnumerator ie = PythonOps.GetEnumerator(value);
1376-
while (ie.MoveNext()) {
1377-
ret.Add(GetByte(ie.Current));
1378-
}
1379-
return ret;
1380-
}
1381-
}
1382-
13831348
#endregion
13841349

13851350
#region IList<byte> Members

Src/IronPython/Runtime/Bytes.cs

Lines changed: 111 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,14 @@
1111
using System.Linq;
1212
using System.Linq.Expressions;
1313
using System.Numerics;
14-
using System.Runtime.InteropServices;
1514
using System.Text;
1615

1716
using Microsoft.Scripting.Runtime;
1817
using Microsoft.Scripting.Utils;
1918

2019
using IronPython.Runtime.Operations;
2120
using IronPython.Runtime.Types;
22-
using IronPython.Hosting;
21+
using NotNullWhenAttribute = System.Diagnostics.CodeAnalysis.NotNullWhenAttribute;
2322

2423
namespace IronPython.Runtime {
2524
[PythonType("bytes"), Serializable]
@@ -31,53 +30,110 @@ public Bytes() {
3130
_bytes = new byte[0];
3231
}
3332

34-
public Bytes([NotNull]IEnumerable<byte> bytes) {
33+
public Bytes([NotNull] Bytes bytes) {
34+
_bytes = bytes._bytes;
35+
}
36+
37+
public Bytes([NotNull] IEnumerable<byte> bytes) {
3538
_bytes = bytes.ToArray();
3639
}
3740

38-
public Bytes([BytesLike, NotNull]IBufferProtocol source) {
41+
public Bytes([NotNull] IBufferProtocol source) {
3942
using IPythonBuffer buffer = source.GetBuffer(BufferFlags.FullRO);
4043
_bytes = buffer.ToArray();
4144
}
4245

43-
public Bytes(CodeContext context, object? source) {
44-
if (PythonTypeOps.TryInvokeUnaryOperator(context, source, "__bytes__", out object? res)) {
45-
if (res is Bytes bytes) {
46-
_bytes = bytes._bytes;
46+
[StaticExtensionMethod]
47+
public static object __new__(CodeContext context, [NotNull] PythonType cls) {
48+
if (cls == TypeCache.Bytes) {
49+
return Empty;
50+
} else {
51+
return cls.CreateInstance(context);
52+
}
53+
}
54+
55+
[StaticExtensionMethod]
56+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] IBufferProtocol source) {
57+
if (cls == TypeCache.Bytes) {
58+
if (source.GetType() == typeof(Bytes)) {
59+
return source;
60+
} else if (TryInvokeBytesOperator(context, source, out Bytes? res)) {
61+
return res;
4762
} else {
48-
throw PythonOps.TypeError("__bytes__ returned non-bytes (got '{0}' from type '{1}')", PythonOps.GetPythonTypeName(res), PythonOps.GetPythonTypeName(source));
63+
return new Bytes(source);
4964
}
50-
} else if (Converter.TryConvertToIndex(source, throwOverflowError: true, out int size)) {
51-
if (size < 0) throw PythonOps.ValueError("negative count");
52-
_bytes = new byte[size];
5365
} else {
54-
_bytes = ByteArray.GetBytes(source, useHint: true).ToArray();
66+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, source));
5567
}
5668
}
5769

58-
public Bytes([NotNull]IEnumerable<object?> source) {
59-
_bytes = source.Select(b => ((int)PythonOps.Index(b)).ToByteChecked()).ToArray();
70+
[StaticExtensionMethod]
71+
public static object __new__(CodeContext context, [NotNull] PythonType cls, object? @object) {
72+
if (cls == TypeCache.Bytes) {
73+
return FromObject(context, @object);
74+
} else {
75+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, @object));
76+
}
6077
}
6178

62-
public Bytes([NotNull]PythonList bytes) {
63-
_bytes = ByteOps.GetBytes(bytes).ToArray();
79+
[StaticExtensionMethod]
80+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] Extensible<int> size) {
81+
if (cls == TypeCache.Bytes) {
82+
if (TryInvokeBytesOperator(context, size, out Bytes? res)) {
83+
return res;
84+
} else {
85+
if (size < 0) throw PythonOps.ValueError("negative count");
86+
return new Bytes(new byte[size]);
87+
}
88+
} else {
89+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, size));
90+
}
6491
}
6592

66-
public Bytes(int size) {
67-
if (size < 0) throw PythonOps.ValueError("negative count");
68-
_bytes = new byte[size];
93+
[StaticExtensionMethod]
94+
public static object __new__(CodeContext context, [NotNull] PythonType cls, int size) {
95+
if (cls == TypeCache.Bytes) {
96+
if (size < 0) throw PythonOps.ValueError("negative count");
97+
return new Bytes(new byte[size]);
98+
} else {
99+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, size));
100+
}
69101
}
70102

71-
public Bytes([NotNull]string @string) {
103+
[StaticExtensionMethod]
104+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] ExtensibleString @string) {
105+
if (cls == TypeCache.Bytes) {
106+
if (TryInvokeBytesOperator(context, @string, out Bytes? res)) {
107+
return res;
108+
} else {
109+
throw PythonOps.TypeError("string argument without an encoding");
110+
}
111+
} else {
112+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, @string));
113+
}
114+
}
115+
116+
[StaticExtensionMethod]
117+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] string @string) {
72118
throw PythonOps.TypeError("string argument without an encoding");
73119
}
74120

75-
public Bytes(CodeContext context, [NotNull]string @string, [NotNull]string encoding) {
76-
_bytes = StringOps.encode(context, @string, encoding, "strict").UnsafeByteArray;
121+
[StaticExtensionMethod]
122+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] string @string, [NotNull] string encoding) {
123+
if (cls == TypeCache.Bytes) {
124+
return StringOps.encode(context, @string, encoding);
125+
} else {
126+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, @string, encoding));
127+
}
77128
}
78129

79-
public Bytes(CodeContext context, [NotNull]string @string, [NotNull]string encoding, [NotNull]string errors) {
80-
_bytes = StringOps.encode(context, @string, encoding, errors).UnsafeByteArray;
130+
[StaticExtensionMethod]
131+
public static object __new__(CodeContext context, [NotNull] PythonType cls, [NotNull] string @string, [NotNull] string encoding, [NotNull] string errors) {
132+
if (cls == TypeCache.Bytes) {
133+
return StringOps.encode(context, @string, encoding, errors);
134+
} else {
135+
return cls.CreateInstance(context, __new__(context, TypeCache.Bytes, @string, encoding, errors));
136+
}
81137
}
82138

83139
private Bytes(byte[] bytes) {
@@ -89,6 +145,21 @@ private Bytes(byte[] bytes) {
89145
internal static Bytes FromByte(byte b)
90146
=> oneByteBytes[b];
91147

148+
internal static Bytes FromObject(CodeContext context, object? o) {
149+
if (o == null) {
150+
throw PythonOps.TypeError("cannot convert 'NoneType' object to bytes");
151+
} else if (o.GetType() == typeof(Bytes)) {
152+
return (Bytes)o;
153+
} else if (TryInvokeBytesOperator(context, o, out Bytes? res)) {
154+
return res;
155+
} else if (Converter.TryConvertToIndex(o, throwOverflowError: true, out int size)) {
156+
if (size < 0) throw PythonOps.ValueError("negative count");
157+
return new Bytes(new byte[size]);
158+
} else {
159+
return new Bytes(ByteOps.GetBytes(o, useHint: true, context).ToArray());
160+
}
161+
}
162+
92163
internal static Bytes Make(byte[] bytes)
93164
=> new Bytes(bytes);
94165

@@ -364,7 +435,7 @@ public Bytes join(object? sequence) {
364435

365436
public Bytes join([NotNull]PythonList sequence) {
366437
if (sequence.__len__() == 0) {
367-
return new Bytes();
438+
return Empty;
368439
} else if (sequence.__len__() == 1) {
369440
return JoinOne(sequence[0]);
370441
}
@@ -892,6 +963,20 @@ internal ReadOnlyMemory<byte> AsMemory() {
892963
return _bytes.AsMemory();
893964
}
894965

966+
private static bool TryInvokeBytesOperator(CodeContext context, object? obj, [NotNullWhen(true)] out Bytes? bytes) {
967+
if (PythonTypeOps.TryInvokeUnaryOperator(context, obj, "__bytes__", out object? res)) {
968+
if (res is Bytes b) {
969+
bytes = b;
970+
return true;
971+
} else {
972+
throw PythonOps.TypeError("__bytes__ returned non-bytes (got '{0}' from type '{1}')", PythonOps.GetPythonTypeName(res), PythonOps.GetPythonTypeName(obj));
973+
}
974+
} else {
975+
bytes = null;
976+
return false;
977+
}
978+
}
979+
895980
private static Bytes JoinOne(object? curVal) {
896981
if (curVal?.GetType() == typeof(Bytes)) {
897982
return (Bytes)curVal;

Src/IronPython/Runtime/Operations/ByteOps.cs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,31 @@ internal static IList<byte> CoerceBytes(object? obj) {
106106
throw PythonOps.TypeError("a bytes-like object is required, not '{0}'", PythonTypeOps.GetName(obj));
107107
}
108108

109-
internal static List<byte> GetBytes(ICollection bytes) {
110-
return bytes.Select(GetByte).ToList();
109+
internal static IList<byte> GetBytes(object? value, bool useHint, CodeContext? context = null) {
110+
switch (value) {
111+
case IList<byte> lob when !(lob is ListGenericWrapper<byte>):
112+
return lob;
113+
case IBufferProtocol bp:
114+
using (IPythonBuffer buf = bp.GetBuffer()) {
115+
return buf.AsReadOnlySpan().ToArray();
116+
}
117+
case ReadOnlyMemory<byte> rom:
118+
return rom.ToArray();
119+
case Memory<byte> mem:
120+
return mem.ToArray();
121+
default:
122+
int len = 0;
123+
if (useHint) PythonOps.TryInvokeLengthHint(context ?? DefaultContext.Default, value, out len);
124+
List<byte> ret = new List<byte>(len);
125+
IEnumerator ie = PythonOps.GetEnumerator(value);
126+
while (ie.MoveNext()) {
127+
ret.Add(GetByte(ie.Current));
128+
}
129+
return ret;
130+
}
111131
}
112132

113-
private static byte GetByte(object? o) {
133+
internal static byte GetByte(object? o) {
114134
// TODO: move fast paths to TryConvertToIndex?
115135
switch (o) {
116136
case int ii:
@@ -135,8 +155,13 @@ private static byte GetByte(object? o) {
135155
return ((BigInteger)ui).ToByteChecked();
136156
}
137157

138-
if (Converter.TryConvertToIndex(o, out int i))
139-
return i.ToByteChecked();
158+
if (Converter.TryConvertToIndex(o, out object index)) {
159+
switch (index) {
160+
case int i: return i.ToByteChecked();
161+
case BigInteger bi: return bi.ToByteChecked();
162+
default: throw new InvalidOperationException(); // unreachable
163+
}
164+
}
140165

141166
throw PythonOps.TypeError($"'{PythonTypeOps.GetName(o)}' object cannot be interpreted as an integer");
142167
}

Src/IronPython/Runtime/Operations/IntOps.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ public static BigInteger from_bytes(CodeContext context, object bytes, string by
545545
bool isLittle = byteorder == "little";
546546
if (!isLittle && byteorder != "big") throw PythonOps.ValueError("byteorder must be either 'little' or 'big'");
547547

548-
return FromBytes(new Bytes(context, bytes), isLittle, signed);
548+
return FromBytes(Bytes.FromObject(context, bytes), isLittle, signed);
549549
}
550550

551551
private static BigInteger FromBytes(IList<byte> bytes, bool isLittle, bool signed) {

Src/IronPython/Runtime/Types/TypeCache.Generated.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ public static class TypeCache {
2828
private static PythonType setcollection;
2929
private static PythonType pythontype;
3030
private static PythonType str;
31+
private static PythonType bytes;
3132
private static PythonType pythontuple;
3233
private static PythonType weakreference;
3334
private static PythonType pythonlist;
@@ -123,6 +124,13 @@ public static PythonType String {
123124
}
124125
}
125126

127+
public static PythonType Bytes {
128+
get {
129+
if (bytes == null) bytes = DynamicHelpers.GetPythonTypeFromType(typeof(Bytes));
130+
return bytes;
131+
}
132+
}
133+
126134
public static PythonType PythonTuple {
127135
get {
128136
if (pythontuple == null) pythontuple = DynamicHelpers.GetPythonTypeFromType(typeof(PythonTuple));

Src/Scripts/generate_typecache.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(self, type, name=None, typeType='PythonType', entryName=None):
4444
TypeData('SetCollection', entryName='Set'),
4545
TypeData('PythonType'),
4646
TypeData('String', 'str'),
47+
TypeData('Bytes'),
4748
TypeData('PythonTuple'),
4849
TypeData('WeakReference'),
4950
TypeData('PythonList'),

0 commit comments

Comments
 (0)