Skip to content

Commit 8d89552

Browse files
Fixed #222 . Added DataSeries>> #encodeOneHot.
1 parent 5958fc0 commit 8d89552

2 files changed

Lines changed: 65 additions & 0 deletions

File tree

src/DataFrame-Tests/DataSeriesTest.class.st

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,52 @@ DataSeriesTest >> testEighth [
12731273
self assert: series eighth equals: 10
12741274
]
12751275

1276+
{ #category : #'tests - converting' }
1277+
DataSeriesTest >> testEncodeOneHot [
1278+
1279+
| actual expected |
1280+
actual := #( 1 2 3 4 ) asDataSeries encodeOneHot.
1281+
expected := #(
1282+
#(1 0 0 0)
1283+
#(0 1 0 0)
1284+
#(0 0 1 0)
1285+
#(0 0 0 1)
1286+
) asDataSeries.
1287+
1288+
self assert: actual equals: expected
1289+
]
1290+
1291+
{ #category : #'tests - converting' }
1292+
DataSeriesTest >> testEncodeOneHotRomanNumbers [
1293+
1294+
| actual expected |
1295+
actual := (#( I XIV VII XXXII ) collect: [ :each | each romanNumber ])
1296+
asDataSeries encodeOneHot.
1297+
expected := #(
1298+
#(1 0 0 0)
1299+
#(0 0 1 0)
1300+
#(0 1 0 0)
1301+
#(0 0 0 1)
1302+
) asDataSeries.
1303+
1304+
self assert: actual equals: expected
1305+
]
1306+
1307+
{ #category : #'tests - converting' }
1308+
DataSeriesTest >> testEncodeOneHotStrings [
1309+
1310+
| actual expected |
1311+
actual := #( apple avocado orange banana ) asDataSeries encodeOneHot.
1312+
expected := #(
1313+
#(1 0 0 0)
1314+
#(0 1 0 0)
1315+
#(0 0 0 1)
1316+
#(0 0 1 0)
1317+
) asDataSeries.
1318+
1319+
self assert: actual equals: expected
1320+
]
1321+
12761322
{ #category : #'tests - comparing' }
12771323
DataSeriesTest >> testEquality [
12781324

src/DataFrame/DataSeries.class.st

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,25 @@ DataSeries >> eighth [
319319
^ self atIndex: 8
320320
]
321321

322+
{ #category : #converting }
323+
DataSeries >> encodeOneHot [
324+
"Encode the values of the DataSeries into one-hot vectors."
325+
326+
| uniqueValues encodingDataSeries oneHotValues |
327+
uniqueValues := self removeDuplicates sortIfPossible.
328+
encodingDataSeries := DataSeries new.
329+
uniqueValues withIndexDo: [ :value :index |
330+
encodingDataSeries at: value put: index ].
331+
oneHotValues := self values collect: [ :value |
332+
| oneHot |
333+
oneHot := encodingDataSeries keys collect: [ :key |
334+
value = key
335+
ifTrue: [ 1 ]
336+
ifFalse: [ 0 ] ].
337+
oneHot ].
338+
^ DataSeries withKeys: self keys values: oneHotValues name: self name
339+
]
340+
322341
{ #category : #private }
323342
DataSeries >> errorKeyNotFound: aKey [
324343

0 commit comments

Comments
 (0)