Skip to content

Commit 0a4a4a7

Browse files
Merge branch 'PolyMathOrg:master' into describe
2 parents 30c616a + a667793 commit 0a4a4a7

6 files changed

Lines changed: 235 additions & 42 deletions

File tree

src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ DataFrameCsvReaderTest >> testReadFromString [
106106
actualDataFrame := DataFrameCsvReader new readFromString: TestCsvStrings commaCsvString.
107107

108108
self
109-
assertCollection: actualDataFrame columns first
109+
assertCollection: actualDataFrame asArrayOfColumns first
110110
hasSameElements: #('1:10 am' '1:30 am' '1:50 am' '2:10 am' '2:30 am').
111111
self
112-
assertCollection: actualDataFrame columns last
112+
assertCollection: actualDataFrame asArrayOfColumns last
113113
hasSameElements: #('rain' 'rain' 'snow' '-' 'rain')
114114
]
115115

src/DataFrame-IO/DataFrameTypeDetector.class.st

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ DataFrameTypeDetector >> detectColumnTypeAndConvert: anArray [
146146
{ #category : #'public API' }
147147
DataFrameTypeDetector >> detectTypesAndConvert: aDataFrame [
148148

149-
aDataFrame columns with: aDataFrame columnNames do: [ :column :columnName |
149+
aDataFrame asArrayOfColumns with: aDataFrame columnNames do: [ :column :columnName |
150150
| thisColumnType |
151151
"Get the user given column type for this column name and if it wasn't
152152
given then use the default type detection"

src/DataFrame-Tests/DataFrameTest.class.st

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -758,11 +758,10 @@ DataFrameTest >> testColumnTransformNotFound [
758758
DataFrameTest >> testColumns [
759759

760760
| expectedCollection |
761-
762-
expectedCollection := #(
763-
(Barcelona Dubai London)
764-
(1.609 2.789 8.788)
765-
(true true false)).
761+
expectedCollection := {
762+
(df columnAt: 1).
763+
(df columnAt: 2).
764+
(df columnAt: 3) } asArray.
766765

767766
self assert: df columns equals: expectedCollection
768767
]
@@ -2845,6 +2844,36 @@ DataFrameTest >> testNumberOfNils [
28452844
self assert: df numberOfNils equals: expected
28462845
]
28472846

2847+
{ #category : #tests }
2848+
DataFrameTest >> testNumericalColumnNames [
2849+
2850+
| dataFrame expected |
2851+
dataFrame := DataFrame withRows:
2852+
#( #( Male 21 Argentina 94 ) #( Female 20 France 97 )
2853+
#( Female 22 Spain 95 ) #( Male 24 Portugal 99 ) ).
2854+
dataFrame columnNames: #( Gender Age Country Score ).
2855+
2856+
expected := #( Age Score ) asOrderedCollection.
2857+
2858+
self assert: dataFrame numericalColumnNames equals: expected
2859+
]
2860+
2861+
{ #category : #tests }
2862+
DataFrameTest >> testNumericalColumns [
2863+
2864+
| dataFrame expectedCollection |
2865+
dataFrame := DataFrame withRows:
2866+
#( #( Male 21 Argentina 94 ) #( Female 20 France 97 )
2867+
#( Female 22 Spain 95 ) #( Male 24 Portugal 99 ) ).
2868+
dataFrame columnNames: #( Gender Age Country Score ).
2869+
2870+
expectedCollection := {
2871+
(dataFrame columnAt: 2).
2872+
(dataFrame columnAt: 4) } asArray.
2873+
2874+
self assert: dataFrame numericalColumns equals: expectedCollection
2875+
]
2876+
28482877
{ #category : #splitjoin }
28492878
DataFrameTest >> testOuterJoin [
28502879
| df2 expected |
@@ -3889,6 +3918,30 @@ DataFrameTest >> testReplaceNilsWithMode [
38893918
self assert: df replaceNilsWithMode equals: expected
38903919
]
38913920

3921+
{ #category : #tests }
3922+
DataFrameTest >> testReplaceNilsWithNextRowValue [
3923+
3924+
|expected|
3925+
3926+
df := DataFrame withRows: #(
3927+
(7 1 nil)
3928+
(8 nil 3)
3929+
(nil 2 nil)).
3930+
3931+
df rowNames: #(A B C).
3932+
df columnNames: #(Column1 Column2 Column3).
3933+
3934+
expected := DataFrame withRows: #(
3935+
(7 1 3)
3936+
(8 2 3)
3937+
(nil 2 nil)).
3938+
3939+
expected rowNames: #(A B C).
3940+
expected columnNames: #(Column1 Column2 Column3).
3941+
3942+
self assert: (df replaceNilsWithNextRowValue) equals: expected
3943+
]
3944+
38923945
{ #category : #replacing }
38933946
DataFrameTest >> testReplaceNilsWithPreviousRowValue [
38943947

src/DataFrame-Tests/DataSeriesTest.class.st

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,10 +874,11 @@ DataSeriesTest >> testCategoricalIsNotNumerical [
874874

875875
{ #category : #'tests - categorical' }
876876
DataSeriesTest >> testCategoricalUniqueValues [
877+
877878
| aSeries expected actual |
878-
aSeries := DataSeries withKeys: #(a b c d e) values: #(z y y z x).
879-
expected := #(x y z).
880-
actual := aSeries uniqueValues.
879+
aSeries := DataSeries withKeys: #( a b c d e ) values: #( z y y z x ).
880+
expected := #( x y z ).
881+
actual := aSeries removeDuplicates.
881882
self assert: actual equals: expected
882883
]
883884

@@ -1587,6 +1588,42 @@ DataSeriesTest >> testMathTan [
15871588
self assert: a tan closeTo: b
15881589
]
15891590

1591+
{ #category : #'tests - statistics' }
1592+
DataSeriesTest >> testMax [
1593+
1594+
self assert: #( 1 2 3 4 ) asDataSeries max equals: 4
1595+
]
1596+
1597+
{ #category : #'tests - statistics' }
1598+
DataSeriesTest >> testMaxWithNils [
1599+
1600+
self assert: #( 1 nil 3 nil ) asDataSeries max equals: 3
1601+
]
1602+
1603+
{ #category : #tests }
1604+
DataSeriesTest >> testMedian [
1605+
1606+
self assert: #( 1 2 3 4 5 ) asDataSeries median equals: 3
1607+
]
1608+
1609+
{ #category : #tests }
1610+
DataSeriesTest >> testMedianWithNils [
1611+
1612+
self assert: #( 1 2 nil 3 4 nil 5 ) asDataSeries median equals: 3
1613+
]
1614+
1615+
{ #category : #'tests - statistics' }
1616+
DataSeriesTest >> testMin [
1617+
1618+
self assert: #( 1 2 3 4 ) asDataSeries min equals: 1
1619+
]
1620+
1621+
{ #category : #'tests - statistics' }
1622+
DataSeriesTest >> testMinWithNils [
1623+
1624+
self assert: #( nil 2 nil 4 ) asDataSeries min equals: 2
1625+
]
1626+
15901627
{ #category : #'tests - creation' }
15911628
DataSeriesTest >> testNewFrom [
15921629

@@ -1680,6 +1717,20 @@ DataSeriesTest >> testRemoveAtIndex [
16801717
self assert: series equals: expected
16811718
]
16821719

1720+
{ #category : #tests }
1721+
DataSeriesTest >> testRemoveDuplicates [
1722+
1723+
| dataseries expected |
1724+
dataseries := DataSeries
1725+
withKeys: #( 'A' 'B' 'C' 'D' 'E' )
1726+
values: #( 7 1 1 1 3 )
1727+
name: series.
1728+
1729+
expected := #( 7 1 3 ).
1730+
1731+
self assert: dataseries removeDuplicates equals: expected
1732+
]
1733+
16831734
{ #category : #'tests - removing' }
16841735
DataSeriesTest >> testRemoveNils [
16851736

@@ -2161,6 +2212,18 @@ DataSeriesTest >> testStatsZerothQuartileEqualsMin [
21612212
self assert: series zerothQuartile equals: series min
21622213
]
21632214

2215+
{ #category : #'tests - statistics' }
2216+
DataSeriesTest >> testStdev [
2217+
2218+
self assert: #( 1 2 3 ) asDataSeries stdev equals: 1
2219+
]
2220+
2221+
{ #category : #tests }
2222+
DataSeriesTest >> testStdevWithNils [
2223+
2224+
self assert: #( 1 nil 2 nil 3 ) asDataSeries stdev equals: 1
2225+
]
2226+
21642227
{ #category : #'tests - arithmetic' }
21652228
DataSeriesTest >> testSum [
21662229

src/DataFrame/DataFrame.class.st

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ DataFrame >> average [
543543
{ #category : #'data-types' }
544544
DataFrame >> calculateDataTypes [
545545

546-
self columns doWithIndex: [ :column :i | self dataTypes at: (self columnNames at: i) put: column calculateDataType ]
546+
self asArrayOfColumns doWithIndex: [ :column :i | self dataTypes at: (self columnNames at: i) put: column calculateDataType ]
547547
]
548548

549549
{ #category : #comparing }
@@ -722,10 +722,9 @@ DataFrame >> columnNames: aCollection [
722722

723723
{ #category : #accessing }
724724
DataFrame >> columns [
725-
726725
"Returns a collection of all columns"
727726

728-
^ self asArrayOfColumns
727+
^ (1 to: self numberOfColumns) collect: [ :j | self columnAt: j ]
729728
]
730729

731730
{ #category : #accessing }
@@ -1098,7 +1097,7 @@ DataFrame >> hasNils [
10981097
"Returns true if there is atleast one nil value in the data frame. Returns false if there are no nil values in the dataframe"
10991098

11001099
| arrayOfColumns |
1101-
arrayOfColumns := self columns.
1100+
arrayOfColumns := self asArrayOfColumns.
11021101
1 to: self numberOfColumns do: [ :column |
11031102
1 to: self numberOfRows do: [ :row |
11041103
((arrayOfColumns at: column) at: row) ifNil: [ ^ true ] ] ].
@@ -1442,7 +1441,7 @@ DataFrame >> normalized [
14421441
'DataFrame will remove the dependency over normalization in the next version. You can use pharo-ai/data-preprocessing project to normalize your DataFrame and even more!'.
14431442
normalizers := (1 to: self anyOne size) collect: [ :e | self class defaultNormalizerClass new ].
14441443

1445-
normalizedColumns := self columns with: normalizers collect: [ :col :normalizer | col normalizedUsing: normalizer ].
1444+
normalizedColumns := self asArrayOfColumns with: normalizers collect: [ :col :normalizer | col normalizedUsing: normalizer ].
14461445

14471446
^ self class withColumns: normalizedColumns columnNames: self columnNames
14481447
]
@@ -1473,6 +1472,22 @@ DataFrame >> numberOfRows [
14731472
^ contents numberOfRows
14741473
]
14751474

1475+
{ #category : #accessing }
1476+
DataFrame >> numericalColumnNames [
1477+
"Returns the names of all numerical columns of the dataframe"
1478+
1479+
^ self columnNames select: [ :columnName |
1480+
(self dataTypes at: columnName) includesBehavior: Number ]
1481+
]
1482+
1483+
{ #category : #accessing }
1484+
DataFrame >> numericalColumns [
1485+
"Returns all numerical columns of the dataframe"
1486+
1487+
^ self columns select: [ :column |
1488+
(self dataTypes at: column name) includesBehavior: Number ]
1489+
]
1490+
14761491
{ #category : #splitjoin }
14771492
DataFrame >> outerJoin: aDataFrame [
14781493
"Performs outer join on aDataFrame with rowNames as keys"
@@ -1838,6 +1853,18 @@ DataFrame >> replaceNilsWithMode [
18381853
modeOfColumn := nil ]
18391854
]
18401855

1856+
{ #category : #replacing }
1857+
DataFrame >> replaceNilsWithNextRowValue [
1858+
1859+
| value numberOfRows |
1860+
numberOfRows := self numberOfRows.
1861+
1 to: self numberOfColumns do: [ :i |
1862+
self numberOfRows to: 1 by: -1 do: [ :j |
1863+
j < numberOfRows ifTrue: [
1864+
(self at: j at: i) ifNil: [ self at: j at: i put: value ] ].
1865+
value := self at: j at: i ] ]
1866+
]
1867+
18411868
{ #category : #replacing }
18421869
DataFrame >> replaceNilsWithPreviousRowValue [
18431870
"Replaces all nil values of a data frame with the previous non-nil value of the column in which it is present"

0 commit comments

Comments
 (0)