Skip to content

Commit 43fde11

Browse files
authored
Merge pull request #226 from Joshua-Dias-Barreto/describe
Added describe method for numeric dataframes
2 parents a667793 + e13f4d5 commit 43fde11

2 files changed

Lines changed: 43 additions & 0 deletions

File tree

src/DataFrame-Tests/DataFrameTest.class.st

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,6 +1552,26 @@ DataFrameTest >> testDataTypesWithNil [
15521552
self assert: df dataTypes equals: expected
15531553
]
15541554

1555+
{ #category : #tests }
1556+
DataFrameTest >> testDescribe [
1557+
1558+
| dataFrame expected |
1559+
dataFrame := DataFrame
1560+
withRows: #( #( 1 1 ) #( 2 nil ) #( 3 1 ) )
1561+
columnNames: #( 'A' 'B' ).
1562+
1563+
expected := DataFrame withRows:
1564+
#( #( 3 2 1 1 1 2 3 3 SmallInteger )
1565+
#( 2 1 0 1 1 1 1 1 SmallInteger ) ).
1566+
1567+
expected rowNames: #( 'A' 'B' ).
1568+
expected columnNames:
1569+
#( 'count' 'mean' 'std' 'min' '25%' '50%' '75%' 'max' 'dtype' ).
1570+
expected at: 1 at: 9 put: SmallInteger.
1571+
expected at: 2 at: 9 put: SmallInteger.
1572+
self assert: dataFrame describe equals: expected
1573+
]
1574+
15551575
{ #category : #tests }
15561576
DataFrameTest >> testDetect [
15571577
| actual expected |

src/DataFrame/DataFrame.class.st

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,29 @@ DataFrame >> defaultHeadTailSize [
941941
^ 5
942942
]
943943

944+
{ #category : #statistics }
945+
DataFrame >> describe [
946+
"Answer another data frame with statistics describing the columns of this data frame"
947+
948+
| content |
949+
content := self numericalColumns collect: [ :column |
950+
{
951+
column countNonNils.
952+
column average.
953+
column stdev.
954+
column min.
955+
column firstQuartile.
956+
column secondQuartile.
957+
column thirdQuartile.
958+
column max.
959+
column calculateDataType } ].
960+
961+
^ self class
962+
withRows: content
963+
rowNames: self numericalColumnNames
964+
columnNames: #( count mean std min '25%' '50%' '75%' max dtype )
965+
]
966+
944967
{ #category : #accessing }
945968
DataFrame >> dimensions [
946969
"Returns the number of rows and number of columns in a DataFrame"

0 commit comments

Comments
 (0)