Skip to content

Commit 30c616a

Browse files
Added describe method for numeric dataframes
1 parent 39f003e commit 30c616a

2 files changed

Lines changed: 65 additions & 0 deletions

File tree

src/DataFrame-Tests/DataFrameTest.class.st

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,6 +1553,26 @@ DataFrameTest >> testDataTypesWithNil [
15531553
self assert: df dataTypes equals: expected
15541554
]
15551555

1556+
{ #category : #tests }
1557+
DataFrameTest >> testDescribe [
1558+
1559+
| dataFrame expected |
1560+
dataFrame := DataFrame
1561+
withRows: #( #( 1 1 ) #( 2 nil ) #( 3 1 ) )
1562+
columnNames: #( 'A' 'B' ).
1563+
1564+
expected := DataFrame withRows:
1565+
#( #( 3 2 1 1 1 2 3 3 SmallInteger )
1566+
#( 2 1 0 1 1 1 1 1 SmallInteger ) ).
1567+
1568+
expected rowNames: #( 'A' 'B' ).
1569+
expected columnNames:
1570+
#( 'count' 'mean' 'std' 'min' '25%' '50%' '75%' 'max' 'dtype' ).
1571+
expected at: 1 at: 9 put: SmallInteger.
1572+
expected at: 2 at: 9 put: SmallInteger.
1573+
self assert: dataFrame describe equals: expected
1574+
]
1575+
15561576
{ #category : #tests }
15571577
DataFrameTest >> testDetect [
15581578
| actual expected |

src/DataFrame/DataFrame.class.st

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,51 @@ DataFrame >> defaultHeadTailSize [
942942
^ 5
943943
]
944944

945+
{ #category : #statistics }
946+
DataFrame >> describe [
947+
"method to statistically describe a numerical dataframe"
948+
949+
| nCol nRow describeDF col count dtype |
950+
nCol := self numberOfColumns.
951+
nRow := self numberOfRows.
952+
describeDF := self class new: nCol @ 9.
953+
describeDF columnNames:
954+
#( 'count' 'mean' 'std' 'min' '25%' '50%' '75%' 'max' 'dtype' ).
955+
describeDF rowNames: self columnNames.
956+
1 to: nCol do: [ :i |
957+
| mean std mini fQ sQ tQ maxi |
958+
col := self columnAt: i.
959+
count := col countNonNils.
960+
count = 0 ifFalse: [
961+
col := col removeNils.
962+
mean := col average.
963+
std := col stdev.
964+
mini := col min.
965+
fQ := col firstQuartile.
966+
sQ := col secondQuartile.
967+
tQ := col thirdQuartile.
968+
maxi := col max ].
969+
dtype := col calculateDataType.
970+
describeDF at: i at: 1 put: count.
971+
972+
describeDF at: i at: 2 put: mean.
973+
974+
describeDF at: i at: 3 put: std.
975+
976+
describeDF at: i at: 4 put: mini.
977+
978+
describeDF at: i at: 5 put: fQ.
979+
980+
describeDF at: i at: 6 put: sQ.
981+
982+
describeDF at: i at: 7 put: tQ.
983+
984+
describeDF at: i at: 8 put: maxi.
985+
986+
describeDF at: i at: 9 put: dtype ].
987+
^ describeDF
988+
]
989+
945990
{ #category : #accessing }
946991
DataFrame >> dimensions [
947992
"Returns the number of rows and number of columns in a DataFrame"

0 commit comments

Comments
 (0)