Skip to content

Commit e13f4d5

Browse files
Implemented a cleaner and more efficient describe method.
1 parent 0a4a4a7 commit e13f4d5

1 file changed

Lines changed: 19 additions & 41 deletions

File tree

src/DataFrame/DataFrame.class.st

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -943,47 +943,25 @@ DataFrame >> defaultHeadTailSize [
943943

944944
{ #category : #statistics }
945945
DataFrame >> describe [
946-
"method to statistically describe a numerical dataframe"
947-
948-
| nCol nRow describeDF col count dtype |
949-
nCol := self numberOfColumns.
950-
nRow := self numberOfRows.
951-
describeDF := self class new: nCol @ 9.
952-
describeDF columnNames:
953-
#( 'count' 'mean' 'std' 'min' '25%' '50%' '75%' 'max' 'dtype' ).
954-
describeDF rowNames: self columnNames.
955-
1 to: nCol do: [ :i |
956-
| mean std mini fQ sQ tQ maxi |
957-
col := self columnAt: i.
958-
count := col countNonNils.
959-
count = 0 ifFalse: [
960-
col := col removeNils.
961-
mean := col average.
962-
std := col stdev.
963-
mini := col min.
964-
fQ := col firstQuartile.
965-
sQ := col secondQuartile.
966-
tQ := col thirdQuartile.
967-
maxi := col max ].
968-
dtype := col calculateDataType.
969-
describeDF at: i at: 1 put: count.
970-
971-
describeDF at: i at: 2 put: mean.
972-
973-
describeDF at: i at: 3 put: std.
974-
975-
describeDF at: i at: 4 put: mini.
976-
977-
describeDF at: i at: 5 put: fQ.
978-
979-
describeDF at: i at: 6 put: sQ.
980-
981-
describeDF at: i at: 7 put: tQ.
982-
983-
describeDF at: i at: 8 put: maxi.
984-
985-
describeDF at: i at: 9 put: dtype ].
986-
^ describeDF
946+
"Answer another data frame with statistics describing the columns of this data frame"
947+
948+
| content |
949+
content := self numericalColumns collect: [ :column |
950+
{
951+
column countNonNils.
952+
column average.
953+
column stdev.
954+
column min.
955+
column firstQuartile.
956+
column secondQuartile.
957+
column thirdQuartile.
958+
column max.
959+
column calculateDataType } ].
960+
961+
^ self class
962+
withRows: content
963+
rowNames: self numericalColumnNames
964+
columnNames: #( count mean std min '25%' '50%' '75%' max dtype )
987965
]
988966

989967
{ #category : #accessing }

0 commit comments

Comments
 (0)