Skip to content

Commit bcf6718

Browse files
Added runnable examples for statistical methods
1 parent 671dcab commit bcf6718

1 file changed

Lines changed: 54 additions & 11 deletions

File tree

src/DataFrame/DataSeries.class.st

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,8 @@ DataSeries >> atIndex: aNumber transform: aBlock [
212212
DataSeries >> average [
213213
"Returns the average without including nils"
214214

215+
"(#(1 2 nil 3) asDataSeries average) >>> 2"
216+
215217
^ self removeNils values average
216218
]
217219

@@ -281,13 +283,21 @@ DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [
281283
DataSeries >> countNils [
282284
"Returns the number of nil values in the data series"
283285

286+
"(#(1 nil 2 nil nil) asDataSeries countNils) >>> 3"
287+
288+
"(#('A' 'nil' nil 'B') asDataSeries countNils) >>> 1"
289+
284290
^ self count: [ :each | each isNil ]
285291
]
286292

287293
{ #category : #statistics }
288294
DataSeries >> countNonNils [
289295
"Returns the number of non-nil values in the data series"
290296

297+
"(#(1 nil 2 nil nil) asDataSeries countNonNils) >>> 2"
298+
299+
"(#('A' 'nil' nil 'B') asDataSeries countNonNils) >>> 3"
300+
291301
^ self count: [ :each | each isNotNil ]
292302
]
293303

@@ -316,12 +326,16 @@ DataSeries >> crossTabulateWith: aSeries [
316326
DataSeries >> cumulativeSum [
317327
"Calculate the cumulative sum of a data series and return a new data series with keys as self keys and values as cumulative sum"
318328

329+
"(#(1 nil 2 3 4) asDataSeries cumulativeSum) >>> (#(1 1 3 6 10) asDataSeries)"
330+
331+
"(#(nil nil 10 90) asDataSeries cumulativeSum) >>> (#(0 0 10 100) asDataSeries)"
332+
319333
| sum |
320334
sum := 0.
321335

322336
^ self collect: [ :each |
323-
each ifNotNil: [ sum := sum + each ].
324-
sum ]
337+
each ifNotNil: [ sum := sum + each ].
338+
sum ]
325339
]
326340

327341
{ #category : #defaults }
@@ -392,7 +406,9 @@ DataSeries >> first [
392406

393407
{ #category : #statistics }
394408
DataSeries >> firstQuartile [
395-
"25% of the values in a set are smaller than or equal to the first Quartile of that set"
409+
"25% of the values in a set are smaller than or equal to the first Quartile of that set"
410+
411+
"(#(7 4 20) asDataSeries firstQuartile) >>> 4"
396412

397413
^ self quartile: 1
398414
]
@@ -406,7 +422,9 @@ DataSeries >> fourth [
406422

407423
{ #category : #statistics }
408424
DataSeries >> fourthQuartile [
409-
"Fourth Quartile is the maximum value in a set of values"
425+
"Fourth Quartile is the maximum value in a set of values"
426+
427+
"(#(7 4 20) asDataSeries fourthQuartile) >>> 20"
410428

411429
^ self quartile: 4
412430
]
@@ -522,6 +540,8 @@ DataSeries >> initialize: aCapacity [
522540
DataSeries >> interquartileRange [
523541
"The Inter Quartile Range is the difference between the third Quartile and the first Quartile"
524542

543+
"(#(7 4 20) asDataSeries interquartileRange) >>> 16"
544+
525545
^ self thirdQuartile - self firstQuartile
526546
]
527547

@@ -584,20 +604,26 @@ DataSeries >> makeNumerical [
584604
DataSeries >> max [
585605
"Returns the maximum value of the dataseries without including nils"
586606

607+
"(#(7 4 20) asDataSeries max) >>> 20"
608+
587609
^ self removeNils values max
588610
]
589611

590612
{ #category : #statistics }
591613
DataSeries >> median [
592614
"Returns the median without including nils"
593615

616+
"(#(7 4 20) asDataSeries median) >>> 7"
617+
594618
^ self removeNils values median
595619
]
596620

597621
{ #category : #statistics }
598622
DataSeries >> min [
599623
"Returns the minimum value of the dataseries without including nils"
600624

625+
"(#(7 4 20) asDataSeries min) >>> 4"
626+
601627
^ self removeNils values min
602628
]
603629

@@ -636,6 +662,8 @@ DataSeries >> quantile: aNumber [
636662
"A quantile determines how many values in a distribution are above or below a certain limit.
637663
Eg: if the parameter aNumber is 85, a value from the data series is returned which is greater than or equal to 85% of the values in the data series"
638664

665+
"(#(7 4 20) asDataSeries quantile: 50) >>> 7"
666+
639667
| sortedSeries index |
640668
sortedSeries := self withoutNils sorted.
641669

@@ -649,8 +677,10 @@ Eg: if the parameter aNumber is 85, a value from the data series is returned whi
649677
DataSeries >> quartile: aNumber [
650678
"Quartiles are three values that split sorted data into four parts, each with an equal number of observations.
651679
Eg: if the parameter aNumber is 3, the Third Quartile of the data series is returned"
652-
653-
^ self quantile: (25 * aNumber)
680+
681+
"(#(7 4 20) asDataSeries quartile: 3) >>> 20"
682+
683+
^ self quantile: 25 * aNumber
654684
]
655685

656686
{ #category : #enumerating }
@@ -754,7 +784,9 @@ DataSeries >> second [
754784

755785
{ #category : #statistics }
756786
DataSeries >> secondQuartile [
757-
"50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
787+
"50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
788+
789+
"(#(7 4 20) asDataSeries secondQuartile) >>> 7"
758790

759791
^ self quartile: 2
760792
]
@@ -846,6 +878,8 @@ DataSeries >> sortedDescending [
846878
DataSeries >> stdev [
847879
"Returns the standard deviation of the dataseries without including nils"
848880

881+
"(#(10 20 30) asDataSeries stdev) >>> 10"
882+
849883
^ self removeNils values stdev
850884
]
851885

@@ -904,7 +938,9 @@ DataSeries >> third [
904938

905939
{ #category : #statistics }
906940
DataSeries >> thirdQuartile [
907-
"75% of the values in a set are smaller than or equal to the third Quartile of that set"
941+
"75% of the values in a set are smaller than or equal to the third Quartile of that set"
942+
943+
"(#(7 4 20) asDataSeries thirdQuartile) >>> 20"
908944

909945
^ self quartile: 3
910946
]
@@ -923,14 +959,19 @@ DataSeries >> uniqueValues [
923959
{ #category : #statistics }
924960
DataSeries >> valueCounts [
925961
"Calculates the frequency of each value in the data series and returns a data series in descending order of frequencies"
926-
962+
963+
"(#(7 20 20) asDataSeries valueCounts) >>> (DataSeries withKeys: #(20 7) values: #(2 1))"
964+
927965
^ (self groupByUniqueValuesAndAggregateUsing: #size) sortDescending
928966
]
929967

930968
{ #category : #statistics }
931969
DataSeries >> valueFrequencies [
932970
"Calculates the relative frequency of values in the data series. Relative frequency is the ratio of the number of times a value occurs in a set to the total number of values in the set"
933-
971+
972+
"(#(7 20 20) asDataSeries valueFrequencies) >>> (DataSeries withKeys: #( 20 7 ) values: {
973+
(2 / 3). (1 / 3) })"
974+
934975
| count freq |
935976
count := self valueCounts.
936977
freq := count / self size.
@@ -1083,7 +1124,9 @@ DataSeries >> withoutNils [
10831124

10841125
{ #category : #statistics }
10851126
DataSeries >> zerothQuartile [
1086-
"Zeroth Quartile is the minimum value in a set of values"
1127+
"Zeroth Quartile is the minimum value in a set of values"
1128+
1129+
"(#(7 4 20) asDataSeries zerothQuartile) >>> 4"
10871130

10881131
^ self quartile: 0
10891132
]

0 commit comments

Comments
 (0)