Skip to content

Commit 6fdbd01

Browse files
authored
Merge pull request #261 from Joshua-Dias-Barreto/chainedSorting
Added runnable examples for methods in the DataSeries class.
2 parents 29cf558 + 757245e commit 6fdbd01

1 file changed

Lines changed: 129 additions & 13 deletions

File tree

src/DataFrame/DataSeries.class.st

Lines changed: 129 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,21 @@ DataSeries >> adaptToCollection: rcvr andSend: selector [
115115
DataSeries >> argmax [
116116
"Returns the key which corresponds to the maximum value of the dataseries"
117117

118+
"(#(100 10 20 30) asDataSeries argmax) >>> 1"
119+
120+
"((DataSeries withKeys: #(A B C) values: #(1 2 40)) argmax) >>> #C"
121+
118122
^ self keyAtValue: self max
119123
]
120124

121125
{ #category : #statistics }
122126
DataSeries >> argmin [
123127
"Returns the key which corresponds to the minimum value of the dataseries"
124128

129+
"(#(100 10 20 30) asDataSeries argmin) >>> 2"
130+
131+
"((DataSeries withKeys: #(A B C) values: #(1 2 40)) argmin) >>> #A"
132+
125133
^ self keyAtValue: self min
126134
]
127135

@@ -138,7 +146,13 @@ DataSeries >> asDataFrame [
138146
{ #category : #accessing }
139147
DataSeries >> at: aKey transform: aBlock [
140148
"Evaluate aBlock on the value at aKey and replace that value with the result. Signal an exception if aKey was not found"
141-
self at: aKey transform: aBlock ifAbsent: [ self errorKeyNotFound: aKey ]
149+
150+
"((DataSeries withKeys: #(A B C) values: #(1 4 3)) at: #C transform: [ :x | x * x ]) >>> (DataSeries withKeys: #(A B C) values: #(1 4 9))."
151+
152+
self
153+
at: aKey
154+
transform: aBlock
155+
ifAbsent: [ self errorKeyNotFound: aKey ]
142156
]
143157

144158
{ #category : #accessing }
@@ -162,7 +176,11 @@ DataSeries >> atAll: aCollectionOfIndexes [
162176
{ #category : #accessing }
163177
DataSeries >> atIndex: aNumber [
164178
"Answer the element of the receiver at index aNumber"
165-
179+
180+
"(#(1 4 9) asDataSeries atIndex: 2) >>> 4"
181+
182+
"((DataSeries withKeys: #(A B C) values: #(1 2 40)) atIndex: 3) >>> 40"
183+
166184
^ self at: (self keys at: aNumber)
167185
]
168186

@@ -176,6 +194,15 @@ DataSeries >> atIndex: aNumber put: aValue [
176194
{ #category : #accessing }
177195
DataSeries >> atIndex: aNumber transform: aBlock [
178196
"Evaluate aBlock on the value at aNumber and replace that value with the result"
197+
198+
" ((DataSeries withKeys: #( A B C ) values: #( 1 4 3 ))
199+
atIndex: 3
200+
transform: [ :x | x * x ])
201+
>>> (DataSeries withKeys: #( A B C ) values: #( 1 4 9 ))"
202+
203+
"(#( 1 3 3 ) asDataSeries atIndex: 2 transform: [ :x | x - 1 ])
204+
>>> (#( 1 2 3 ) asDataSeries)"
205+
179206
| key |
180207
key := self keys at: aNumber.
181208
self at: key transform: aBlock
@@ -185,6 +212,8 @@ DataSeries >> atIndex: aNumber transform: aBlock [
185212
DataSeries >> average [
186213
"Returns the average without including nils"
187214

215+
"(#(1 2 nil 3) asDataSeries average) >>> 2"
216+
188217
^ self removeNils values average
189218
]
190219

@@ -254,13 +283,21 @@ DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [
254283
DataSeries >> countNils [
255284
"Returns the number of nil values in the data series"
256285

286+
"(#(1 nil 2 nil nil) asDataSeries countNils) >>> 3"
287+
288+
"(#('A' 'nil' nil 'B') asDataSeries countNils) >>> 1"
289+
257290
^ self count: [ :each | each isNil ]
258291
]
259292

260293
{ #category : #statistics }
261294
DataSeries >> countNonNils [
262295
"Returns the number of non-nil values in the data series"
263296

297+
"(#(1 nil 2 nil nil) asDataSeries countNonNils) >>> 2"
298+
299+
"(#('A' 'nil' nil 'B') asDataSeries countNonNils) >>> 3"
300+
264301
^ self count: [ :each | each isNotNil ]
265302
]
266303

@@ -289,12 +326,16 @@ DataSeries >> crossTabulateWith: aSeries [
289326
DataSeries >> cumulativeSum [
290327
"Calculate the cumulative sum of a data series and return a new data series with keys as self keys and values as cumulative sum"
291328

329+
"(#(1 nil 2 3 4) asDataSeries cumulativeSum) >>> (#(1 1 3 6 10) asDataSeries)"
330+
331+
"(#(nil nil 10 90) asDataSeries cumulativeSum) >>> (#(0 0 10 100) asDataSeries)"
332+
292333
| sum |
293334
sum := 0.
294335

295336
^ self collect: [ :each |
296-
each ifNotNil: [ sum := sum + each ].
297-
sum ]
337+
each ifNotNil: [ sum := sum + each ].
338+
sum ]
298339
]
299340

300341
{ #category : #defaults }
@@ -316,6 +357,9 @@ DataSeries >> defaultPrecision [
316357
DataSeries >> eighth [
317358
"Answer the eighth element of the receiver.
318359
Raise an error if there are not enough elements."
360+
361+
"(#(a b c d e f g h i j) asDataSeries eighth) >>> #h"
362+
319363
^ self atIndex: 8
320364
]
321365

@@ -353,19 +397,27 @@ DataSeries >> errorKeysMismatch [
353397
DataSeries >> fifth [
354398
"Answer the fifth element of the receiver.
355399
Raise an error if there are not enough elements."
400+
401+
"(#(a b c d e f g h i j) asDataSeries fifth) >>> #e"
402+
356403
^ self atIndex: 5
357404
]
358405

359406
{ #category : #accessing }
360407
DataSeries >> first [
361408
"Answer the first element of the receiver.
362409
Raise an error if there are not enough elements."
410+
411+
"(#(a b c d e f g h i j) asDataSeries first) >>> #a"
412+
363413
^ self atIndex: 1
364414
]
365415

366416
{ #category : #statistics }
367417
DataSeries >> firstQuartile [
368-
"25% of the values in a set are smaller than or equal to the first Quartile of that set"
418+
"25% of the values in a set are smaller than or equal to the first Quartile of that set"
419+
420+
"(#(7 4 20) asDataSeries firstQuartile) >>> 4"
369421

370422
^ self quartile: 1
371423
]
@@ -374,12 +426,17 @@ DataSeries >> firstQuartile [
374426
DataSeries >> fourth [
375427
"Answer the fourth element of the receiver.
376428
Raise an error if there are not enough elements."
429+
430+
"(#(a b c d e f g h i j) asDataSeries fourth) >>> #d"
431+
377432
^ self atIndex: 4
378433
]
379434

380435
{ #category : #statistics }
381436
DataSeries >> fourthQuartile [
382-
"Fourth Quartile is the maximum value in a set of values"
437+
"Fourth Quartile is the maximum value in a set of values"
438+
439+
"(#(7 4 20) asDataSeries fourthQuartile) >>> 20"
383440

384441
^ self quartile: 4
385442
]
@@ -495,20 +552,34 @@ DataSeries >> initialize: aCapacity [
495552
DataSeries >> interquartileRange [
496553
"The Inter Quartile Range is the difference between the third Quartile and the first Quartile"
497554

555+
"(#(7 4 20) asDataSeries interquartileRange) >>> 16"
556+
498557
^ self thirdQuartile - self firstQuartile
499558
]
500559

501560
{ #category : #'categorical-numerical' }
502561
DataSeries >> isCategorical [
503562
"Returns true if atleast one value of the data series is non numerical and returns false otherwise"
504563

564+
"(#(a 1 2 3) asDataSeries isCategorical) >>> true"
565+
566+
"(#(0 1 2 3) asDataSeries isCategorical) >>> false"
567+
568+
"(#(a b c d) asDataSeries isCategorical) >>> true"
569+
505570
^ self isNumerical not
506571
]
507572

508573
{ #category : #'categorical-numerical' }
509574
DataSeries >> isNumerical [
510575
"Returns true if all values of the data series are numerical values and returns false otherwise"
511576

577+
"(#(a 1 2 3) asDataSeries isNumerical) >>> false"
578+
579+
"(#(0 1 2.2 3) asDataSeries isNumerical) >>> true"
580+
581+
"((#( I XIV VII XII ) collect: [ :each | each romanNumber ]) asDataSeries isNumerical) >>> true"
582+
512583
^ forcedIsNumerical ifNil: [
513584
(self removeDuplicates copyWithout: nil) allSatisfy: [ :each |
514585
each isNumber ] ]
@@ -531,6 +602,9 @@ DataSeries >> keys: anArrayOfKeys [
531602
DataSeries >> last [
532603
"Answer the last element of the receiver.
533604
Raise an error if there are not enough elements."
605+
606+
"(#(a b c d e f g h i j) asDataSeries last) >>> #j"
607+
534608
^ self atIndex: self size
535609
]
536610

@@ -557,27 +631,37 @@ DataSeries >> makeNumerical [
557631
DataSeries >> max [
558632
"Returns the maximum value of the dataseries without including nils"
559633

634+
"(#(7 4 20) asDataSeries max) >>> 20"
635+
560636
^ self removeNils values max
561637
]
562638

563639
{ #category : #statistics }
564640
DataSeries >> median [
565641
"Returns the median without including nils"
566642

643+
"(#(7 4 20) asDataSeries median) >>> 7"
644+
567645
^ self removeNils values median
568646
]
569647

570648
{ #category : #statistics }
571649
DataSeries >> min [
572650
"Returns the minimum value of the dataseries without including nils"
573651

652+
"(#(7 4 20) asDataSeries min) >>> 4"
653+
574654
^ self removeNils values min
575655
]
576656

577657
{ #category : #accessing }
578658
DataSeries >> mode [
579659
"The mode of a set of values is the value that appears most often. "
580660

661+
"(#(a j a j e j g j i j) asDataSeries mode) >>> #j"
662+
663+
"(#(1 2 3 2) asDataSeries mode) >>> 2"
664+
581665
| valueCounts |
582666
valueCounts := self valueCounts.
583667
^ valueCounts keyAtValue: valueCounts max
@@ -601,6 +685,9 @@ DataSeries >> name: anObject [
601685
DataSeries >> ninth [
602686
"Answer the ninth element of the receiver.
603687
Raise an error if there are not enough elements."
688+
689+
"(#(a b c d e f g h i j) asDataSeries ninth) >>> #i"
690+
604691
^ self atIndex: 9
605692
]
606693

@@ -609,6 +696,8 @@ DataSeries >> quantile: aNumber [
609696
"A quantile determines how many values in a distribution are above or below a certain limit.
610697
Eg: if the parameter aNumber is 85, a value from the data series is returned which is greater than or equal to 85% of the values in the data series"
611698

699+
"(#(7 4 20) asDataSeries quantile: 50) >>> 7"
700+
612701
| sortedSeries index |
613702
sortedSeries := self withoutNils sorted.
614703

@@ -622,8 +711,10 @@ Eg: if the parameter aNumber is 85, a value from the data series is returned whi
622711
DataSeries >> quartile: aNumber [
623712
"Quartiles are three values that split sorted data into four parts, each with an equal number of observations.
624713
Eg: if the parameter aNumber is 3, the Third Quartile of the data series is returned"
625-
626-
^ self quantile: (25 * aNumber)
714+
715+
"(#(7 4 20) asDataSeries quartile: 3) >>> 20"
716+
717+
^ self quantile: 25 * aNumber
627718
]
628719

629720
{ #category : #enumerating }
@@ -722,12 +813,17 @@ DataSeries >> replaceNilsWithZeros [
722813
DataSeries >> second [
723814
"Answer the second element of the receiver.
724815
Raise an error if there are not enough elements."
816+
817+
"(#(a b c d e f g h i j) asDataSeries second) >>> #b"
818+
725819
^ self atIndex: 2
726820
]
727821

728822
{ #category : #statistics }
729823
DataSeries >> secondQuartile [
730-
"50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
824+
"50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
825+
826+
"(#(7 4 20) asDataSeries secondQuartile) >>> 7"
731827

732828
^ self quartile: 2
733829
]
@@ -744,13 +840,19 @@ DataSeries >> select: aBlock [
744840
DataSeries >> seventh [
745841
"Answer the seventh element of the receiver.
746842
Raise an error if there are not enough elements."
843+
844+
"(#(a b c d e f g h i j) asDataSeries seventh) >>> #g"
845+
747846
^ self atIndex: 7
748847
]
749848

750849
{ #category : #accessing }
751850
DataSeries >> sixth [
752851
"Answer the sixth element of the receiver.
753852
Raise an error if there are not enough elements."
853+
854+
"(#(a b c d e f g h i j) asDataSeries sixth) >>> #f"
855+
754856
^ self atIndex: 6
755857
]
756858

@@ -819,6 +921,8 @@ DataSeries >> sortedDescending [
819921
DataSeries >> stdev [
820922
"Returns the standard deviation of the dataseries without including nils"
821923

924+
"(#(10 20 30) asDataSeries stdev) >>> 10"
925+
822926
^ self removeNils values stdev
823927
]
824928

@@ -872,12 +976,17 @@ DataSeries >> tail: aNumber [
872976
DataSeries >> third [
873977
"Answer the third element of the receiver.
874978
Raise an error if there are not enough elements."
979+
980+
"(#(a b c d e f g h i j) asDataSeries third) >>> #c"
981+
875982
^ self atIndex: 3
876983
]
877984

878985
{ #category : #statistics }
879986
DataSeries >> thirdQuartile [
880-
"75% of the values in a set are smaller than or equal to the third Quartile of that set"
987+
"75% of the values in a set are smaller than or equal to the third Quartile of that set"
988+
989+
"(#(7 4 20) asDataSeries thirdQuartile) >>> 20"
881990

882991
^ self quartile: 3
883992
]
@@ -896,14 +1005,19 @@ DataSeries >> uniqueValues [
8961005
{ #category : #statistics }
8971006
DataSeries >> valueCounts [
8981007
"Calculates the frequency of each value in the data series and returns a data series in descending order of frequencies"
899-
1008+
1009+
"(#(7 20 20) asDataSeries valueCounts) >>> (DataSeries withKeys: #(20 7) values: #(2 1))"
1010+
9001011
^ (self groupByUniqueValuesAndAggregateUsing: #size) sortDescending
9011012
]
9021013

9031014
{ #category : #statistics }
9041015
DataSeries >> valueFrequencies [
9051016
"Calculates the relative frequency of values in the data series. Relative frequency is the ratio of the number of times a value occurs in a set to the total number of values in the set"
906-
1017+
1018+
"(#(7 20 20) asDataSeries valueFrequencies) >>> (DataSeries withKeys: #( 20 7 ) values: {
1019+
(2 / 3). (1 / 3) })"
1020+
9071021
| count freq |
9081022
count := self valueCounts.
9091023
freq := count / self size.
@@ -1056,7 +1170,9 @@ DataSeries >> withoutNils [
10561170

10571171
{ #category : #statistics }
10581172
DataSeries >> zerothQuartile [
1059-
"Zeroth Quartile is the minimum value in a set of values"
1173+
"Zeroth Quartile is the minimum value in a set of values"
1174+
1175+
"(#(7 4 20) asDataSeries zerothQuartile) >>> 4"
10601176

10611177
^ self quartile: 0
10621178
]

0 commit comments

Comments
 (0)