@@ -115,13 +115,21 @@ DataSeries >> adaptToCollection: rcvr andSend: selector [
115115DataSeries >> argmax [
116116 " Returns the key which corresponds to the maximum value of the dataseries"
117117
118+ " (#(100 10 20 30) asDataSeries argmax) >>> 1"
119+
120+ " ((DataSeries withKeys: #(A B C) values: #(1 2 40)) argmax) >>> #C"
121+
118122 ^ self keyAtValue: self max
119123]
120124
121125{ #category : #statistics }
122126DataSeries >> argmin [
123127 " Returns the key which corresponds to the minimum value of the dataseries"
124128
129+ " (#(100 10 20 30) asDataSeries argmin) >>> 2"
130+
131+ " ((DataSeries withKeys: #(A B C) values: #(1 2 40)) argmin) >>> #A"
132+
125133 ^ self keyAtValue: self min
126134]
127135
@@ -138,7 +146,13 @@ DataSeries >> asDataFrame [
138146{ #category : #accessing }
139147DataSeries >> at: aKey transform: aBlock [
140148 " Evaluate aBlock on the value at aKey and replace that value with the result. Signal an exception if aKey was not found"
141- self at: aKey transform: aBlock ifAbsent: [ self errorKeyNotFound: aKey ]
149+
150+ " ((DataSeries withKeys: #(A B C) values: #(1 4 3)) at: #C transform: [ :x | x * x ]) >>> (DataSeries withKeys: #(A B C) values: #(1 4 9))."
151+
152+ self
153+ at: aKey
154+ transform: aBlock
155+ ifAbsent: [ self errorKeyNotFound: aKey ]
142156]
143157
144158{ #category : #accessing }
@@ -162,7 +176,11 @@ DataSeries >> atAll: aCollectionOfIndexes [
162176{ #category : #accessing }
163177DataSeries >> atIndex: aNumber [
164178 " Answer the element of the receiver at index aNumber"
165-
179+
180+ " (#(1 4 9) asDataSeries atIndex: 2) >>> 4"
181+
182+ " ((DataSeries withKeys: #(A B C) values: #(1 2 40)) atIndex: 3) >>> 40"
183+
166184 ^ self at: (self keys at: aNumber)
167185]
168186
@@ -176,6 +194,15 @@ DataSeries >> atIndex: aNumber put: aValue [
176194{ #category : #accessing }
177195DataSeries >> atIndex: aNumber transform: aBlock [
178196 " Evaluate aBlock on the value at aNumber and replace that value with the result"
197+
198+ " ((DataSeries withKeys: #( A B C ) values: #( 1 4 3 ))
199+ atIndex: 3
200+ transform: [ :x | x * x ])
201+ >>> (DataSeries withKeys: #( A B C ) values: #( 1 4 9 ))"
202+
203+ " (#( 1 3 3 ) asDataSeries atIndex: 2 transform: [ :x | x - 1 ])
204+ >>> (#( 1 2 3 ) asDataSeries)"
205+
179206 | key |
180207 key := self keys at: aNumber.
181208 self at: key transform: aBlock
@@ -185,6 +212,8 @@ DataSeries >> atIndex: aNumber transform: aBlock [
185212DataSeries >> average [
186213 " Returns the average without including nils"
187214
215+ " (#(1 2 nil 3) asDataSeries average) >>> 2"
216+
188217 ^ self removeNils values average
189218]
190219
@@ -254,13 +283,21 @@ DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [
254283DataSeries >> countNils [
255284 " Returns the number of nil values in the data series"
256285
286+ " (#(1 nil 2 nil nil) asDataSeries countNils) >>> 3"
287+
288+ " (#('A' 'nil' nil 'B') asDataSeries countNils) >>> 1"
289+
257290 ^ self count: [ :each | each isNil ]
258291]
259292
260293{ #category : #statistics }
261294DataSeries >> countNonNils [
262295 " Returns the number of non-nil values in the data series"
263296
297+ " (#(1 nil 2 nil nil) asDataSeries countNonNils) >>> 2"
298+
299+ " (#('A' 'nil' nil 'B') asDataSeries countNonNils) >>> 3"
300+
264301 ^ self count: [ :each | each isNotNil ]
265302]
266303
@@ -289,12 +326,16 @@ DataSeries >> crossTabulateWith: aSeries [
289326DataSeries >> cumulativeSum [
290327 " Calculate the cumulative sum of a data series and return a new data series with keys as self keys and values as cumulative sum"
291328
329+ " (#(1 nil 2 3 4) asDataSeries cumulativeSum) >>> (#(1 1 3 6 10) asDataSeries)"
330+
331+ " (#(nil nil 10 90) asDataSeries cumulativeSum) >>> (#(0 0 10 100) asDataSeries)"
332+
292333 | sum |
293334 sum := 0 .
294335
295336 ^ self collect: [ :each |
296- each ifNotNil: [ sum := sum + each ].
297- sum ]
337+ each ifNotNil: [ sum := sum + each ].
338+ sum ]
298339]
299340
300341{ #category : #defaults }
@@ -316,6 +357,9 @@ DataSeries >> defaultPrecision [
316357DataSeries >> eighth [
317358 " Answer the eighth element of the receiver.
318359 Raise an error if there are not enough elements."
360+
361+ " (#(a b c d e f g h i j) asDataSeries eighth) >>> #h"
362+
319363 ^ self atIndex: 8
320364]
321365
@@ -353,19 +397,27 @@ DataSeries >> errorKeysMismatch [
353397DataSeries >> fifth [
354398 " Answer the fifth element of the receiver.
355399 Raise an error if there are not enough elements."
400+
401+ " (#(a b c d e f g h i j) asDataSeries fifth) >>> #e"
402+
356403 ^ self atIndex: 5
357404]
358405
359406{ #category : #accessing }
360407DataSeries >> first [
361408 " Answer the first element of the receiver.
362409 Raise an error if there are not enough elements."
410+
411+ " (#(a b c d e f g h i j) asDataSeries first) >>> #a"
412+
363413 ^ self atIndex: 1
364414]
365415
366416{ #category : #statistics }
367417DataSeries >> firstQuartile [
368- " 25% of the values in a set are smaller than or equal to the first Quartile of that set"
418+ " 25% of the values in a set are smaller than or equal to the first Quartile of that set"
419+
420+ " (#(7 4 20) asDataSeries firstQuartile) >>> 4"
369421
370422 ^ self quartile: 1
371423]
@@ -374,12 +426,17 @@ DataSeries >> firstQuartile [
374426DataSeries >> fourth [
375427 " Answer the fourth element of the receiver.
376428 Raise an error if there are not enough elements."
429+
430+ " (#(a b c d e f g h i j) asDataSeries fourth) >>> #d"
431+
377432 ^ self atIndex: 4
378433]
379434
380435{ #category : #statistics }
381436DataSeries >> fourthQuartile [
382- " Fourth Quartile is the maximum value in a set of values"
437+ " Fourth Quartile is the maximum value in a set of values"
438+
439+ " (#(7 4 20) asDataSeries fourthQuartile) >>> 20"
383440
384441 ^ self quartile: 4
385442]
@@ -495,20 +552,34 @@ DataSeries >> initialize: aCapacity [
495552DataSeries >> interquartileRange [
496553 " The Inter Quartile Range is the difference between the third Quartile and the first Quartile"
497554
555+ " (#(7 4 20) asDataSeries interquartileRange) >>> 16"
556+
498557 ^ self thirdQuartile - self firstQuartile
499558]
500559
501560{ #category : #' categorical-numerical' }
502561DataSeries >> isCategorical [
503562 " Returns true if atleast one value of the data series is non numerical and returns false otherwise"
504563
564+ " (#(a 1 2 3) asDataSeries isCategorical) >>> true"
565+
566+ " (#(0 1 2 3) asDataSeries isCategorical) >>> false"
567+
568+ " (#(a b c d) asDataSeries isCategorical) >>> true"
569+
505570 ^ self isNumerical not
506571]
507572
508573{ #category : #' categorical-numerical' }
509574DataSeries >> isNumerical [
510575 " Returns true if all values of the data series are numerical values and returns false otherwise"
511576
577+ " (#(a 1 2 3) asDataSeries isNumerical) >>> false"
578+
579+ " (#(0 1 2.2 3) asDataSeries isNumerical) >>> true"
580+
581+ " ((#( I XIV VII XII ) collect: [ :each | each romanNumber ]) asDataSeries isNumerical) >>> true"
582+
512583 ^ forcedIsNumerical ifNil: [
513584 (self removeDuplicates copyWithout: nil ) allSatisfy: [ :each |
514585 each isNumber ] ]
@@ -531,6 +602,9 @@ DataSeries >> keys: anArrayOfKeys [
531602DataSeries >> last [
532603 " Answer the last element of the receiver.
533604 Raise an error if there are not enough elements."
605+
606+ " (#(a b c d e f g h i j) asDataSeries last) >>> #j"
607+
534608 ^ self atIndex: self size
535609]
536610
@@ -557,27 +631,37 @@ DataSeries >> makeNumerical [
557631DataSeries >> max [
558632 " Returns the maximum value of the dataseries without including nils"
559633
634+ " (#(7 4 20) asDataSeries max) >>> 20"
635+
560636 ^ self removeNils values max
561637]
562638
563639{ #category : #statistics }
564640DataSeries >> median [
565641 " Returns the median without including nils"
566642
643+ " (#(7 4 20) asDataSeries median) >>> 7"
644+
567645 ^ self removeNils values median
568646]
569647
570648{ #category : #statistics }
571649DataSeries >> min [
572650 " Returns the minimum value of the dataseries without including nils"
573651
652+ " (#(7 4 20) asDataSeries min) >>> 4"
653+
574654 ^ self removeNils values min
575655]
576656
577657{ #category : #accessing }
578658DataSeries >> mode [
579659 " The mode of a set of values is the value that appears most often. "
580660
661+ " (#(a j a j e j g j i j) asDataSeries mode) >>> #j"
662+
663+ " (#(1 2 3 2) asDataSeries mode) >>> 2"
664+
581665 | valueCounts |
582666 valueCounts := self valueCounts.
583667 ^ valueCounts keyAtValue: valueCounts max
@@ -601,6 +685,9 @@ DataSeries >> name: anObject [
601685DataSeries >> ninth [
602686 " Answer the ninth element of the receiver.
603687 Raise an error if there are not enough elements."
688+
689+ " (#(a b c d e f g h i j) asDataSeries ninth) >>> #i"
690+
604691 ^ self atIndex: 9
605692]
606693
@@ -609,6 +696,8 @@ DataSeries >> quantile: aNumber [
609696 " A quantile determines how many values in a distribution are above or below a certain limit.
610697Eg: if the parameter aNumber is 85, a value from the data series is returned which is greater than or equal to 85% of the values in the data series"
611698
699+ " (#(7 4 20) asDataSeries quantile: 50) >>> 7"
700+
612701 | sortedSeries index |
613702 sortedSeries := self withoutNils sorted.
614703
@@ -622,8 +711,10 @@ Eg: if the parameter aNumber is 85, a value from the data series is returned whi
622711DataSeries >> quartile: aNumber [
623712 " Quartiles are three values that split sorted data into four parts, each with an equal number of observations.
624713Eg: if the parameter aNumber is 3, the Third Quartile of the data series is returned"
625-
626- ^ self quantile: (25 * aNumber)
714+
715+ " (#(7 4 20) asDataSeries quartile: 3) >>> 20"
716+
717+ ^ self quantile: 25 * aNumber
627718]
628719
629720{ #category : #enumerating }
@@ -722,12 +813,17 @@ DataSeries >> replaceNilsWithZeros [
722813DataSeries >> second [
723814 " Answer the second element of the receiver.
724815 Raise an error if there are not enough elements."
816+
817+ " (#(a b c d e f g h i j) asDataSeries second) >>> #b"
818+
725819 ^ self atIndex: 2
726820]
727821
728822{ #category : #statistics }
729823DataSeries >> secondQuartile [
730- " 50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
824+ " 50% of the values in a set are smaller than or equal to the second Quartile of that set. It is also known as the median"
825+
826+ " (#(7 4 20) asDataSeries secondQuartile) >>> 7"
731827
732828 ^ self quartile: 2
733829]
@@ -744,13 +840,19 @@ DataSeries >> select: aBlock [
744840DataSeries >> seventh [
745841 " Answer the seventh element of the receiver.
746842 Raise an error if there are not enough elements."
843+
844+ " (#(a b c d e f g h i j) asDataSeries seventh) >>> #g"
845+
747846 ^ self atIndex: 7
748847]
749848
750849{ #category : #accessing }
751850DataSeries >> sixth [
752851 " Answer the sixth element of the receiver.
753852 Raise an error if there are not enough elements."
853+
854+ " (#(a b c d e f g h i j) asDataSeries sixth) >>> #f"
855+
754856 ^ self atIndex: 6
755857]
756858
@@ -819,6 +921,8 @@ DataSeries >> sortedDescending [
819921DataSeries >> stdev [
820922 " Returns the standard deviation of the dataseries without including nils"
821923
924+ " (#(10 20 30) asDataSeries stdev) >>> 10"
925+
822926 ^ self removeNils values stdev
823927]
824928
@@ -872,12 +976,17 @@ DataSeries >> tail: aNumber [
872976DataSeries >> third [
873977 " Answer the third element of the receiver.
874978 Raise an error if there are not enough elements."
979+
980+ " (#(a b c d e f g h i j) asDataSeries third) >>> #c"
981+
875982 ^ self atIndex: 3
876983]
877984
878985{ #category : #statistics }
879986DataSeries >> thirdQuartile [
880- " 75% of the values in a set are smaller than or equal to the third Quartile of that set"
987+ " 75% of the values in a set are smaller than or equal to the third Quartile of that set"
988+
989+ " (#(7 4 20) asDataSeries thirdQuartile) >>> 20"
881990
882991 ^ self quartile: 3
883992]
@@ -896,14 +1005,19 @@ DataSeries >> uniqueValues [
8961005{ #category : #statistics }
8971006DataSeries >> valueCounts [
8981007 " Calculates the frequency of each value in the data series and returns a data series in descending order of frequencies"
899-
1008+
1009+ " (#(7 20 20) asDataSeries valueCounts) >>> (DataSeries withKeys: #(20 7) values: #(2 1))"
1010+
9001011 ^ (self groupByUniqueValuesAndAggregateUsing: #size ) sortDescending
9011012]
9021013
9031014{ #category : #statistics }
9041015DataSeries >> valueFrequencies [
9051016 " Calculates the relative frequency of values in the data series. Relative frequency is the ratio of the number of times a value occurs in a set to the total number of values in the set"
906-
1017+
1018+ " (#(7 20 20) asDataSeries valueFrequencies) >>> (DataSeries withKeys: #( 20 7 ) values: {
1019+ (2 / 3). (1 / 3) })"
1020+
9071021 | count freq |
9081022 count := self valueCounts.
9091023 freq := count / self size.
@@ -1056,7 +1170,9 @@ DataSeries >> withoutNils [
10561170
10571171{ #category : #statistics }
10581172DataSeries >> zerothQuartile [
1059- " Zeroth Quartile is the minimum value in a set of values"
1173+ " Zeroth Quartile is the minimum value in a set of values"
1174+
1175+ " (#(7 4 20) asDataSeries zerothQuartile) >>> 4"
10601176
10611177 ^ self quartile: 0
10621178]
0 commit comments