@@ -1190,6 +1190,12 @@ DataFrame >> groupBy: columnName aggregate: anArrayOfUsingAsStatements [
11901190DataFrame >> hasNils [
11911191 " Returns true if there is atleast one nil value in the data frame. Returns false if there are no nil values in the dataframe"
11921192
1193+ " (#(#(nil 2) #(nil 4)) asDataFrame hasNils) >>> true"
1194+
1195+ " (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame hasNils) >>> false"
1196+
1197+ " (#(#(nil 'nil') #('nil' 'nil')) asDataFrame hasNils) >>> true"
1198+
11931199 | arrayOfColumns |
11941200 arrayOfColumns := self asArrayOfColumns.
11951201 1 to: self numberOfColumns do: [ :column |
@@ -1200,9 +1206,14 @@ DataFrame >> hasNils [
12001206
12011207{ #category : #replacing }
12021208DataFrame >> hasNilsByColumn [
1203-
12041209 " Returns a dictionary which indicates the presence of any nil values column wise"
12051210
1211+ " (#(#(1 2) #(nil 4)) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> true).(2 -> false)})"
1212+
1213+ " (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> false).(2 -> false)})"
1214+
1215+ " (#(#(nil 'nil') #('nil' 'nil')) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> true).(2 -> false)})"
1216+
12061217 | dictionary |
12071218 dictionary := Dictionary new .
12081219 self columnNames do: [ :each |
@@ -1559,6 +1570,12 @@ DataFrame >> numberOfColumns [
15591570DataFrame >> numberOfNils [
15601571 " Returns a dictionary which indicates the number of nil values column wise"
15611572
1573+ " (#(#(nil 2) #(nil 4)) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 2).(2 -> 0)})"
1574+
1575+ " (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 0).(2 -> 0)})"
1576+
1577+ " (#(#(nil 'nil') #('nil' 'nil')) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 1).(2 -> 0)})"
1578+
15621579 | dictionary count |
15631580 dictionary := Dictionary new .
15641581 self columnNames do: [ :each |
@@ -1974,53 +1991,76 @@ DataFrame >> replaceAllNilsWithZeros [
19741991{ #category : #replacing }
19751992DataFrame >> replaceNilsWith: anObject [
19761993 " Replaces all nil values of a data frame with the object anObject"
1977-
1994+
1995+ " (#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWith: 5) >>> (#(#(5 2) #(3 5)) asDataFrame)"
1996+
1997+ " (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame replaceNilsWith: 5) >>> (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame)"
1998+
1999+ " (#(#(nil 'nil') #('nil' 'nil')) asDataFrame replaceNilsWith: 5) >>> (#(#(5 'nil') #('nil' 'nil')) asDataFrame)"
2000+
19782001 1 to: self numberOfColumns do: [ :columnIndex |
19792002 1 to: self numberOfRows do: [ :rowIndex |
1980- (self at: rowIndex at: columnIndex) ifNil:
1981- [ self at: rowIndex at: columnIndex put: anObject ] ]
1982- ]
2003+ (self at: rowIndex at: columnIndex) ifNil: [
2004+ self at: rowIndex at: columnIndex put: anObject ] ] ]
19832005]
19842006
19852007{ #category : #replacing }
19862008DataFrame >> replaceNilsWithAverage [
19872009 " Replaces all nil values of a data frame with the average value of the column in which it is present"
19882010
2011+ " (#(#(nil 2) #(3 nil) #(5 6)) asDataFrame replaceNilsWithAverage) >>> (#(#(4 2) #(3 4) #(5 6)) asDataFrame)"
2012+
2013+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithAverage) >>> (#(#(1 2) #(3 4)) asDataFrame)"
2014+
19892015 | averageOfColumn |
19902016 1 to: self numberOfColumns do: [ :i |
1991- averageOfColumn := ((self columnAt: i) select: [ :ele | ele isNotNil ]) average.
2017+ averageOfColumn := ((self columnAt: i) select: [ :ele |
2018+ ele isNotNil ]) average.
19922019 1 to: self numberOfRows do: [ :j |
1993- (self at: j at: i) ifNil: [
1994- self at: j at: i put: averageOfColumn ] ].
1995- ]
2020+ (self at: j at: i) ifNil: [ self at: j at: i put: averageOfColumn ] ] ]
19962021]
19972022
19982023{ #category : #replacing }
19992024DataFrame >> replaceNilsWithMedian [
20002025 " Replaces all nil values of a data frame with the median of the column in which it is present"
20012026
2027+ " (#(#(nil 2) #(3 nil) #(5 6) #(7 8)) asDataFrame replaceNilsWithMedian) >>> (#(#(5 2) #(3 6) #(5 6) #(7 8)) asDataFrame)"
2028+
2029+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithMedian) >>> (#(#(1 2) #(3 4)) asDataFrame)"
2030+
20022031 | medianOfColumn |
20032032 1 to: self numberOfColumns do: [ :i |
2004- medianOfColumn := ((self columnAt: i) select: [ :ele | ele isNotNil ]) median.
2033+ medianOfColumn := ((self columnAt: i) select: [ :ele | ele isNotNil ])
2034+ median.
20052035 1 to: self numberOfRows do: [ :j |
2006- (self at: j at: i) ifNil: [
2007- self at: j at: i put: medianOfColumn ] ].
2008- ]
2036+ (self at: j at: i) ifNil: [ self at: j at: i put: medianOfColumn ] ] ]
20092037]
20102038
20112039{ #category : #replacing }
20122040DataFrame >> replaceNilsWithMode [
20132041 " Replaces all nil values of a data frame with the mode of the column in which it is present"
20142042
2043+ " (#(#(nil 2) #(3 nil) #(3 2)) asDataFrame replaceNilsWithMode) >>> (#(#(3 2) #(3 2) #(3 2)) asDataFrame)"
2044+
2045+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithMode) >>> (#(#(1 2) #(3 4)) asDataFrame)"
2046+
20152047 1 to: self numberOfColumns do: [ :i |
20162048 | modeOfColumn |
20172049 1 to: self numberOfRows do: [ :j |
2018- (self at: j at: i) ifNil: [ self at: j at: i put: (modeOfColumn ifNil: [ modeOfColumn := ((self columnAt: i) select: [ :ele | ele isNotNil ]) mode ]) ] ].
2050+ (self at: j at: i) ifNil: [
2051+ self at: j at: i put: (modeOfColumn ifNil: [
2052+ modeOfColumn := ((self columnAt: i) select: [ :ele |
2053+ ele isNotNil ]) mode ]) ] ].
20192054 modeOfColumn := nil ]
20202055]
20212056
20222057{ #category : #replacing }
20232058DataFrame >> replaceNilsWithNextRowValue [
2059+ " Replaces all nil values of a data frame with the next non-nil value of the column in which it is present. If there is no non-nil value after it, it is not replaced"
2060+
2061+ " (#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithNextRowValue) >>> (#(#(3 2) #(3 nil)) asDataFrame)"
2062+
2063+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithNextRowValue) >>> (#(#(1 2) #(3 4)) asDataFrame)"
20242064
20252065 | value numberOfRows |
20262066 numberOfRows := self numberOfRows.
@@ -2033,24 +2073,27 @@ DataFrame >> replaceNilsWithNextRowValue [
20332073
20342074{ #category : #replacing }
20352075DataFrame >> replaceNilsWithPreviousRowValue [
2036- " Replaces all nil values of a data frame with the previous non-nil value of the column in which it is present"
2076+ " Replaces all nil values of a data frame with the previous non-nil value of the column in which it is present. If there is no non-nil value before it, it is not replaced"
2077+
2078+ " (#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithPreviousRowValue) >>> (#(#(nil 2) #(3 2)) asDataFrame)"
20372079
2038- |value |
2080+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithPreviousRowValue) >>> (#(#(1 2) #(3 4)) asDataFrame)"
2081+
2082+ | value |
20392083 1 to: self numberOfColumns do: [ :i |
20402084 1 to: self numberOfRows do: [ :j |
2041-
2042- j > 1
2043- ifTrue: [
2044- (self at: j at: i) ifNil: [ self at: j at: i put: value ].
2045- ].
2046- value: = self at: j at: i.
2047- ].
2048- ]
2085+ j > 1 ifTrue: [
2086+ (self at: j at: i) ifNil: [ self at: j at: i put: value ] ].
2087+ value := self at: j at: i ] ]
20492088]
20502089
20512090{ #category : #replacing }
20522091DataFrame >> replaceNilsWithZero [
2053- " Replaces all nil values of a data frame with xero"
2092+ " Replaces all nil values of a data frame with zero"
2093+
2094+ " (#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithZero) >>> (#(#(0 2) #(3 0)) asDataFrame)"
2095+
2096+ " (#(#(1 2) #(3 4)) asDataFrame replaceNilsWithZero) >>> (#(#(1 2) #(3 4)) asDataFrame)"
20542097
20552098 self replaceNilsWith: 0
20562099]
0 commit comments