Skip to content

Commit 9bbfeae

Browse files
authored
Merge pull request #277 from hernanmd/readCSV_skip_header
Add method to read CSV string with option to skip N lines from header
2 parents 8492ebf + a33b126 commit 9bbfeae

3 files changed

Lines changed: 72 additions & 0 deletions

File tree

src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,49 @@ DataFrameCsvReaderTest >> testReadCsvWithSeparatorTab [
9999
self assert: actualDataFrame equals: dataFrameWithoutRowNames
100100
]
101101

102+
{ #category : #tests }
103+
DataFrameCsvReaderTest >> testReadCsvWithSeparatorTabSkip1 [
104+
105+
| doubleHeaderString controlDataFrame dataFrameWithoutFirstLine |
106+
107+
doubleHeaderString := 'a description header
108+
' , TestCsvStrings tabCsvString.
109+
dataFrameWithoutFirstLine := DataFrame readFromCsv: doubleHeaderString withSeparator: Character tab skip: 1.
110+
111+
controlDataFrame := DataFrame
112+
withRows: #(
113+
#('1:10 am' '2.4' 'true' 'rain')
114+
#('1:30 am' '0.5' 'true' 'rain')
115+
#('1:50 am' '-1.2' 'true' 'snow')
116+
#('2:10 am' '-2.3' 'false' '-')
117+
#('2:30 am' '3.2' 'true' 'rain' ))
118+
columnNames: #(nil temperature precipitation type ).
119+
120+
self assert: controlDataFrame equals: dataFrameWithoutFirstLine
121+
]
122+
123+
{ #category : #tests }
124+
DataFrameCsvReaderTest >> testReadCsvWithSeparatorTabSkipN [
125+
126+
| doubleHeaderString controlDataFrame dataFrameWithoutTwoFirstLines |
127+
128+
doubleHeaderString := 'a description header
129+
another description header
130+
' , TestCsvStrings tabCsvString.
131+
dataFrameWithoutTwoFirstLines := DataFrame readFromCsv: doubleHeaderString withSeparator: Character tab skip: 2.
132+
133+
controlDataFrame := DataFrame
134+
withRows: #(
135+
#('1:10 am' '2.4' 'true' 'rain')
136+
#('1:30 am' '0.5' 'true' 'rain')
137+
#('1:50 am' '-1.2' 'true' 'snow')
138+
#('2:10 am' '-2.3' 'false' '-')
139+
#('2:30 am' '3.2' 'true' 'rain' ))
140+
columnNames: #(nil temperature precipitation type ).
141+
142+
self assert: controlDataFrame equals: dataFrameWithoutTwoFirstLines
143+
]
144+
102145
{ #category : #tests }
103146
DataFrameCsvReaderTest >> testReadFromString [
104147

src/DataFrame-IO/DataFrame.extension.st

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@ DataFrame class >> readFromCsv: aFileReference withSeparator: aSeparator [
4545
^ self readFrom: aFileReference using: reader
4646
]
4747

48+
{ #category : #'*DataFrame-IO' }
49+
DataFrame class >> readFromCsv: aFileReference withSeparator: aCharacter skip: anInteger [
50+
51+
| df reader |
52+
53+
reader := DataFrameCsvReader new.
54+
df := reader readFromString: aFileReference withSeparator: aCharacter skip: anInteger.
55+
df calculateDataTypes.
56+
^ df
57+
58+
]
59+
4860
{ #category : #'*DataFrame-IO' }
4961
DataFrame class >> readFromCsvWithRowNames: aFileReference [
5062
| reader |

src/DataFrame-IO/DataFrameCsvReader.class.st

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,23 @@ DataFrameCsvReader >> readFromString: aString withSeparator: aSeparator [
148148
^ df
149149
]
150150

151+
{ #category : #reading }
152+
DataFrameCsvReader >> readFromString: aCSVString withSeparator: aSeparator skip: nRows [
153+
"Read data frame from aCSVString skipping nRows from its header"
154+
155+
| reader df |
156+
157+
reader := NeoCSVReader on: aCSVString readStream.
158+
reader separator: aSeparator.
159+
nRows timesRepeat: [ reader skipHeader ].
160+
161+
self readColumnNamesWith: reader.
162+
self readRowsWith: reader.
163+
reader close.
164+
df := self createDataFrame.
165+
^ df
166+
]
167+
151168
{ #category : #reading }
152169
DataFrameCsvReader >> readFromString: aString withSeparator: aSeparator withHeader: hasHeader [
153170
"Read data frame from aString"

0 commit comments

Comments
 (0)