Skip to content

Commit 306c123

Browse files
committed
fix bugs; added array format ; added nullobject ; added emitall
1 parent 6a2d572 commit 306c123

9 files changed

Lines changed: 184 additions & 31 deletions

File tree

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ node_js:
33
- "10"
44
- "8"
55
- "6"
6-
- "4"
76
after_success: 'npm run coveralls'
87
script: "npm run travis"

bin/options.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@
7676
"--alwaysSplitAtEOL":{
7777
"desc": "Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.",
7878
"type": "boolean"
79+
},
80+
"--nullObject":{
81+
"desc":"How to parse if a csv cell contains 'null'. Default false will keep 'null' as string. Change to true if a null object is needed.",
82+
"type":"boolean"
83+
},
84+
"--downstreamFormat":{
85+
"desc":"Option to set what JSON array format is needed by downstream. 'line' is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. 'array' will write complete JSON array string to downstream (suitable for file writable stream etc). Default 'line'",
86+
"type":"string"
7987
}
8088
},
8189
"examples": [

readme.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,10 @@ Following parameters are supported:
246246
* **includeColumns**: This parameter instructs the parser to include only those columns as specified by the regular expression. Example: /(name|age)/ will parse and include columns whose header contains "name" or "age"
247247
* **ignoreColumns**: This parameter instructs the parser to ignore columns as specified by the regular expression. Example: /(name|age)/ will ignore columns whose header contains "name" or "age"
248248
* **colParser**: Allows override parsing logic for a specific column. It accepts a JSON object with fields like: `headName: <String | Function | ColParser>` . e.g. {field1:'number'} will use built-in number parser to convert value of the `field1` column to number. For more information See [details below](#column-parser)
249-
* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.
250-
249+
* **alwaysSplitAtEOL**: Always interpret each line (as defined by `eol` like `\n`) as a row. This will prevent `eol` characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text).
250+
* **nullObject**: How to parse if a csv cell contains "null". Default false will keep "null" as string. Change to true if a null object is needed.
251+
* **downstreamFormat**: Option to set what JSON array format is needed by downstream. "line" is also called ndjson format. This format will write lines of JSON (without square brackets and commas) to downstream. "array" will write complete JSON array string to downstream (suitable for file writable stream etc). Default "line"
252+
* **needEmitAll**: Parser will build JSON result is `.then` is called (or await is used). If this is not desired, set this to false. Default is true.
251253
All parameters can be used in Command Line tool.
252254

253255
## Asynchronouse Result Process

src/Converter.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import { bufFromString } from "./util";
1616

1717

1818

19-
export class Converter extends Transform implements PromiseLike<Array<any>> {
19+
export class Converter extends Transform implements PromiseLike<any[]> {
2020
preRawData(onRawData: PreRawDataCallback): Converter {
2121
this.runtime.preRawDataHook = onRawData;
2222
return this;
@@ -115,11 +115,11 @@ export class Converter extends Transform implements PromiseLike<Array<any>> {
115115
// }
116116
this.once("error", (err: any) => {
117117
// console.log("BBB");
118-
119-
setTimeout(() => {
118+
//wait for next cycle to emit the errors.
119+
setImmediate(() => {
120120
this.result.processError(err);
121121
this.emit("done", err);
122-
}, 0);
122+
});
123123

124124
});
125125
this.once("done", () => {

src/Parameters.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,26 @@ export interface CSVParseParam {
6666
*/
6767
eol?: string;
6868
/**
69-
* Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). This ensures that misplaced quotes only break on row, and not all ensuing rows.
69+
* Always interpret each line (as defined by eol) as a row. This will prevent eol characters from being used within a row (even inside a quoted field). Default is false. Change to true if you are confident no inline line breaks (like line break in a cell which has multi line text)
7070
*/
7171
alwaysSplitAtEOL: boolean;
7272
/**
7373
* The format to be converted to. "json" (default) -- convert csv to json. "csv" -- convert csv to csv row array. "line" -- convert csv to csv line string
7474
*/
7575
output: "json" | "csv" | "line";
76+
77+
/**
78+
* Convert string "null" to null object in JSON outputs. Default is false.
79+
*/
80+
nullObject:boolean;
81+
/**
82+
* Define the format required by downstream (this parameter does not work if objectMode is on). `line` -- json is emitted in a single line separated by a line breake like "json1\njson2" . `array` -- downstream requires array format like "[json1,json2]". Default is line.
83+
*/
84+
downstreamFormat: "line" | "array";
85+
/**
86+
* Define whether .then(callback) returns all JSON data in its callback. Default is true. Change to false to save memory if subscribing json lines.
87+
*/
88+
needEmitAll: boolean;
7689
}
7790

7891
export type CellParser = (item: string, head: string, resultRow: any, row: string[], columnIndex: number) => any;
@@ -101,7 +114,10 @@ export function mergeParams(params?: Partial<CSVParseParam>): CSVParseParam {
101114
colParser: {},
102115
eol: undefined,
103116
alwaysSplitAtEOL: false,
104-
output: "json"
117+
output: "json",
118+
nullObject: false,
119+
downstreamFormat:"line",
120+
needEmitAll:true
105121
}
106122
if (!params) {
107123
params = {};

src/Result.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import {Result} from "./Result";
2+
import { Converter } from "./Converter";
3+
import P from "bluebird";
4+
import {readFileSync} from "fs";
5+
import path from "path";
6+
import assert from "assert";
7+
import { JSONResult } from "./lineToJson";
8+
const dataDir=path.join(__dirname,"../test/data/");
9+
10+
describe("Result",()=>{
11+
it ("should return need push downstream based on needEmitAll parameter",function (){
12+
const conv=new Converter();
13+
const res=new Result(conv);
14+
assert.equal(res["needEmitAll"],false);
15+
conv.then();
16+
assert.equal(res["needEmitAll"],true);
17+
conv.parseParam.needEmitAll=false;
18+
assert.equal(res["needEmitAll"],false);
19+
});
20+
21+
})
22+

src/Result.ts

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { Converter } from "./Converter";
22
import { ProcessLineResult } from "./Processor";
33
import P from "bluebird";
44
import CSVError from "./CSVError";
5-
5+
import { EOL } from "os";
66
export class Result {
77
private get needEmitLine(): boolean {
88
return !!this.converter.parseRuntime.subscribe && !!this.converter.parseRuntime.subscribe.onNext || this.needPushDownstream
@@ -15,12 +15,18 @@ export class Result {
1515
return this._needPushDownstream;
1616
}
1717
private get needEmitAll(): boolean {
18-
return !!this.converter.parseRuntime.then;
18+
return !!this.converter.parseRuntime.then && this.converter.parseParam.needEmitAll;
19+
// return !!this.converter.parseRuntime.then;
1920
}
2021
private finalResult: any[] = [];
2122
constructor(private converter: Converter) { }
2223
processResult(resultLines: ProcessLineResult[]): P<any> {
2324
const startPos = this.converter.parseRuntime.parsedLineNumber;
25+
if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") {
26+
if (startPos === 0) {
27+
pushDownstream(this.converter, "[" + EOL);
28+
}
29+
}
2430
// let prom: P<any>;
2531
return new P((resolve, reject) => {
2632
if (this.needEmitLine) {
@@ -60,14 +66,20 @@ export class Result {
6066
}
6167
}
6268
endProcess() {
63-
if (this.needEmitAll) {
69+
6470
if (this.converter.parseRuntime.then && this.converter.parseRuntime.then.onfulfilled) {
65-
this.converter.parseRuntime.then.onfulfilled(this.finalResult);
71+
if (this.needEmitAll) {
72+
this.converter.parseRuntime.then.onfulfilled(this.finalResult);
73+
}else{
74+
this.converter.parseRuntime.then.onfulfilled([]);
75+
}
6676
}
67-
}
6877
if (this.converter.parseRuntime.subscribe && this.converter.parseRuntime.subscribe.onCompleted) {
6978
this.converter.parseRuntime.subscribe.onCompleted();
7079
}
80+
if (this.needPushDownstream && this.converter.parseParam.downstreamFormat === "array") {
81+
pushDownstream(this.converter, "]" + EOL);
82+
}
7183
}
7284
}
7385

@@ -94,15 +106,15 @@ function processLineByLine(
94106
}, cb);
95107
} else {
96108
// processRecursive(lines, hook, conv, offset, needPushDownstream, cb, nextLine, false);
97-
if (needPushDownstream){
98-
pushDownstream(conv,nextLine);
109+
if (needPushDownstream) {
110+
pushDownstream(conv, nextLine);
99111
}
100-
while (offset<lines.length){
101-
const line=lines[offset];
112+
while (offset < lines.length) {
113+
const line = lines[offset];
102114
hook(line, conv.parseRuntime.parsedLineNumber + offset);
103115
offset++;
104-
if (needPushDownstream){
105-
pushDownstream(conv,line);
116+
if (needPushDownstream) {
117+
pushDownstream(conv, line);
106118
}
107119
}
108120
cb();
@@ -116,11 +128,11 @@ function processLineByLine(
116128
// }
117129
} else {
118130
if (needPushDownstream) {
119-
while (offset<lines.length) {
131+
while (offset < lines.length) {
120132
const line = lines[offset++];
121133
pushDownstream(conv, line);
122134
}
123-
135+
124136
}
125137
cb();
126138
}
@@ -144,7 +156,8 @@ function processRecursive(
144156
}
145157
function pushDownstream(conv: Converter, res: ProcessLineResult) {
146158
if (typeof res === "object" && !conv.options.objectMode) {
147-
conv.push(JSON.stringify(res) + "\n", "utf8");
159+
const data = JSON.stringify(res);
160+
conv.push(data + (conv.parseParam.downstreamFormat === "array" ? "," + EOL : EOL), "utf8");
148161
} else {
149162
conv.push(res);
150163
}

src/lineToJson.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,18 @@ function setPath(resultJson: any, head: string, value: any, conv: Converter,head
114114
if (conv.parseParam.flatKeys) {
115115
conv.parseRuntime.columnValueSetter[headIdx] = flatSetter;
116116
} else {
117+
117118
if (head.indexOf(".") > -1) {
118-
if (conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) {
119+
const headArr=head.split(".");
120+
let jsonHead=true;
121+
while(headArr.length>0){
122+
const headCom=headArr.shift();
123+
if (headCom!.length===0){
124+
jsonHead=false;
125+
break;
126+
}
127+
}
128+
if (!jsonHead || conv.parseParam.colParser[head] && (conv.parseParam.colParser[head] as ColumnParam).flat) {
119129
conv.parseRuntime.columnValueSetter[headIdx] = flatSetter;
120130
} else {
121131
conv.parseRuntime.columnValueSetter[headIdx] = jsonSetter;
@@ -125,6 +135,9 @@ function setPath(resultJson: any, head: string, value: any, conv: Converter,head
125135
}
126136
}
127137
}
138+
if (conv.parseParam.nullObject ===true && value ==="null"){
139+
value=null;
140+
}
128141
conv.parseRuntime.columnValueSetter[headIdx](resultJson, head, value);
129142
// flatSetter(resultJson, head, value);
130143

test/testCSVConverter3.ts

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import csv from "../src";
2-
var assert = require("assert");
2+
import assert from "assert";
33
var fs = require("fs");
44
import { sandbox } from "sinon";
55
import CSVError from "../src/CSVError";
@@ -231,13 +231,93 @@ describe("testCSVConverter3", function () {
231231
it("should parse header with quotes correctly", function () {
232232
var testData = __dirname + "/data/csvWithUnclosedHeader";
233233
return csv({
234-
headers:["exam_date","sample_no","status","sample_type","patient_id","last_name","first_name","gender_of_patient","patient_birth_date","patient_note","patient_department","accession_number","sample_site","physician","operator","department","note","test_order_code","draw_time","approval_status","approval_time","report_layout","patient_account_number","none_1","errors_detected_during_measurement","age","error_code_01","weight","error_code_02","height","error_code_03","hcg_beta_p","error_code_04","troponin_i_p","error_code_05","ck_mb_p","error_code_06","d_dimer_p","error_code_07","hscrp_p","error_code_08","myoglobin_p","error_code_09","nt_probnp","error_code_10","crp","error_code_11","bnp","error_code_12","tnt","error_code_13","demo_p","error_code_14","pct","error_code_15"]
234+
headers: ["exam_date", "sample_no", "status", "sample_type", "patient_id", "last_name", "first_name", "gender_of_patient", "patient_birth_date", "patient_note", "patient_department", "accession_number", "sample_site", "physician", "operator", "department", "note", "test_order_code", "draw_time", "approval_status", "approval_time", "report_layout", "patient_account_number", "none_1", "errors_detected_during_measurement", "age", "error_code_01", "weight", "error_code_02", "height", "error_code_03", "hcg_beta_p", "error_code_04", "troponin_i_p", "error_code_05", "ck_mb_p", "error_code_06", "d_dimer_p", "error_code_07", "hscrp_p", "error_code_08", "myoglobin_p", "error_code_09", "nt_probnp", "error_code_10", "crp", "error_code_11", "bnp", "error_code_12", "tnt", "error_code_13", "demo_p", "error_code_14", "pct", "error_code_15"]
235235
})
236-
.fromFile(testData)
236+
.fromFile(testData)
237+
.then((d) => {
238+
assert.equal(d.length, 2);
239+
assert.equal(d[0].sample_no, "12669");
240+
})
241+
242+
});
243+
it ("should stream json string correctly",function(done){
244+
const data=`a,b,c
245+
1,2,3
246+
4,5,6`
247+
let hasLeftBracket=false;
248+
let hasRightBracket=false;
249+
csv({
250+
downstreamFormat:"array"
251+
})
252+
.fromString(data)
253+
.on("data",(d)=>{
254+
const str=d.toString();
255+
if (str[0]==="[" && str.length ===2){
256+
hasLeftBracket=true;
257+
}else if (str[0]==="]" && str.length===2){
258+
hasRightBracket=true;
259+
}else{
260+
assert.equal(str[str.length-2],",");
261+
}
262+
263+
})
264+
.on("end",()=>{
265+
assert.equal(hasLeftBracket,true);
266+
assert.equal(hasRightBracket,true);
267+
done();
268+
})
269+
})
270+
it ("should stream json line correctly",function(done){
271+
const data=`a,b,c
272+
1,2,3
273+
4,5,6`
274+
csv({
275+
downstreamFormat:"line"
276+
})
277+
.fromString(data)
278+
.on("data",(d)=>{
279+
const str=d.toString();
280+
281+
assert.notEqual(str[str.length-2],",");
282+
})
283+
.on("end",()=>{
284+
done();
285+
})
286+
})
287+
it ("should not send json if needEmitAll is false",async function(){
288+
const data=`a,b,c
289+
1,2,3
290+
4,5,6`
291+
return csv({
292+
needEmitAll:false
293+
})
294+
.fromString(data)
237295
.then((d)=>{
238-
assert.equal(d.length,2);
239-
assert.equal(d[0].sample_no,"12669");
296+
assert(d.length===0);
240297
})
241-
242-
});
298+
})
299+
it ("should convert null to null object",async function(){
300+
const data=`a,b,c
301+
null,2,3
302+
4,5,6`
303+
return csv({
304+
nullObject:true
305+
})
306+
.fromString(data)
307+
.then((d)=>{
308+
assert.equal(d[0].a,null)
309+
})
310+
})
311+
it ("should process period properly",async function(){
312+
const data=`a..,b,c
313+
1,2,3
314+
4,5,6`
315+
return csv({
316+
})
317+
.fromString(data)
318+
.then((d)=>{
319+
assert.equal(d[0]["a.."],1);
320+
assert.equal(d[1]["a.."],4);
321+
})
322+
})
243323
});

0 commit comments

Comments
 (0)