Skip to content

Commit 96b8c58

Browse files
committed
add test for groupby
1 parent c18ea63 commit 96b8c58

1 file changed

Lines changed: 252 additions & 0 deletions

File tree

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
import { assert } from "chai";
2+
import { Console } from "console";
3+
import { DataFrame, Series } from '../../dist/danfojs-node/src';
4+
5+
6+
describe("groupby", function () {
7+
it("Check group by One column data", function () {
8+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
9+
let cols = [ "A", "B", "C" ];
10+
let df = new DataFrame(data, { columns: cols });
11+
let groupDf = df.groupby([ "A" ]);
12+
13+
let groupDict = {
14+
'1': { A: [ 1 ], B: [ 2 ], C: [ 3 ] },
15+
'4': { A: [ 4 ], B: [ 5 ], C: [ 6 ] },
16+
'20': { A: [ 20 ], B: [ 30 ], C: [ 40 ] },
17+
'39': { A: [ 39 ], B: [ 89 ], C: [ 78 ] }
18+
}
19+
assert.deepEqual(groupDf.colDict, groupDict);
20+
});
21+
22+
it("Obtain the DataFrame of one of the group", function () {
23+
24+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
25+
let cols = [ "A", "B", "C" ];
26+
let df = new DataFrame(data, { columns: cols });
27+
let group_df = df.groupby([ "A" ]);
28+
let new_data = [ [ 1, 2, 3 ] ];
29+
assert.deepEqual(group_df.get_group([ 1 ]).values, new_data);
30+
});
31+
32+
it("Check group by Two column data", function () {
33+
34+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
35+
let cols = [ "A", "B", "C" ];
36+
let df = new DataFrame(data, { columns: cols });
37+
let group_df = df.groupby([ "A", "B" ]);
38+
let new_data = {
39+
'1-2': { A: [ 1 ], B: [ 2 ], C: [ 3 ] },
40+
'4-5': { A: [ 4 ], B: [ 5 ], C: [ 6 ] },
41+
'20-30': { A: [ 20 ], B: [ 30 ], C: [ 40 ] },
42+
'39-89': { A: [ 39 ], B: [ 89 ], C: [ 78 ] }
43+
}
44+
assert.deepEqual(group_df.colDict, new_data);
45+
});
46+
47+
it("Obtain the DataFrame of one of the group, grouped by two column", function () {
48+
49+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
50+
let cols = [ "A", "B", "C" ];
51+
let df = new DataFrame(data, { columns: cols });
52+
let group_df = df.groupby([ "A", "B" ]);
53+
let new_data = [ [ 1, 2, 3 ] ];
54+
55+
assert.deepEqual(group_df.get_group([ 1, 2 ]).values, new_data);
56+
});
57+
58+
it("Count column in group", function () {
59+
60+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
61+
let cols = [ "A", "B", "C" ];
62+
let df = new DataFrame(data, { columns: cols });
63+
let group_df = df.groupby([ "A", "B" ]);
64+
let new_data = [
65+
[ 1, 2, 1 ],
66+
[ 4, 5, 1 ],
67+
[ 20, 30, 1 ],
68+
[ 39, 89, 1 ]
69+
];
70+
71+
assert.deepEqual(group_df.col([ "C" ]).count().values, new_data);
72+
});
73+
74+
it("sum column element in group", function () {
75+
76+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
77+
let cols = [ "A", "B", "C" ];
78+
let df = new DataFrame(data, { columns: cols });
79+
let group_df = df.groupby([ "A", "B" ]);
80+
let new_data = [
81+
[ 1, 2, 3 ],
82+
[ 4, 5, 6 ],
83+
[ 20, 30, 40 ],
84+
[ 39, 89, 78 ]
85+
];
86+
assert.deepEqual(group_df.col([ "C" ]).sum().values, new_data);
87+
});
88+
89+
it("sum column element group by two column", function () {
90+
91+
let data = [ [ 1, 2, 3 ], [ 1, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
92+
let cols = [ "A", "B", "C" ];
93+
let df = new DataFrame(data, { columns: cols });
94+
let group_df = df.groupby([ "A", "B" ]);
95+
96+
let new_data = [
97+
[ 1, 2, 2, 3 ],
98+
[ 1, 5, 5, 6 ],
99+
[ 20, 30, 30, 40 ],
100+
[ 39, 89, 89, 78 ]
101+
];
102+
103+
assert.deepEqual(group_df.col([ "B", "C" ]).sum().values, new_data);
104+
});
105+
106+
it("Perform aggregate on column for groupby", function () {
107+
108+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
109+
let cols = [ "A", "B", "C" ];
110+
let df = new DataFrame(data, { columns: cols });
111+
let group_df = df.groupby([ "A", "B" ]);
112+
let new_data = [
113+
[ 1, 2, 2, 1 ],
114+
[ 4, 5, 5, 1 ],
115+
[ 20, 30, 30, 1 ],
116+
[ 39, 89, 89, 1 ]
117+
];
118+
119+
assert.deepEqual(group_df.agg({ B: "mean", C: "count" }).values, new_data);
120+
});
121+
122+
it("cummulative sum for groupby", function () {
123+
124+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
125+
let cols = [ "A", "B", "C" ];
126+
let df = new DataFrame(data, { columns: cols });
127+
let group_df = df.groupby([ "A", "B" ]);
128+
let new_data = [
129+
[ 1, 2, 2, 3 ],
130+
[ 4, 5, 5, 6 ],
131+
[ 20, 30, 30, 40 ],
132+
[ 39, 89, 89, 78 ]
133+
];
134+
assert.deepEqual(group_df.col([ "B", "C" ]).cumsum().values, new_data);
135+
});
136+
137+
it("cummulative max for groupby", function () {
138+
139+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
140+
let cols = [ "A", "B", "C" ];
141+
let df = new DataFrame(data, { columns: cols });
142+
let group_df = df.groupby([ "A" ]);
143+
let new_data = [ [ 1, 3 ], [ 4, 6 ], [ 20, 40 ], [ 39, 78 ] ];
144+
145+
146+
assert.deepEqual(group_df.col([ "C" ]).cummax().values, new_data);
147+
});
148+
149+
it("cummulative min for groupby", function () {
150+
151+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
152+
let cols = [ "A", "B", "C" ];
153+
let df = new DataFrame(data, { columns: cols });
154+
let group_df = df.groupby([ "A" ]);
155+
let new_data = [ [ 1, 3 ], [ 4, 6 ], [ 20, 40 ], [ 39, 78 ] ];
156+
157+
assert.deepEqual(group_df.col([ "C" ]).cummin().values, new_data);
158+
});
159+
160+
it("cummulative prod for groupby", function () {
161+
162+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
163+
let cols = [ "A", "B", "C" ];
164+
let df = new DataFrame(data, { columns: cols });
165+
let group_df = df.groupby([ "A" ]);
166+
let new_data = [ [ 1, 3 ], [ 4, 6 ], [ 20, 40 ], [ 39, 78 ] ];
167+
168+
assert.deepEqual(group_df.col([ "C" ]).cumprod().values, new_data);
169+
});
170+
171+
it("mean for groupby", function () {
172+
173+
let data = [ [ 1, 2, 3 ], [ 4, 5, 6 ], [ 20, 30, 40 ], [ 39, 89, 78 ] ];
174+
let cols = [ "A", "B", "C" ];
175+
let df = new DataFrame(data, { columns: cols });
176+
let group_df = df.groupby([ "A", "B" ]);
177+
let new_data = [
178+
[ 1, 2, 2, 3 ],
179+
[ 4, 5, 5, 6 ],
180+
[ 20, 30, 30, 40 ],
181+
[ 39, 89, 89, 78 ]
182+
];
183+
184+
assert.deepEqual(group_df.col([ "B", "C" ]).mean().values, new_data);
185+
});
186+
187+
it("should apply grouby operation to all column", function(){
188+
let data = { 'A': [ 'foo', 'bar', 'foo', 'bar',
189+
'foo', 'bar', 'foo', 'foo' ],
190+
'B': [ 'one', 'one', 'two', 'three',
191+
'two', 'two', 'one', 'three' ],
192+
'C': [ 1, 3, 2, 4, 5, 2, 6, 7 ],
193+
'D': [ 3, 2, 4, 1, 5, 6, 7, 8 ]
194+
};
195+
let df = new DataFrame(data);
196+
let grp = df.groupby([ "A", "B" ]);
197+
let rslt = [
198+
[ 'foo', 'one', 2, 2 ],
199+
[ 'foo', 'two', 2, 2 ],
200+
[ 'foo', 'three', 1, 1 ],
201+
[ 'bar', 'one', 1, 1 ],
202+
[ 'bar', 'three', 1, 1 ],
203+
[ 'bar', 'two', 1, 1 ]
204+
];
205+
206+
assert.deepEqual(grp.count().values, rslt);
207+
});
208+
it("should apply function to specific column", function () {
209+
210+
let data = { 'A': [ 'foo', 'bar', 'foo', 'bar',
211+
'foo', 'bar', 'foo', 'foo' ],
212+
'B': [ 'one', 'one', 'two', 'three',
213+
'two', 'two', 'one', 'three' ],
214+
'C': [ 1, 3, 2, 4, 5, 2, 6, 7 ],
215+
'D': [ 3, 2, 4, 1, 5, 6, 7, 8 ]
216+
};
217+
let df = new DataFrame(data);
218+
let group_df = df.groupby([ "A"]);
219+
let rslt = [
220+
[ 5, 3, 'foo' ],
221+
[ 6, 4, 'foo' ],
222+
[ 7, 7, 'foo' ],
223+
[ 9, 8, 'foo' ],
224+
[ 10, 9, 'foo' ],
225+
[ 4, 5, 'bar' ],
226+
[ 3, 6, 'bar' ],
227+
[ 8, 4, 'bar' ]
228+
]
229+
assert.deepEqual(group_df.col(['D', 'C']).apply((x) => x.add(2)).values, rslt);
230+
});
231+
232+
it("should apply function to group column", function () {
233+
let data = { 'A': [ 'foo', 'bar', 'foo', 'bar',
234+
'foo', 'bar', 'foo', 'foo' ],
235+
'B': [ 'one', 'one', 'two', 'three',
236+
'two', 'two', 'one', 'three' ],
237+
'C': [ 1, 3, 2, 4, 5, 2, 6, 7 ],
238+
'D': [ 3, 2, 4, 1, 5, 6, 7, 8 ]
239+
};
240+
let df = new DataFrame(data);
241+
let group_df = df.groupby([ "A", "B"]);
242+
let rslt = [
243+
[ 2, 2, 2, 2, 'foo', 'one' ],
244+
[ 2, 2, 2, 2, 'foo', 'two' ],
245+
[ 1, 1, 1, 1, 'foo', 'three' ],
246+
[ 1, 1, 1, 1, 'bar', 'one' ],
247+
[ 1, 1, 1, 1, 'bar', 'three' ],
248+
[ 1, 1, 1, 1, 'bar', 'two' ]
249+
];
250+
assert.deepEqual(group_df.apply((x) => x.count({axis:0})).values, rslt);
251+
});
252+
})

0 commit comments

Comments
 (0)