1+ describe ( "groupby" , function ( ) {
2+ it ( "Check group by One column data" , function ( ) {
3+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
4+ let cols = [ "A" , "B" , "C" ] ;
5+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
6+ let groupDf = df . groupby ( [ "A" ] ) ;
7+
8+ let groupDict = {
9+ '1' : { A : [ 1 ] , B : [ 2 ] , C : [ 3 ] } ,
10+ '4' : { A : [ 4 ] , B : [ 5 ] , C : [ 6 ] } ,
11+ '20' : { A : [ 20 ] , B : [ 30 ] , C : [ 40 ] } ,
12+ '39' : { A : [ 39 ] , B : [ 89 ] , C : [ 78 ] }
13+ }
14+ assert . deepEqual ( groupDf . colDict , groupDict ) ;
15+ } ) ;
16+
17+ it ( "Obtain the DataFrame of one of the group" , function ( ) {
18+
19+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
20+ let cols = [ "A" , "B" , "C" ] ;
21+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
22+ let group_df = df . groupby ( [ "A" ] ) ;
23+ let new_data = [ [ 1 , 2 , 3 ] ] ;
24+ assert . deepEqual ( group_df . get_group ( [ 1 ] ) . values , new_data ) ;
25+ } ) ;
26+
27+ it ( "Check group by Two column data" , function ( ) {
28+
29+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
30+ let cols = [ "A" , "B" , "C" ] ;
31+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
32+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
33+ let new_data = {
34+ '1-2' : { A : [ 1 ] , B : [ 2 ] , C : [ 3 ] } ,
35+ '4-5' : { A : [ 4 ] , B : [ 5 ] , C : [ 6 ] } ,
36+ '20-30' : { A : [ 20 ] , B : [ 30 ] , C : [ 40 ] } ,
37+ '39-89' : { A : [ 39 ] , B : [ 89 ] , C : [ 78 ] }
38+ }
39+ assert . deepEqual ( group_df . colDict , new_data ) ;
40+ } ) ;
41+
42+ it ( "Obtain the DataFrame of one of the group, grouped by two column" , function ( ) {
43+
44+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
45+ let cols = [ "A" , "B" , "C" ] ;
46+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
47+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
48+ let new_data = [ [ 1 , 2 , 3 ] ] ;
49+
50+ assert . deepEqual ( group_df . get_group ( [ 1 , 2 ] ) . values , new_data ) ;
51+ } ) ;
52+
53+ it ( "Count column in group" , function ( ) {
54+
55+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
56+ let cols = [ "A" , "B" , "C" ] ;
57+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
58+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
59+ let new_data = [
60+ [ 1 , 2 , 1 ] ,
61+ [ 4 , 5 , 1 ] ,
62+ [ 20 , 30 , 1 ] ,
63+ [ 39 , 89 , 1 ]
64+ ] ;
65+
66+ assert . deepEqual ( group_df . col ( [ "C" ] ) . count ( ) . values , new_data ) ;
67+ } ) ;
68+
69+ it ( "sum column element in group" , function ( ) {
70+
71+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
72+ let cols = [ "A" , "B" , "C" ] ;
73+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
74+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
75+ let new_data = [
76+ [ 1 , 2 , 3 ] ,
77+ [ 4 , 5 , 6 ] ,
78+ [ 20 , 30 , 40 ] ,
79+ [ 39 , 89 , 78 ]
80+ ] ;
81+ assert . deepEqual ( group_df . col ( [ "C" ] ) . sum ( ) . values , new_data ) ;
82+ } ) ;
83+
84+ it ( "sum column element group by two column" , function ( ) {
85+
86+ let data = [ [ 1 , 2 , 3 ] , [ 1 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
87+ let cols = [ "A" , "B" , "C" ] ;
88+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
89+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
90+
91+ let new_data = [
92+ [ 1 , 2 , 2 , 3 ] ,
93+ [ 1 , 5 , 5 , 6 ] ,
94+ [ 20 , 30 , 30 , 40 ] ,
95+ [ 39 , 89 , 89 , 78 ]
96+ ] ;
97+
98+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . sum ( ) . values , new_data ) ;
99+ } ) ;
100+
101+ it ( "Perform aggregate on column for groupby" , function ( ) {
102+
103+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
104+ let cols = [ "A" , "B" , "C" ] ;
105+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
106+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
107+ let new_data = [
108+ [ 1 , 2 , 2 , 1 ] ,
109+ [ 4 , 5 , 5 , 1 ] ,
110+ [ 20 , 30 , 30 , 1 ] ,
111+ [ 39 , 89 , 89 , 1 ]
112+ ] ;
113+
114+ assert . deepEqual ( group_df . agg ( { B : "mean" , C : "count" } ) . values , new_data ) ;
115+ } ) ;
116+
117+ it ( "cummulative sum for groupby" , function ( ) {
118+
119+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
120+ let cols = [ "A" , "B" , "C" ] ;
121+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
122+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
123+ let new_data = [
124+ [ 1 , 2 , 2 , 3 ] ,
125+ [ 4 , 5 , 5 , 6 ] ,
126+ [ 20 , 30 , 30 , 40 ] ,
127+ [ 39 , 89 , 89 , 78 ]
128+ ] ;
129+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . cumsum ( ) . values , new_data ) ;
130+ } ) ;
131+
132+ it ( "cummulative max for groupby" , function ( ) {
133+
134+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
135+ let cols = [ "A" , "B" , "C" ] ;
136+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
137+ let group_df = df . groupby ( [ "A" ] ) ;
138+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
139+
140+
141+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cummax ( ) . values , new_data ) ;
142+ } ) ;
143+
144+ it ( "cummulative min for groupby" , function ( ) {
145+
146+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
147+ let cols = [ "A" , "B" , "C" ] ;
148+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
149+ let group_df = df . groupby ( [ "A" ] ) ;
150+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
151+
152+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cummin ( ) . values , new_data ) ;
153+ } ) ;
154+
155+ it ( "cummulative prod for groupby" , function ( ) {
156+
157+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
158+ let cols = [ "A" , "B" , "C" ] ;
159+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
160+ let group_df = df . groupby ( [ "A" ] ) ;
161+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
162+
163+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cumprod ( ) . values , new_data ) ;
164+ } ) ;
165+
166+ it ( "mean for groupby" , function ( ) {
167+
168+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
169+ let cols = [ "A" , "B" , "C" ] ;
170+ let df = new dfd . DataFrame ( data , { columns : cols } ) ;
171+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
172+ let new_data = [
173+ [ 1 , 2 , 2 , 3 ] ,
174+ [ 4 , 5 , 5 , 6 ] ,
175+ [ 20 , 30 , 30 , 40 ] ,
176+ [ 39 , 89 , 89 , 78 ]
177+ ] ;
178+
179+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . mean ( ) . values , new_data ) ;
180+ } ) ;
181+
182+ it ( "should apply grouby operation to all column" , function ( ) {
183+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
184+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
185+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
186+ 'two' , 'two' , 'one' , 'three' ] ,
187+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
188+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
189+ } ;
190+ let df = new dfd . DataFrame ( data ) ;
191+ let grp = df . groupby ( [ "A" , "B" ] ) ;
192+ let rslt = [
193+ [ 'foo' , 'one' , 2 , 2 ] ,
194+ [ 'foo' , 'two' , 2 , 2 ] ,
195+ [ 'foo' , 'three' , 1 , 1 ] ,
196+ [ 'bar' , 'one' , 1 , 1 ] ,
197+ [ 'bar' , 'three' , 1 , 1 ] ,
198+ [ 'bar' , 'two' , 1 , 1 ]
199+ ] ;
200+
201+ assert . deepEqual ( grp . count ( ) . values , rslt ) ;
202+ } ) ;
203+ it ( "should apply function to specific column" , function ( ) {
204+
205+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
206+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
207+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
208+ 'two' , 'two' , 'one' , 'three' ] ,
209+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
210+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
211+ } ;
212+ let df = new dfd . DataFrame ( data ) ;
213+ let group_df = df . groupby ( [ "A" ] ) ;
214+ let rslt = [
215+ [ 'foo' , 5 , 3 ] ,
216+ [ 'foo' , 6 , 4 ] ,
217+ [ 'foo' , 7 , 7 ] ,
218+ [ 'foo' , 9 , 8 ] ,
219+ [ 'foo' , 10 , 9 ] ,
220+ [ 'bar' , 4 , 5 ] ,
221+ [ 'bar' , 3 , 6 ] ,
222+ [ 'bar' , 8 , 4 ]
223+ ]
224+ assert . deepEqual ( group_df . col ( [ 'D' , 'C' ] ) . apply ( ( x ) => x . add ( 2 ) ) . values , rslt ) ;
225+ } ) ;
226+
227+ it ( "should apply function to group column" , function ( ) {
228+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
229+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
230+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
231+ 'two' , 'two' , 'one' , 'three' ] ,
232+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
233+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
234+ } ;
235+ let df = new dfd . DataFrame ( data ) ;
236+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
237+ let rslt = [
238+ [ 'foo' , 'one' , 2 , 2 , 2 , 2 ] ,
239+ [ 'foo' , 'two' , 2 , 2 , 2 , 2 ] ,
240+ [ 'foo' , 'three' , 1 , 1 , 1 , 1 ] ,
241+ [ 'bar' , 'one' , 1 , 1 , 1 , 1 ] ,
242+ [ 'bar' , 'three' , 1 , 1 , 1 , 1 ] ,
243+ [ 'bar' , 'two' , 1 , 1 , 1 , 1 ]
244+ ]
245+ assert . deepEqual ( group_df . apply ( ( x ) => x . count ( { axis :0 } ) ) . values , rslt ) ;
246+ } ) ;
247+
248+ it ( "should obtain the number of groups" , function ( ) {
249+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
250+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
251+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
252+ 'two' , 'two' , 'one' , 'three' ] ,
253+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
254+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
255+ } ;
256+ let df = new dfd . DataFrame ( data ) ;
257+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
258+ let rslt = 6
259+ assert . equal ( group_df . ngroups , rslt ) ;
260+ } ) ;
261+ it ( "should obtain all groups" , function ( ) {
262+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
263+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
264+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
265+ 'two' , 'two' , 'one' , 'three' ] ,
266+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
267+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
268+ } ;
269+ let df = new dfd . DataFrame ( data ) ;
270+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
271+ let rslt = {
272+ 'foo-one' : {
273+ A : [ 'foo' , 'foo' ] ,
274+ B : [ 'one' , 'one' ] ,
275+ C : [ 1 , 6 ] ,
276+ D : [ 3 , 7 ]
277+ } ,
278+ 'bar-one' : { A : [ 'bar' ] , B : [ 'one' ] , C : [ 3 ] , D : [ 2 ] } ,
279+ 'foo-two' : {
280+ A : [ 'foo' , 'foo' ] ,
281+ B : [ 'two' , 'two' ] ,
282+ C : [ 2 , 5 ] ,
283+ D : [ 4 , 5 ]
284+ } ,
285+ 'bar-three' : { A : [ 'bar' ] , B : [ 'three' ] , C : [ 4 ] , D : [ 1 ] } ,
286+ 'bar-two' : { A : [ 'bar' ] , B : [ 'two' ] , C : [ 2 ] , D : [ 6 ] } ,
287+ 'foo-three' : { A : [ 'foo' ] , B : [ 'three' ] , C : [ 7 ] , D : [ 8 ] }
288+ }
289+ assert . deepEqual ( group_df . groups , rslt ) ;
290+ } ) ;
291+
292+ it ( "should obtain the first row of all groups" , function ( ) {
293+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
294+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
295+ 'B' : [ 'one' , 'one' , 'one' , 'three' ,
296+ 'two' , 'two' , 'one' , 'three' ] ,
297+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
298+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
299+ } ;
300+ let df = new dfd . DataFrame ( data ) ;
301+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
302+ let rslt = [
303+ [ 'foo' , 'one' , 'foo' , 'one' , 1 , 3 ] ,
304+ [ 'foo' , 'two' , 'foo' , 'two' , 5 , 5 ] ,
305+ [ 'foo' , 'three' , 'foo' , 'three' , 7 , 8 ] ,
306+ [ 'bar' , 'one' , 'bar' , 'one' , 3 , 2 ] ,
307+ [ 'bar' , 'three' , 'bar' , 'three' , 4 , 1 ] ,
308+ [ 'bar' , 'two' , 'bar' , 'two' , 2 , 6 ]
309+ ]
310+ assert . deepEqual ( group_df . first ( ) . values , rslt ) ;
311+ } ) ;
312+
313+ it ( "should obtain the last row of all groups" , function ( ) {
314+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
315+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
316+ 'B' : [ 'one' , 'one' , 'one' , 'three' ,
317+ 'two' , 'two' , 'one' , 'three' ] ,
318+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
319+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
320+ } ;
321+ let df = new dfd . DataFrame ( data ) ;
322+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
323+ let rslt = [
324+ [ 'foo' , 'one' , 'foo' , 'one' , 6 , 7 ] ,
325+ [ 'foo' , 'two' , 'foo' , 'two' , 5 , 5 ] ,
326+ [ 'foo' , 'three' , 'foo' , 'three' , 7 , 8 ] ,
327+ [ 'bar' , 'one' , 'bar' , 'one' , 3 , 2 ] ,
328+ [ 'bar' , 'three' , 'bar' , 'three' , 4 , 1 ] ,
329+ [ 'bar' , 'two' , 'bar' , 'two' , 2 , 6 ]
330+ ]
331+ assert . deepEqual ( group_df . last ( ) . values , rslt ) ;
332+ } ) ;
333+
334+ it ( "should obtain the number of rows of each groups" , function ( ) {
335+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
336+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
337+ 'B' : [ 'one' , 'one' , 'one' , 'three' ,
338+ 'two' , 'two' , 'one' , 'three' ] ,
339+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
340+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
341+ } ;
342+ let df = new dfd . DataFrame ( data ) ;
343+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
344+ let rslt = [
345+ [ 'foo' , 'one' , 3 ] ,
346+ [ 'foo' , 'two' , 1 ] ,
347+ [ 'foo' , 'three' , 1 ] ,
348+ [ 'bar' , 'one' , 1 ] ,
349+ [ 'bar' , 'three' , 1 ] ,
350+ [ 'bar' , 'two' , 1 ]
351+ ]
352+ assert . deepEqual ( group_df . size ( ) . values , rslt ) ;
353+ } ) ;
354+ } )
0 commit comments