1+ import { assert } from "chai" ;
2+ import { Console } from "console" ;
3+ import { DataFrame , Series } from '../../dist/danfojs-node/src' ;
4+
5+
6+ describe ( "groupby" , function ( ) {
7+ it ( "Check group by One column data" , function ( ) {
8+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
9+ let cols = [ "A" , "B" , "C" ] ;
10+ let df = new DataFrame ( data , { columns : cols } ) ;
11+ let groupDf = df . groupby ( [ "A" ] ) ;
12+
13+ let groupDict = {
14+ '1' : { A : [ 1 ] , B : [ 2 ] , C : [ 3 ] } ,
15+ '4' : { A : [ 4 ] , B : [ 5 ] , C : [ 6 ] } ,
16+ '20' : { A : [ 20 ] , B : [ 30 ] , C : [ 40 ] } ,
17+ '39' : { A : [ 39 ] , B : [ 89 ] , C : [ 78 ] }
18+ }
19+ assert . deepEqual ( groupDf . colDict , groupDict ) ;
20+ } ) ;
21+
22+ it ( "Obtain the DataFrame of one of the group" , function ( ) {
23+
24+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
25+ let cols = [ "A" , "B" , "C" ] ;
26+ let df = new DataFrame ( data , { columns : cols } ) ;
27+ let group_df = df . groupby ( [ "A" ] ) ;
28+ let new_data = [ [ 1 , 2 , 3 ] ] ;
29+ assert . deepEqual ( group_df . get_group ( [ 1 ] ) . values , new_data ) ;
30+ } ) ;
31+
32+ it ( "Check group by Two column data" , function ( ) {
33+
34+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
35+ let cols = [ "A" , "B" , "C" ] ;
36+ let df = new DataFrame ( data , { columns : cols } ) ;
37+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
38+ let new_data = {
39+ '1-2' : { A : [ 1 ] , B : [ 2 ] , C : [ 3 ] } ,
40+ '4-5' : { A : [ 4 ] , B : [ 5 ] , C : [ 6 ] } ,
41+ '20-30' : { A : [ 20 ] , B : [ 30 ] , C : [ 40 ] } ,
42+ '39-89' : { A : [ 39 ] , B : [ 89 ] , C : [ 78 ] }
43+ }
44+ assert . deepEqual ( group_df . colDict , new_data ) ;
45+ } ) ;
46+
47+ it ( "Obtain the DataFrame of one of the group, grouped by two column" , function ( ) {
48+
49+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
50+ let cols = [ "A" , "B" , "C" ] ;
51+ let df = new DataFrame ( data , { columns : cols } ) ;
52+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
53+ let new_data = [ [ 1 , 2 , 3 ] ] ;
54+
55+ assert . deepEqual ( group_df . get_group ( [ 1 , 2 ] ) . values , new_data ) ;
56+ } ) ;
57+
58+ it ( "Count column in group" , function ( ) {
59+
60+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
61+ let cols = [ "A" , "B" , "C" ] ;
62+ let df = new DataFrame ( data , { columns : cols } ) ;
63+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
64+ let new_data = [
65+ [ 1 , 2 , 1 ] ,
66+ [ 4 , 5 , 1 ] ,
67+ [ 20 , 30 , 1 ] ,
68+ [ 39 , 89 , 1 ]
69+ ] ;
70+
71+ assert . deepEqual ( group_df . col ( [ "C" ] ) . count ( ) . values , new_data ) ;
72+ } ) ;
73+
74+ it ( "sum column element in group" , function ( ) {
75+
76+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
77+ let cols = [ "A" , "B" , "C" ] ;
78+ let df = new DataFrame ( data , { columns : cols } ) ;
79+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
80+ let new_data = [
81+ [ 1 , 2 , 3 ] ,
82+ [ 4 , 5 , 6 ] ,
83+ [ 20 , 30 , 40 ] ,
84+ [ 39 , 89 , 78 ]
85+ ] ;
86+ assert . deepEqual ( group_df . col ( [ "C" ] ) . sum ( ) . values , new_data ) ;
87+ } ) ;
88+
89+ it ( "sum column element group by two column" , function ( ) {
90+
91+ let data = [ [ 1 , 2 , 3 ] , [ 1 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
92+ let cols = [ "A" , "B" , "C" ] ;
93+ let df = new DataFrame ( data , { columns : cols } ) ;
94+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
95+
96+ let new_data = [
97+ [ 1 , 2 , 2 , 3 ] ,
98+ [ 1 , 5 , 5 , 6 ] ,
99+ [ 20 , 30 , 30 , 40 ] ,
100+ [ 39 , 89 , 89 , 78 ]
101+ ] ;
102+
103+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . sum ( ) . values , new_data ) ;
104+ } ) ;
105+
106+ it ( "Perform aggregate on column for groupby" , function ( ) {
107+
108+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
109+ let cols = [ "A" , "B" , "C" ] ;
110+ let df = new DataFrame ( data , { columns : cols } ) ;
111+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
112+ let new_data = [
113+ [ 1 , 2 , 2 , 1 ] ,
114+ [ 4 , 5 , 5 , 1 ] ,
115+ [ 20 , 30 , 30 , 1 ] ,
116+ [ 39 , 89 , 89 , 1 ]
117+ ] ;
118+
119+ assert . deepEqual ( group_df . agg ( { B : "mean" , C : "count" } ) . values , new_data ) ;
120+ } ) ;
121+
122+ it ( "cummulative sum for groupby" , function ( ) {
123+
124+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
125+ let cols = [ "A" , "B" , "C" ] ;
126+ let df = new DataFrame ( data , { columns : cols } ) ;
127+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
128+ let new_data = [
129+ [ 1 , 2 , 2 , 3 ] ,
130+ [ 4 , 5 , 5 , 6 ] ,
131+ [ 20 , 30 , 30 , 40 ] ,
132+ [ 39 , 89 , 89 , 78 ]
133+ ] ;
134+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . cumsum ( ) . values , new_data ) ;
135+ } ) ;
136+
137+ it ( "cummulative max for groupby" , function ( ) {
138+
139+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
140+ let cols = [ "A" , "B" , "C" ] ;
141+ let df = new DataFrame ( data , { columns : cols } ) ;
142+ let group_df = df . groupby ( [ "A" ] ) ;
143+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
144+
145+
146+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cummax ( ) . values , new_data ) ;
147+ } ) ;
148+
149+ it ( "cummulative min for groupby" , function ( ) {
150+
151+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
152+ let cols = [ "A" , "B" , "C" ] ;
153+ let df = new DataFrame ( data , { columns : cols } ) ;
154+ let group_df = df . groupby ( [ "A" ] ) ;
155+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
156+
157+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cummin ( ) . values , new_data ) ;
158+ } ) ;
159+
160+ it ( "cummulative prod for groupby" , function ( ) {
161+
162+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
163+ let cols = [ "A" , "B" , "C" ] ;
164+ let df = new DataFrame ( data , { columns : cols } ) ;
165+ let group_df = df . groupby ( [ "A" ] ) ;
166+ let new_data = [ [ 1 , 3 ] , [ 4 , 6 ] , [ 20 , 40 ] , [ 39 , 78 ] ] ;
167+
168+ assert . deepEqual ( group_df . col ( [ "C" ] ) . cumprod ( ) . values , new_data ) ;
169+ } ) ;
170+
171+ it ( "mean for groupby" , function ( ) {
172+
173+ let data = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 20 , 30 , 40 ] , [ 39 , 89 , 78 ] ] ;
174+ let cols = [ "A" , "B" , "C" ] ;
175+ let df = new DataFrame ( data , { columns : cols } ) ;
176+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
177+ let new_data = [
178+ [ 1 , 2 , 2 , 3 ] ,
179+ [ 4 , 5 , 5 , 6 ] ,
180+ [ 20 , 30 , 30 , 40 ] ,
181+ [ 39 , 89 , 89 , 78 ]
182+ ] ;
183+
184+ assert . deepEqual ( group_df . col ( [ "B" , "C" ] ) . mean ( ) . values , new_data ) ;
185+ } ) ;
186+
187+ it ( "should apply grouby operation to all column" , function ( ) {
188+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
189+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
190+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
191+ 'two' , 'two' , 'one' , 'three' ] ,
192+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
193+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
194+ } ;
195+ let df = new DataFrame ( data ) ;
196+ let grp = df . groupby ( [ "A" , "B" ] ) ;
197+ let rslt = [
198+ [ 'foo' , 'one' , 2 , 2 ] ,
199+ [ 'foo' , 'two' , 2 , 2 ] ,
200+ [ 'foo' , 'three' , 1 , 1 ] ,
201+ [ 'bar' , 'one' , 1 , 1 ] ,
202+ [ 'bar' , 'three' , 1 , 1 ] ,
203+ [ 'bar' , 'two' , 1 , 1 ]
204+ ] ;
205+
206+ assert . deepEqual ( grp . count ( ) . values , rslt ) ;
207+ } ) ;
208+ it ( "should apply function to specific column" , function ( ) {
209+
210+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
211+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
212+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
213+ 'two' , 'two' , 'one' , 'three' ] ,
214+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
215+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
216+ } ;
217+ let df = new DataFrame ( data ) ;
218+ let group_df = df . groupby ( [ "A" ] ) ;
219+ let rslt = [
220+ [ 5 , 3 , 'foo' ] ,
221+ [ 6 , 4 , 'foo' ] ,
222+ [ 7 , 7 , 'foo' ] ,
223+ [ 9 , 8 , 'foo' ] ,
224+ [ 10 , 9 , 'foo' ] ,
225+ [ 4 , 5 , 'bar' ] ,
226+ [ 3 , 6 , 'bar' ] ,
227+ [ 8 , 4 , 'bar' ]
228+ ]
229+ assert . deepEqual ( group_df . col ( [ 'D' , 'C' ] ) . apply ( ( x ) => x . add ( 2 ) ) . values , rslt ) ;
230+ } ) ;
231+
232+ it ( "should apply function to group column" , function ( ) {
233+ let data = { 'A' : [ 'foo' , 'bar' , 'foo' , 'bar' ,
234+ 'foo' , 'bar' , 'foo' , 'foo' ] ,
235+ 'B' : [ 'one' , 'one' , 'two' , 'three' ,
236+ 'two' , 'two' , 'one' , 'three' ] ,
237+ 'C' : [ 1 , 3 , 2 , 4 , 5 , 2 , 6 , 7 ] ,
238+ 'D' : [ 3 , 2 , 4 , 1 , 5 , 6 , 7 , 8 ]
239+ } ;
240+ let df = new DataFrame ( data ) ;
241+ let group_df = df . groupby ( [ "A" , "B" ] ) ;
242+ let rslt = [
243+ [ 2 , 2 , 2 , 2 , 'foo' , 'one' ] ,
244+ [ 2 , 2 , 2 , 2 , 'foo' , 'two' ] ,
245+ [ 1 , 1 , 1 , 1 , 'foo' , 'three' ] ,
246+ [ 1 , 1 , 1 , 1 , 'bar' , 'one' ] ,
247+ [ 1 , 1 , 1 , 1 , 'bar' , 'three' ] ,
248+ [ 1 , 1 , 1 , 1 , 'bar' , 'two' ]
249+ ] ;
250+ assert . deepEqual ( group_df . apply ( ( x ) => x . count ( { axis :0 } ) ) . values , rslt ) ;
251+ } ) ;
252+ } )
0 commit comments