@@ -65,6 +65,130 @@ rumble = RumbleSession.builder.getOrCreate();
6565# Just to improve readability when invoking Spark methods
6666spark = rumble
6767
68+ ##############################
69+ ###### Your first query ######
70+ ##############################
71+
72+ # Even though RumbleDB uses Spark internally, it can be used without any knowledge of Spark.
73+
74+ # Executing a query is done with rumble.jsoniq() like so. A query returns a sequence
75+ # of items, here the sequence with just the integer item 2.
76+ items = rumble.jsoniq('1+1')
77+
78+ # A sequence of items can simply be converted to a list of Python values with json().
79+ # Since there is only one value in the sequence output by this query, we get a singleton list with the integer 2.
80+ python_list = items.json()
81+
82+ print(python_list)
83+
84+ ############################################
85+ ##### More complex, standalone queries #####
86+ ############################################
87+
88+ # JSONiq is very powerful and expressive. You will find tutorials as well as a reference on JSONiq.org.
89+
90+ seq = rumble.jsoniq("""
91+
92+ let $stores :=
93+ [
94+ { "store number" : 1, "state" : "MA" },
95+ { "store number" : 2, "state" : "MA" },
96+ { "store number" : 3, "state" : "CA" },
97+ { "store number" : 4, "state" : "CA" }
98+ ]
99+ let $sales := [
100+ { "product" : "broiler", "store number" : 1, "quantity" : 20 },
101+ { "product" : "toaster", "store number" : 2, "quantity" : 100 },
102+ { "product" : "toaster", "store number" : 2, "quantity" : 50 },
103+ { "product" : "toaster", "store number" : 3, "quantity" : 50 },
104+ { "product" : "blender", "store number" : 3, "quantity" : 100 },
105+ { "product" : "blender", "store number" : 3, "quantity" : 150 },
106+ { "product" : "socks", "store number" : 1, "quantity" : 500 },
107+ { "product" : "socks", "store number" : 2, "quantity" : 10 },
108+ { "product" : "shirt", "store number" : 3, "quantity" : 10 }
109+ ]
110+ let $join :=
111+ for $store in $stores[], $sale in $sales[]
112+ where $store."store number" = $sale."store number"
113+ return {
114+ "nb" : $store."store number",
115+ "state" : $store.state,
116+ "sold" : $sale.product
117+ }
118+ return [$join]
119+ """);
120+
121+ print(seq.json());
122+
123+ seq = rumble.jsoniq("""
124+ for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10)
125+ group by $store-number := $product.store-number
126+ order by $store-number ascending
127+ return {
128+ "store" : $store-number,
129+ "products" : [ distinct-values($product.product) ]
130+ }
131+ """);
132+ print(seq.json());
133+
134+ ############################################################
135+ ###### Binding JSONiq variables to Python values ###########
136+ ############################################################
137+
138+ # It is possible to bind a JSONiq variable to a list of native Python values
139+ # and then use it in a query.
140+ # JSONiq, variables are bound to sequences of items, just like the results of JSONiq
141+ # queries are sequence of items.
142+ # A Python list will be seamlessly converted to a sequence of items by the library.
143+ # Currently we only support strs, ints, floats, booleans, None, lists, and dicts.
144+ # But if you need more (like date, bytes, etc) we can add them without any problem.
145+ # JSONiq has a rich type system.
146+
147+ rumble.bind('$c', [1,2,3,4, 5, 6])
148+ print(rumble.jsoniq("""
149+ for $v in $c
150+ let $parity := $v mod 2
151+ group by $parity
152+ return { switch($parity)
153+ case 0 return "even"
154+ case 1 return "odd"
155+ default return "?" : $v
156+ }
157+ """).json())
158+
159+ rumble.bind('$c', [[1,2,3],[4,5,6]])
160+ print(rumble.jsoniq("""
161+ for $i in $c
162+ return [
163+ for $j in $i
164+ return { "foo" : $j }
165+ ]
166+ """).json())
167+
168+ rumble.bind('$c', [{"foo":[1,2,3]},{"foo":[4,{"bar":[1,False, None]},6]}])
169+ print(rumble.jsoniq('{ "results" : $c.foo[[2]] }').json())
170+
171+ # It is possible to bind only one value. The it must be provided as a singleton list.
172+ # This is because in JSONiq, an item is the same a sequence of one item.
173+ rumble.bind('$c', [42])
174+ print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
175+
176+ # For convenience and code readability, you can also use bindOne().
177+ rumble.bindOne('$c', 42)
178+ print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
179+
180+
181+ ################################################
182+ ##### Using Pyspark DataFrames with JSONiq #####
183+ ################################################
184+
185+ # The power users can also interface our library with pyspark DataFrames.
186+ # JSONiq sequences of items can have billions of items, and our library supports this
187+ # out of the box: it can also run on clusters on AWS Elastic MapReduce for example.
188+ # But your laptop is just fine, too: it will spread the computations on your cores.
189+ # You can bind a DataFrame to a JSONiq variable. JSONiq will recognize this
190+ # DataFrame as a sequence of object items.
191+
68192# Create a data frame also similar to Spark (but using the rumble object).
69193data = [("Alice", 30), ("Bob", 25), ("Charlie", 35)];
70194columns = ["Name", "Age"];
@@ -104,8 +228,8 @@ df.show();
104228
105229# A DataFrame output by JSONiq can be reused as input to a Spark SQL query.
106230# (Remember that rumble is a wrapper around a SparkSession object, so you can use rumble.sql() just like spark.sql())
107- df.createTempView("input ")
108- df2 = spark.sql("SELECT * FROM input ").toDF("name");
231+ df.createTempView("myview ")
232+ df2 = spark.sql("SELECT * FROM myview ").toDF("name");
109233df2.show();
110234
111235# A DataFrame output by Spark SQL can be reused as input to a JSONiq query.
@@ -173,95 +297,6 @@ seq.write().mode("overwrite").parquet("outputparquet");
173297seq = rumble.jsoniq("1+1");
174298seq.write().mode("overwrite").text("outputtext");
175299
176- ############################################
177- ##### More complex, standalone queries #####
178- ############################################
179-
180- seq = rumble.jsoniq("""
181-
182- let $stores :=
183- [
184- { "store number" : 1, "state" : "MA" },
185- { "store number" : 2, "state" : "MA" },
186- { "store number" : 3, "state" : "CA" },
187- { "store number" : 4, "state" : "CA" }
188- ]
189- let $sales := [
190- { "product" : "broiler", "store number" : 1, "quantity" : 20 },
191- { "product" : "toaster", "store number" : 2, "quantity" : 100 },
192- { "product" : "toaster", "store number" : 2, "quantity" : 50 },
193- { "product" : "toaster", "store number" : 3, "quantity" : 50 },
194- { "product" : "blender", "store number" : 3, "quantity" : 100 },
195- { "product" : "blender", "store number" : 3, "quantity" : 150 },
196- { "product" : "socks", "store number" : 1, "quantity" : 500 },
197- { "product" : "socks", "store number" : 2, "quantity" : 10 },
198- { "product" : "shirt", "store number" : 3, "quantity" : 10 }
199- ]
200- let $join :=
201- for $store in $stores[], $sale in $sales[]
202- where $store."store number" = $sale."store number"
203- return {
204- "nb" : $store."store number",
205- "state" : $store.state,
206- "sold" : $sale.product
207- }
208- return [$join]
209- """);
210-
211- print(seq.json());
212-
213- seq = rumble.jsoniq("""
214- for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10)
215- group by $store-number := $product.store-number
216- order by $store-number ascending
217- return {
218- "store" : $store-number,
219- "products" : [ distinct-values($product.product) ]
220- }
221- """);
222- print(seq.json());
223-
224- ############################################################
225- ###### Binding JSONiq variables to Python values ###########
226- ############################################################
227-
228- # It is possible to bind a variable to a list of native Python values.
229- # Remember that in JSONiq, variables are bound to sequences of items.
230- # A Python list will be seamlessly converted to a sequence of items by the library.
231- # Currently we only support strs, ints, floats, booleans, None, lists, and dicts.
232- rumble.bind('$c', [1,2,3,4, 5, 6])
233- print(rumble.jsoniq("""
234- for $v in $c
235- let $parity := $v mod 2
236- group by $parity
237- return { switch($parity)
238- case 0 return "even"
239- case 1 return "odd"
240- default return "?" : $v
241- }
242- """).json())
243-
244- rumble.bind('$c', [[1,2,3],[4,5,6]])
245- print(rumble.jsoniq("""
246- for $i in $c
247- return [
248- for $j in $i
249- return { "foo" : $j }
250- ]
251- """).json())
252-
253- rumble.bind('$c', [{"foo":[1,2,3]},{"foo":[4,{"bar":[1,False, None]},6]}])
254- print(rumble.jsoniq('{ "results" : $c.foo[[2]] }').json())
255-
256- # It is possible to bind only one value. The it must be provided as a singleton list.
257- # This is because in JSONiq, an item is the same a sequence of one item.
258- rumble.bind('$c', [42])
259- print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
260-
261- # For convenience and code readability, you can also use bindOne().
262- rumble.bindOne('$c', 42)
263- print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
264-
265300```
266301# How to learn JSONiq, and more query examples
267302
0 commit comments