Skip to content

Commit b4b4224

Browse files
Update README.md
1 parent 37b29c0 commit b4b4224

1 file changed

Lines changed: 126 additions & 91 deletions

File tree

README.md

Lines changed: 126 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,130 @@ rumble = RumbleSession.builder.getOrCreate();
6565
# Just to improve readability when invoking Spark methods
6666
spark = rumble
6767
68+
##############################
69+
###### Your first query ######
70+
##############################
71+
72+
# Even though RumbleDB uses Spark internally, it can be used without any knowledge of Spark.
73+
74+
# Executing a query is done with rumble.jsoniq() like so. A query returns a sequence
75+
# of items, here the sequence with just the integer item 2.
76+
items = rumble.jsoniq('1+1')
77+
78+
# A sequence of items can simply be converted to a list of Python values with json().
79+
# Since there is only one value in the sequence output by this query, we get a singleton list with the integer 2.
80+
python_list = items.json()
81+
82+
print(python_list)
83+
84+
############################################
85+
##### More complex, standalone queries #####
86+
############################################
87+
88+
# JSONiq is very powerful and expressive. You will find tutorials as well as a reference on JSONiq.org.
89+
90+
seq = rumble.jsoniq("""
91+
92+
let $stores :=
93+
[
94+
{ "store number" : 1, "state" : "MA" },
95+
{ "store number" : 2, "state" : "MA" },
96+
{ "store number" : 3, "state" : "CA" },
97+
{ "store number" : 4, "state" : "CA" }
98+
]
99+
let $sales := [
100+
{ "product" : "broiler", "store number" : 1, "quantity" : 20 },
101+
{ "product" : "toaster", "store number" : 2, "quantity" : 100 },
102+
{ "product" : "toaster", "store number" : 2, "quantity" : 50 },
103+
{ "product" : "toaster", "store number" : 3, "quantity" : 50 },
104+
{ "product" : "blender", "store number" : 3, "quantity" : 100 },
105+
{ "product" : "blender", "store number" : 3, "quantity" : 150 },
106+
{ "product" : "socks", "store number" : 1, "quantity" : 500 },
107+
{ "product" : "socks", "store number" : 2, "quantity" : 10 },
108+
{ "product" : "shirt", "store number" : 3, "quantity" : 10 }
109+
]
110+
let $join :=
111+
for $store in $stores[], $sale in $sales[]
112+
where $store."store number" = $sale."store number"
113+
return {
114+
"nb" : $store."store number",
115+
"state" : $store.state,
116+
"sold" : $sale.product
117+
}
118+
return [$join]
119+
""");
120+
121+
print(seq.json());
122+
123+
seq = rumble.jsoniq("""
124+
for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10)
125+
group by $store-number := $product.store-number
126+
order by $store-number ascending
127+
return {
128+
"store" : $store-number,
129+
"products" : [ distinct-values($product.product) ]
130+
}
131+
""");
132+
print(seq.json());
133+
134+
############################################################
135+
###### Binding JSONiq variables to Python values ###########
136+
############################################################
137+
138+
# It is possible to bind a JSONiq variable to a list of native Python values
139+
# and then use it in a query.
140+
# JSONiq, variables are bound to sequences of items, just like the results of JSONiq
141+
# queries are sequence of items.
142+
# A Python list will be seamlessly converted to a sequence of items by the library.
143+
# Currently we only support strs, ints, floats, booleans, None, lists, and dicts.
144+
# But if you need more (like date, bytes, etc) we can add them without any problem.
145+
# JSONiq has a rich type system.
146+
147+
rumble.bind('$c', [1,2,3,4, 5, 6])
148+
print(rumble.jsoniq("""
149+
for $v in $c
150+
let $parity := $v mod 2
151+
group by $parity
152+
return { switch($parity)
153+
case 0 return "even"
154+
case 1 return "odd"
155+
default return "?" : $v
156+
}
157+
""").json())
158+
159+
rumble.bind('$c', [[1,2,3],[4,5,6]])
160+
print(rumble.jsoniq("""
161+
for $i in $c
162+
return [
163+
for $j in $i
164+
return { "foo" : $j }
165+
]
166+
""").json())
167+
168+
rumble.bind('$c', [{"foo":[1,2,3]},{"foo":[4,{"bar":[1,False, None]},6]}])
169+
print(rumble.jsoniq('{ "results" : $c.foo[[2]] }').json())
170+
171+
# It is possible to bind only one value. The it must be provided as a singleton list.
172+
# This is because in JSONiq, an item is the same a sequence of one item.
173+
rumble.bind('$c', [42])
174+
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
175+
176+
# For convenience and code readability, you can also use bindOne().
177+
rumble.bindOne('$c', 42)
178+
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
179+
180+
181+
################################################
182+
##### Using Pyspark DataFrames with JSONiq #####
183+
################################################
184+
185+
# The power users can also interface our library with pyspark DataFrames.
186+
# JSONiq sequences of items can have billions of items, and our library supports this
187+
# out of the box: it can also run on clusters on AWS Elastic MapReduce for example.
188+
# But your laptop is just fine, too: it will spread the computations on your cores.
189+
# You can bind a DataFrame to a JSONiq variable. JSONiq will recognize this
190+
# DataFrame as a sequence of object items.
191+
68192
# Create a data frame also similar to Spark (but using the rumble object).
69193
data = [("Alice", 30), ("Bob", 25), ("Charlie", 35)];
70194
columns = ["Name", "Age"];
@@ -104,8 +228,8 @@ df.show();
104228
105229
# A DataFrame output by JSONiq can be reused as input to a Spark SQL query.
106230
# (Remember that rumble is a wrapper around a SparkSession object, so you can use rumble.sql() just like spark.sql())
107-
df.createTempView("input")
108-
df2 = spark.sql("SELECT * FROM input").toDF("name");
231+
df.createTempView("myview")
232+
df2 = spark.sql("SELECT * FROM myview").toDF("name");
109233
df2.show();
110234
111235
# A DataFrame output by Spark SQL can be reused as input to a JSONiq query.
@@ -173,95 +297,6 @@ seq.write().mode("overwrite").parquet("outputparquet");
173297
seq = rumble.jsoniq("1+1");
174298
seq.write().mode("overwrite").text("outputtext");
175299
176-
############################################
177-
##### More complex, standalone queries #####
178-
############################################
179-
180-
seq = rumble.jsoniq("""
181-
182-
let $stores :=
183-
[
184-
{ "store number" : 1, "state" : "MA" },
185-
{ "store number" : 2, "state" : "MA" },
186-
{ "store number" : 3, "state" : "CA" },
187-
{ "store number" : 4, "state" : "CA" }
188-
]
189-
let $sales := [
190-
{ "product" : "broiler", "store number" : 1, "quantity" : 20 },
191-
{ "product" : "toaster", "store number" : 2, "quantity" : 100 },
192-
{ "product" : "toaster", "store number" : 2, "quantity" : 50 },
193-
{ "product" : "toaster", "store number" : 3, "quantity" : 50 },
194-
{ "product" : "blender", "store number" : 3, "quantity" : 100 },
195-
{ "product" : "blender", "store number" : 3, "quantity" : 150 },
196-
{ "product" : "socks", "store number" : 1, "quantity" : 500 },
197-
{ "product" : "socks", "store number" : 2, "quantity" : 10 },
198-
{ "product" : "shirt", "store number" : 3, "quantity" : 10 }
199-
]
200-
let $join :=
201-
for $store in $stores[], $sale in $sales[]
202-
where $store."store number" = $sale."store number"
203-
return {
204-
"nb" : $store."store number",
205-
"state" : $store.state,
206-
"sold" : $sale.product
207-
}
208-
return [$join]
209-
""");
210-
211-
print(seq.json());
212-
213-
seq = rumble.jsoniq("""
214-
for $product in json-lines("http://rumbledb.org/samples/products-small.json", 10)
215-
group by $store-number := $product.store-number
216-
order by $store-number ascending
217-
return {
218-
"store" : $store-number,
219-
"products" : [ distinct-values($product.product) ]
220-
}
221-
""");
222-
print(seq.json());
223-
224-
############################################################
225-
###### Binding JSONiq variables to Python values ###########
226-
############################################################
227-
228-
# It is possible to bind a variable to a list of native Python values.
229-
# Remember that in JSONiq, variables are bound to sequences of items.
230-
# A Python list will be seamlessly converted to a sequence of items by the library.
231-
# Currently we only support strs, ints, floats, booleans, None, lists, and dicts.
232-
rumble.bind('$c', [1,2,3,4, 5, 6])
233-
print(rumble.jsoniq("""
234-
for $v in $c
235-
let $parity := $v mod 2
236-
group by $parity
237-
return { switch($parity)
238-
case 0 return "even"
239-
case 1 return "odd"
240-
default return "?" : $v
241-
}
242-
""").json())
243-
244-
rumble.bind('$c', [[1,2,3],[4,5,6]])
245-
print(rumble.jsoniq("""
246-
for $i in $c
247-
return [
248-
for $j in $i
249-
return { "foo" : $j }
250-
]
251-
""").json())
252-
253-
rumble.bind('$c', [{"foo":[1,2,3]},{"foo":[4,{"bar":[1,False, None]},6]}])
254-
print(rumble.jsoniq('{ "results" : $c.foo[[2]] }').json())
255-
256-
# It is possible to bind only one value. The it must be provided as a singleton list.
257-
# This is because in JSONiq, an item is the same a sequence of one item.
258-
rumble.bind('$c', [42])
259-
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
260-
261-
# For convenience and code readability, you can also use bindOne().
262-
rumble.bindOne('$c', 42)
263-
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
264-
265300
```
266301
# How to learn JSONiq, and more query examples
267302

0 commit comments

Comments
 (0)