Skip to content

Commit 83b1e71

Browse files
author
Ghislain Fourny
committed
Add support for binding JSONiq variables to lists of Python/JSON values.
1 parent 289751e commit 83b1e71

4 files changed

Lines changed: 102 additions & 7 deletions

File tree

README.md

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,18 @@ from jsoniq import RumbleSession
6060
# The syntax to start a session is similar to that of Spark.
6161
# A RumbleSession is a SparkSession that additionally knows about RumbleDB.
6262
# All attributes and methods of SparkSession are also available on RumbleSession.
63-
rumble = RumbleSession.builder.appName("PyRumbleExample").getOrCreate();
63+
rumble = RumbleSession.builder.getOrCreate();
64+
65+
# Just to improve readability when invoking Spark methods
66+
spark = rumble
6467
6568
# Create a data frame also similar to Spark (but using the rumble object).
6669
data = [("Alice", 30), ("Bob", 25), ("Charlie", 35)];
6770
columns = ["Name", "Age"];
68-
df = rumble.createDataFrame(data, columns);
71+
df = spark.createDataFrame(data, columns);
6972
7073
# This is how to bind a JSONiq variable to a dataframe. You can bind as many variables as you want.
71-
rumble.bindDataFrameAsVariable('$a', df);
74+
rumble.bind('$a', df);
7275
7376
# This is how to run a query. This is similar to spark.sql().
7477
# Since variable $a was bound to a DataFrame, it is automatically declared as an external variable
@@ -102,17 +105,17 @@ df.show();
102105
# A DataFrame output by JSONiq can be reused as input to a Spark SQL query.
103106
# (Remember that rumble is a wrapper around a SparkSession object, so you can use rumble.sql() just like spark.sql())
104107
df.createTempView("input")
105-
df2 = rumble.sql("SELECT * FROM input").toDF("name");
108+
df2 = spark.sql("SELECT * FROM input").toDF("name");
106109
df2.show();
107110
108111
# A DataFrame output by Spark SQL can be reused as input to a JSONiq query.
109-
rumble.bindDataFrameAsVariable('$b', df2);
112+
rumble.bind('$b', df2);
110113
seq2 = rumble.jsoniq("for $i in 1 to 5 return $b");
111114
df3 = seq2.df();
112115
df3.show();
113116
114117
# And a DataFrame output by JSONiq can be reused as input to another JSONiq query.
115-
rumble.bindDataFrameAsVariable('$b', df3);
118+
rumble.bind('$b', df3);
116119
seq3 = rumble.jsoniq("$b[position() lt 3]");
117120
df4 = seq3.df();
118121
df4.show();
@@ -218,13 +221,59 @@ return {
218221
""");
219222
print(seq.json());
220223
224+
############################################################
225+
###### Binding JSONiq variables to Python values ###########
226+
############################################################
227+
228+
# It is possible to bind a variable to a list of native Python values.
229+
# Remember that in JSONiq, variables are bound to sequences of items.
230+
# A Python list will be seamlessly converted to a sequence of items by the library.
231+
# Currently we only support strs, ints, floats, booleans, None, lists, and dicts.
232+
rumble.bind('$c', [1,2,3,4, 5, 6])
233+
print(rumble.jsoniq("""
234+
for $v in $c
235+
let $parity := $v mod 2
236+
group by $parity
237+
return { switch($parity)
238+
case 0 return "even"
239+
case 1 return "odd"
240+
default return "?" : $v
241+
}
242+
""").json())
243+
244+
rumble.bind('$c', [[1,2,3],[4,5,6]])
245+
print(rumble.jsoniq("""
246+
for $i in $c
247+
return [
248+
for $j in $i
249+
return { "foo" : $j }
250+
]
251+
""").json())
252+
253+
rumble.bind('$c', [{"foo":[1,2,3]},{"foo":[4,{"bar":[1,False, None]},6]}])
254+
print(rumble.jsoniq('{ "results" : $c.foo[[2]] }').json())
255+
256+
# It is possible to bind only one value. The it must be provided as a singleton list.
257+
# This is because in JSONiq, an item is the same a sequence of one item.
258+
rumble.bind('$c', [42])
259+
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
260+
261+
# For convenience and code readability, you can also use bindOne().
262+
rumble.bindOne('$c', 42)
263+
print(rumble.jsoniq('for $i in 1 to $c return $i*$i').json())
264+
221265
```
222266
# How to learn JSONiq, and more query examples
223267

224268
Even more queries can be found [here](https://colab.research.google.com/github/RumbleDB/rumble/blob/master/RumbleSandbox.ipynb) and you can look at the [JSONiq documentation](https://www.jsoniq.org) and tutorials.
225269

226270
# Last updates
227271

272+
## Version 0.1.0 alpha 12
273+
- Allow to bind JSONiq variables to Python values (mapping Python lists to sequences of items). This makes it possible to manipulate Python values directly with JSONiq and even without any knowledge of Spark at all.
274+
- renamed bindDataFrameAsVariable() to bind(), which can be used both with DataFrames and Python lists.
275+
- add bindOne() for binding a single value to a JSONiq variable.
276+
228277
## Version 0.1.0 alpha 11
229278
- Fix an issue when feeding a DataFrame output by rumble.jsoniq() back to a new JSONiq query (as a variable).
230279

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "jsoniq"
7-
version = "0.1.0a11"
7+
version = "0.1.0a12"
88
description = "Python edition of RumbleDB, a JSONiq engine"
99
requires-python = ">=3.11"
1010
dependencies = [
@@ -21,6 +21,8 @@ classifiers = [
2121
"Development Status :: 3 - Alpha",
2222
"Intended Audience :: Developers",
2323
"Programming Language :: Python :: 3.11",
24+
"Programming Language :: Python :: 3.12",
25+
"Programming Language :: Python :: 3.13",
2426
]
2527

2628
[tool.setuptools.packages.find]
1.48 MB
Binary file not shown.

src/jsoniq/session.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,50 @@ def __getattr__(self, name):
8383

8484
_builder = Builder()
8585

86+
def convert(self, value):
87+
if isinstance(value, bool):
88+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createBooleanItem(value)
89+
elif isinstance(value, str):
90+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createStringItem(value)
91+
elif isinstance(value, int):
92+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createLongItem(value)
93+
elif isinstance(value, float):
94+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createDoubleItem(value)
95+
elif value is None:
96+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createNullItem()
97+
elif isinstance(value, list):
98+
java_list = self._sparksession._jvm.java.util.ArrayList()
99+
for v in value:
100+
java_list.add(self.convert(v))
101+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createArrayItem(java_list, False)
102+
elif isinstance(value, dict):
103+
java_map = self._sparksession._jvm.java.util.HashMap()
104+
for k, v in value.items():
105+
java_list = self._sparksession._jvm.java.util.ArrayList()
106+
java_list.add(self.convert(v))
107+
java_map[k] = java_list
108+
return self._sparksession._jvm.org.rumbledb.items.ItemFactory.getInstance().createObjectItem(java_map, False)
109+
else:
110+
raise ValueError("Cannot yet convert value of type " + str(type(value)) + " to a RumbleDB item. Please open an issue and we will look into it!")
111+
112+
def bind(self, name: str, valueToBind):
113+
conf = self._jrumblesession.getConfiguration();
114+
if not name.startswith("$"):
115+
raise ValueError("Variable name must start with a dollar symbol ('$').")
116+
name = name[1:]
117+
if isinstance(valueToBind, list):
118+
items = [ self.convert(value) for value in valueToBind]
119+
conf.setExternalVariableValue(name, items)
120+
return self
121+
if(hasattr(valueToBind, "_get_object_id")):
122+
conf.setExternalVariableValue(name, valueToBind);
123+
else:
124+
conf.setExternalVariableValue(name, valueToBind._jdf);
125+
return self;
126+
127+
def bindOne(self, name: str, value):
128+
return self.bind(name, [value])
129+
86130
def bindDataFrameAsVariable(self, name: str, df):
87131
conf = self._jrumblesession.getConfiguration();
88132
if not name.startswith("$"):

0 commit comments

Comments
 (0)