|
1 | | -# python-jsoniq |
2 | | -Python version of RumbleDB |
| 1 | +This is the Python version of RumbleDB. It is currently only a prototype (alpha) and probably unstable. |
| 2 | + |
| 3 | +Install with |
| 4 | +``` |
| 5 | +pip install jsoniq |
| 6 | +``` |
| 7 | + |
| 8 | +Sample code: |
| 9 | +``` |
| 10 | +from jsoniq import RumbleSession |
| 11 | +
|
| 12 | +# The syntax to start a session is similar to Spark. |
| 13 | +rumble = RumbleSession.builder.appName("PyRumbleExample").getOrCreate(); |
| 14 | +
|
| 15 | +# Create a data frame also similar to Spark (but using the rumble object). |
| 16 | +data = [("Alice", 30), ("Bob", 25), ("Charlie", 35)]; |
| 17 | +columns = ["Name", "Age"]; |
| 18 | +df = rumble.createDataFrame(data, columns); |
| 19 | +
|
| 20 | +# This is how to bind a JSONiq variable to a dataframe. You can bind as many variables as you want. |
| 21 | +rumble.bindDataFrameAsVariable('$a', df); |
| 22 | +
|
| 23 | +# This is how to run a query (declaring the external variable). This is similar to spark.sql(). |
| 24 | +res = rumble.jsoniq('declare variable $a external; $a.Name'); |
| 25 | +
|
| 26 | +# returns a list containing one or several of "DataFrame", "RDD", "PUL", "Local" |
| 27 | +modes = res.availableOutputs(); |
| 28 | +
|
| 29 | +###### Parallel access ###### |
| 30 | +
|
| 31 | +# This returns a regular data frame |
| 32 | +df = res.getAsDataFrame(); |
| 33 | +df.show(); |
| 34 | +
|
| 35 | +# This returns an RDD containing JSONiq item objects (does not work yet with transformations) |
| 36 | +rdd = res.getAsRDD(); |
| 37 | +print(rdd.count()); |
| 38 | +for item in rdd.take(10): |
| 39 | + print(item.getStringValue()); |
| 40 | +
|
| 41 | +##### Local access ###### |
| 42 | +
|
| 43 | +# This materializes the rows as items. |
| 44 | +# The items are access with the RumbleDB Item API. |
| 45 | +list = res.getAsList(); |
| 46 | +for result in list: |
| 47 | + print(result.getStringValue()) |
| 48 | +
|
| 49 | +# This streams through the items one by one |
| 50 | +res.open(); |
| 51 | +while (res.hasNext()): |
| 52 | + print(res.next().getStringValue()); |
| 53 | +res.close(); |
| 54 | +
|
| 55 | +###### Native Python/JSON Access for bypassing the Item API (but losing on the richer JSONiq type system) ###### |
| 56 | +
|
| 57 | +# This method directly gets the result as JSON (dict, list, strings, ints, etc). |
| 58 | +jlist = res.getAsJSONList(); |
| 59 | +for str in jlist: |
| 60 | + print(str); |
| 61 | +
|
| 62 | +# This streams through the JSON values one by one. |
| 63 | +res.open(); |
| 64 | +while(res.hasNext()): |
| 65 | + print(res.nextJSON()); |
| 66 | +res.close(); |
| 67 | +
|
| 68 | +# There is still a problem to solve to make RDDs work across Python and Java |
| 69 | +#rdd = res.getAsJSONRDD(); |
| 70 | +#print(rdd.count()); |
| 71 | +#for str in rdd.take(10): |
| 72 | +# print(str); |
| 73 | +``` |
0 commit comments