Skip to content

Commit d94b4b7

Browse files
Release 2.0.0 alpha 1
Improvements2
2 parents 955145c + f6edcef commit d94b4b7

6 files changed

Lines changed: 71 additions & 13 deletions

File tree

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,11 @@ Even more queries can be found [here](https://colab.research.google.com/github/R
338338

339339
# Latest updates
340340

341+
## Version 2.0.0 alpha 1
342+
- When returning a single-column DataFrame with atomic values, the name is now __value and not value to avoid collisions with user-defined columns.
343+
- Improved schema inferrence: DataFrames can be returned in a wider range of cases.
344+
- Improved error display in notebooks when errors happen upon collecting the results and not already upon calling jsoniq().
345+
341346
## Version 0.2.0 alpha 9
342347
- Stability improvements.
343348

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "jsoniq"
7-
version = "0.2.0a9"
7+
version = "2.0.0a1"
88
description = "Python edition of RumbleDB, a JSONiq engine"
99
requires-python = ">=3.11"
1010
dependencies = [

src/jsoniq/sequence.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,10 @@ def rdd(self):
5454
return self._rumblesession.lastResult
5555

5656
def df(self):
57-
if (not "DataFrame" in self._jsequence.availableOutputs()):
58-
sys.stderr.write(self.schema_str)
59-
return None
6057
self._rumblesession.lastResult = DataFrame(self._jsequence.getAsDataFrame(), self._sparksession)
6158
return self._rumblesession.lastResult
6259

6360
def pdf(self):
64-
if (not "DataFrame" in self._jsequence.availableOutputs()):
65-
sys.stderr.write(self.schema_str)
66-
return None
6761
self._rumblesession.lastResult = self.df().toPandas()
6862
return self._rumblesession.lastResult
6963

src/jsoniq/session.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pandas as pd
88
import importlib.resources as pkg_resources
99

10-
with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path:
10+
with pkg_resources.path("jsoniq.jars", "rumbledb-2.0.0.jar") as jar_path:
1111
jar_path_str = "file://" + str(jar_path)
1212

1313
def get_spark_version():
@@ -88,10 +88,13 @@ def getOrCreate(self):
8888
sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
8989
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
9090
sys.exit(43)
91-
elif not spark_version.startswith("4.0"):
91+
elif not os.environ.get('SPARK_HOME') is None and not spark_version.startswith("4.0"):
9292
sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
9393
sys.exit(43)
9494
else:
95+
sys.stderr.write(f"[Error] SPARK_HOME is not set, but somehow pyspark is not falling back to the packaged Spark 4.0.0 version.\n")
96+
sys.stderr.write(f"We would appreciate a bug report with some information about your OS, setup, etc.\n")
97+
sys.stderr.write(f"In the meantime, what you could do as a workaround is download the Spark 4.0.0 zip file from spark.apache.org, unzip it to some local directory, and point SPARK_HOME to this directory.\n")
9598
raise e
9699
return RumbleSession._rumbleSession
97100

src/jsoniqmagic/magic.py

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,24 +67,80 @@ def run(self, line, cell=None, timed=False):
6767
"""
6868

6969
if(args.pyspark_data_frame):
70-
df = response.df();
70+
try:
71+
df = response.df();
72+
except Py4JJavaError as e:
73+
print(e.java_exception.getMessage())
74+
return
75+
except Exception as e:
76+
print("Query unsuccessful.")
77+
print("Usual reasons: firewall, misconfigured proxy.")
78+
print("Error message:")
79+
print(e.args[0])
80+
return
81+
except:
82+
print("Query unsuccessful.")
83+
print("Usual reasons: firewall, misconfigured proxy.")
84+
return
7185
if df is not None:
7286
df.show()
7387

7488
if (args.pandas_data_frame):
75-
pdf = response.pdf()
89+
try:
90+
pdf = response.pdf()
91+
except Py4JJavaError as e:
92+
print(e.java_exception.getMessage())
93+
return
94+
except Exception as e:
95+
print("Query unsuccessful.")
96+
print("Usual reasons: firewall, misconfigured proxy.")
97+
print("Error message:")
98+
print(e.args[0])
99+
return
100+
except:
101+
print("Query unsuccessful.")
102+
print("Usual reasons: firewall, misconfigured proxy.")
103+
return
76104
if pdf is not None:
77105
print(pdf)
78106

79107
if (args.apply_updates):
80108
if ("PUL" in response.availableOutputs()):
81-
response.applyPUL()
109+
try:
110+
response.applyPUL()
111+
except Py4JJavaError as e:
112+
print(e.java_exception.getMessage())
113+
return
114+
except Exception as e:
115+
print("Query unsuccessful.")
116+
print("Usual reasons: firewall, misconfigured proxy.")
117+
print("Error message:")
118+
print(e.args[0])
119+
return
120+
except:
121+
print("Query unsuccessful.")
122+
print("Usual reasons: firewall, misconfigured proxy.")
123+
return
82124
print("Updates applied successfully.")
83125
else:
84126
print("No Pending Update List (PUL) available to apply.")
85127

86128
if (args.json or (not args.pandas_data_frame and not args.pyspark_data_frame)):
87-
capplusone = response.take(rumble.getRumbleConf().getResultSizeCap() + 1)
129+
try:
130+
capplusone = response.take(rumble.getRumbleConf().getResultSizeCap() + 1)
131+
except Py4JJavaError as e:
132+
print(e.java_exception.getMessage())
133+
return
134+
except Exception as e:
135+
print("Query unsuccessful.")
136+
print("Usual reasons: firewall, misconfigured proxy.")
137+
print("Error message:")
138+
print(e.args[0])
139+
return
140+
except:
141+
print("Query unsuccessful.")
142+
print("Usual reasons: firewall, misconfigured proxy.")
143+
return
88144
if len(capplusone) > rumble.getRumbleConf().getResultSizeCap():
89145
count = response.count()
90146
print("The query output %s items, which is too many to display. Displaying the first %s items:" % (count, rumble.getRumbleConf().getResultSizeCap()))

0 commit comments

Comments
 (0)