Skip to content

Commit 0b137cf

Browse files
More informative error message if there is interference with another Spark version
More informative error message if there is interference with another Spark version
2 parents 2fea77c + 59a858c commit 0b137cf

4 files changed

Lines changed: 33 additions & 2 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ Even more queries can be found [here](https://colab.research.google.com/github/R
349349

350350
# Latest updates
351351

352+
## Version 0.2.0 alpha 5
353+
- If the initialization of the Spark session fails, we now check if SPARK_HOME is set and if it may be invalid or pointing to a different Spark version than 4.0, and output a more informative error message.
354+
352355
## Version 0.2.0 alpha 4
353356
- Added parameters to the jsoniq magic to select the desired output to print: -j, -df, -pdf
354357
- Added informative error message with a hint on how to fix when trying to get a DataFrame and there is no schema.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "jsoniq"
7-
version = "0.2.0a4"
7+
version = "0.2.0a5"
88
description = "Python edition of RumbleDB, a JSONiq engine"
99
requires-python = ">=3.11"
1010
dependencies = [
-8 Bytes
Binary file not shown.

src/jsoniq/session.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,15 @@
1010
with pkg_resources.path("jsoniq.jars", "rumbledb-1.24.0.jar") as jar_path:
1111
jar_path_str = "file://" + str(jar_path)
1212

13+
def get_spark_version():
14+
if os.environ.get('SPARK_HOME') != None:
15+
spark_version = os.popen("spark-submit --version 2>&1").read()
16+
if "version" in spark_version:
17+
match = re.search(r'version (\d+\.\d+.\d+)', spark_version)
18+
if match:
19+
return match.group(1)
20+
return None
21+
1322
class MetaRumbleSession(type):
1423
def __getattr__(cls, item):
1524
if item == "builder":
@@ -64,7 +73,26 @@ def __init__(self):
6473

6574
def getOrCreate(self):
6675
if RumbleSession._rumbleSession is None:
67-
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
76+
try:
77+
RumbleSession._rumbleSession = RumbleSession(self._sparkbuilder.getOrCreate())
78+
except FileNotFoundError as e:
79+
if not os.environ.get('SPARK_HOME') is None:
80+
sys.stderr.write("[Error] SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
81+
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
82+
sys.exit(43)
83+
else:
84+
raise e
85+
except TypeError as e:
86+
spark_version = get_spark_version()
87+
if not os.environ.get('SPARK_HOME') is None and spark_version is None:
88+
sys.stderr.write("[Error] Could not determine Spark version. The SPARK_HOME environment variable may not be set properly. Please check that it points to a valid path to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
89+
sys.stderr.write(f"Current value of SPARK_HOME: {os.environ.get('SPARK_HOME')}\n")
90+
sys.exit(43)
91+
elif not spark_version.startswith("4.0"):
92+
sys.stderr.write(f"[Error] RumbleDB requires Spark 4.0, but found version {spark_version}. Please either set SPARK_HOME to a Spark 4.0 directory, or maybe the easiest would be to delete the environment variable SPARK_HOME completely to fall back to the installation of Spark 4.0 packaged with pyspark.\n")
93+
sys.exit(43)
94+
else:
95+
raise e
6896
return RumbleSession._rumbleSession
6997

7098
def create(self):

0 commit comments

Comments
 (0)