'Error while creating an instance of SparkContext on Local machine (Ubuntu 20.04 LTS)
I am trying to run a Pyspark (3.2.0) code on local machine (Ubuntu 20.04LTS) with Java build 17.0.2+8-LTS-86. I am getting error during the creating of "SparkContext". The code I am trying to execute is as follows:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType
sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
from pyspark.sql import SparkSession
spark = SparkSession \
.builder \
.getOrCreate()
I am getting the following error:
Py4JJavaError Traceback (most recent call last)
/tmp/ipykernel_7459/1065970851.py in <module>
2 from pyspark.sql import SQLContext, SparkSession
3 from pyspark.sql.types import StructType, StructField, DoubleType, IntegerType, StringType
----> 4 sc = SparkContext.getOrCreate(SparkConf().setMaster("local[*]"))
5 from pyspark.sql import SparkSession
6 spark = SparkSession \
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/pyspark/context.py in getOrCreate(cls, conf)
390 with SparkContext._lock:
391 if SparkContext._active_spark_context is None:
--> 392 SparkContext(conf=conf or SparkConf())
393 return SparkContext._active_spark_context
394
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
--> 146 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
147 conf, jsc, profiler_cls)
148 except:
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
207
208 # Create the Java SparkContext through Py4J
--> 209 self._jsc = jsc or self._initialize_context(self._conf._jconf)
210 # Reset the SparkConf to the one actually used by the SparkContext in JVM.
211 self._conf = SparkConf(_jconf=self._jsc.sc().conf())
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/pyspark/context.py in _initialize_context(self, jconf)
327 Initialize SparkContext in function to allow subclass specific initialization
328 """
--> 329 return self._jvm.JavaSparkContext(jconf)
330
331 @classmethod
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/py4j/java_gateway.py in __call__(self, *args)
1571
1572 answer = self._gateway_client.send_command(command)
-> 1573 return_value = get_return_value(
1574 answer, self._gateway_client, None, self._fqn)
1575
~/anaconda3/envs/pyspark_env/lib/python3.9/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.IllegalAccessError: class org.apache.spark.storage.StorageUtils$ (in unnamed module @0x5706d6e2) cannot access class sun.nio.ch.DirectBuffer (in module java.base) because module java.base does not export sun.nio.ch to unnamed module @0x5706d6e2
at org.apache.spark.storage.StorageUtils$.<init>(StorageUtils.scala:213)
at org.apache.spark.storage.StorageUtils$.<clinit>(StorageUtils.scala)
at org.apache.spark.storage.BlockManagerMasterEndpoint.<init>(BlockManagerMasterEndpoint.scala:110)
at org.apache.spark.SparkEnv$.$anonfun$create$9(SparkEnv.scala:348)
at org.apache.spark.SparkEnv$.registerOrLookupEndpoint$1(SparkEnv.scala:287)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:336)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:191)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:460)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:833)
link to the notebook that I am trying to run is as follows:
https://github.com/abrar39/StackOverflow/blob/master/pysparkNotebook.ipynb
Help will be highly appreciated. Regards.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
