Ich habe Probleme beim Ausführen von pyspark auf einem externen Server. Das Ausführen von Pyspark führt zu mehreren Fehlern, und ich kann nicht feststellen, wie sie zu beheben sind. Hier ist der Ausdruck:Mehrere Fehler beim Ausführen von Pyspark
[[email protected] ~]# pyspark
Python 3.6.0 |Anaconda 4.3.1 (64-bit)| (default, Dec 23 2016, 12:22:00)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux
Type "help", "copyright", "credits" or "license" for more information.
Using Spark's default log4j profile: org/apache/spark/log4j-
defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).
17/05/26 21:19:10 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable
17/05/26 21:19:10 WARN SparkContext: Another SparkContext is being
constructed (or threw an exception in its constructor). This may indicate
an error, since only one SparkContext may be running in this JVM (see SPARK-
2243). The other SparkContext was created at:
org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:423)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
py4j.Gateway.invoke(Gateway.java:236)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.GatewayConnection.run(GatewayConnection.java:214)
java.lang.Thread.run(Thread.java:748)
Traceback (most recent call last):File "/usr/local/spark/spark/python/pyspark/shell.py", line 43, in <module>
spark = SparkSession.builder\
File "/usr/local/spark/spark/python/pyspark/sql/session.py", line 169, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/usr/local/spark/spark/python/pyspark/context.py", line 310, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/usr/local/spark/spark/python/pyspark/context.py", line 118, in __init__
conf, jsc, profiler_cls)
File "/usr/local/spark/spark/python/pyspark/context.py", line 182, in _do_init
self._jsc = jsc or self._initialize_context(self._conf._jconf)
File "/usr/local/spark/spark/python/pyspark/context.py", line 249, in _initialize_context
return self._jvm.JavaSparkContext(jconf)
File "/usr/local/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1401, in __call__
File "/usr/local/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.ExceptionInInitializerError
at org.apache.spark.SparkContext.<init>(SparkContext.scala:397)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.net.UnknownHostException: spark-master: spark-master: Temporary failure in name resolution
at java.net.InetAddress.getLocalHost(InetAddress.java:1505)
at org.apache.spark.util.Utils$.findLocalInetAddress(Utils.scala:870)
at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress$lzycompute(Utils.scala:863)
at org.apache.spark.util.Utils$.org$apache$spark$util$Utils$$localIpAddress(Utils.scala:863)
at org.apache.spark.util.Utils$$anonfun$localHostName$1.apply(Utils.scala:920)
at org.apache.spark.util.Utils$$anonfun$localHostName$1.apply(Utils.scala:920)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.util.Utils$.localHostName(Utils.scala:920)
at org.apache.spark.internal.config.package$.<init>(package.scala:189)
at org.apache.spark.internal.config.package$.<clinit>(package.scala)
... 13 more
Caused by: java.net.UnknownHostException: spark-master: Temporary failure in name resolution
at java.net.Inet6AddressImpl.lookupAllHostAddr(Native Method)
at java.net.InetAddress$2.lookupAllHostAddr(InetAddress.java:928)
at java.net.InetAddress.getAddressesFromNameService(InetAddress.java:1323)
at java.net.InetAddress.getLocalHost(InetAddress.java:1500)
... 22 more
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/spark/spark/python/pyspark/shell.py", line 47, in <module>
spark = SparkSession.builder.getOrCreate()
File "/usr/local/spark/spark/python/pyspark/sql/session.py", line 169, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/usr/local/spark/spark/python/pyspark/context.py", line 310, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/usr/local/spark/spark/python/pyspark/context.py", line 118, in __init__
conf, jsc, profiler_cls)
File "/usr/local/spark/spark/python/pyspark/context.py", line 182, in _do_init
self._jsc = jsc or self._initialize_context(self._conf._jconf)
File "/usr/local/spark/spark/python/pyspark/context.py", line 249, in _initialize_context
return self._jvm.JavaSparkContext(jconf)
File "/usr/local/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1401, in __call__
File "/usr/local/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.internal.config.package$
at org.apache.spark.SparkContext.<init>(SparkContext.scala:397)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
>>>
Jede Hilfe würde sehr geschätzt werden!
Edit: Auf anderen Maschinen, die ich versuche, dies zu starten, bekomme ich eine ähnliche Reihe von Fehlern, aber ohne die UnknownHostException.
pyspark
Python 2.7.5 (default, Nov 20 2015, 02:00:19)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-4)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
17/05/30 13:12:35 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/05/30 13:12:35 WARN SparkContext: Another SparkContext is being constructed (or threw an exception in its constructor). This may indicate an error, since only one SparkContext may be running in this JVM (see SPARK-2243). The other SparkContext was created at:
org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.lang.reflect.Constructor.newInstance(Constructor.java:423)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
py4j.Gateway.invoke(Gateway.java:236)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.GatewayConnection.run(GatewayConnection.java:214)
java.lang.Thread.run(Thread.java:748)
Traceback (most recent call last):
File "/usr/local/spark/python/pyspark/shell.py", line 47, in <module>
spark = SparkSession.builder.getOrCreate()
File "/usr/local/spark/python/pyspark/sql/session.py", line 169, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/usr/local/spark/python/pyspark/context.py", line 310, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/usr/local/spark/python/pyspark/context.py", line 118, in __init__
conf, jsc, profiler_cls)
File "/usr/local/spark/python/pyspark/context.py", line 182, in _do_init
self._jsc = jsc or self._initialize_context(self._conf._jconf)
File "/usr/local/spark/python/pyspark/context.py", line 249, in _initialize_context
return self._jvm.JavaSparkContext(jconf)
File "/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1401, in __call__
File "/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.internal.config.package$
at org.apache.spark.SparkContext.<init>(SparkContext.scala:397)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
>>>
'UnknownHostException: spark-master: spark-master' - sieht so aus, als hätten Sie master url (in Spark config?) So konfiguriert, dass er auf' spark-master' zeigt, der nicht in eine IP-Adresse aufgelöst wird. – zero323
@ zero323 Danke! Aber das scheint nicht das ganze Problem zu sein. Auf den anderen Computern, die ich versuche, dies auszuführen, erhalte ich eine ähnliche Reihe von Fehlern, aber ohne die UnknownHostException: –
UPDATE: Behebung des Host-Problems behoben die meisten Fehler. Die normale Funkenschale läuft jetzt. Noch einige Probleme mit Pyspark, wird als separate Frage veröffentlichen. –