我有这个Scala代码,以便连接到某个FTP服务器:
import java.io.{File, FileOutputStream, InputStream}
import org.apache.commons.net.ftp.{FTPClient,FTPFile}
import scala.util.Try
final class FTP() {
private val client = new FTPClient
def login(username: String, password: String): Try[Boolean] = Try {
client.login(username, password)
}
def connect(host: String): Try[Unit] = Try {
client.connect(host)
client.enterLocalPassiveMode()
}
def connected: Boolean = client.isConnected
def disconnect(): Unit = client.disconnect()
def canConnect(host: String): Boolean = {
client.connect(host)
val connectionWasEstablished = connected
client.disconnect()
connectionWasEstablished
}
def listFiles(dir: Option[String] = None): List[FTPFile] =
dir.fold(client.listFiles)(client.listFiles).toList
def connectWithAuth(host: String,
username: String = "anonymous",
password: String = "") : Try[Boolean] = {
for {
connection <- connect(host)
login <- login(username, password)
} yield login
}
def cd(path: String): Boolean =
client.changeWorkingDirectory(path)
def filesInCurrentDirectory: Seq[String] =
listFiles().map(_.getName)
def downloadFileStream(remote: String): InputStream = {
val stream = client.retrieveFileStream(remote)
client.completePendingCommand()
stream
}
def downloadFile(remote: String): Boolean = {
val os = new FileOutputStream(new File(remote))
client.retrieveFile(remote, os)
}
def uploadFile(remote: String, input: InputStream): Boolean =
client.storeFile(remote, input)
}// end of final class
val myFTP = new FTP
val myBool : Try[Boolean] = myFTP.connectWithAuth("MY_FTP_SERVER","USER","PASS")
println("Can connect? " + myBool)
当我在本地spark-shell中运行此代码时,我有这个:
scala> val myFTP = new FTP
myFTP: FTP = $iwC$$iwC$FTP@1077598b
scala>
scala> val myBool : Try[Boolean] = myFTP.connectWithAuth("MY_FTP_SERVER","USER","PASS")
myBool: scala.util.Try[Boolean] = Failure(java.net.UnknownHostException: MY_FTP_SERVER: nodename nor servname provided, or not known)
scala> println("Can connect? " + myBool)
Can connect? Failure(java.net.UnknownHostException: MY_FTP_SERVER: nodename nor servname provided, or not known)
如果我在shell中导出http_proxy变量,我可以通过curl到达ftp服务器,那么,如何使用Scala以编程方式设置http_proxy变量?
MacBook-Pro-de-Alonso:spark-1.5.0-bin-hadoop2.6 aisidoro$ export http_proxy=socks5://MY_PROXY:1080
MacBook-Pro-de-Alonso:spark-1.5.0-bin-hadoop2.6 aisidoro$ curl MY_FTP_SERVER
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>301 Moved Permanently</title>
</head><body>
<h1>Moved Permanently</h1>
<p>The document has moved <a href="http://MY_FTP_SERVER">here</a>.</p>
<hr>
<address>Apache/2.2.15 (CentOS) Server at MY_FTP_SERVER Port 80</address>
</body></html>
非常感谢。
更新22 Dic 2016
我试过在运行FTP类代码的同一个spark-shell中执行此操作:
scala> System.setProperty("http_proxy","socks5://MY_PROXY_SERVER:1080")
res0: String = null
scala> System.getProperty("http_proxy")
res1: String = socks5://MY_PROXY_SERVER:1080
具有相同的结果,我无法通过代理服务器以编程方式访问此ftp服务器...
我也尝试在同一终端会话中运行spark-shell之前导出JAVA_OPTS,结果相同......
export JAVA_OPTS="Dhttp.proxyHost=socks5://MY_PROXY_HOST -Dhttp.proxyPort=1080"
答案 0 :(得分:1)
虽然http_proxy
环境变量通常由curl
和其他各种实用程序使用,但java有自己的specifying proxy settings方式。由于您正在使用SOCKS代理,请尝试设置socks代理属性,例如如果使用命令行/环境变量:
-DsocksProxyHost=MY_PROXY_HOST -DsocksProxyPort=1080
如果使用spark shell,您可能需要查看有关传递这些内容的this StackOverflow question示例。
Apache Commons FtpClient也无法明确查看环境变量,并使用socket.connect
连接到FTP服务器。
但是,如果您想通过HTTP代理隧道传输FTP流量,可能会有一个FTPHTTPClient类,有关详细信息,请参阅example,但基本上是:
ftp = new FTPHTTPClient(proxyHost, proxyPort, proxyUser, proxyPassword);