不连续的FTP下载抛出“读取超时”或“连接重置”

时间:2016-09-05 09:38:04

标签: java ftp hive ftp-client

我在包'org.apache.commons.net.ftp'中使用FTP和FTPClient从FTP服务器下载文件。

这是我的总示例代码

public class FtpInput {

    private static final Logger LOG = Logger.getLogger(FtpInput.class);
    private static final int TIMEOUT = 120000;
    private static final String SIZE_COMMAND_REPLY_CODE = "213 ";

    /**
     * FTPClient
     */
    private FTPClient ftpClient;

    /**
     * FTP size
     */
    private long completeFileSize = 0;

    protected String ip = "";
    protected int port = 21;
    protected String user = "";
    protected String passwd = "";
    protected String path = "";
    protected String fileName = "";

    /**
     * count input bytes
     */
    private CountingInputStream is;

    /**
     * the bytes already processed
     */
    private long processedBytesNum;

    private byte[] inputBuffer = new byte[1024];

    /**
     * connect to ftp server and fetch inputStream
     */
    public void connect() {

        this.ftpClient = new FTPClient();
        ftpClient.setRemoteVerificationEnabled(false);
        try {
            ftpClient.connect(ip, port);
            if (!ftpClient.login(user, passwd)) {
                throw new IOException("ftp login failed!");
            }
            if (StringUtils.isNotBlank(path)) {
                if (!ftpClient.changeWorkingDirectory(path)) {
                    ftpClient.mkd(path);
                    if (!ftpClient.changeWorkingDirectory(path)) {
                        throw new IOException("ftp change working dir failed! path:" + path);
                    }
                }
            }
            ftpClient.setFileType(FTP.BINARY_FILE_TYPE);
            ftpClient.setSoTimeout(TIMEOUT);
            ftpClient.setConnectTimeout(TIMEOUT);
            ftpClient.setDataTimeout(TIMEOUT);
            ftpClient.enterLocalPassiveMode();
            // keep control channel keep-alive when download large file
            ftpClient.setControlKeepAliveTimeout(120);
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException("ftp login failed!", e);
        }
        // get complete ftp size
        completeFileSize = getFtpFileSize();
        LOG.info(String.format("ftp file size: %d", completeFileSize));
        try {
            InputStream ftpis = this.ftpClient.retrieveFileStream(this.fileName);
            if (ftpis == null) {
                LOG.error("cannot fetch source file.");
            }
            this.is = new CountingInputStream(ftpis);
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    /**
     * readBytes
     * 
     * @return
     */
    public byte[] readBytes() {
        byte[] bytes = readBytesFromStream(is, inputBuffer);
        // the bytes processed
        processedBytesNum = is.getCount();
        return bytes;
    }

    /**
     * readBytesFromStream
     * 
     * @param stream
     * @param inputBuffer
     * @return
     */
    protected byte[] readBytesFromStream(InputStream stream, byte[] inputBuffer) {
        Preconditions.checkNotNull(stream != null, "InputStream has not been inited yet.");
        Preconditions.checkArgument(inputBuffer != null && inputBuffer.length > 0);
        int readBytes;
        try {
            readBytes = stream.read(inputBuffer);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        if (readBytes == inputBuffer.length) {
            // inputBuffer is filled full.
            return inputBuffer;
        } else if (readBytes > 0 && readBytes < inputBuffer.length) {
            // inputBuffer is not filled full.
            byte[] tmpBytes = new byte[readBytes];
            System.arraycopy(inputBuffer, 0, tmpBytes, 0, readBytes);
            return tmpBytes;
        } else if (readBytes == -1) {
            // Read end.
            return null;
        } else {
            // may other situation happens?
            throw new RuntimeException(String.format("readBytesFromStream: readBytes=%s inputBuffer.length=%s",
                    readBytes, inputBuffer.length));
        }
    }

    /**
     * fetch the byte size of remote file size
     */
    private long getFtpFileSize() {
        try {
            ftpClient.sendCommand("SIZE", this.fileName);
            String reply = ftpClient.getReplyString().trim();
            LOG.info(String.format("ftp file %s size reply : %s", fileName, reply));
            Preconditions.checkArgument(reply.startsWith(SIZE_COMMAND_REPLY_CODE),
                    "ftp file size reply:  %s is not success", reply);
            String sizeSubStr = reply.substring(SIZE_COMMAND_REPLY_CODE.length());
            long actualFtpSize = Long.parseLong(sizeSubStr);
            return actualFtpSize;
        } catch (Throwable e) {
            e.printStackTrace();
            throw new RuntimeException(e.getMessage());
        }
    }

    public void close() {
        try {
            if (is != null) {
                LOG.info(String.format("already read %d bytes from ftp file %s", is.getCount(), fileName));
                is.close();
            }
            if (ftpClient != null) {
                // Must call completePendingCommand() to finish command.
                boolean isSuccessTransfer = ftpClient.completePendingCommand();
                if (!isSuccessTransfer) {
                    LOG.error("error happened when complete transfer of ftp");
                }
                ftpClient.logout();
                ftpClient.disconnect();
            }
        } catch (Throwable e) {
            e.printStackTrace();
            LOG.error(String.format("Close ftp input failed:%s,%s", e.getMessage(), e.getCause()));
        } finally {
            is = null;
            ftpClient = null;
        }
    }

    public void validInputComplete() {
        Preconditions.checkArgument(processedBytesNum == completeFileSize, "ftp file transfer is not complete");
    }


    /**
     * main
     * 
     * @param args
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        String ip = "***.***.***.****";
        int port = 21;
        String user = "***";
        String passwd = "***";
        String path = "/home/work";

        String fileName = "b.txt";

        FtpInput input = new FtpInput();
        try {
            input.fileName = fileName;
            input.path = path;
            input.ip = ip;
            input.port = port;
            input.user = user;
            input.passwd = passwd;

            // connect to FTP server
            input.connect();
            while (true) {
                // read bytes
                byte[] bytes = input.readBytes();
                if (bytes == null) {
                    break;
                }
                LOG.info("read " + bytes.length + " bytes at :" + new Date(System.currentTimeMillis()));
                // Attention: this is used for simulating the process of writing data into hive table
                // it maybe consume more than 1 minute;
                Thread.sleep(3000);
            }
            input.validInputComplete();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            input.close();
        }
    }

}

这是异常消息:

java.net.SocketTimeoutException: Read timed out
or
java.net.SocketException: Connection reset

at stream.readBytes in method readBytesFromStream

首先,我认为这可能是因为慢慢写入hive表,然后FTP服务器关闭了连接。 但实际上,写入hive表的速度已经够快了。

现在,我需要你的帮助,我该如何解决这个问题。

1 个答案:

答案 0 :(得分:1)

从您的评论中,您可能需要数小时才能完成下载文件。

您无法合理地期望FTP服务器等待数小时才能完成传输。特别是如果你大部分时间没有转移任何东西。您浪费了服务器资源,大多数服务器都会保护自己免受此类滥用。

你的设计有缺陷。

您应该重新设计应用程序,先完全下载文件;并在下载完成后才导入文件。