有些文件(xyz.tar.gz.pgp)很大,无法加载到内存中。使用Scala,我们需要解密和提取在流上运行的文件,以便我们应该能够访问文件的每一行,这些行可以进一步拆分并映射到数据库的某些列。
要解压缩,我尝试使用TarArchiveInputStream。它适用于tar.gz,但不适用于tar.gz.pgp。在解密如何将BCPGInputStream转换为TarArchiveInputStream可接受的InputStream时无法弄清。
我使用公钥/私钥对,试图通过流解密文件,但是它不起作用。
private[this] def decryptHelper[U](input: InputStream)(handler: PGPLiteralData => U): U = {
val fixIn = PGPUtil.getDecoderStream(input)
try {
val objF = new JcaPGPObjectFactory(fixIn)
// TODO - better method to advance to encrypted data.
val enc = objF.nextObject match {
case e: PGPEncryptedDataList => e
case _ => objF.nextObject.asInstanceOf[PGPEncryptedDataList]
}
import collection.JavaConverters._
val it = enc.getEncryptedDataObjects()
val pbe = (for {
obj <- it.asInstanceOf[java.util.Iterator[AnyRef]].asScala
if obj.isInstanceOf[PGPPublicKeyEncryptedData]
} yield obj.asInstanceOf[PGPPublicKeyEncryptedData]).toTraversable.headOption.getOrElse {
throw new IllegalArgumentException("Secret key for message not found.")
}
// TODO - Better exception?
if(pbe.getKeyID != this.keyID) println(pbe.getKeyID)
val privKey = nested
println(privKey.getKeyID)
val clear = {
val provider = Security.getProvider("BC")
val dataDecryptorFactory = new JcePublicKeyDataDecryptorFactoryBuilder().setProvider(provider).setContentProvider(provider).build(privKey)
pbe.getDataStream(dataDecryptorFactory)
}
val plainFact = new JcaPGPObjectFactory(clear)
// Handle compressed + uncompressed data here.
def extractLiteral(x: Any): PGPLiteralData = x match {
case msg: PGPLiteralData => msg
case cData: PGPCompressedData =>
// Now we need to read the compressed stream of data.
val compressedStream = new BufferedInputStream(cData.getDataStream)
val pgpFact = new JcaPGPObjectFactory(compressedStream)
extractLiteral(pgpFact.nextObject)
// case msg: PGPOnePassSignature => throw new NotEncryptedMessageException("Message is a signature")
// case _ => throw new NotEncryptedMessageException("Message is not a simple encyrpted file")
}
val msg = extractLiteral(plainFact.nextObject)
val result = handler(msg)
if(pbe.isIntegrityProtected && !pbe.verify()) println("Encrypted message failed integrity check.")
result
}
}
应该能够逐行获取原始内容,以便可以将每一行进一步拆分以映射到数据库列