spark.rdd.compress及其对保存表的影响

时间:2019-06-06 10:47:42

标签: apache-spark pyspark

来自诸如thisthis之类的问题,当我将在RDD级别上分区的数据帧保存到(例如)镶木桌子。
或者也许换句话说:spark.rdd.compress是否还会压缩我使用spark.rdd.compress时创建的表?

spark.rdd.compress来自docs,执行以下操作:

  

是否压缩序列化的RDD分区(例如   Java和Scala中的StorageLevel.MEMORY_ONLY_SER或   Python中的StorageLevel.MEMORY_ONLY)。 可以节省大量空间   花费一些额外的CPU时间。压缩将使用   spark.io.compression.codec。

因此,此外,如果这种压缩有效,是否还会花费额外的CPU从这种表中再次检索数据?

1 个答案:

答案 0 :(得分:1)

  

spark.rdd.compress是否还会压缩我使用dataframe.write.saveAsTable(...)时创建的表

不会,RDD接收器也不会。

如您引用的文档中所述,它仅适用于序列化($(function () { var socket = io(); socket.on('connect', function(){ var id = socket.io.engine.id; document.getElementById("os").innerHTML = id }); $('form').submit(function(){ socket.emit('chat message', $('#m').val()) $('#m').val(''); return false; }); socket.on('chat message', function(msg){ var form = $('#m').val(); //logic for commands var help = /#help/; var kill = /#kill/; var admin = /SUDO/; var private = /#to/; var one = help.test(msg); var two = kill.test(msg); var three = admin.test(msg); var four = private.test(msg); if (one == true) { $('#messages').append($('<li>').text('[SYSTEM]: The "#help" command has been activated. The following is an automated response and the user message which activated #help will appear below it in the chat stream. ')); $('#messages').append($('<li>').text("[#HELP]: This message is a brief overview of how to use Megaphone: type a message into the bar above, and hit enter to send it. Refresh the window to clear messages, and to leave, simply close the tab.")); function delay(ms, cb) { setTimeout(cb, ms) } document.title = "New Message!"; audio.play(); delay(5000, function() { document.title = "Megaphone"; }) } if (two == true && three == true) { location = "https://google.com" } if (two == true && three == false) { $('#messages').append($('<li>').text(' [SYSTEM]: ATTEMPTED ACCESS TO AN ADMIN COMMAND DETECTED. THE COMMAND CANNOT BE USED WITHOUT AN ADMIN PASSPHRASE.')); function delay(ms, cb) { setTimeout(cb, ms) } document.title = "New Message!"; audio.play(); delay(5000, function() { document.title = "Megaphone"; }) } if (three == true) { var adminmsg = msg.split('SUDO')[1] $('#messages').append($('<li>').text(' [ADMIN]: ' + adminmsg)); function delay(ms, cb) { setTimeout(cb, ms) } document.title = "New Message!"; audio.play(); delay(3000, function() { document.title = "Megaphone"; }) } if (four == true) { var privatemessage = msg.split('#to')[0]; var recipientid = msg.split('#to')[1]; socket.emit('private message', { recipient: recipientid, message: privatemessage }); } else { $('#messages').append($('<li>').text(' [USER]: ' + msg)); function delay(ms, cb) { setTimeout(cb, ms) } document.title = "New Message!"; audio.play(); delay(5000, function() { document.title = "Megaphone"; }) }; }); }); )缓存。它与外部存储无关。