我有大约1亿行的熊猫数据框。在多核计算机上并行处理效果很好,每个核的利用率为100%。但是,const functions = require('firebase-functions');
const admin = require('firebase-admin');
const {WebhookClient, Suggestion} = require('dialogflow-fulfillment');
const {dialogflow, Permission, Image, SignIn, BasicCard} = require('actions-on-google');
process.env.DEBUG = 'dialogflow:*'; // enables lib debugging statements
admin.initializeApp(functions.config().firebase);
const db = admin.firestore();
db.settings({timestampsInSnapshots: true});
const {ssml} = require('./util');
exports.dialogflowFirebaseFulfillment = functions.https.onRequest((request, response) => {
const agent = new WebhookClient({request, response});
let conv = agent.conv();
function ask_for_sign_in(agent) {
let conv = agent.conv();
conv.ask(new SignIn('Per personalizzare'));
agent.add(conv);
}
function actions_intent_SIGN_IN(agent) {
let conv = agent.conv();
const granted = conv.arguments.get('SIGN_IN').status === 'OK';
console.log('name', conv.user.profile.payload);
if(granted){
agent.add('granted');
}else{
agent.add('not granted');
}
agent.add('test');
}
// Map from Dialogflow intent names to functions to be run when the intent is matched
let intentMap = new Map();
intentMap.set('ask_for_sign_in', ask_for_sign_in);
intentMap.set('actions_intent_SIGN_IN', actions_intent_SIGN_IN);
agent.handleRequest(intentMap);
});
的结果是一个生成器,因此为了实际收集处理后的结果,我迭代了该生成器。这非常非常慢(几小时),部分是因为它是单核,部分是因为循环。实际上,它比executor.map()
是否有更好的方法(可能是并发和/或矢量化的)?
编辑:将pandas 0.23.4(当前最新)与Python 3.7.0一起使用
my_function()
答案 0 :(得分:1)
以下是与您的案例有关的基准:https://stackoverflow.com/a/31713471/5588279
如您所见,concat(追加)多次无效。您应该只做pd.concat(gen)
。我相信underlyig实现将预分配所有需要的内存。
对于您而言,每次都会进行内存分配。