我有一个大型数据集,其列标记为1 - 65(其他标题列),并且想要查找每行中有多少列具有字符串(任何值)。例如,如果所有行1 - 65都已填充,则此特定行中的计数应为65,如果仅填充10,则计数应为10.
有没有简单的方法可以做到这一点?我目前正在使用以下代码,由于行数很多,因此需要很长时间。
var Co2 = require("./../models/co2.model"),
Dht = require("./../models/co2.model"),
express = require("express"),
q = require("q"),
router = express.Router();
router.get("/all", function(req, res, next) {
var co2Collector = function() {
return Co2.showAll(function(err, results) {
co2Data = [];
if (err) {
res.json(err);
}
for (i = 0; i < results.length; i++) {
co2Data.push(results[i]);
}
return co2Data;
});
};
var dhtCollector = function() {
return Dht.showAll(function(err, results) {
dhtData = [];
if (err) {
res.json(err);
}
for (i = 0; i < results.length; i++) {
dhtData.push(results[i]);
}
return dhtData;
});
};
var dataMerge = function(results) {
co2Data = results[0].value;
dhtData = results[1].value;
mergedData = [];
mergedData = mergedData.concat(co2Data, dhtData);
res.json({
message: "Done!",
result: mergedData
});
return mergedData;
};
q
.all([q.fcall(co2Collector), q.fcall(dhtCollector)])
.then(dataMerge)
.done();
});
module.exports = router;
答案 0 :(得分:1)
您应该能够使您的问题适应此示例
假设我们有这个数据框
df = pd.DataFrame([["","foo","bar"],["","","bar"],["","",""],["foo","bar","bar"]])
0 1 2
0 foo bar
1 bar
2
3 foo bar bar
然后我们创建一个布尔掩码,其中一个单元格!=
""
并将这些值相加
df['count'] = (df != "").sum(1)
print(df)
0 1 2 count
0 foo bar 2
1 bar 1
2 0
3 foo bar bar 3
答案 1 :(得分:0)
根据我对帖子及后续评论的理解,您有兴趣了解列标签1到65的每一行中的字符串数。有两个步骤,第一步是将数据子集到第1列到第1列65,然后以下是计算每行中的字符串数。要做到这一点:
import pandas as pd
import numpy as np
# create sample data
df = pd.DataFrame({'col1': list('abdecde'),
'col2': np.random.rand(7)})
# change one val of column two to string for illustration purposes
df.loc[3, 'col2'] = 'b'
# to create the subset of columns, you could use
# subset = [str(num) for num in list(range(1, 66))]
# and then just use df[subset]
# for each row, count the number of columns that have a string value
# applymap operates elementwise, so we are essentially creating
# a new representation of your data in place, where a 1 represents a
# string value was there, and a 0 represent not a string.
# we then sum along the rows to get the final counts
col_str_counts = np.sum(df.applymap(lambda x: 1 if isinstance(x, str) else 0), axis=1)
# we changed the column two value above, so to check that the count is 2 for that row idx:
col_str_counts[3]
>>> 2
# and for the subset, it would simply become:
# col_str_counts = np.sum(df[subset].applymap(lambda x: 1 if isinstance(x, str) else 0), axis=1)
答案 2 :(得分:0)
const documentData = snapshot.docs.map(d => ({'key':d.id,'value':d.data()}));