Question

我有一个大型数据集，其列标记为1 - 65（其他标题列），并且想要查找每行中有多少列具有字符串（任何值）。例如，如果所有行1 - 65都已填充，则此特定行中的计数应为65，如果仅填充10，则计数应为10.

有没有简单的方法可以做到这一点？我目前正在使用以下代码，由于行数很多，因此需要很长时间。

var Co2 = require("./../models/co2.model"),
  Dht = require("./../models/co2.model"),
  express = require("express"),
  q = require("q"),
  router = express.Router();

router.get("/all", function(req, res, next) {
  var co2Collector = function() {
    return Co2.showAll(function(err, results) {
      co2Data = [];
      if (err) {
        res.json(err);
      }
      for (i = 0; i < results.length; i++) {
        co2Data.push(results[i]);
      }
      return co2Data;
    });
  };

  var dhtCollector = function() {
    return Dht.showAll(function(err, results) {
      dhtData = [];
      if (err) {
        res.json(err);
      }
      for (i = 0; i < results.length; i++) {
        dhtData.push(results[i]);
      }
      return dhtData;
    });
  };

  var dataMerge = function(results) {
    co2Data = results[0].value;
    dhtData = results[1].value;
    mergedData = [];

    mergedData = mergedData.concat(co2Data, dhtData);
    res.json({
      message: "Done!",
      result: mergedData
    });
    return mergedData;
  };

  q
    .all([q.fcall(co2Collector), q.fcall(dhtCollector)])
    .then(dataMerge)
    .done();
});

module.exports = router;

Answer 1

您应该能够使您的问题适应此示例

假设我们有这个数据框

df = pd.DataFrame([["","foo","bar"],["","","bar"],["","",""],["foo","bar","bar"]])

     0    1    2
0       foo  bar
1            bar
2               
3  foo  bar  bar

然后我们创建一个布尔掩码，其中一个单元格!= ""并将这些值相加

df['count'] = (df != "").sum(1)
print(df)

     0    1    2  count
0       foo  bar      2
1            bar      1
2                     0
3  foo  bar  bar      3

Answer 2

根据我对帖子及后续评论的理解，您有兴趣了解列标签1到65的每一行中的字符串数。有两个步骤，第一步是将数据子集到第1列到第1列65，然后以下是计算每行中的字符串数。要做到这一点：

import pandas as pd
import numpy as np

# create sample data
df = pd.DataFrame({'col1': list('abdecde'),
                   'col2': np.random.rand(7)})

# change one val of column two to string for illustration purposes    
df.loc[3, 'col2'] = 'b'

# to create the subset of columns, you could use 
# subset = [str(num) for num in list(range(1, 66))]
# and then just use df[subset]

# for each row, count the number of columns that have a string value
# applymap operates elementwise, so we are essentially creating 
# a new representation of your data in place, where a 1 represents a 
# string value was there, and a 0 represent not a string.
# we then sum along the rows to get the final counts
col_str_counts = np.sum(df.applymap(lambda x: 1 if isinstance(x, str) else 0), axis=1)

# we changed the column two value above, so to check that the count is 2 for that row idx:
col_str_counts[3]
>>> 2

# and for the subset, it would simply become:
# col_str_counts = np.sum(df[subset].applymap(lambda x: 1 if isinstance(x, str) else 0), axis=1)

Answer 3

const documentData = snapshot.docs.map(d => ({'key':d.id,'value':d.data()}));

pandas计算行内已填充单元格的数量

3 个答案: