Question

原始时间数据如下：

i=0
for i in df['time']:
    if len(df['time'])==5:
        time = '2018'+'-'+df['time']
        con_time.append(time)
        i +=1
    else:
        con_time.append(df['time']) 
        i +=1

我希望所有这些数据都包含年份值。我申请了这个：

con_time = []

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-78-b7d87c72f412> in <module>()
      8     else:
      9         con_time.append(df['time'])
---> 10         i +=1

TypeError: must be str, not int

发生错误：

const express = require('express');
const mongoose = require('mongoose');
const autheticate = require('../authenticate');
const multer = require('multer')
const bodyParser = require('body-parser');
const storage = multer.diskStorage({
    destination: (req, file, cb) => {
        cb(null, 'public/images');
    },
    filename: (req, file, cb) => {
        cb(null, file.originalname);
    }
});
const imageFileFilter = (req, file, cb) => {
    if(!file.originalname.match(/\.(jpg|jpeg|png|gif)$/)) {
        return cb(new Error('You can upload only image files!'), false);
    }
    cb(null, true);
};
const upload = multer({ storage: storage, fileFilter: imageFileFilter});
//const uploadImgFile = multer().single('imageFile');
const uploadRouter=express.Router();
uploadRouter.use(bodyParser.json());
uploadRouter.route('/')
.get(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
    res.statusCode = 403;
    res.end('GET operation not supported on /imageUpload');
})
.post(autheticate.verifyUser,autheticate.verifyAdmin,upload.single('imageFile'),(req,res,next)=>{
   res.statusCode=200;
   res.setHeader('Content-Type','application/json');
   res.end('End');
})
.put(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
    res.statusCode = 403;
    res.end('GET operation not supported on /imageUpload');
})
.delete(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
    res.statusCode = 403;
    res.end('GET operation not supported on /imageUpload');
})
module.exports = uploadRouter;

这个错误很奇怪.... 实际上我想创建一个新列表，将其转换为np.array并将其连接到df中。我有更好的方法来实现目标吗？

Answer 1

因为你问过另一种方法。而不是在python中显式循环并填充列表，而应该直接使用DataFrame方法。在你的情况下，这将是

df['time'].apply(lambda x: x if len(x) != 5 else '2018-'+x)

对于某些数据集，这可能会运行得更快

修改我实际上使用随机玩具数据集运行时间基准测试，其中约50％的完整日期和不完整日期。简而言之，对于小型数据集而言，对于大型数据集而言，简单的for循环解决方案似乎更快，两种方法都表现出类似的性能：

# 1M examples import random import numpy as np y = pd.Series(np.random.randint(0,2,1000000)) s = {0:'2015-07-08', 1:'05-11'} y = y.map(s) %%timeit -n100 _ = y.apply(lambda x: x if len(x) != 5 else '2018-'+x) >>> 275 ms ± 6.42 ms per loop (mean ± std. dev. of 7 runs, 100 loops each) %%timeit -n100 con_time = [] for i in y: if len(i)==5: time = '2018-'+i con_time.append(time) else: con_time.append(i) con_time_a = np.array(con_time) >>> 289 ms ± 5.23 ms per loop (mean ± std. dev. of 7 runs, 100 loops each) # 1K examples import random import numpy as np y = pd.Series(np.random.randint(0,2,1000)) s = {0:'2015-07-08', 1:'05-11'} y = y.map(s) %%timeit -n100 _ = y.apply(lambda x: x if len(x) != 5 else '2018-'+x) >>> 431 µs ± 70.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) %%timeit -n100 con_time = [] for i in y: if len(i)==5: time = '2018-'+i con_time.append(time) else: con_time.append(i) con_time_a = np.array(con_time) >>> 289 µs ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

Answer 2

您有两个i个变量，当您从i += 1获取i变量

时

for i in df['time']

不是

i = 0

使用其他名称从for循环更改i变量，例如，如果您不需要来自for循环语句的变量，则可以将其命名为_（下划线）

TypeError：必须是str，而不是int无法解决

2 个答案: