原始时间数据如下:
i=0
for i in df['time']:
if len(df['time'])==5:
time = '2018'+'-'+df['time']
con_time.append(time)
i +=1
else:
con_time.append(df['time'])
i +=1
我希望所有这些数据都包含年份值。 我申请了这个:
con_time = []
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-78-b7d87c72f412> in <module>()
8 else:
9 con_time.append(df['time'])
---> 10 i +=1
TypeError: must be str, not int
发生错误:
const express = require('express');
const mongoose = require('mongoose');
const autheticate = require('../authenticate');
const multer = require('multer')
const bodyParser = require('body-parser');
const storage = multer.diskStorage({
destination: (req, file, cb) => {
cb(null, 'public/images');
},
filename: (req, file, cb) => {
cb(null, file.originalname);
}
});
const imageFileFilter = (req, file, cb) => {
if(!file.originalname.match(/\.(jpg|jpeg|png|gif)$/)) {
return cb(new Error('You can upload only image files!'), false);
}
cb(null, true);
};
const upload = multer({ storage: storage, fileFilter: imageFileFilter});
//const uploadImgFile = multer().single('imageFile');
const uploadRouter=express.Router();
uploadRouter.use(bodyParser.json());
uploadRouter.route('/')
.get(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
res.statusCode = 403;
res.end('GET operation not supported on /imageUpload');
})
.post(autheticate.verifyUser,autheticate.verifyAdmin,upload.single('imageFile'),(req,res,next)=>{
res.statusCode=200;
res.setHeader('Content-Type','application/json');
res.end('End');
})
.put(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
res.statusCode = 403;
res.end('GET operation not supported on /imageUpload');
})
.delete(autheticate.verifyUser,autheticate.verifyAdmin,(req,res,next)=>{
res.statusCode = 403;
res.end('GET operation not supported on /imageUpload');
})
module.exports = uploadRouter;
这个错误很奇怪.... 实际上我想创建一个新列表,将其转换为np.array并将其连接到df中。 我有更好的方法来实现目标吗?
答案 0 :(得分:3)
因为你问过另一种方法。而不是在python中显式循环并填充列表,而应该直接使用DataFrame方法。在你的情况下,这将是
df['time'].apply(lambda x: x if len(x) != 5 else '2018-'+x)
对于某些数据集,这可能会运行得更快
修改强> 我实际上使用随机玩具数据集运行时间基准测试,其中约50%的完整日期和不完整日期。简而言之,对于小型数据集而言,对于大型数据集而言,简单的for循环解决方案似乎更快,两种方法都表现出类似的性能:
# 1M examples
import random
import numpy as np
y = pd.Series(np.random.randint(0,2,1000000))
s = {0:'2015-07-08', 1:'05-11'}
y = y.map(s)
%%timeit -n100
_ = y.apply(lambda x: x if len(x) != 5 else '2018-'+x)
>>> 275 ms ± 6.42 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit -n100
con_time = []
for i in y:
if len(i)==5:
time = '2018-'+i
con_time.append(time)
else:
con_time.append(i)
con_time_a = np.array(con_time)
>>> 289 ms ± 5.23 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
# 1K examples
import random
import numpy as np
y = pd.Series(np.random.randint(0,2,1000))
s = {0:'2015-07-08', 1:'05-11'}
y = y.map(s)
%%timeit -n100
_ = y.apply(lambda x: x if len(x) != 5 else '2018-'+x)
>>> 431 µs ± 70.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit -n100
con_time = []
for i in y:
if len(i)==5:
time = '2018-'+i
con_time.append(time)
else:
con_time.append(i)
con_time_a = np.array(con_time)
>>> 289 µs ± 40.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
答案 1 :(得分:2)
您有两个i
个变量,当您从i += 1
获取i
变量
for i in df['time']
不是
i = 0
使用其他名称从for循环更改i
变量,例如,如果您不需要来自for循环语句的变量,则可以将其命名为_
(下划线)