我想使用字典
替换Pandas数据帧中的值DataFrame = games-u-q-s.csv:
blue1 blue2 blue3 blue4 blue5 red1 red2 red3 red4 red5 winner
8 432 96 11 112 104 498 122 238 412 0
119 39 76 10 35 54 25 120 157 92 0
57 63 29 61 36 90 19 412 92 22 0
第1列 - 第10列包含带有winner
列的标签为
Dictionary = champNum.csv
champId champNum
266 1
103 2
84 3
12 4
32 5
34 6
1 7
. .
. .
143 138
并将其另存为dataset_feature_champion_number.csv
我想将champId
转换为champNum
并将预期输出转换为:
blue1 blue2 blue3 blue4 blue5 red1 red2 red3 red4 red5 winner
125 11 59 70 124 36 129 20 135 111 0
23 40 77 53 95 67 73 37 132 91 0
69 13 116 81 22 68 127 111 91 8 0
这是代码:
import csv
import os
import numpy as np
import pandas as pd
def createDictionary(csvfile):
with open(csvfile, mode='r') as data:
reader = csv.reader(data)
dict = {int(rows[0]):int(rows[1]) for rows in reader}
return dict
def convertDataframeToChampNum(csvfile,dictionary):
df = pd.read_csv(csvfile)
temp1 = df.iloc[:,1:11]
temp2 = df['winner']
temp3 = temp1.applymap(dictionary.get)
champNum = temp3.join(temp2)
return champNum
def saveAsCSV(dataframe):
dataframe.to_csv("dataset_feature_champion_number.csv")
def main():
diction = createDictionary("champNum.csv")
dataset = convertDataframeToChampNum("games-u-q-s.csv",diction)
saveAsCSV(dataset)
if __name__ =='__main__':
main()
我犯了很多错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-f86679fc49f9> in <module>()
27
28 if __name__ =='__main__':
---> 29 main()
<ipython-input-19-f86679fc49f9> in main()
22
23 def main():
---> 24 diction = createDictionary("champNum.csv")
25 dataset = convertDataframeToChampNum("games-u-q-s.csv",diction)
26 saveAsCSV(dataset)
<ipython-input-19-f86679fc49f9> in createDictionary(csvfile)
7 with open(csvfile, mode='r') as data:
8 reader = csv.reader(data)
----> 9 dict = {int(rows[0]):int(rows[1]) for rows in reader}
10 return dict
11
<ipython-input-19-f86679fc49f9> in <dictcomp>(.0)
7 with open(csvfile, mode='r') as data:
8 reader = csv.reader(data)
----> 9 dict = {int(rows[0]):int(rows[1]) for rows in reader}
10 return dict
11
ValueError: invalid literal for int() with base 10: 'champNum'
答案 0 :(得分:0)
我认为您正在寻找pandas.DataFrame.transform:
>>> a = pd.DataFrame([[1,2,3,4,5],[6,7,8,9,10]])
>>> a
0 1 2 3 4
0 1 2 3 4 5
1 6 7 8 9 10
>>> a.transform(lambda x: -x)
0 1 2 3 4
0 -1 -2 -3 -4 -5
1 -6 -7 -8 -9 -10
或,适用于您的问题
df = pd.DataFrame({'blue1': [8, 119, 57],
'blue2': [432, 39, 63],
'blue3': [96, 76, 29],
'blue4': [11, 10, 61],
'blue5': [112, 35, 36],
'red1': [104, 54, 90],
'red2': [498, 25, 19],
'red3': [122, 120, 412],
'red4': [238, 157, 92],
'red5': [412, 92, 22],
'winner': [0, 0, 0]})
transform_dict = {266: 1, 103: 2, ...}
df.transform(lambda x: transform_dict[x] if x in transform_dict else None)