目前我正在拆分销售汽车的标题 使用split()函数。然后我通过csv文件进行itterating,由pandas读取以查找匹配的字符串。 这是我目前的剧本:
title = 'ABARTH Abarth 500 124 1.4l 22,200miles UK'
keywords = title.split()
for word in keywords:
for index, row in df.iterrows():
makes = row[1]
models = row[3]
if word in makes:
print('Found: ' + word + ' as a match with ' + makes + ' in pd data set.')
pass
else:
print('no')
if word in models:
print('Found: ' + word + ' as a match with ' + models + ' in pd data set.')
pass
成功匹配'make'后,我如何匹配相应的'模型'?
答案 0 :(得分:1)
str.contains将帮助您找到匹配的字符串,然后需要使用DataFrame.loc对数据框应用过滤器。你只需要为make& amp重复这两个函数。你需要的模型。由于您提供的数据不足,我创建了一个示例数据,这是代码。
avformat_find_stream_info()
输出
import pandas as pd
import numpy as np
import re
from io import StringIO
words = ['amc', 'buick','volkswagen']
Models = "|".join(map(re.escape, words))
words = ['European','American']
Origins = "|".join(map(re.escape, words))
TESTDATA=StringIO("""
Model,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin
amc ambassador dpl,15.0,8,390,190,3850,8.5,70,American
amc gremlin,21.0,6,199,90,2648,15.0,70,American
amc hornet,18.0,6,199,97,2774,15.5,70,American
amc rebel sst,16.0,8,304,150,3433,12.0,70,American
buick estate wagon (sw),14.0,8,455,225,3086,10.0,70,American
buick skylark 320,15.0,8,350,165,3693,11.5,70,American
chevrolet chevelle malibu,18.0,8,307,130,3504,12.0,70,American
chevrolet impala,14.0,8,454,220,4354,9.0,70,American
volkswagen 1131 deluxe sedan,26.0,4,97,46,1835,20.5,70,European
fiat 124b,30.0,4,88,76,2065,14.5,71,European
opel 1900,28.0,4,116,90,2123,14.0,71,European
peugeot 304,30.0,4,79,70,2074,19.5,71,European
volkswagen model 111,27.0,4,97,60,1834,19.0,71,European """)
df = pd.read_csv(TESTDATA, sep=",")
df=df.loc[df.Model.str.contains(Models)]
df=df.loc[df.Origin.str.contains(Origins)]
print (df)