编辑:我已将文件剥离为有问题的位
from PySide import QtCore, QtGui
class Window(QtGui.QWidget):
def __init__(self):
QtGui.QWidget.__init__(self)
self.edit = QtGui.QTextEdit(self)
self.edit.installEventFilter(self)
layout = QtGui.QVBoxLayout(self)
layout.addWidget(self.edit)
def eventFilter(self, widget, event):
if (event.type() == QtCore.QEvent.KeyPress and
widget is self.edit):
key = event.key()
if key == QtCore.Qt.Key_Escape:
print('escape')
else:
if key == QtCore.Qt.Key_Return:
self.edit.setText('return')
elif key == QtCore.Qt.Key_Enter:
self.edit.setText('enter')
return True
return QtGui.QWidget.eventFilter(self, widget, event)
if __name__ == '__main__':
import sys
app = QtGui.QApplication(sys.argv)
window = Window()
window.setGeometry(500, 300, 300, 300)
window.show()
sys.exit(app.exec_())
对于第一个测试,我得到以下输出,暗示有重复
raw_data = {"link":
['https://www.otodom.pl/oferta/mieszkanie-w-spokojnej-okolicy-gdansk-lostowice-ID43FLJ.html#cda8700ef5',
'https://www.otodom.pl/oferta/mieszkanie-w-spokojnej-okolicy-gdansk-lostowice-ID43FLH.html#cda8700ef5',
'https://www.otodom.pl/oferta/mieszkanie-w-spokojnej-okolicy-gdansk-lostowice-ID43FLj.html#cda8700ef5',
'https://www.otodom.pl/oferta/mieszkanie-w-spokojnej-okolicy-gdansk-lostowice-ID43FLh.html#cda8700ef5',
'https://www.otodom.pl/oferta/zielony-widok-mieszkanie-3m04-ID43EWU.html#9dca9667c3',
'https://www.otodom.pl/oferta/zielony-widok-mieszkanie-3m04-ID43EWu.html#9dca9667c3',
'https://www.otodom.pl/oferta/nowoczesne-osiedle-gotowe-do-konca-roku-bazantow-ID43vQM.html#af24036d28',
'https://www.otodom.pl/oferta/nowoczesne-osiedle-gotowe-do-konca-roku-bazantow-ID43vQJ.html#af24036d28',
'https://www.otodom.pl/oferta/nowoczesne-osiedle-gotowe-do-konca-roku-bazantow-ID43vQm.html#af24036d28',
'https://www.otodom.pl/oferta/nowoczesne-osiedle-gotowe-do-konca-roku-bazantow-ID43vQj.html#af24036d28',
'https://www.otodom.pl/oferta/mieszkanie-56-m-warszawa-ID43sWY.html#2d0084b7ea',
'https://www.otodom.pl/oferta/mieszkanie-56-m-warszawa-ID43sWy.html#2d0084b7ea',
'https://www.otodom.pl/oferta/idealny-2pok-apartament-0-pcc-widok-na-park-ID43q4X.html#64f19d3152',
'https://www.otodom.pl/oferta/idealny-2pok-apartament-0-pcc-widok-na-park-ID43q4x.html#64f19d3152']}
df = pd.DataFrame(raw_data, columns = ["link"])
#duplicate check #1
a = print(df.iloc[12][0])
b = print(df.iloc[13][0])
if a == b:
print("equal")
#duplicate check #2
df.duplicated()
第二次测试似乎没有重复
https://www.otodom.pl/oferta/idealny-2pok-apartament-0-pcc-widok-na-park-ID43q4X.html#64f19d3152
https://www.otodom.pl/oferta/idealny-2pok-apartament-0-pcc-widok-na-park-ID43q4x.html#64f19d3152
equal
原始帖子:
尝试从附件的“链接”列中识别重复的值:
0 False
1 False
2 False
3 False
4 False
5 False
6 False
7 False
8 False
9 False
10 False
11 False
12 False
13 False
dtype: bool
使用上面的代码,但我不断得到的答案是没有重复。通过Excel进行了检查,应该有七个重复的实例。甚至选择的特定单元格也可以进行快速检查(标有#的部分),并且这些值已被识别为相等。然而import pandas as pd
data = pd.read_csv(r"...\consolidated.csv", sep=",")
df = pd.DataFrame(data)
del df['Unnamed: 0']
duplicate_rows = df[df.duplicated(["Link"], keep="first")]
pd.DataFrame(duplicate_rows)
#a = print(df.iloc[42657][15])
#b = print(df.iloc[42676][15])
#if a == b:
# print("equal")
并未捕获它们
我已经挠头好一个小时了,但仍然不知道我缺少什么-帮助表示赞赏!
答案 0 :(得分:0)
首先,您不需要df = pd.DataFrame(data)
,因为data = pd.read_csv(r"...\consolidated.csv", sep=",")
已经返回了数据框。
关于删除重复项,请检查Documentation中的drop_duplicates
方法
希望这会有所帮助。
答案 1 :(得分:0)
我遇到了同样的问题,将数据框的列转换为“ str”很有帮助。
例如
df['link'] = df['link'].astype(str)
duplicate_rows = df[df.duplicated(["link"], keep="first")]