我有一个庞大的数据集,其中包含Patient_details。我想删除唯一的(ID)行,并保留重复多次(重复)的ID。 我对python不熟悉。请帮助我在Spyder-Python中解决它。
1029.20384 # yes
102 # no, length of 3 < 8
预期结果为
Sub MergeToPub ()
Dim strWorkbookName As String
Dim pubSource As Object
Dim mrgMain As MailMerge
Dim appPub As New Publisher.Application
Dim FileLink As String
strWorkbookName = ThisWorkbook.Path & "\" & ThisWorkbook.Name
FileLink = [MailMergePub].Value
appPub.ActiveWindow.Visible = True
Set pubSource = appPub.Open(FileLink)
Set mrgMain = pubSource.MailMerge
'before i added this next line of code, for some reason
'it added the same data source twice and merged duplicate results
If pubSource.MailMerge.DataSource.Name = strWorkbookName Then GoTo ContinueCode
pubSource.MailMerge.OpenDataSource _
bstrDataSource:=strWorkbookName, _
bstrTable:="Sheet1$", _
fNeverPrompt:=True
ContinueCode:
'this adds two filters
With mrgMain.DataSource
.Filters.Add Column:="Column1", _
Comparison:=msoFilterComparisonEqual, _
Conjunction:=msoFilterConjunctionAnd, _
bstrCompareTo:="Name"
.Filters.Add Column:="Column2", _
Comparison:=msoFilterComparisonNotEqual, _
Conjunction:=msoFilterConjunctionAnd, _
bstrCompareTo:="yes"
.ApplyFilter
.FirstRecord = pbDefaultFirstRecord
.LastRecord = pbDefaultLastRecord
End With
mrgMain.Execute False, pbMergeToNewPublication
pubSource.Close
Set appPub = Nothing
Set pubSource = Nothing
End Sub
答案 0 :(得分:0)
检查ID
是否重复,如果为真,则保留它。
df = df[df['ID'].duplicated(keep=False)]