Python大熊猫删除列值为“ NaN”的重复行

时间:2018-11-21 13:26:17

标签: python pandas duplicates

需要包含NaN值但又重复的行。例如,此表:

    A   B   C
0   foo 2   3
1   foo nan nan
2   foo 1   4
3   bar nan nan
4   foo nan nan

应该变成这个:

    A   B   C
0   foo 2   3
2   foo 1   4
3   bar nan nan

我该怎么做?

2 个答案:

答案 0 :(得分:2)

使用boolean indexing

import UIKit

class ViewController2: UIViewController {

    @IBOutlet weak var drawingPlace: UIImageView!

    var startTouch : CGPoint?
    var secondTouch : CGPoint?
    var currentContext : CGContext?
    var prevImage : UIImage?


    override func viewDidLoad() {
        super.viewDidLoad()
    }


    override func touchesBegan(_ touches: Set<UITouch>, with event: UIEvent?) {
        let touch = touches.first
        startTouch = touch?.location(in: drawingPlace)
    }

    override func touchesMoved(_ touches: Set<UITouch>, with event: UIEvent?) {

        for touch in touches{
            secondTouch = touch.location(in: drawingPlace)

            if(self.currentContext == nil){
                UIGraphicsBeginImageContext(drawingPlace.frame.size)
                self.currentContext = UIGraphicsGetCurrentContext()
            }else{
                self.currentContext?.clear(CGRect(x: 0, y: 0, width: drawingPlace.frame.width, height: drawingPlace.frame.height))
            }

            self.prevImage?.draw(in: self.drawingPlace.bounds)

            let bezier = UIBezierPath()

            bezier.move(to: startTouch!)
            bezier.addLine(to: secondTouch!)
            bezier.close()

            UIColor.blue.set()

            self.currentContext?.setLineWidth(4)
            self.currentContext?.addPath(bezier.cgPath)
            self.currentContext?.strokePath()
            let img2 = self.currentContext?.makeImage()
            drawingPlace.image = UIImage.init(cgImage: img2!)

        }
    }


    override func touchesEnded(_ touches: Set<UITouch>, with event: UIEvent?) {

        self.currentContext = nil
        self.prevImage = self.drawingPlace.image
    }

}

说明

测试列df = df[~df['A'].duplicated(keep=False) | df[['B','C']].notnull().any(axis=1)] print (df) A B C 0 foo 2.0 3.0 2 foo 1.0 4.0 3 bar NaN NaN 中是否存在重复项-duplicated中带有A处的反布尔掩码:

~

检查print (~df['A'].duplicated(keep=False)) 0 False 1 False 2 False 3 True 4 False Name: A, dtype: bool 列中的不丢失值:

B,C

然后使用DataFrame.any每行至少一个True:

print (df[['B','C']].notnull())
       B      C
0   True   True
1  False  False
2   True   True
3  False  False
4  False  False

print (df[['B','C']].notnull().any(axis=1)) 0 True 1 False 2 True 3 False 4 False dtype: bool 捆绑在一起以按位|

OR

答案 1 :(得分:1)

与jezrael的解决方案略有不同:

>>> df                                                                                                     
     A    B    C
0  foo  2.0  3.0
1  foo  NaN  NaN
2  foo  1.0  4.0
3  bar  NaN  NaN
4  foo  NaN  NaN
>>>
>>> df.drop(index=df[df.duplicated(keep=False)].isnull().any(1).index)                                  
     A    B    C
0  foo  2.0  3.0
2  foo  1.0  4.0
3  bar  NaN  NaN

步骤:

>>> df.duplicated(keep=False)                                                                            
0    False
1     True
2    False
3    False
4     True
dtype: bool
>>>
>>> df[df.duplicated(keep=False)]                                                                       
      A   B   C
1  foo NaN NaN
4  foo NaN NaN
>>>
>>> df[df.duplicated(keep=False)].isnull()                                                                 
       A     B     C
1  False  True  True
4  False  True  True
>>>
>>> df[df.duplicated(keep=False)].isnull().any(1).index                                                          
Int64Index([1, 4], dtype='int64')