熊猫数据框将具有相同列值的行放在一起

时间:2018-07-28 09:57:10

标签: python pandas

对于以下数据框,

var mongoose = require("mongoose");

var campgroundSchema = new mongoose.Schema({
   name: String,
   image: String,
   description: String,
   comments: [
      {
         type: mongoose.Schema.Types.ObjectId,
         ref: "Comment"
      }
   ]
});


var Campground = mongoose.model("Campground", campgroundSchema);

var mongoose = require("mongoose");

var commentSchema = mongoose.Schema({
    text: String,
    author: String
});

var Comment = mongoose.model("Comment", commentSchema);

var data = [
    {
        name: "Cloud's Rest", 
        image: "https://farm4.staticflickr.com/3795/10131087094_c1c0a1c859.jpg",
        description: "blah blah blah"
    },
    {
        name: "Desert Mesa", 
        image: "https://farm4.staticflickr.com/3859/15123592300_6eecab209b.jpg",
        description: "blah blah blah"
    },
    {
        name: "Canyon Floor", 
        image: "https://farm1.staticflickr.com/189/493046463_841a18169e.jpg",
        description: "blah blah blah"
    }
]

function seedDB(){
   //Remove all campgrounds
   Campground.remove({}, function(err){
        if(err){
            console.log(err);
        }
        console.log("removed campgrounds!");
         //add a few campgrounds
        data.forEach(function(seed){
            Campground.create(seed, function(err, campground){
                if(err){
                    console.log(err)
                } else {
                    console.log("added a campground");
                    //create a comment
                    Comment.create(
                        {
                            text: "This place is great, but I wish there was internet",
                            author: "Homer"
                        }, function(err, comment){
                            if(err){
                                console.log(err);
                            } else {
                                campground.comments.push(comment._id);
                                campground.save();
                                console.log("Created new comment");
                            }
                        });
                }
            });
        });
    }); 
}

function dbInit(){
    var MONGODB_SERVER = 'mongodb://localhost:27017/test';
     mongoose.connect(MONGODB_SERVER, (err)=>{
        if(err){
            console.log('------- Not connecting to database --------');
        }else{
            console.log('------ Database connected -------');
            seedDB();
        }
    });
}

dbInit();

我想得到以下结果

由于第一行的名称为'a',因此我们会将所有名称为'a'的行移至第一行下方,并将所有其他行向下推。
然后,我们将对所有其他{ "_id" : ObjectId("5b5c4d0d97529908f180ff35"), "comments" : [ ObjectId("5b5c4d0e97529908f180ff38") ], "name" : "Cloud's Rest", "image" : "https://farm4.staticflickr.com/3795/10131087094_c1c0a1c859.jpg", "description" : "blah blah blah", "__v" : 1 } { "_id" : ObjectId("5b5c4d0d97529908f180ff36"), "comments" : [ ObjectId("5b5c4d0e97529908f180ff39") ], "name" : "Desert Mesa", "image" : "https://farm4.staticflickr.com/3859/15123592300_6eecab209b.jpg", "description" : "blah blah blah", "__v" : 1 } { "_id" : ObjectId("5b5c4d0d97529908f180ff37"), "comments" : [ ObjectId("5b5c4d0e97529908f180ff3b") ], "name" : "Canyon Floor", "image" : "https://farm1.staticflickr.com/189/493046463_841a18169e.jpg", "description" : "blah blah blah", "__v" : 1 } (b,c ..)进行相同的操作,以保持数据的初始顺序

   id name
0   1    a
1   2    b
2   3    c
3   4    b
4   5    a

如何使用pandas数据框来做到这一点?

我可以考虑迭代行,然后删除具有相同名称的行,并将其添加到当前迭代行的下方。
我想知道是否有更好的方法。
而且我不确定是否可以在迭代时更改数据框。

  • 编辑

在某种意义上保持初始顺序..

如果行上方的行具有相同的name,则只能向上移动一行,否则不会更改任何顺序。
并且具有相同 id name 0 1 a 1 5 a 2 2 b 3 4 b 4 3 c 的行的排序与初始排序相同(相对)

2 个答案:

答案 0 :(得分:0)

这是另一种方法。为了按原样获得顺序,我使用了unique()函数,然后创建了另一个具有该顺序的列,然后对其进行了排序。剩下的就是对func popUpLoading(){ DispatchQueue.main.async { self.popalert = UIAlertController(title: "", message: "wait...", preferredStyle: .alert) let loadingIndicator = UIActivityIndicatorView(frame: CGRect(x: 10, y: 5, width: 50, height: 50)) loadingIndicator.hidesWhenStopped = true loadingIndicator.activityIndicatorViewStyle = UIActivityIndicatorViewStyle.gray loadingIndicator.startAnimating(); self.present(self.popalert, animated: true, completion: nil) self.popalert.view.addSubview(loadingIndicator) } } 列进行排序,并且当您需要按顺序排列索引时,请使用reset_index()方法。这将添加额外的列索引,以避免将参数设置为True。

order

输出:

    import pandas as pd
x = pd.DataFrame.from_dict({'id': [1,2,3,4,5],'name':['a','b','c','b','a']})
y = x['name'].unique().tolist()
#print(x)
order = []
for index,row in x.iterrows():
    order.append(y.index(row['name']))
x['order'] = order
x = x.sort_values(by=['order'])
x  = x.reset_index(drop=True)
print(x)
x = x.drop('order',axis=1)
print(x)

答案 1 :(得分:0)

这个想法是要以Image<Gray, byte> image = img.Convert<Gray, byte>().Not().ThresholdBinary(new Gray(80), new Gray(240)); image = img.Not().InRange(new Bgr(120, 1, 1), new Bgr(250, 100, 70)); camProccessed.Image = image; 的形式为每一行创建密钥,其中(x, idx)x列中名称的索引,它保持原始{{1 }}列,并且仅包含不同的值,并且name_unique是原始数据帧中的索引

name

如果idx在原始数据帧的第一行中,也将In [26]: df = pd.DataFrame({'id': [1, 2, 3, 4, 5], 'name': ['a', 'b', 'c', 'b', 'a']}) In [27]: name_ord = {} In [28]: ordering_list = [] In [29]: for idx, value in enumerate(df['name']): ...: if value not in name_ord: ...: name_ord[value] = len(name_ord) ...: ordering_list.append((name_ord[value], idx)) ...: In [30]: df['ord'] = ordering_list In [31]: df.sort_values(by='ord') Out[31]: id name ord 0 1 a (0, 0) 4 5 a (0, 4) 1 2 b (1, 1) 3 4 b (1, 3) 2 3 c (2, 2) 保留在结果数据帧的顶部:

b