我有以下数据框:
df = pd.DataFrame({'id': 3*['a']+4*['b'], 'name':['al', 'qwer', 'l', 'ewdbd', 'op', 'qsde', 'knmkln']})
id name
a al
a qwer
a l
b ewdbd
b op
b qsde
b knmkln
我想对id进行分组并获取名称中最短的字符串,如果有多个最小长度的字符串可以是任意字符串,则最终数据帧将是:
id name
a l
b op
如何实现?
答案 0 :(得分:3)
在assign
和drop_duplicates
中使用groupby
,在大多数情况下应该比(df.assign(length=df['name'].str.len())
.sort_values('length')
.drop_duplicates('id')
.drop(columns='length')
)
快:
drop
或者不使用df.loc[df['name'].str.len().sort_values().index].drop_duplicates(subset='id')
操作,该操作相对于注释中的Ben.T
相对较慢:
id name
2 a l
4 b op
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include <iostream>
int main() {
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
GLFWwindow* window = glfwCreateWindow(500, 500, "Test", NULL, NULL);
if (window == NULL) {
std::cout << "GLAD failed.";
}
glfwMakeContextCurrent(window);
void frameBufferSizeCallback(GLFWwindow* window,int width,int height);
glfwSetFramebufferSizeCallback(window, frameBufferSizeCallback);
glViewport(0, 0, 500, 500);//Error here
while (!glfwWindowShouldClose(window)) {
glfwSwapBuffers(window);
glfwPollEvents();
}
glfwTerminate();
return 0;
}
void frameBufferSizeCallback(GLFWwindow* window,int width,int height) {
glViewport(0,0,width,height);
}
答案 1 :(得分:2)
对于最短的最短字符串,您要首先找到长度:
df.loc[df['name'].str.len().groupby(df['id']).idxmin()]
输出:
id name
2 a l
4 b op