假设我有一个看起来像这样的 pd.DataFrame
:
id col1_a col1_b col2_a col2_b
1 x x 2 3
2 z d 4 5
3 y y 9 9
4 p p 8 1
这个数据框代表的是 2 个数据框 (df_a
, df_b
) 逐列比较。
我正在尝试获取一个突出显示并查找包含这些差异的列的数据框:
id col1_a col1_b col2_a col2_b diff
1 x x 2 3 col2
2 z d 4 5 col1,col2
3 y y 9 9 None
4 p p 8 1 col2
我怎样才能在不必双重遍历列和行的情况下实现这样的目标。
我知道我可以通过做类似的事情(未测试)来实现这一点:
for col_ptr1 in df.columns:
for col_ptr2 in df.columns:
for idx, row in df.iterrows():
if col_ptr1.strip('_a') == col_ptr2.strip('_b'):
blah blah blah...
这太丑了。我想知道是否有更pandas
风格的方法来解决这个问题。
答案 0 :(得分:6)
选择包含 col
的列子集,然后 split
这些列名称围绕分隔符 _
并使用 split
访问器提取 str
的第一个组件
现在,group
上一步提取的 col
前缀上的数据帧,并使用 nunique
沿 axis=1
进行聚合以计算 unqiue 值。如果不等于 1,则检查唯一值,然后使用 dataframe.dot
c = df.filter(regex=r'_(a|b)$')
m = c.groupby(c.columns.str.split('_').str[0], axis=1).nunique().ne(1)
df['diff'] = m.dot(m.columns + ',').str[:-1]
id col1_a col1_b col2_a col2_b diff
0 1 x x 2 3 col2
1 2 z d 4 5 col1,col2
2 3 y y 9 9
3 4 p p 8 1 col2
答案 1 :(得分:4)
这是在axis=1 上使用groupby 创建公共组的另一种方法,然后将每个组与第二列进行比较,并在不匹配时获取列名:
u = df.set_index("id")
cols = u.columns.str.split("_").str[0]
l = (g.ne(g.iloc[:,-1],axis=0) for i,g in u.groupby(cols,axis=1))
df['diff_'] = df['id'].map(pd.concat(l,axis=1).dot(cols+',').str[:-1])
print(df)
id col1_a col1_b col2_a col2_b diff_
0 1 x x 2 3 col2
1 2 z d 4 5 col1,col2
2 3 y y 9 9
3 4 p p 8 1 col2
答案 2 :(得分:0)
我想答案可能是这样的:
import * as THREE from 'three';
import * as PNLTRI from 'pnltri';
import * as BAS from 'three-bas';
import {
OrbitControls
} from 'three/examples/jsm/controls/OrbitControls';
import {
TextGeometry
} from 'three/src/geometries/TextGeometry';
import {
TweenMax,
Power1
} from 'gsap';
import {
Matrix4
} from 'three/src/math/Matrix4';
window.onload = init;
var utils = {
extend: function(dst, src) {
for (var key in src) {
dst[key] = src[key];
}
return dst;
}
};
function init() {
var root = new THREERoot({
createCameraControls: false,
fov: 10
});
root.renderer.setClearColor(0xffffff);
root.renderer.setPixelRatio(window.devicePixelRatio || 1);
root.camera.position.set(0, 0, 1400);
var textAnimation = createTextAnimation();
root.scene.add(textAnimation);
var tween = TweenMax.fromTo(textAnimation, 4, {
animationProgress: 0
}, {
animationProgress: 1,
ease: Power1.easeInOut,
repeat: -1,
yoyo: true
});
createTweenScrubber(tween);
}
function createTweenScrubber(tween, seekSpeed) {
seekSpeed = seekSpeed || 0.001;
function stop() {
TweenMax.to(tween, 2, {
timeScale: 0
});
}
function resume() {
TweenMax.to(tween, 2, {
timeScale: 1
});
}
function seek(dx) {
var progress = tween.progress();
var p = THREE.Math.clamp((progress + (dx * seekSpeed)), 0, 1);
tween.progress(p);
}
var _cx = 0;
// desktop
var mouseDown = false;
document.body.style.cursor = 'pointer';
window.addEventListener('mousedown', function(e) {
mouseDown = true;
document.body.style.cursor = 'ew-resize';
_cx = e.clientX;
stop();
});
window.addEventListener('mouseup', function(e) {
mouseDown = false;
document.body.style.cursor = 'pointer';
resume();
});
window.addEventListener('mousemove', function(e) {
if (mouseDown === true) {
var cx = e.clientX;
var dx = cx - _cx;
_cx = cx;
seek(dx);
}
});
// mobile
window.addEventListener('touchstart', function(e) {
_cx = e.touches[0].clientX;
stop();
e.preventDefault();
});
window.addEventListener('touchend', function(e) {
resume();
e.preventDefault();
});
window.addEventListener('touchmove', function(e) {
var cx = e.touches[0].clientX;
var dx = cx - _cx;
_cx = cx;
seek(dx);
e.preventDefault();
});
}
function createTextAnimation() {
var geometry = generateTextGeometry('Evan Jancy', {
size: 14,
height: 0,
font: 'droid sans',
weight: 'bold',
style: 'normal',
bevelSize: 0.75,
bevelThickness: 0.50,
bevelEnabled: true,
anchor: {
x: 0.5,
y: 0.5,
z: 0.5
}
});
BAS.Utils.separateFaces(geometry);
return new TextAnimation(geometry);
}
function generateTextGeometry(text, params) {
var geometry = new TextGeometry(text, params);
geometry.computeBoundingBox();
geometry.userData = {};
geometry.userData.size = {
width: geometry.boundingBox.max.x - geometry.boundingBox.min.x,
height: geometry.boundingBox.max.y - geometry.boundingBox.min.y,
depth: geometry.boundingBox.max.z - geometry.boundingBox.min.z
};
var anchorX = geometry.userData.size.width * -params.anchor.x;
var anchorY = geometry.userData.size.height * -params.anchor.y;
var anchorZ = geometry.userData.size.depth * -params.anchor.z;
var matrix = new Matrix4().makeTranslation(anchorX, anchorY, anchorZ);
geometry.applyMatrix(matrix);
return geometry;
}
////////////////////
// CLASSES
////////////////////
function TextAnimation(textGeometry) {
var bufferGeometry = new BAS.ModelBufferGeometry(textGeometry);
var aAnimation = bufferGeometry.createAttribute('aAnimation', 2);
var aControl0 = bufferGeometry.createAttribute('aControl0', 3);
var aControl1 = bufferGeometry.createAttribute('aControl1', 3);
var aEndPosition = bufferGeometry.createAttribute('aEndPosition', 3);
var faceCount = bufferGeometry.faceCount;
var i, i2, i3, i4, v;
var size = textGeometry.userData.size;
var length = new THREE.Vector3(size.width, size.height,
size.depth).multiplyScalar(0.5).length();
var maxDelay = length * 0.06;
this.animationDuration = maxDelay + 4 + 1;
this._animationProgress = 0;
for (i = 0, i2 = 0, i3 = 0, i4 = 0; i < faceCount; i++, i2 += 6, i3 += 9, i4 += 12) {
var face = textGeometry.faces[i];
var centroid = BAS.Utils.computeCentroid(textGeometry, face);
var dirX = centroid.x > 0 ? 1 : -1;
var dirY = centroid.y > 0 ? 1 : -1;
// animation
var delay = centroid.length() * THREE.Math.randFloat(0.03, 0.06);
var duration = THREE.Math.randFloat(2, 4);
for (v = 0; v < 6; v += 2) {
aAnimation.array[i2 + v] = delay + Math.random();
aAnimation.array[i2 + v + 1] = duration;
}
// ctrl
var c0x = THREE.Math.randFloat(0, 30) * dirX;
var c0y = THREE.Math.randFloat(60, 120) * dirY;
var c0z = THREE.Math.randFloat(-20, 20);
var c1x = THREE.Math.randFloat(30, 60) * dirX;
var c1y = THREE.Math.randFloat(0, 60) * dirY;
var c1z = THREE.Math.randFloat(-20, 20);
for (v = 0; v < 9; v += 3) {
aControl0.array[i3 + v] = c0x;
aControl0.array[i3 + v + 1] = c0y;
aControl0.array[i3 + v + 2] = c0z;
aControl1.array[i3 + v] = c1x;
aControl1.array[i3 + v + 1] = c1y;
aControl1.array[i3 + v + 2] = c1z;
}
}
var material = new BAS.BasicAnimationMaterial({
shading: THREE.FlatShading,
side: THREE.DoubleSide,
uniforms: {
uTime: {
type: 'f',
value: 0
}
},
shaderFunctions: [
BAS.ShaderChunk['cubic_bezier']
],
shaderParameters: [
'uniform float uTime;',
'attribute vec2 aAnimation;',
'attribute vec3 aControl0;',
'attribute vec3 aControl1;',
'attribute vec3 aEndPosition;'
],
shaderVertexInit: [
'float tDelay = aAnimation.x;',
'float tDuration = aAnimation.y;',
'float tTime = clamp(uTime - tDelay, 0.0, tDuration);',
'float tProgress = tTime / tDuration;'
],
shaderTransformPosition: [
'vec3 tPosition = transformed;',
'tPosition *= 1.0 - tProgress;',
'tPosition += cubicBezier(transformed, aControl0, aControl1, aEndPosition,
tProgress);',
'transformed = tPosition;'
]
}, {
diffuse: 0x000000
});
THREE.Mesh.call(this, bufferGeometry, material);
this.frustumCulled = false;
}
TextAnimation.prototype = Object.create(THREE.Mesh.prototype);
TextAnimation.prototype.constructor = TextAnimation;
Object.defineProperty(TextAnimation.prototype, 'animationProgress', {
get: function() {
return this._animationProgress;
},
set: function(v) {
this._animationProgress = v;
this.material.uniforms['uTime'].value = this.animationDuration * v;
}
});
function THREERoot(params) {
params = utils.extend({
antialias: false,
fov: 60,
zNear: 1,
zFar: 10000,
createCameraControls: true
}, params);
this.renderer = new THREE.WebGLRenderer({
antialias: params.antialias
});
document.getElementById('rootr').appendChild(this.renderer.domElement);
this.camera = new THREE.PerspectiveCamera(
params.fov,
window.innerWidth / window.innerHeight,
params.zNear,
params.zfar
);
this.scene = new THREE.Scene();
if (params.createCameraControls) {
this.controls = new OrbitControls(this.camera, this.renderer.domElement);
}
// this.resize = this.resize.bind(this);
this.tick = this.tick.bind(this);
this.resize();
this.tick();
window.addEventListener('resize', this.resize, false);
}
THREERoot.prototype = {
tick: function() {
this.update();
this.render();
requestAnimationFrame(this.tick);
},
update: function() {
this.controls && this.controls.update();
},
render: function() {
this.renderer.render(this.scene, this.camera);
},
resize: function() {
this.camera.aspect = window.innerWidth / window.innerHeight;
this.camera.updateProjectionMatrix();
this.renderer.setSize(window.innerWidth, window.innerHeight);
}
};
答案 3 :(得分:0)
您可以使用 apply 函数,它也提供比直接迭代更高效的计算。这段代码可以很容易地推广到许多列对的情况。
def find_diff(x):
res = ''
if (x['col1_a'] != x['col1_b']):
res += 'col1'
if (x['col2_a'] != x['col2_b']):
res += 'col2' if len(res) == 0 else ',col2'
return None if res == '' else res
df['diff'] = df.apply(find_diff, axis=1)
有关 apply 函数的更多信息,请参见 here。
答案 4 :(得分:0)
您可以使用pandas.DataFrame.apply:
import pandas as pd
def get_diff(x):
if x.col1_a == x.col1_b and x.col2_a == x.col2_b:
return None
if x.col1_a != x.col1_b and x.col2_a != x.col2_b:
return 'col1,col2'
if x.col1_a != x.col1_b:
return 'col1'
if x.col2_a != x.col2_b:
return 'col2'
df['diff'] = df.apply(get_diff, axis=1)