我需要为每个组使用不同的比例按组对数据进行采样。我有100多个小组,但为了简单起见,我的例子只有3组。 我们假设我有这个数据帧:
df2 = pd.DataFrame({'group_id': np.repeat(['A', 'B', 'C'], (40, 60, 20)),
'vals': np.random.randn(120)})
N = len(df2)
df2.groupby('group_id').count()
# vals
#group_id
#A 40
#B 60
#C 20
我想使用下面的数据框对A,B和C组进行采样比例:
prop = pd.DataFrame({'A': {0.45},
'B': {0.55},
'C': {0.62}})
当我尝试采样时,出现错误:
grouped = df2.groupby('group_id')
x = grouped.apply(lambda x: x.sample(frac=props))
错误:NameError:未定义全局名称'props'
非常感谢任何帮助! 谢谢
答案 0 :(得分:0)
我认为需要标量的DataFrame,然后按import { Component, Input, OnInit, ElementRef } from '@angular/core';
import * as d3 from 'd3';
@Component({
selector: 'd3-viz',
templateUrl: './d3-viz.html'
})
export class D3Viz {
private host;
width: number = 750;
height: number = 500;
a = { id: "a" };
b = { id: "b" };
c = { id: "c" };
links = [
{ source: this.a, target: this.b },
{ source: this.b, target: this.c },
{ source: this.c, target: this.a }
];
nodes = [this.a, this.b, this.c];
constructor(private element: ElementRef) {
this.host = d3.select(this.element.nativeElement);
}
ngOnInit() {
this.buildViz();
d3.interval(() => {
this.nodes.pop(); // Remove c.
this.links.pop(); // Remove c-a.
this.links.pop(); // Remove b-c.
this.buildViz('update');
}, 2000, d3.now());
d3.interval(() => {
this.nodes.push(this.c);
this.links.push({ source: this.b, target: this.c });
this.links.push({ source: this.c, target: this.a });
this.buildViz('update');
}, 2000, d3.now() + 1000);
}
buildViz(update?) {
let svg = this.host.append('svg')
.attr('width', this.width)
.attr('height', this.height);
let color = d3.scaleOrdinal(d3.schemeCategory10);
if(!update){
var simulation = d3.forceSimulation<any>(this.nodes)
.force("charge", d3.forceManyBody().strength(-1000))
.force("link", d3.forceLink(this.links).distance(200))
.force("x", d3.forceX())
.force("y", d3.forceY())
.alphaTarget(1)
.on("tick", ticked);
var g = svg.append("g").attr("transform", "translate(" + this.width / 2 + "," + this.height / 2 + ")"),
link = g.append("g").attr("stroke", "#000").attr("stroke-width", 1.5).selectAll(".link"),
node = g.append("g").attr("stroke", "#fff").attr("stroke-width", 1.5).selectAll(".node");
}
var restart = () => {
// Apply the general update pattern to the nodes.
node = node.data(this.nodes, function (d: any) { return d.id; });
node.exit().remove();
node = node.enter().append("circle").attr("fill", function (d: any) { return color(d.id); }).attr("r", 8).merge(node);
// Apply the general update pattern to the links.
link = link.data(this.links, function (d) { return d.source.id + "-" + d.target.id; });
link.exit().remove();
link = link.enter().append("line").merge(link);
// Update and restart the simulation.
simulation.nodes(this.nodes);
simulation.force<any>("link").links(this.links);
simulation.alpha(1).restart();
}
restart();
function ticked() {
node.attr("cx", function (d: any) { return d.x; })
.attr("cy", function (d: any) { return d.y; })
link.attr("x1", function (d: any) { return d.source.x; })
.attr("y1", function (d: any) { return d.source.y; })
.attr("x2", function (d: any) { return d.target.x; })
.attr("y2", function (d: any) { return d.target.y; });
}
}
}
查找:
x.name