熊猫 - 对不同比例的许多群体进行抽样

时间:2018-03-08 15:10:12

标签: python pandas sample

我需要为每个组使用不同的比例按组对数据进行采样。我有100多个小组,但为了简单起见,我的例子只有3组。 我们假设我有这个数据帧:

df2 = pd.DataFrame({'group_id': np.repeat(['A', 'B', 'C'], (40, 60, 20)),
               'vals': np.random.randn(120)})
N = len(df2)
df2.groupby('group_id').count()

#           vals
#group_id   
#A         40
#B         60
#C         20

我想使用下面的数据框对A,B和C组进行采样比例:

 prop = pd.DataFrame({'A': {0.45},
                      'B': {0.55},
                      'C': {0.62}})

当我尝试采样时,出现错误:

grouped = df2.groupby('group_id')
x = grouped.apply(lambda x: x.sample(frac=props))

错误:NameError:未定义全局名称'props'

非常感谢任何帮助! 谢谢

1 个答案:

答案 0 :(得分:0)

我认为需要标量的DataFrame,然后按import { Component, Input, OnInit, ElementRef } from '@angular/core'; import * as d3 from 'd3'; @Component({ selector: 'd3-viz', templateUrl: './d3-viz.html' }) export class D3Viz { private host; width: number = 750; height: number = 500; a = { id: "a" }; b = { id: "b" }; c = { id: "c" }; links = [ { source: this.a, target: this.b }, { source: this.b, target: this.c }, { source: this.c, target: this.a } ]; nodes = [this.a, this.b, this.c]; constructor(private element: ElementRef) { this.host = d3.select(this.element.nativeElement); } ngOnInit() { this.buildViz(); d3.interval(() => { this.nodes.pop(); // Remove c. this.links.pop(); // Remove c-a. this.links.pop(); // Remove b-c. this.buildViz('update'); }, 2000, d3.now()); d3.interval(() => { this.nodes.push(this.c); this.links.push({ source: this.b, target: this.c }); this.links.push({ source: this.c, target: this.a }); this.buildViz('update'); }, 2000, d3.now() + 1000); } buildViz(update?) { let svg = this.host.append('svg') .attr('width', this.width) .attr('height', this.height); let color = d3.scaleOrdinal(d3.schemeCategory10); if(!update){ var simulation = d3.forceSimulation<any>(this.nodes) .force("charge", d3.forceManyBody().strength(-1000)) .force("link", d3.forceLink(this.links).distance(200)) .force("x", d3.forceX()) .force("y", d3.forceY()) .alphaTarget(1) .on("tick", ticked); var g = svg.append("g").attr("transform", "translate(" + this.width / 2 + "," + this.height / 2 + ")"), link = g.append("g").attr("stroke", "#000").attr("stroke-width", 1.5).selectAll(".link"), node = g.append("g").attr("stroke", "#fff").attr("stroke-width", 1.5).selectAll(".node"); } var restart = () => { // Apply the general update pattern to the nodes. node = node.data(this.nodes, function (d: any) { return d.id; }); node.exit().remove(); node = node.enter().append("circle").attr("fill", function (d: any) { return color(d.id); }).attr("r", 8).merge(node); // Apply the general update pattern to the links. link = link.data(this.links, function (d) { return d.source.id + "-" + d.target.id; }); link.exit().remove(); link = link.enter().append("line").merge(link); // Update and restart the simulation. simulation.nodes(this.nodes); simulation.force<any>("link").links(this.links); simulation.alpha(1).restart(); } restart(); function ticked() { node.attr("cx", function (d: any) { return d.x; }) .attr("cy", function (d: any) { return d.y; }) link.attr("x1", function (d: any) { return d.source.x; }) .attr("y1", function (d: any) { return d.source.y; }) .attr("x2", function (d: any) { return d.target.x; }) .attr("y2", function (d: any) { return d.target.y; }); } } } 查找:

x.name