我有一个包含1000列的数据框。它是来自不同品种的动物的数据集。但是我从一些品种中获得了更多的动物。所以我想要做的是选择一个随机样本的那些品种与更多的动物,并使所有品种具有相同的观察数量。 详情:我有400只荷斯坦动物,300只泽西岛,100只赫里福德,150只尼洛尔和50只Canchim。我想做的是从每个品种中随机选择50只动物。所以最后我总共会有250只动物。我知道如何随机选择使用runif,但我不知道如何在我的情况下应用它。 我的数据如下:
Breed ID Trait1 Trait2 Trait3
Holstein 1 11 22 44
Jersey 2 22 33 55
Nelore 3 33 44 66
Nelore 4 44 55 77
Canchim 5 55 66 88
我试过了:
Data = data[!!ave(seq_along(data$Breed), unique(data$Breed), FUN=function(x) sample(x, 50) == x),]
但是,它不起作用,我不允许在我使用的服务器中安装软件包dplyr。 提前谢谢。
答案 0 :(得分:1)
您可以在品种上<?xml version="1.0" encoding="utf-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:annotation>
<xs:documentation xml:lang="en">
XML Schema.
</xs:documentation>
</xs:annotation>
<xs:element name="harvest_run_info" type="harvest_run_info_type"/>
<xs:complexType name="harvest_run_info_type">
<xs:all minOccurs="1" maxOccurs="1">
<xs:element name="harvest_time" type="xs:time" minOccurs="1" maxOccurs="1"/>
<xs:element name="harvest_date" type="xs:date" minOccurs="1" maxOccurs="1"/>
<xs:element name="harvester_version" type="harvester_version_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="harvester_path" type="xs:string" minOccurs="1" maxOccurs="1"/>
</xs:all>
</xs:complexType>
<xs:element name="property_advert" type="property_advert_type"/>
<xs:complexType name="property_advert_type">
<xs:all>
<xs:element name="rent_cost" type="xs:positiveInteger" minOccurs="1" maxOccurs="1"/>
<xs:element name="rent_frequency" type="rent_frequency_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="number_of_bedrooms" type="xs:positiveInteger" minOccurs="1" maxOccurs="1"/>
<xs:element name="available_date" type="available_date_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="property_description" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="main_photo_url" type="xs:anyURI" minOccurs="0" maxOccurs="1"/>
<xs:element name="branch_name" type="xs:string" minOccurs="1" maxOccurs="1"/>
<xs:element name="property_type" type="property_type_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="building_name" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="street_name" type="xs:string" minOccurs="0" maxOccurs="1"/>
<xs:element name="postcode" type="ukPostCode" minOccurs="0" maxOccurs="1"/>
<xs:element name="furnished" type="xs:boolean" minOccurs="1" maxOccurs="1"/>
<xs:element name="location_area" type="xs:string" minOccurs="0" maxOccurs="1"/>
</xs:all>
<xs:attribute name="agent_advert_reference" type="xs:string"/>
<xs:attribute name="original_html_file" type="xs:string" use="required"/>
<xs:attribute name="static_advert_url" type="xs:string"/>
</xs:complexType>
<xs:element name="rent_frequency" type="rent_frequency_type"/>
<xs:simpleType name="rent_frequency_type">
<xs:restriction base="xs:string">
<xs:enumeration value="per month"/>
<xs:enumeration value="per week"/>
<xs:enumeration value="Per Month"/>
<xs:enumeration value="Per Week"/>
<xs:enumeration value="Per month"/>
<xs:enumeration value="Per week"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="property_type" type="property_type_type"/>
<xs:simpleType name="property_type_type">
<xs:restriction base="xs:string">
<xs:enumeration value="Room"/>
<xs:enumeration value="House"/>
<xs:enumeration value="Flat"/>
<xs:enumeration value="room"/>
<xs:enumeration value="house"/>
<xs:enumeration value="flat"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="property_agent" type="property_agent_type"/>
<xs:complexType name="property_agent_type">
<xs:all>
<xs:element name="company_name" type="xs:string" minOccurs="1" maxOccurs="1"/>
</xs:all>
</xs:complexType>
<xs:simpleType name="available_date_type">
<xs:union>
<xs:simpleType>
<xs:restriction base="xs:date"/>
</xs:simpleType>
<xs:simpleType>
<xs:restriction base="xs:NMTOKEN">
<xs:enumeration value="Now"/>
<xs:enumeration value="now"/>
<xs:enumeration value="NOW"/>
</xs:restriction>
</xs:simpleType>
</xs:union>
</xs:simpleType>
<xs:simpleType name="harvester_version_type">
<xs:restriction base="xs:token">
<xs:pattern value="([0-9]+\.)?([0-9]+\.)?([0-9]+)"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="ukPostCode">
<xs:restriction base="xs:token">
<xs:pattern value="(GIR 0AA)|((([A-Z-[QVX]][0-9][0-9]?)|(([A-Z-[QVX]][A-Z-[IJZ]][0-9][0-9]?)|(([A-Z-[QVX]][0-9][A-HJKPSTUW])|([A-Z-[QVX]][A-Z-[IJZ]][0-9][ABEHMNPRVWXY])))) [0-9][A-Z-[CIKMOV]]{2})"/>
</xs:restriction>
</xs:simpleType>
<xs:element name="header" type="header_type"/>
<xs:complexType name="header_type">
<xs:sequence>
<xs:element name="harvest_run_info" type="harvest_run_info_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="property_agent" type="property_agent_type" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
</xs:complexType>
<xs:element name="property_adverts" type="property_adverts_type"/>
<xs:complexType name="property_adverts_type">
<xs:sequence>
<xs:element name="header" type="header_type" minOccurs="1" maxOccurs="1"/>
<xs:element name="adverts" minOccurs="1" maxOccurs="1"/>
</xs:sequence>
</xs:complexType>
<xs:element name="adverts" type="adverts_type">
<xs:unique name="unique_original_html_file">
<xs:selector xpath="property_advert"/>
<xs:field xpath="@original_html_file"/>
</xs:unique>
<xs:unique name="unique_agent_advert_reference">
<xs:selector xpath="property_advert"/>
<xs:field xpath="@agent_advert_reference"/>
</xs:unique>
<xs:unique name="unique_static_advert_url">
<xs:selector xpath="property_advert"/>
<xs:field xpath="@static_advert_url"/>
</xs:unique>
</xs:element>
<xs:complexType name="adverts_type">
<xs:sequence>
<xs:element name="property_advert" type="property_advert_type" minOccurs="1" maxOccurs="unbounded"/>
</xs:sequence>
</xs:complexType>
</xs:schema>
您的动物数据框,然后将自定义函数应用于每个将随机提取50行的块:
split