我想从数据帧from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
link = "https://in.finance.yahoo.com/quote/AAPL?p=AAPL"
def get_information(driver, url):
driver.get(url)
driver.find_element_by_tag_name("body").send_keys(Keys.END) # scroll page
time.sleep(1) # small pause between
driver.find_element_by_tag_name("body").send_keys(Keys.END) # one more time
item = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "[id$='-QuoteModule'] p[class^='businessSummary']")))
driver.execute_script("arguments[0].scrollIntoView();", item)
print(item.text)
if __name__ == "__main__":
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
try:
get_information(driver,link)
finally:
driver.quit()
中抽取许多连续的行。
df
我正在尝试获取类似于以下内容的内容,该内容使我可以对3个随机行进行采样并重复此过程100次。
df <- data.frame(C1 = c(1, 2, 4, 7, 9), C2 = c(2, 4, 6, 8, 10))
通过连续,结果应类似于:
test <- replicate(100, df[sample(1:nrow(df), 3, replace=T),], simplify=F)
我该如何实现?
答案 0 :(得分:6)
我们只需要采样一个块的起始行索引即可。
sample.block <- function (DF, chunk.size) {
if (chunk.size > nrow(DF)) return(NULL)
start <- sample.int(nrow(DF) - chunk.size + 1, 1)
DF[start:(start + chunk.size - 1), ]
}
replicate(100, sample.block(df, 3), simplify = FALSE)