Click to view data sample我正在尝试将bool working = true;
for(int i = 0; i < urls.Count(); ++i)
{
working = true;
ChromiumWebBrowser scraper = new ChromiumWebBrowser(urls[i]);
scraper.FrameLoadEnd += scrape;
while(working)
{
Application.DoEvents();
System.Threading.Thread.Sleep(50);
}
}
async void scrape(object sender, FrameLoadEndEventArgs args)
{
ChromiumWebBrowser chrome = (ChromiumWebBrowser)sender;
if (args.Frame.IsMain && chrome.CanExecuteJavascriptInMainFrame)
{
string script = Properties.Resources.script;
chrome.ExecuteScriptAsync(script);
string data = " ";
do
{
string html = await chrome.GetSourceAsync();
string dataField = "data";
int dataFieldIndex = html.IndexOf(phoneField);
data = html.Substring(dataFieldIndex + dataField.Count(), html.IndexOf("<", dataFieldIndex + dataField.Count()) - dataFieldIndex - dataField.Count());
System.Threading.Thread.Sleep(50);
} while (data.Count() == 3);
addDataToHashSet(data);
}
else
{
Debug.WriteLine("Error");
}
}
private void addDataToHashSet(string data)
{
data = data.Replace("-", "");
data = data.Replace(" ", "");
dataHashSet.Add(data);
working = false;
}
(每个项目类型)的CefSettings cefSettings = new CefSettings();
cefSettings.CefCommandLineArgs.Add("allow-running-insecure-content", "1");
cefSettings.IgnoreCertificateErrors = true;
CefSharpSettings.Proxy = new ProxyOptions(ip: "...", port: "...");
Cef.Initialize(cefSettings);
值替换为Item_Visibility
的值。但这会引发错误:
Item_Visibility
Item_Identifier
答案 0 :(得分:1)
据我所知,您想将Item_Visibility
的每个唯一值的Item_Identifier
的平均值放入Item_Visibility
列中。我猜,Item_Identifier
在这种情况下是指一组项目,因为以其他方式将它们分组然后取均值是没有意义的。
回答您的问题:
# sample data creation
data = pd.DataFrame(np.random.rand(4000,2),columns=['Item_Identifier','Item_Visibility'])
data.loc[:,'Item_Identifier']= data.loc[:,'Item_Identifier'].apply(
lambda x: 'id1' if x> 0.4 else 'id2')
# creating map_table so we could map values
map_table = data.groupby('Item_Identifier').mean()
# mapping values
data.loc[:,'Item_Visibility'] = data.loc[:,'Item_Identifier'].map(
map_table.to_dict()['Item_Visibility'])
希望有帮助!
答案 1 :(得分:0)
使用数据透视表-
visibility_avg = data.pivot_table(values = 'Item_Visibility', index = 'Item_Identifier')
#imputing 0 values with mean visibility of products
miss_bool = data['Item_Visibility'].isnull()
data.loc[miss_bool,'Item_Visibilty'] = data.loc[miss_bool,'Item_Identifier'].apply(lambda x: visibility_avg(x))