c#中的硒网刮

时间:2017-04-20 06:53:25

标签: c# selenium

我正在使用selenium dll进行网页抓取。但是我在刮取记录列表时遇到了问题。如果我使用调试器,它会提取所有记录,但如果禁用调试器并运行应用程序,它有时会显示较少的记录或有时显示没有记录。我想知道是否有任何方法可以知道页面是否已完全加载。

  

driver.FindElementsByClassName( “搜索结果-gridview的项目”)。ToList()

改变记录数。

                            driver.FindElementsByClassName("search-result-gridview-item").ToList().ForEach(x =>
                            {
                                objUPCProcess = new UPCProcessingModel();
                                try
                                {
                                    objUPCProcess.Description = x.FindElement(By.ClassName("prod-ProductTitle")).Text;
                                    objUPCProcess.Price = x.FindElement(By.ClassName("Price")).Text;
                                    listOfProductDetails.Add(objUPCProcess);
                                    if (i == 0)
                                    {
                                        log.Item = objUPCProcess.Description;
                                        i++;
                                    }
                                }
                                catch (OpenQA.Selenium.NoSuchElementException ex)
                                {
                                    try
                                    {
                                        objUPCProcess.Description = x.FindElement(By.ClassName("prod-ProductTitle")).Text;
                                        objUPCProcess.Price = ex.Message;
                                        listOfProductDetails.Add(objUPCProcess);
                                    }
                                    catch
                                    {
                                        try
                                        {
                                            objUPCProcess.Price = x.FindElement(By.ClassName("Price")).Text;
                                            objUPCProcess.Description = ex.Message;
                                            listOfProductDetails.Add(objUPCProcess);
                                        }
                                        catch
                                        {
                                            objUPCProcess.Description = ex.Message;
                                            objUPCProcess.Price = ex.Message;
                                            log.Message = ex.Message;
                                            listOfProductDetails.Add(objUPCProcess);
                                            log.Status = "Error";
                                        }

                                    }


                                }
                            });
                        }

2 个答案:

答案 0 :(得分:0)

看看这种情况,我很确定这是由同步问题(webdriver和页面以不同的速度工作)和实际填充记录的AJAX请求引起的。这就是为什么

  

如果禁用调试器并运行应用程序,它有时会显示较少的记录或有时显示无记录

我的代码中也没有看到任何explicit waits。但是你可以实现一个帮助Utils方法来检查

  

页面是否已完全加载

我已经像IScriptExecutor这样使用了:

public void WaitSecondsForNewPageToLoad(int maxWaitTimeInSeconds)
{
    string state = string.Empty;
    bool jQueryActive = true;
    try
    {
        WebDriverWait wait = new WebDriverWait(TestCaseContext.Driver,
            TimeSpan.FromSeconds(maxWaitTimeInSeconds));
        //Checks every 500 ms whether predicate returns true if returns exit otherwise keep trying till it returns true
        wait.Until(d =>
        {
            try
            {
                state =
                    ((IJavaScriptExecutor) TestCaseContext.Driver).ExecuteScript(
                        @"return document.readyState").ToString();
                jQueryActive =
                    (bool)((IJavaScriptExecutor) TestCaseContext.Driver).ExecuteScript(
                        @"return jQuery.active == 0");
                WindowsWhenSteps.WhenIFocusTheCurrentBrowserWindow();
            }
            catch (InvalidOperationException)
            {
                //Ignore
            }
            return (state.Equals("complete", StringComparison.InvariantCultureIgnoreCase) ||
                    state.Equals("loaded", StringComparison.InvariantCultureIgnoreCase))  &&
                    jQueryActive;
        });
    }
    catch (TimeoutException)
    {
        //sometimes Page remains in Interactive mode and never becomes Complete, then we can still try to access the controls
        if (!state.Equals("interactive", StringComparison.InvariantCultureIgnoreCase))
            Assert.IsTrue(false);
    }
    catch (NullReferenceException)
    {
        //sometimes Page remains in Interactive mode and never becomes Complete, then we can still try to access the controls
        if (!state.Equals("interactive", StringComparison.InvariantCultureIgnoreCase))
            Assert.IsTrue(false);
    }
    catch (WebDriverException)
    {
        if (TestCaseContext.Driver.WindowHandles.Count == 1)
        {
            TestCaseContext.Driver.SwitchTo().Window(TestCaseContext.Driver.WindowHandles[0]);
        }
        state =
            ((IJavaScriptExecutor) TestCaseContext.Driver).ExecuteScript(
                @"return document.readyState").ToString();
        if (
            !(state.Equals("complete", StringComparison.InvariantCultureIgnoreCase) ||
              state.Equals("loaded", StringComparison.InvariantCultureIgnoreCase)))
            Assert.IsTrue(false);
    }
}  

注意: 如果它看起来像是一种开销,你可能会失去一些异常处理,但我的目标是完全回答有利于未来的读者。

答案 1 :(得分:-1)

使用以下代码检查selenium是否为trigerred

/* * Created by SharpDevelop. 
   * User: sravanth 
   * Date: 2/2/2018 
   * Time: 1:33 AM * 
   * To change this template use Tools | Options | Coding | Edit Standard Headers. 
*/ 

using System; 
using OpenQA.Selenium; 
using OpenQA.Selenium.Chrome; 
using OpenQA.Selenium.IE; 
using OpenQA.Selenium.Support.UI; 
using System.Diagnostics; 
using Microsoft.VisualBasic; 
using System.Windows.Forms; 
using System.Runtime.InteropServices; 
//using System.Collections; 
using System.Collections.Generic; 

namespace sele 
{ 
    class Program 
    { 
        public static void Main(string[] args) 
        { 
            //Console.WriteLine("Hello World!"); 
            // TODO: Implement Functionality Here 
            //Console.Write("Press any key to continue . . . "); 
            //Console.ReadKey(true); 
            //System.setProperty("webdriver.ie.driver", "C:\\Users/sravanth/Downloads/IEDriverServer_x64_3.8.0/IEdriver.exe"); 
            // C:\Users\sravanth\Downloads\IEDriverServer_x64_3.8.0 
            IWebDriver driver;
            // = new InternetExplorerDriver(@"C:\\Users/sravanth/Downloads/IEDriverServer_x64_3.8.0"); 
            var service = InternetExplorerDriverService.CreateDefaultService(@"C:\\Users/sravanth/Downloads/IEDriverServer_x64_3.8.0"); 
            //var service = InternetExplorerDriverService.CreateDefaultService(@"C:\\Users/sravanth/Downloads/chromedriver_win32"); 
            // properties on the service can be used to e.g. hide the command prompt 
            var options = new InternetExplorerOptions { IgnoreZoomLevel = true, InitialBrowserUrl = "file:///C:/Users/sravanth/Desktop/a.html", IntroduceInstabilityByIgnoringProtectedModeSettings = true }; 
            driver = new InternetExplorerDriver(service, options); 
            //driver = new ChromeDriver(@"C:\\Users/sravanth/Downloads/chromedriver_win32"); 
            //driver.Navigate().GoToUrl("https://www.w3schools.com/js/tryit.asp?filename=tryjs_prompt"); 
            driver.Url="file:///C:/Users/sravanth/Desktop/a.html"; 
            //driver.Navigate().GoToUrl("file:///C:/Users/sravanth/Desktop/a.html"); 
            driver.Navigate(); 
            //IList links = driver.FindElements(By.TagName("button")); 
            // Console.WriteLine(links.Count); 
            WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(10)); 
            //IWebElement btn = wait.Until(ExpectedConditions.ElementIsVisible(By.Id("btn"))); 
            IWebElement btn=driver.FindElement(By.Id("btn")); 
            btn.Click(); 
            // System.Threading.Thread.Sleep(5000); 
            Process[] processes = Process.GetProcessesByName("iexplore"); 
            Console.WriteLine(processes.Length); 
            int i=0; 
            IntPtr windowHandle; 
            foreach (Process p in processes) 
            { 
                i=i+1; 
                Console.WriteLine(i); 
                windowHandle = p.MainWindowHandle; 
                Console.Write("iexplore"); 
                Console.WriteLine(windowHandle.ToString()); 
                // do something with windowHandle 
                if(i.Equals(1))
                { 
                    //Console.WriteLine("Reached If Loop"); 
                    SetForegroundWindow(windowHandle); 
                } 
            } 
            //System.Windows.Forms.SendKeys.SendWait("%{F4}"); 
            Console.WriteLine(processes.Length); 
            Process.Start("notepad.exe"); 
            var prc = Process.GetProcessesByName("notepad"); 
            if (prc.Length > 0) 
            { 
                SetForegroundWindow(prc[0].MainWindowHandle); 
            } 
            //System.Windows.Forms.SendKeys.SendWait("%{F4}"); 
        } 

        [DllImport("user32.dll")] 
        private static extern bool SetForegroundWindow(IntPtr hWnd); 
    } 
}