我正试图从亚马逊中榨取一些东西,但无法通过登录。 HtmlAgilityPack返回一个空节点的子节点。这是表单的HTML看起来像(为了便于阅读而编辑)
<form name="signIn" method="post" novalidate action="https://www.amazon.com/ap/signin/..." class="auth-validate-form auth-real-time-validation a-spacing-none">
<input type="hidden" name="appActionToken" value="..." />
<input type="hidden" name="appAction" value="SIGNIN" />
<input type="hidden" name="openid.pape.max_auth_age" value="ape:MA==">
<input type="hidden" name="openid.return_to" value="...">
<input type="hidden" name="prevRID" value="...">
这是我的代码
class Program
{
static void Main(string[] args)
{
var amazonHomeUri = new Uri("https://www.amazon.com");
var amazonHomeDoc = GetHtml(amazonHomeUri);
var signInNode = amazonHomeDoc.DocumentNode.CssSelect("#nav-link-yourAccount").First();
var signInUrl = signInNode.Attributes["href"].Value;
var amazonSignInDoc = GetHtml(new Uri(amazonHomeUri, signInUrl));
var loginForm = amazonSignInDoc.DocumentNode.CssSelect("form[name='signIn']").First();
var postUrl = loginForm.Attributes["action"];
//THIS IS WHERE THE ERROR HAPPENS
//THERE ARE CERTAINLY INPUTS INSIDE THE FORM
foreach(var input in loginForm.SelectNodes("input"))
{
System.Console.WriteLine(input.Attributes["name"]);
}
}
private static HtmlAgilityPack.HtmlDocument GetHtml(Uri uri)
{
string html;
WebRequest request = WebRequest.Create(uri);
using (WebResponse response = request.GetResponse())
{
Stream dataStream = response.GetResponseStream();
using (var reader = new StreamReader(dataStream))
{
html = reader.ReadToEnd();
}
}
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
return doc;
}
}