我有一个问题,我一直试图解决,但似乎无法找到答案,所以希望有人在这里可以帮我找到答案。
问题是我正在尝试从动态生成下载文件的网站创建网页抓取。该网站需要输入各种参数才能对您的数据请求进行分类。点击提交按钮后,网站会根据您的参数动态生成数据请求,并将结果下载到文件中。这在浏览器中工作正常。
当尝试使用C#执行此操作时,我的代码成功提交了带有所需参数的请求并捕获了HTML响应,但似乎无法捕获文件下载响应。远程URL上的文件下载位置被屏蔽。下载文件名是以“Inquiry_currentdate_currenttime.xml”格式动态生成的。文件大小可能因提交的数据请求而异。
这个远程URL正在将数据文件推送给我而不是我使用c#DownloadFile从URL中提取数据。我怎么能抓住这个“推”文件?
我到目前为止尝试的代码如下。此代码成功提交请求并获得Html响应。
foreach (string fy in lstFY)
{
foreach (string st in lstState)
{
HtmlHandler b = new HtmlHandler();
b.Get(gWebTgtName);
b.FormElements["ctl00$ContentPlaceHolder$ddlFundingYear"] = fy;
b.FormElements["ctl00$ContentPlaceHolder$ddlState"] = st;
//set reporting format (XML) radiobutton
b.FormElements["ctl00$ContentPlaceHolder$rblReportFormat"] = "xml";
//set "select data points" ckbx
b.FormElements["ctl00$ContentPlaceHolder$cbSelectDatapoints"] = "on";
//set "select All Data Points" ckbx
b.FormElements["ctl00$ContentPlaceHolder$cbAll"] = "on";
//set "submit button"
b.FormElements["ctl00$ContentPlaceHolder$bSearch"] = "Build Data File!";
//SUBMIT (build data file)
string resp3 = b.Post(gWebTgtName);
//RESPONSE
//save XMLfile
//process XMLfile
}
}
public string Post(string url)
{
_isPost = true;
CreateWebRequestObject().Load(url, "POST");
return _htmlDoc.DocumentNode.InnerHtml;
}
private HtmlWeb CreateWebRequestObject()
{
HtmlWeb web = new HtmlWeb();
web.UseCookies = true;
web.PreRequest = new HtmlWeb.PreRequestHandler(OnPreRequest);
web.PostResponse = new HtmlWeb.PostResponseHandler(OnAfterResponse);
web.PreHandleDocument = new HtmlWeb.PreHandleDocumentHandler(OnPreHandleDocument);
return web;
}
protected bool OnPreRequest(HttpWebRequest rqst)
{
AddCookiesTo(rqst); // Add cookies that were saved from previous requests
if (_isPost) AddPostDataTo(rqst); // We only need to add post data on a POST request
return true;
}
protected void OnAfterResponse(HttpWebRequest rqst, HttpWebResponse resp)
{
SaveCookiesFrom(resp); // Save cookies for subsequent requests
GetDownload(DRTCollector.gWebTgtName);
string sWebPage = string.Empty;
// get correct charset and encoding from the server's header
string sCharset = resp.CharacterSet;
Encoding encoding = Encoding.GetEncoding(sCharset);
// read response
using (StreamReader sr = new StreamReader(resp.GetResponseStream(), encoding))
{
sWebPage = sr.ReadToEnd();
// Close and clean up the StreamReader
sr.Close();
XmlDocument xdoc = new XmlDocument();
xdoc.LoadXml(sWebPage);
}
}
private void AddCookiesTo(HttpWebRequest rqst)
{
if (Cookies != null && Cookies.Count > 0)
{
rqst.CookieContainer.Add(Cookies);
}
}
private void AddPostDataTo(HttpWebRequest rqst)
{
string payload = FormElements.AssemblePostPayload();
byte[] buff = Encoding.UTF8.GetBytes(payload.ToCharArray());
rqst.ContentLength = buff.Length;
rqst.ContentType = "application/x-www-form-urlencoded";
rqst.Accept = "image/png,image/*;q=0.8,*/*;q=0.5";
//rqst.ContentType = "application/xml";
//rqst.Accept = "application/xml";
System.IO.Stream reqStream = rqst.GetRequestStream();
reqStream.Write(buff, 0, buff.Length);
}
答案 0 :(得分:0)
没有任何代码可以更好地描述您正在做的事情,我会说尝试使用Selenium或其他一些(Web UI测试工具来生成测试代码并检查其功能。使用一些Fiddler会话数据,应该能够RE这个过程.Selenium很好bc它将生成几种语言的测试代码,包括C#(和bc免费).HTH