我认为这是本次论坛被认为是一个很好的问题的边缘,但我认为它相当有趣,应该会产生一些很酷的见解。基本上我有一个性能问题。我这里有一段代码片段。
我试着把它记录下来。基本上它所做的只是向URL发出一个HEAD请求 - 它最终会重定向到youtube(事实上,它会在youtube.com本身作为重定向器开始)。获得最终到达网址后,它会获取视频ID并使用youtube API获取一些数据。它目前甚至无法解析这些数据 - 它只是HttpClient请求。
对我而言,这很简单 - 它应该采取< 1秒。后来的请求经常是。但是,如果你每分钟做一次,你会发现整个事情经常需要3秒以上,有时会失败。更糟糕的是,对主要youtube站点的简单HEAD请求通常需要2-3秒。我已经在2个不相关的网络上测试了这个并找到了相同的东西 - 随意在此验证我。
我在这里缺少什么?我对绩效的期望是否不合理? HTTPClient在启动连接时是否可怕?我配置错误了吗?任何见解都是非常有必要的。
答案 0 :(得分:0)
例如,user-agent,Connection:keep-alive或Access-Control-Allow-Headers:X-Requested-With(ajax将发送此信息)。
您可以尝试不同的组合,看看是否有任何差异。
答案 1 :(得分:0)
不设置用户代理或特定接受标题会使某些服务器处于两难境地,服务器无法知道您希望响应的语言,您喜欢的格式或某些网站用于呈现的用户代理移动页面。
您提供的信息越多,服务器在其盘子上的工作就越少,另一个问题(我们无法控制的问题)是您的请求被路由到的地方,如果它被路由的话到远离你的数据中心,那么请求自然需要更长的时间。
我们可以通过设置一些超时和时间来克服这个问题。在我们的代码中重试逻辑,下面我已经将请求配置为 450ms后超时并且重试最多3次。
我还摆脱了HttpClient ...是的,它提供了一个简单的界面来进行查询,但是一旦优化有问题,缺点就会超过好处。
为了进一步优化我们的要求,我们需要扩展,我们需要在1分22秒内使用线程, 我跑了通过 2,000次请求,只需 7次超时!
void Main()
{
const string Youtube = "youtube.com";
const string UriRegexPattern = @"v=([^\&]*)";
const string UriPath = "/watch";
const string UriQuery = "v=";
const string TestUri = "https://www.youtube.com/watch/HYE9H_ZUuOI";
const string ApiUri = "https://www.googleapis.com/youtube/v3/videos?id={0}&key=AIzaSyDWaA2OoArAjQTHqmN6r9XrpHYNkpKGyGw&part=snippet,contentDetails,statistics,status";
const int TotalThreads = 10;
ConcurrentQueue<string> UriQueue = new ConcurrentQueue<string>();
for (int i = 0; i < 1000; i++)
{
UriQueue.Enqueue(TestUri);
}
Thread[] threads = new Thread[TotalThreads];
for (int i = 0; i < TotalThreads; i++)
{
int iCopy = i;
threads[iCopy] = new Thread(()=>
{
Stopwatch sw = new Stopwatch();
sw.Start();
string uri;
while (UriQueue.TryDequeue(out uri))
{
// Locate the final redirect Uri
Uri finalUri = Http.GetRedirectDestination(new Uri(uri));
Console.WriteLine ("THREAD[{0}] >>> Time taken locating redirect: {1}", iCopy, sw.Elapsed);
sw.Reset();
// Ensure that the host is youtube, and the page contains a video
if (!finalUri.ContainsHost(Youtube) || !finalUri.ContainsPath(UriPath) || !finalUri.ContainsQuery(UriQuery)) return;
// Extract the youtubeId using a regular expression.
string youtubeId = finalUri.ExtractQuery(UriRegexPattern);
// The uri of api to query including the youtubeId extracted
string apiUri = string.Format(ApiUri, youtubeId);
// Reset the stopwatch and query the api
sw.Start();
string json = Http.Get(new Uri(apiUri));
Console.WriteLine ("THREAD[{0}] >>> Time taken querying api: {1}", iCopy, sw.Elapsed);
sw.Stop();
// Also lets try not to get blacklisted by youtube
Thread.Sleep(500);
}
});
threads[iCopy].Start();
}
foreach(var thread in threads)
{
thread.Join();
}
}
public static class Http
{
public static Uri GetRedirectDestination(Uri uri, int retries = 0)
{
Uri redirectUri = null;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Method = "HEAD";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
request.Headers["Accept-Language"] = "en-GB,en-US;q=0.8,en;q=0.6";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
request.AutomaticDecompression = DecompressionMethods.GZip;
request.Timeout = 450;
try
{
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
if (response.StatusCode == HttpStatusCode.Redirect ||
response.StatusCode == HttpStatusCode.RedirectKeepVerb ||
response.StatusCode == HttpStatusCode.RedirectMethod)
{
redirectUri = GetRedirectDestination(new Uri(response.Headers["Location"]));
}
return response.ResponseUri;
}
}
catch (WebException exception)
{
Console.WriteLine ("WebException Uri: {0}", uri);
Console.WriteLine (">> Message: {0}", exception.Message);
Console.WriteLine (">> Status: {0}", exception.Status);
if (retries > 2)
{
throw;
}
retries += 1;
return GetRedirectDestination(uri, retries);
}
}
public static string Get(Uri uri, int retries = 0)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Accept = "application/json";
request.AutomaticDecompression = DecompressionMethods.GZip;
try
{
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
using (Stream stream = response.GetResponseStream())
using (StreamReader reader = new StreamReader(stream))
{
return reader.ReadToEnd();
}
}
catch (WebException exception)
{
// throw;
string exceptionResponse = string.Empty;
using (Stream stream = exception.Response.GetResponseStream())
if (retries > 2)
{
throw;
}
retries += 1;
return Get(uri, retries);
}
}
}
public static class Extensions
{
public static bool IsNullOrWhiteSpace(this string text)
{
return string.IsNullOrWhiteSpace(text);
}
public static bool ContainsHost(this Uri uri, string host)
{
if (uri == null) throw new ArgumentNullException("uri");
if (host.IsNullOrWhiteSpace()) throw new ArgumentNullException("host");
return uri.Host.Contains(host);
}
public static bool ContainsPath(this Uri uri, string path)
{
if (uri == null) throw new ArgumentNullException("uri");
if (path.IsNullOrWhiteSpace()) throw new ArgumentNullException("path");
if (uri.PathAndQuery.IsNullOrWhiteSpace()) return false;
return uri.PathAndQuery.ToLowerInvariant().Contains(path.ToLowerInvariant());
}
public static bool ContainsQuery(this Uri uri, string query)
{
if (uri == null) throw new ArgumentNullException("uri");
if (query.IsNullOrWhiteSpace()) throw new ArgumentNullException("query");
if (uri.Query.IsNullOrWhiteSpace()) return false;
return uri.Query.ToLowerInvariant().Contains(query.ToLowerInvariant());
}
public static string ExtractQuery(this Uri uri, string regexPattern)
{
if (regexPattern.IsNullOrWhiteSpace()) throw new ArgumentNullException("regexPattern");
if (uri.Query.IsNullOrWhiteSpace()) return null;
Match match = Regex.Match(uri.Query, regexPattern);
return match.Groups[1].Value;
}
}