我正在创建读取bbc提要的控制台应用程序,然后它必须创建带有新闻对象的json文件。它每小时运行一小时。我的问题是它复制了父对象,但我不明白为什么。奇怪的是,如果你在一小时内运行它就可以工作但如果你在一小时前5分钟运行它就会创建这个重复的父元素。
public static void Main(string[] args)
{
// get the starting time of app.
DateTime startingTime = DateTime.Now;
int minute = 1;
int hoursRun = 0;
bool folderCreated = false;
int n = startingTime.AddHours(hoursRun).Hour;
//this will be the folder path for feeds.
string feedsFolderPath = Environment.GetFolderPath(
System.Environment.SpecialFolder.Desktop) + "\\feeds";
// uri for feeds.
string bbcURI = "http://feeds.bbci.co.uk/news/uk/rss.xml";
while (true)
{
// check the hour and if it is more than 1 minutes past the hour wait for the next hour.
if (DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour && DateTime.Now.Minute < minute)
{
//get feeds
News bbcNewsFeed = ProcessFeedHelper.GetFeeds(bbcURI);
// if this is the first run go ahead and create a json file.
if (hoursRun == 0)
{
if (!folderCreated)
{
ProcessFeedHelper.CreateFolder(feedsFolderPath);
folderCreated = true;
}
ProcessFeedHelper.CreateJsonFile(bbcNewsFeed, feedsFolderPath);
}
else
{
//if it is the second time then we need to check for duplicates.
ProcessFeedHelper.RemoveDuplicatesFeeds(bbcNewsFeed, feedsFolderPath);
ProcessFeedHelper.CreateJsonFile(bbcNewsFeed, feedsFolderPath);
}
// if it is the 23rd hour then we need to reset the counter and detele all files in folder.
if (hoursRun == 23)
{
hoursRun = 0;
ProcessFeedHelper.DeleteFilesInDirectory(feedsFolderPath);
}
else
{
//others increment the hoursrun.
hoursRun++;
}
bbcNewsFeed = null;
}
}
}
}
助手类
public static News GetFeeds(String aURI)
{
News newsFeed;
//instantiate xmlreader and point to uri
using (System.Xml.XmlReader reader = System.Xml.XmlReader.Create(aURI))
{
//load the feed into SyndicationFeed Object
SyndicationFeed feed = SyndicationFeed.Load(reader);
newsFeed = new News();
List<NewsItem> newsItemList = new List<NewsItem>();
foreach (var item in feed.Items)
{
// BBC Feed parent element titles change throughout the day but I have not managed to get them all.
// Could potentially break however, the logic is correct.
// Here we create the parent element object.
if (item.Title.Text == "BBC News Channel" || item.Title.Text == "BBC News at 10")
{
newsFeed.title = item.Title.Text;
newsFeed.link = item.Id;
newsFeed.description = item.Summary.Text;
}
else
{
NewsItem newsItem = new NewsItem();
newsItem.title = item.Title.Text;
newsItem.link = item.Id;
newsItem.description = item.Summary.Text;
newsItem.publishDate = FormatDate(item.PublishDate.ToString());
//Add it to parent object.
newsItemList.Add(newsItem);
}
}
newsFeed.items = newsItemList;
//close reader once we have finished reading feed and return feed object.
reader.Close();
}
return newsFeed;
}
/// <summary>
/// Creates a folder at a specified path.
/// </summary>
/// <param name="aPath"></param>
public static void CreateFolder(string aPath)
{
System.IO.Directory.CreateDirectory(aPath);
}
/// <summary>
/// Creates a Json formatted file based on a news object passed through.
/// </summary>
/// <param name="aNews"></param>
/// <param name="aPath"></param>
public static void CreateJsonFile(News aNews, string aPath)
{
string filePath = aPath + "\\" + DateTime.Now.ToString("yyyy-MM-dd-HH") + ".json";
//serialises objects in news Object and appends a file.
string jsonFile = JsonConvert.SerializeObject(aNews, Newtonsoft.Json.Formatting.Indented);
aNews = JsonConvert.DeserializeObject<News>(jsonFile);
jsonFile = JsonConvert.SerializeObject(aNews, Newtonsoft.Json.Formatting.Indented);
File.AppendAllText(@filePath, jsonFile);
Console.WriteLine(jsonFile);
}
/// <summary>
/// Removes Duplicate news articles in new feeds if they are already stored in files.
/// </summary>
/// <param name="aNews"></param>
/// <param name="aPath"></param>
public static void RemoveDuplicatesFeeds(News aNews, string aPath)
{
try
{
//get paths to all files.
string[] filesInDirectory = Directory.GetFiles(aPath);
List<News> newsInFiles = new List<News>();
News newsInFile;
// loop through files in directory.
foreach (string aFile in filesInDirectory)
{
//Read files file and deserialise the news object putting it in a news collection.
StreamReader reader = new StreamReader(aFile);
string fileContent = reader.ReadToEnd();
newsInFile = Newtonsoft.Json.JsonConvert.DeserializeObject<News>(fileContent);
newsInFiles.Add(newsInFile);
reader.Close();
}
//only go in here if there is the recent feed has news items.
if (aNews.items.Count > 0)
{
foreach (News aNewsInFile in newsInFiles)
{
// put news list into new news list so the next loop doesn't crash.
List<NewsItem> tempNewsList = new List<NewsItem>(aNews.items);
foreach (NewsItem aNewsItemFromCurrentFeed in tempNewsList)
{
//check that the current news item is not already in files saved.
var newsItemAlreadyExists = from nItems in aNewsInFile.items
where nItems.title == aNewsItemFromCurrentFeed.title
where nItems.publishDate == aNewsItemFromCurrentFeed.publishDate
where nItems.link == aNewsItemFromCurrentFeed.link
where nItems.description == aNewsItemFromCurrentFeed.description
select nItems;
// if item already stored in file then we must remove it as we don't want it.
if (newsItemAlreadyExists.First() != null)
{
if (aNews.items.Contains(aNewsItemFromCurrentFeed))
{
aNews.items.Remove(aNewsItemFromCurrentFeed);
}
}
}
}
}
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
}
/// <summary>
/// Deletes all the files in a directory(path specified in parameter).
/// </summary>
/// <param name="directoryPath"></param>
public static void DeleteFilesInDirectory(string directoryPath)
{
try
{
//create files collection and directory object.
List<FileInfo> importFiles = new List<FileInfo>();
DirectoryInfo tempDirectory = new DirectoryInfo(directoryPath);
//get all files in directory.
importFiles.AddRange(tempDirectory.GetFiles());
//if the number of files in the directory are greater than zero then delete them.
if (importFiles.Count > 0)
{
for (int i = 0; i < importFiles.Count; i++)
importFiles[i].Delete();
}
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
}
/// <summary>
/// Formats a string to ddd, mm yyyy hh:ss gmt
/// </summary>
/// <param name="aDate"></param>
/// <returns></returns>
private static String FormatDate(String aDate)
{
try
{
//split string
char[] delimiters = { ' ', ',', ':', '/' };
string[] tokens = aDate.Split(delimiters);
int year = int.Parse(tokens[2]);
int month = int.Parse(tokens[1]);
int day = int.Parse(tokens[0]);
int hh = int.Parse(tokens[3]);
int mm = int.Parse(tokens[4]);
int ss = int.Parse(tokens[5]);
//create date time object. and add gmt to end of string.
DateTime date = new DateTime(year, month, day, hh, mm, ss);
return date.ToUniversalTime().ToString("r");
}
catch (Exception e)
{
Console.WriteLine("Unexpected Error");
}
return "";
}
}
父类
class News
{
public string title { get; set; }
public string link{ get; set; }
public string description{ get; set; }
public IList<NewsItem> items{ get; set; }
}
儿童班
class NewsItem
{
public string title { get; set; }
public string description { get; set; }
public string link { get; set; }
public string publishDate { get; set; }
}
文件示例(不假设有结束部分)
{
"title": "BBC News Channel",
"link": "http://www.bbc.co.uk/news/10318089",
"description": "Britain's most-watched news channel, delivering breaking news and analysis all day, every day.",
"items": [
{
"title": "Dover ferry port chaos leads to 14-hour traffic jams",
"description": "Delays at the Port of Dover have caused up to 14-hour tailbacks on the A20 /M20 with Kent Police warning disruption could last for another two days.",
"link": "http://www.bbc.co.uk/news/uk-england-kent-36873632",
"publishDate": "Sat, 23 Jul 2016 19:38:36 GMT"
}, ]
} {
"title": "BBC News Channel",
"link": "http://www.bbc.co.uk/news/10318089",
"description": "Britain's most-watched news channel, delivering breaking news and analysis all day, every day.",
"items": []
}
答案 0 :(得分:1)
我认为问题可能是竞争条件:
if (DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour && DateTime.Now.Minute < minute)
假设你在8:59启动程序,正如我在上面的评论中指出的那样,它正在寻找小时为8分钟和分钟为0.你认为这不会发生在23小时或者......,但是......
想象一下,在8:59:59.9999,检查此条件DateTime.Now.Hour == startingTime.AddHours(hoursRun).Hour
并返回true,因为小时当前为8.因此执行继续检查下一个条件:DateTime.Now.Minute < minute
。时间已过,所以在检查条件时是9点。因此两个条件都是正确的,并且代码被执行。 (创建一个名为2016-07-23-09.json
的文件。)
现在hoursRun
会增加,所以它现在是9。
循环的下一次迭代,时间就像是9:00:05。两个条件都为真(小时为9,分钟为0),因此代码再次运行,附加到同一文件(2016-07-23-09.json
)。
如果我的预感是正确的,可能最小的修复是这样做,这可以确保你同时检查小时和分钟组件:
while (true)
{
var now = DateTime.Now;
if (now.Hour == startingTime.AddHours(hoursRun).Hour && now.Minute < minute)
{
我还建议在while
循环中放入一个sleep语句......你可能在这个紧凑的繁忙循环中烧掉了大量的CPU。
修改强>
哦,也许,你可能并不是说要先等待23个小时。 :-)你可以在任何地方使用+1
作为一个简单的修复(虽然这意味着如果你在8:00运行程序,它将等到9:00才能写出第一个文件)。
编辑2
如果您不关心“按小时运行”,这可能是构建循环的更简单方法:
DateTime lastRun = DateTime.MinValue;
while (true)
{
// sleep for 10 minutes at a time until an hour has passed
while ((DateTime.UtcNow - lastRun) < TimeSpan.FromHours(1))
{
Thread.Sleep(TimeSpan.FromMinutes(10));
}
// do work in here
// remember the last time we did work
lastRun = DateTime.UtcNow;
}