上下文:我正在读取如下所示的日志文件数据:
{"action": "tweet", id: 1234, user: "user1", timestamp: 3120}
{"action": "retweet", target_id: 1234, user: "user2", timestamp: 4020}
{"action": "tweet", id: 1235, user: "user3", timestamp: 5320}
{"action": "retweet", target_id: 1235, user: "user4", timestamp: 5820}
{"action": "retweet", target_id: 1235, user: "user2", timestamp: 6540}
日志文件中有更多数据,但这些是最重要的字段。
我创建了以下函数来读取日志文件的每一行,并检测用户是否是垃圾邮件发送者。垃圾邮件发送者的标准是:
该函数应该返回一个垃圾邮件发送者阵列,但不要重复已经被标记过至少一次的任何人。
我相信我可以清理它很多:
identifySpammers = (file_path) => {
const spammers = [];
const notSpammers = []; // I"m only using this to track when someone comes up as having a retweet that was after 1000 milliseconds
let previousId;
let previousTimeStamp;
const data = fs.readFileSync(`/${file_path}`, 'utf8').trim().split("\n")
for(index in data) {
const parsedData = JSON.parse(data[index]);
if(parsedData.timestamp - previousTimeStamp >= 1001 && spammers.includes(parsedData.user)) {
notSpammers.push(parsedData.user)
} else if (!spammers.includes(parsedData.user) && parsedData.target_id === previousID && parsedData.timestamp - previousTimeStamp < 1001) {
spammers.push(parsedData.user)
} else if (parsedData.action === "tweet") {
previousId = parsedData.id
previousTimeStamp = parsedData.timestamp
notSpammers.push(parsedData.user)
}
}
for(let i = 0; i < spammers.length; i++) {
if(notSpammers.includes(spammers[i])) {
spammers.splice(i, 1)
}
}
return spammers;
}
我真正应该做的是利用类似的条件,但写入一个对象,其中每个键是用户的名字,如果值是否是垃圾邮件发送者,那么类似于:
const spammers = {
user1: null, // Initialize as null
user2: false, // Once a user is marked as false, it is impossible for them to ever be considered a spammer.
user3: true // This could end up being false later on
}
我尝试使用与我的双阵列解决方案类似的逻辑来做到这一点,但没有运气。
基于上面的代码,如果有人可以提供任何指导,我可以提出一个更有效的对象驱动解决方案,我会很感激。不需要完整的答案,只需要一些关于如何处理的建议以及除了你可能有的任何想法。
谢谢!
编辑1:清除了一些错误并明确了用户是垃圾邮件发送者的条件。 编辑2:扩展日志文件,以演示如何更早地将用户标记为垃圾邮件,但随后将必要条件传递给后者。
答案 0 :(得分:1)
我使用更简洁的代码方式重构并使用建议的数据结构:
var data = [
{"action": "tweet", id: 1234, user: "user1", timestamp: 1341414},
{"action": "retweet", target_id: 1234, user: "user2", timestamp: 1341415},
{"action": "tweet", id: 1235, user: "user3", timestamp: 2341414},
{"action": "retweet", target_id: 1235, user: "user1", timestamp: 2341415},
]
const isSpammerByUser = {};
let previousId;
let previousTimeStamp;
for(index in data) {
const parsedData = data[index];
const user = parsedData.user;
const isUserASpammer = (isSpammerByUser[user] === true);
const hasUserProvenToNotBeASpammer = (isSpammerByUser[user] === false);
const isFastAction = (parsedData.timestamp - previousTimeStamp < 1001);
const isRetweetFromPrevious = (parsedData.target_id === previousId);
if (parsedData.action === "tweet") {
isSpammerByUser[user] = false;
previousId = parsedData.id
previousTimeStamp = parsedData.timestamp
} else if(isUserASpammer && !isFastAction) {
isSpammerByUser[user] = false;
} else if (!hasUserProvenToNotBeASpammer && isRetweetFromPrevious && isFastAction) {
isSpammerByUser[user] = true;
}
}
const spammers = [];
for (user in isSpammerByUser)
if (isSpammerByUser[user])
spammers.push(user);
console.log("spammers: " + spammers.join(","));