我进行了一次模拟采访,并被问到这个问题。我开始解决它,但现在我陷入困境。任何解决方案和解释将不胜感激。
您有一个数据文件,每个对象看起来类似于:
{"timestamp": 1487722870, "user": "Una", "action": "navigate"}
时间戳记是unix时间,以秒为单位。
编写
function bot_detection(input_file_path)
来收集在4分钟的任何持续时间内至少执行10次操作的用户的所有漫游器访问,并且至少5次对应于同一操作(例如,导航)输出找到的机器人名称的列表。
例如,如果您确定用户Tran是系统中唯一的漫游器,则该函数应生成
["Tran"]
我目前的做法:
// the data looks like this. first i read it into my file
{"timestamp": 1487184625, "user": "Eric", "action": "navigate"}
{"timestamp": 1487184655, "user": "Bill", "action": "browse"}
{"timestamp": 1487184685, "user": "Eric", "action": "key press"}
{"timestamp": 1487184715, "user": "John", "action": "idle"}
{"timestamp": 1487184755, "user": "Tran", "action": "search"}
{"timestamp": 1487098049, "user": "Tran", "action": "click"}
{"timestamp": 1487098079, "user": "Eric", "action": "click"}
{"timestamp": 1487098109, "user": "Tran", "action": "click"}
{"timestamp": 1487098139, "user": "Bill", "action": "navigate"}
{"timestamp": 1487098169, "user": "Tran", "action": "search"}
{"timestamp": 1487184716, "user": "Tran", "action": "search"}
{"timestamp": 1487298169, "user": "Tran", "action": "search"}
{"timestamp": 1487271407, "user": "Bill", "action": "search"}
{"timestamp": 1487271467, "user": "John", "action": "navigate"}
{"timestamp": 1487271527, "user": "Dave", "action": "browse"}
let data = fs.readFileSync('user_file.txt');
let startTime = data[0].timestamp
let timelaps = startTime + 8 min
let users = {}
for(let i= 0, let startTimeIdx = 0; i < arr.length; i++){
// check if user exist in users
// push into user action
// check timelaps is > timestamp of [i]
// update timelaps
// remove startTimeIdx from user till you find one that exist in the timelaps window
// check is user you just added to has > 20 actions
// check if user has > 10 of the same actions
// mark user as bot and ignore all other cases if this user shows up again
}
example of users : {
{ Eric: {action: {navigation: 0, keypress: 1} , bot: true},
{ Bill: action: {browse: 1}},
{ John: action: {idle: 1}},
{ Tran: action: {search: 1}, bot: true},
}
最佳答案
好的,这比乍一看要难,但是有很多方法可以做到。
由于它在提供的数据中,因此没有漫游器。
对于这样的事情,我希望数据按时间戳排序,而不是按时间戳排序,因此我最终不得不首先对数据进行排序。
代码在下面,请参见代码中的注释以获取解释
const fs = require('fs');
//get the data and split by \n
//easier to run .each on it later
var data = fs.readFileSync('user_file.txt','utf8').split("\n");
//sort the data by timestamp
data.sort(function(a,b){
var a=JSON.parse(a);
var b=JSON.parse(b);
return(a.timestamp-b.timestamp);});
//make users an array you can use object but array is just easier for me
var users=[];
//duration in seconds
const duration = 240;
//minimum number of actions that will consider user a bot
const minEntries = 10;
//minimum number of same actions that will consider user a bot
const minActions = 5;
//check and return the duration of the entries
function checkduration(user)
{
var entries=user.entries.length;
return(user.entries[entries-1].timestamp-user.entries[0].timestamp);
}
//array of bots
var bots=[];
//go through each line
data.forEach(function(log){
//parse the log to json
var jsonLog = JSON.parse(log);
//check if user exixts
if(typeof(users[jsonLog.user])=='undefined')
{
//create new user entry it is an object
//which will contain all logs for this user and the check whther its bot or not
//assume its not a bot
users[jsonLog.user]={entries:[jsonLog],bot:false};
return;
}
else if(users[jsonLog.user].bot==true)
{
//user is a bot, no need to check anymore
//return to go through next user
return;
}
//add on new entry
users[jsonLog.user].entries.push(jsonLog);
//discard entries longer than 4 mins ago
while(users[jsonLog.user].entries.length>1 && checkduration(users[jsonLog.user])>duration)
{
users[jsonLog.user].entries.shift();
}
//see if have enough entries to check for bot
if(users[jsonLog.user].entries.length==minEntries)
{
//has enough entries check for type of actions
var actions=[];
var maxActions=0;
//get number of actions of each type
users[jsonLog.user].entries.forEach(function(entry){
if(typeof(actions[entry.action])=='undefined')
{
actions[entry.action]=1;
}
else{
actions[entry.action]++;
if(maxActions<actions[entry.action])
{
maxActions=actions[entry.action];
}
}
});
//if actions is more than min required set as bot
if(maxActions>=minActions)
{
users[jsonLog.user].bot=true;
//add to list of bots
bots.push(jsonLog.user);
}
}
});
console.log(bots);