我进行了一次模拟采访,并被问到这个问题。我开始解决它,但现在我陷入困境。任何解决方案和解释将不胜感激。

您有一个数据文件,每个对象看起来类似于:

{"timestamp": 1487722870, "user": "Una", "action": "navigate"}


时间戳记是unix时间,以秒为单位。

编写function bot_detection(input_file_path)来收集在4分钟的任何持续时间内至少执行10次操作的用户的所有漫游器访问,并且至少5次对应于同一操作(例如,导航)

输出找到的机器人名称的列表。

例如,如果您确定用户Tran是系统中唯一的漫游器,则该函数应生成["Tran"]

我目前的做法:

 // the data looks like this. first i read it into my file

    {"timestamp": 1487184625, "user": "Eric", "action": "navigate"}
    {"timestamp": 1487184655, "user": "Bill", "action": "browse"}
    {"timestamp": 1487184685, "user": "Eric", "action": "key press"}
    {"timestamp": 1487184715, "user": "John", "action": "idle"}
    {"timestamp": 1487184755, "user": "Tran", "action": "search"}
    {"timestamp": 1487098049, "user": "Tran", "action": "click"}
    {"timestamp": 1487098079, "user": "Eric", "action": "click"}
    {"timestamp": 1487098109, "user": "Tran", "action": "click"}
    {"timestamp": 1487098139, "user": "Bill", "action": "navigate"}
    {"timestamp": 1487098169, "user": "Tran", "action": "search"}
    {"timestamp": 1487184716, "user": "Tran", "action": "search"}
    {"timestamp": 1487298169, "user": "Tran", "action": "search"}
    {"timestamp": 1487271407, "user": "Bill", "action": "search"}
    {"timestamp": 1487271467, "user": "John", "action": "navigate"}
    {"timestamp": 1487271527, "user": "Dave", "action": "browse"}


let data = fs.readFileSync('user_file.txt');
      let startTime = data[0].timestamp
      let timelaps =  startTime + 8 min
      let users = {}

for(let i= 0, let startTimeIdx = 0; i < arr.length; i++){
// check if user exist in users
  // push into user action
  // check timelaps is > timestamp of [i]
     // update timelaps
     // remove startTimeIdx from user till you find one that exist in the timelaps window
  // check is user you just added to has > 20 actions
      // check if user has > 10 of the same actions
           // mark user as bot and ignore all other cases if this user shows up again
}
example of users : {
  { Eric: {action: {navigation: 0, keypress: 1} , bot: true},
  { Bill: action: {browse: 1}},
  { John: action: {idle: 1}},
  { Tran: action: {search: 1}, bot: true},
}

最佳答案

好的,这比乍一看要难,但是有很多方法可以做到。
由于它在提供的数据中,因此没有漫游器。
对于这样的事情,我希望数据按时间戳排序,而不是按时间戳排序,因此我最终不得不首先对数据进行排序。
代码在下面,请参见代码中的注释以获取解释

const fs = require('fs');
//get the data and split by \n
//easier to run .each on it later
var data = fs.readFileSync('user_file.txt','utf8').split("\n");

//sort the data by timestamp
data.sort(function(a,b){
    var a=JSON.parse(a);
    var b=JSON.parse(b);
    return(a.timestamp-b.timestamp);});
//make users an array you can use object but array is just easier for me
var users=[];
//duration in seconds
const duration = 240;
//minimum number of actions that will consider user a bot
const minEntries = 10;
//minimum number of same actions that will consider user a bot
const minActions = 5;
//check and return the duration of the entries
function checkduration(user)
{
    var entries=user.entries.length;
    return(user.entries[entries-1].timestamp-user.entries[0].timestamp);
}
//array of bots
var bots=[];
//go through each line
data.forEach(function(log){
    //parse the log to json
    var jsonLog = JSON.parse(log);
    //check if user exixts
    if(typeof(users[jsonLog.user])=='undefined')
    {
        //create new user entry it is an object
        //which will contain all logs for this user and the check whther its bot or not
        //assume its not a bot
        users[jsonLog.user]={entries:[jsonLog],bot:false};
        return;
    }
    else if(users[jsonLog.user].bot==true)
    {
        //user is a bot, no need to check anymore
        //return to go through next user
        return;
    }
    //add on new entry
    users[jsonLog.user].entries.push(jsonLog);

    //discard entries longer than 4 mins ago
    while(users[jsonLog.user].entries.length>1 && checkduration(users[jsonLog.user])>duration)
    {
        users[jsonLog.user].entries.shift();
    }

    //see if have enough entries to check for bot
    if(users[jsonLog.user].entries.length==minEntries)
    {
        //has enough entries check for type of actions
        var actions=[];
        var maxActions=0;
        //get number of actions of each type
        users[jsonLog.user].entries.forEach(function(entry){
            if(typeof(actions[entry.action])=='undefined')
            {
                actions[entry.action]=1;
            }
            else{
                actions[entry.action]++;
                if(maxActions<actions[entry.action])
                {
                    maxActions=actions[entry.action];
                }
            }
        });
        //if actions is more than min required set as bot
        if(maxActions>=minActions)
        {
            users[jsonLog.user].bot=true;
            //add to list of bots
            bots.push(jsonLog.user);
        }
    }
});
console.log(bots);

09-26 23:18