我当时正在为CS类开发Datalog解释器,但遇到一个奇怪的问题,即我的Rule评估花了太多时间才能完成。在查看了我的代码之后,我在下面进行了两项修改,以固定我的评估以正确的通过次数执行:

//original form
bool addedFacts = false;
for (X x: xs) {
    addedFacts = addedFacts || set.insert(x).second;
}
//modified form
bool addedFacts = false;
for (X x: xs) {
    if (set.insert(x).second) {
        addedFacts = true;
    }
}

在我看来,这两个代码结构在逻辑上是等效的。有一个原因导致一个人正确执行而一个人执行错误/效率低下吗?
这是一个正在发生的问题的可构建示例:
#include <iostream>
#include <set>
#include <vector>

using std::set;
using std::vector;
using std::cout;
using std::endl;

const int CAP = 100;

class Rule {
public:
    int factor;
    Rule(int factor) {
        this->factor = factor;
    }
    bool evaluateInefficient(set<int>& facts) {
        vector<int> data;
        bool addedFacts = false;
        for (int fact : facts) {
            data.push_back(fact);
        }
        for (int datum : data) {
            int newFact = datum * factor;
            if (newFact < CAP) {
                addedFacts = addedFacts || facts.insert(newFact).second;
            }
        }
        return addedFacts;
    }
    bool evaluate(set<int>& facts) {
        vector<int> data;
        bool addedFacts = false;
        for (int fact : facts) {
            data.push_back(fact);
        }
        for (int datum : data) {
            int newFact = datum * factor;
            if (newFact < CAP) {
                if (facts.insert(newFact).second) {
                    addedFacts = true;
                }
            }
        }
        return addedFacts;
    }
};

int doublyInefficient(vector<Rule>& rules) {
    set<int> facts;
    facts.insert(1);
    bool addedFacts = true;
    int passes = 0;
    while (addedFacts) {
        passes++;
        addedFacts = false;
        for (Rule rule : rules) {
            addedFacts = addedFacts || rule.evaluateInefficient(facts);
        }
    }
    return passes;
}

int singlyInefficient(vector<Rule>& rules) {
    set<int> facts;
    facts.insert(1);
    bool addedFacts = true;
    int passes = 0;
    while (addedFacts) {
        passes++;
        addedFacts = false;
        for (Rule rule : rules) {
            addedFacts = addedFacts || rule.evaluate(facts);
        }
    }
    return passes;
}

int efficient(vector<Rule>& rules) {
    set<int> facts;
    facts.insert(1);
    bool addedFacts = true;
    int passes = 0;
    while (addedFacts) {
        passes++;
        addedFacts = false;
        for (Rule rule : rules) {
            if (rule.evaluate(facts)) {
                addedFacts = true;
            }
        }
    }
    return passes;
}

int main(int argc, char* argv[]) {
    //build the rules
    vector<Rule> rules;
    rules.push_back(Rule(2));
    rules.push_back(Rule(3));
    rules.push_back(Rule(5));
    rules.push_back(Rule(7));
    rules.push_back(Rule(11));
    rules.push_back(Rule(13));
    //Show three different codes that should (in my mind) take the same amount of passes over the rules but don't
    cout << "Facts populated after " << doublyInefficient(rules) << " passes through the Rules." << endl;
    cout << "Facts populated after " << singlyInefficient(rules) << " passes through the Rules." << endl;
    cout << "Facts populated after " << efficient(rules) << " passes through the Rules." << endl;
    getchar();
}

在Visual Studio 2017上以调试和 Release模式(32位)运行时,我得到以下输出。据我所知,代码未进行优化。
Facts populated after 61 passes through the Rules.
Facts populated after 17 passes through the Rules.
Facts populated after 7 passes through the Rules.

最佳答案

由于短路评估而产生差异:
考虑形式为(expr1 || expr2)的表达式。短路意味着,如果expr1评估为true,表达式expr2根本不会得到评估(参见this online c++ standard draft,强调是我的):



因此,在表达式addedFacts || set.insert(x).second中,从首次将addedFacts变为true的点开始,将不再执行表达式set.insert(x).second。我想这是“错误的”行为,因为您的set那时将不包含相应的x

10-07 21:00