到目前为止,这是我设法做到的:

  • O(n ^ 2)中给定字符串的所有子字符串计算prefix function
  • 检查在 O(n ^ 3)中执行所有可能的操作组合的结果

  • 我的解决方案通过了n≤2000的所有测试,但是超过了2000 <n≤5000的时间限制。这是它的代码:
    #include <iostream>
    #include <string>
    
    using namespace std;
    
    const int MAX_N = 5000;
    
    int result; // 1 less than actual
    
    // [x][y] corresponds to substring that starts at position `x` and ends at position `x + y` =>
    // => corresponding substring length is `y + 1`
    int lps[MAX_N][MAX_N]; // prefix function for the substring s[x..x+y]
    bool checked[MAX_N][MAX_N]; // whether substring s[x..x+y] is processed by check function
    
    // length is 1 less than actual
    void check(int start, int length) {
        checked[start][length] = true;
        if (length < result) {
            if (length == 0) {
                cout << 1; // actual length = length + 1 = 0 + 1 = 1
                exit(0); // 1 is the minimum possible result
            }
            result = length;
        }
        // iteration over all proper prefixes that are also suffixes
        // i - current prefix length
        for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
            int newLength = length - i;
            int newStart = start + i;
            if (!checked[start][newLength])
                check(start, newLength);
            if (!checked[newStart][newLength])
                check(newStart, newLength);
        }
    }
    
    int main()
    {
        string str;
        cin >> str;
        int n = str.length();
        // lps calculation runs in O(n^2)
        for (int l = 0; l < n; l++) {
            int subLength = n - l;
            lps[l][0] = 0;
            checked[l][0] = false;
            for (int i = 1; i < subLength; ++i) {
                int j = lps[l][i - 1];
                while (j > 0 && str[i + l] != str[j + l])
                    j = lps[l][j - 1];
                if (str[i + l] == str[j + l])  j++;
                lps[l][i] = j;
                checked[l][i] = false;
            }
        }
        result = n - 1;
        // checking all possible operations combinations in O(n^3)
        check(0, n - 1);
        cout << result + 1;
    }
    

    问:还有更有效的解决方案吗?

    最佳答案

    这是获取对数因子的一种方法。如果可以到达子串dp[i][j],则让s[i..j]为true。然后:

    dp[0][length(s)-1] ->
      true
    
    dp[0][j] ->
      if s[0] != s[j+1]:
        false
      else:
        true if any dp[0][k]
          for j < k ≤ (j + longestMatchRight[0][j+1])
    
      (The longest match we can use is
       also bound by the current range.)
    
    (Initialise left side similarly.)
    

    现在从外面迭代:
    for i = 1 to length(s)-2:
      for j = length(s)-2 to i:
        dp[i][j] ->
          // We removed on the right
          if s[i] != s[j+1]:
            false
          else:
            true if any dp[i][k]
              for j < k ≤ (j + longestMatchRight[i][j+1])
    
          // We removed on the left
          if s[i-1] != s[j]:
            true if dp[i][j]
          else:
            true if any dp[k][j]
              for (i - longestMatchLeft[i-1][j]) ≤ k < i
    

    我们可以预先计算出(i, j)中每个起始对O(n^2)的最长匹配,
    longest(i, j) ->
      if s[i] == s[j]:
        return 1 + longest(i + 1, j + 1)
      else:
        return 0
    

    这将使我们能够检查从i中的索引jO(1)开始的子字符串匹配。 (我们需要左右方向。)

    如何获得对数因子

    我们可以想到一种提出数据结构的方法,该方法可以让我们确定是否
    any dp[i][k]
      for j < k ≤ (j + longestMatchRight[i][j+1])
    
    (And similarly for the left side.)
    

    O(log n)中,考虑到我们已经看到了这些值。

    这是带有段树的C++代码(用于左右查询,所以是O(n^2 * log n)),其中包括Bananon的测试生成器。对于5000个“a”字符,它以3.54s,420 MB(https://ideone.com/EIrhnR)运行。为了减少内存,在单个行上实现了一个段树(我仍然需要研究对左侧查询执行的操作,以进一步减少内存)。
    #include <iostream>
    #include <string>
    #include <ctime>
    #include <random>
    #include <algorithm>    // std::min
    
    using namespace std;
    
    const int MAX_N = 5000;
    
    int seg[2 * MAX_N];
    int segsL[MAX_N][2 * MAX_N];
    int m[MAX_N][MAX_N][2];
    int dp[MAX_N][MAX_N];
    int best;
    
    // Adapted from https://codeforces.com/blog/entry/18051
    void update(int n, int p, int value) { // set value at position p
      for (seg[p += n] = value; p > 1; p >>= 1)
        seg[p >> 1] = seg[p] + seg[p ^ 1];
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    int query(int n, int l, int r) { // sum on interval [l, r)
      int res = 0;
      for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
        if (l & 1) res += seg[l++];
        if (r & 1) res += seg[--r];
      }
      return res;
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    void updateL(int n, int i, int p, int value) { // set value at position p
      for (segsL[i][p += n] = value; p > 1; p >>= 1)
        segsL[i][p >> 1] = segsL[i][p] + segsL[i][p ^ 1];
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    int queryL(int n, int i, int l, int r) { // sum on interval [l, r)
      int res = 0;
      for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
        if (l & 1) res += segsL[i][l++];
        if (r & 1) res += segsL[i][--r];
      }
      return res;
    }
    
    // Code by גלעד ברקן
    void precalc(int n, string & s) {
      int i, j;
      for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
          // [longest match left, longest match right]
          m[i][j][0] = (s[i] == s[j]) & 1;
          m[i][j][1] = (s[i] == s[j]) & 1;
        }
      }
    
      for (i = n - 2; i >= 0; i--)
        for (j = n - 2; j >= 0; j--)
          m[i][j][1] = s[i] == s[j] ? 1 + m[i + 1][j + 1][1] : 0;
    
      for (i = 1; i < n; i++)
        for (j = 1; j < n; j++)
          m[i][j][0] = s[i] == s[j] ? 1 + m[i - 1][j - 1][0] : 0;
    }
    
    // Code by גלעד ברקן
    void f(int n, string & s) {
      int i, j, k, longest;
    
      dp[0][n - 1] = 1;
      update(n, n - 1, 1);
      updateL(n, n - 1, 0, 1);
    
      // Right side initialisation
      for (j = n - 2; j >= 0; j--) {
        if (s[0] == s[j + 1]) {
          longest = std::min(j + 1, m[0][j + 1][1]);
          for (k = j + 1; k <= j + longest; k++)
            dp[0][j] |= dp[0][k];
          if (dp[0][j]) {
            update(n, j, 1);
            updateL(n, j, 0, 1);
            best = std::min(best, j + 1);
          }
        }
      }
    
      // Left side initialisation
      for (i = 1; i < n; i++) {
        if (s[i - 1] == s[n - 1]) {
          // We are bound by the current range
          longest = std::min(n - i, m[i - 1][n - 1][0]);
          for (k = i - 1; k >= i - longest; k--)
            dp[i][n - 1] |= dp[k][n - 1];
          if (dp[i][n - 1]) {
            updateL(n, n - 1, i, 1);
            best = std::min(best, n - i);
          }
        }
      }
    
      for (i = 1; i <= n - 2; i++) {
        for (int ii = 0; ii < MAX_N; ii++) {
          seg[ii * 2] = 0;
          seg[ii * 2 + 1] = 0;
        }
        update(n, n - 1, dp[i][n - 1]);
        for (j = n - 2; j >= i; j--) {
          // We removed on the right
          if (s[i] == s[j + 1]) {
            // We are bound by half the current range
            longest = std::min(j - i + 1, m[i][j + 1][1]);
            //for (k=j+1; k<=j+longest; k++)
            //dp[i][j] |= dp[i][k];
            if (query(n, j + 1, j + longest + 1)) {
              dp[i][j] = 1;
              update(n, j, 1);
              updateL(n, j, i, 1);
            }
          }
          // We removed on the left
          if (s[i - 1] == s[j]) {
            // We are bound by half the current range
            longest = std::min(j - i + 1, m[i - 1][j][0]);
            //for (k=i-1; k>=i-longest; k--)
            //dp[i][j] |= dp[k][j];
            if (queryL(n, j, i - longest, i)) {
              dp[i][j] = 1;
              updateL(n, j, i, 1);
              update(n, j, 1);
            }
          }
          if (dp[i][j])
            best = std::min(best, j - i + 1);
        }
      }
    }
    
    int so(string s) {
      for (int i = 0; i < MAX_N; i++) {
        seg[i * 2] = 0;
        seg[i * 2 + 1] = 0;
        for (int j = 0; j < MAX_N; j++) {
          segsL[i][j * 2] = 0;
          segsL[i][j * 2 + 1] = 0;
          m[i][j][0] = 0;
          m[i][j][1] = 0;
          dp[i][j] = 0;
        }
      }
      int n = s.length();
      best = n;
      precalc(n, s);
      f(n, s);
      return best;
    }
    // End code by גלעד ברקן
    
    // Code by Bananon  =======================================================================
    
    int result;
    
    int lps[MAX_N][MAX_N];
    bool checked[MAX_N][MAX_N];
    
    void check(int start, int length) {
      checked[start][length] = true;
      if (length < result) {
        result = length;
      }
      for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
        int newLength = length - i;
        if (!checked[start][newLength])
          check(start, newLength);
        int newStart = start + i;
        if (!checked[newStart][newLength])
          check(newStart, newLength);
      }
    }
    
    int my(string str) {
      int n = str.length();
      for (int l = 0; l < n; l++) {
        int subLength = n - l;
        lps[l][0] = 0;
        checked[l][0] = false;
        for (int i = 1; i < subLength; ++i) {
          int j = lps[l][i - 1];
          while (j > 0 && str[i + l] != str[j + l])
            j = lps[l][j - 1];
          if (str[i + l] == str[j + l]) j++;
          lps[l][i] = j;
          checked[l][i] = false;
        }
      }
      result = n - 1;
      check(0, n - 1);
      return result + 1;
    }
    
    // generate =================================================================
    
    bool rndBool() {
      return rand() % 2 == 0;
    }
    
    int rnd(int bound) {
      return rand() % bound;
    }
    
    void untrim(string & str) {
      int length = rnd(str.length());
      int prefixLength = rnd(str.length()) + 1;
      if (rndBool())
        str.append(str.substr(0, prefixLength));
      else {
        string newStr = str.substr(str.length() - prefixLength, prefixLength);
        newStr.append(str);
        str = newStr;
      }
    }
    
    void rndTest(int minTestLength, string s) {
      while (s.length() < minTestLength)
        untrim(s);
      int myAns = my(s);
      int soAns = so(s);
      cout << myAns << " " << soAns << '\n';
      if (soAns != myAns) {
        cout << s;
        exit(0);
      }
    }
    
    int main() {
      int minTestLength;
      cin >> minTestLength;
      string seed;
      cin >> seed;
      while (true)
        rndTest(minTestLength, seed);
    }
    

    这是JavaScript代码(未改进对数因子),以表明重现有效。 (为了获得对数因子,我们将内部k循环替换为单个范围查询。)

    debug = 1
    
    function precalc(s){
      let m = new Array(s.length)
      for (let i=0; i<s.length; i++){
        m[i] = new Array(s.length)
        for (let j=0; j<s.length; j++){
          // [longest match left, longest match right]
          m[i][j] = [(s[i] == s[j]) & 1, (s[i] == s[j]) & 1]
        }
      }
    
      for (let i=s.length-2; i>=0; i--)
        for (let j=s.length-2; j>=0; j--)
          m[i][j][1] = s[i] == s[j] ? 1 + m[i+1][j+1][1] : 0
    
      for (let i=1; i<s.length; i++)
        for (let j=1; j<s.length; j++)
          m[i][j][0] = s[i] == s[j] ? 1 + m[i-1][j-1][0] : 0
    
      return m
    }
    
    function f(s){
      m = precalc(s)
      let n = s.length
      let min = s.length
      let dp = new Array(s.length)
    
      for (let i=0; i<s.length; i++)
        dp[i] = new Array(s.length).fill(0)
    
      dp[0][s.length-1] = 1
    
      // Right side initialisation
      for (let j=s.length-2; j>=0; j--){
        if (s[0] == s[j+1]){
          let longest = Math.min(j + 1, m[0][j+1][1])
          for (let k=j+1; k<=j+longest; k++)
            dp[0][j] |= dp[0][k]
          if (dp[0][j])
            min = Math.min(min, j + 1)
        }
      }
    
      // Left side initialisation
      for (let i=1; i<s.length; i++){
        if (s[i-1] == s[s.length-1]){
          let longest = Math.min(s.length - i, m[i-1][s.length-1][0])
          for (let k=i-1; k>=i-longest; k--)
            dp[i][s.length-1] |= dp[k][s.length-1]
          if (dp[i][s.length-1])
            min = Math.min(min, s.length - i)
        }
      }
    
      for (let i=1; i<=s.length-2; i++){
        for (let j=s.length-2; j>=i; j--){
          // We removed on the right
          if (s[i] == s[j+1]){
            // We are bound by half the current range
            let longest = Math.min(j - i + 1, m[i][j+1][1])
            for (let k=j+1; k<=j+longest; k++)
              dp[i][j] |= dp[i][k]
          }
          // We removed on the left
          if (s[i-1] == s[j]){
            // We are bound by half the current range
            let longest = Math.min(j - i + 1, m[i-1][j][0])
            for (let k=i-1; k>=i-longest; k--)
              dp[i][j] |= dp[k][j]
          }
          if (dp[i][j])
            min = Math.min(min, j - i + 1)
        }
      }
    
      if (debug){
        let str = ""
        for (let row of dp)
          str += row + "\n"
        console.log(str)
      }
    
      return min
    }
    
    function main(s){
      var strs = [
        "caaca",
        "bbabbbba",
        "baabbabaa",
        "bbabbba",
        "bbbabbbbba",
        "abbabaabbab",
        "abbabaabbaba",
        "aabaabaaabaab",
        "bbabbabbb"
      ]
    
      for (let s of strs){
        let t = new Date
        console.log(s)
        console.log(f(s))
        //console.log((new Date - t)/1000)
        console.log("")
      }
    }
    
    main()

    关于c++ - 高效的字符串截断算法,顺序删除相等的前缀和后缀,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/59687292/

    10-11 22:34
    查看更多