POJ 3693 Maximum repetition substring

Reading time ~4 minutes

Description

给出一个字符串,求最大重复子串(重复次数最多,如果存在多个,求字典序最小的那一个)。

Solution

后缀数组的应用。直接下手不好解决,不妨枚举一下循环节的长度。我们发现,任何一个循环节为重复子串总会包含至少两个 字符。那么考虑枚举两个相邻的上述字符,可以通过后缀数组求出的长度,但是最长公共子串的开头并不一定是我们枚举的字符,所以还需要求出最长向前能匹配多少。这可以通过倒过来做一次后缀数组得到。那么我们现在有了一个极长区间,可以求得这个区间的循环节个数,也就可以求出一个区间满足开头落在这个区间内部的最大重复子串的循环节个数都为。只需要找字典序最小的一个。那么用表查一下这个区间内最小的的后缀就好了。时间复杂度。有一个优化,当求得一个极长重复子串之后,落在子串内的都不用再枚举了。

#include <map>
#include <cmath>
#include <vector>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>

using namespace std;

const int N = 100000 + 5;
const int LOG = 17;

int n;
int m;

char s[N];

struct suffix_array {
  char s[N];
  
  int c[N];
  int t0[N];
  int t1[N];
  int sa[N];

  int rk[N];
  int ht[N];

  inline void build() {
    int *x = t0;
    int *y = t1;
    m = 256;
    s[n++] = 0;
    for (int i = 0; i < m; ++i) {
      c[i] = 0;
    }
    for (int i = 0; i < n; ++i) {
      x[i] = s[i];
      ++c[x[i]];
    }
    for (int i = 1; i < m; ++i) {
      c[i] += c[i - 1];
    }
    for (int i = n - 1; i >= 0; --i) {
      int t = --c[x[i]];
      sa[t] = i;
    }
    for (int k = 1; k <= n; k <<= 1) {
      int p = 0;
      for (int i = n - k; i < n; ++i) {
        y[p++] = i;
      }
      for (int i = 0; i < n; ++i) {
        if (sa[i] >= k) {
          y[p++] = sa[i] - k;
        }
      }
      for (int i = 0; i < m; ++i) {
        c[i] = 0;
      }
      for (int i = 0; i < n; ++i) {
        ++c[x[y[i]]];
      }
      for (int i = 1; i < m; ++i) {
        c[i] += c[i - 1];
      }
      for (int i = n - 1; i >= 0; --i) {
        int t = --c[x[y[i]]];
        sa[t] = y[i];
      }
      p = 1;
      swap(x, y);
      x[sa[0]] = 0;
      for (int i = 1; i < n; ++i) {
        if (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) {
          x[sa[i]] = p - 1;
        } else {
          x[sa[i]] = p++;
        }
      }
      if (p >= n) {
        break;
      }
      m = p;
    }
    --n;
    for (int i = 0; i < n; ++i) {
      sa[i] = sa[i + 1];
    }
    for (int i = 0; i < n; ++i) {
      rk[sa[i]] = i;
    }
    int k = 0;
    for (int i = 0; i < n; ++i) {
      if (k) {
        --k;
      }
      if (rk[i] == 0) {
        k = 0;
        continue;
      }
      int j = sa[rk[i] - 1];

      while (s[i + k] == s[j + k]) {
        ++k;
      }
      ht[rk[i]] = k;
    }
  }

  int mn[N][LOG];

  inline void init_st() {
    for (int i = 0; i < n; ++i) {
      mn[i][0] = ht[i];
    }
    for (int k = 1; k < LOG; ++k) {
      for (int i = 0; i < n; ++i) {
        mn[i][k] = mn[i][k - 1];
        if (i + (1 << (k - 1)) < n) {
          mn[i][k] = min(mn[i][k], mn[i + (1 << (k - 1))][k - 1]);
        }
      }
    }
  }

  inline int query(int l, int r) {
    if (l > r) {
      swap(l, r);
    }
    ++l;
    int k = log(r - l + 1) / log(2);
    return min(mn[l][k], mn[r - (1 << k) + 1][k]);
  }

};

suffix_array pre;
suffix_array suf;

int mx = 0;
int cur;

int mn[N][LOG];

inline void init_rk() {
  for (int i = 0; i < n; ++i) {
    mn[i][0] = suf.rk[i];
  }
  for (int k = 1; k < LOG; ++k) {
    for (int i = 0; i < n; ++i) {
      mn[i][k] = mn[i][k - 1];
      if (i + (1 << (k - 1)) < n) {
        mn[i][k] = min(mn[i][k], mn[i + (1 << (k - 1))][k - 1]);
      }
    }
  }
}

inline int query_rk(int l, int r) {
  int k = log(r - l + 1) / log(2);
  return min(mn[l][k], mn[r - (1 << k) + 1][k]);
}

int main() {
  int kase = 0;
  while (true) {
    scanf("%s", s);
    if (s[0] == '#') {
      return 0;
    }
    n = strlen(s);
    
    memcpy(suf.s, s, sizeof(s));
    suf.build();
    suf.init_st();
    reverse(s, s + n);
    memcpy(pre.s, s, sizeof(s));
    pre.build();
    pre.init_st();
    reverse(s, s + n);

    init_rk();

    mx = 0;
    int ans_len = 0;
    for (int l = 1; l <= n; ++l) {
      for (int i = 0; (i + 1) * l < n; ++i) {
        int p = i * l;
        int q = (i + 1) * l;
        if (s[p] != s[q]) {
          continue;
        }
        int suf_max = suf.query(suf.rk[p], suf.rk[q]);
        int pre_max = pre.query(pre.rk[n - p - 1], pre.rk[n - q - 1]);
        int low = p - pre_max + 1;
        int t = (suf_max + pre_max - 1) / l + 1;
        int t0 = l * t - l - 1;
        int high = p + suf_max - 1 - t0;
        if (high < low) {
          continue;
        }
        int mn_rk = suf.sa[query_rk(low, high)];

        if (t == mx) {
          if (suf.rk[cur] > suf.rk[mn_rk]) {
            cur = mn_rk;
            ans_len = t * l;
          }
        } else if (t > mx) {
          mx = t;
          cur = mn_rk;
          ans_len = t * l;
        }
        i = (p + pre_max - 1) / l;
      }
     
    }
    printf("Case %d: ", ++kase);
    for (int i = cur; i < cur + ans_len; ++i) {
      putchar(s[i]);
    }
    puts("");
  }
  return 0;
}

51Nod 1238 最小公倍数之和 V3

## Description求:\\[\sum_{i=1}^n \sum_{j=1}^n lcm(i,j)\\]$n \leq 10^{10}$## Solution我们要求的就是下面这个式子:\\[\sum_{i=1}^n \sum_{j=1}^n \frac{ij}{\...… Continue reading

虚树学习笔记

Published on November 24, 2017

NOIP2017 爆炸记

Published on November 24, 2017