This approach leverages binary search in conjunction with the Rabin-Karp (rolling hash) algorithm to find the longest duplicate substring within a given string.
We perform binary search on the length of the possible substring, starting from 1 to length of s
-1. For each mid-length obtained from the binary search, we use a rolling hash function to hash each substring of length mid
. This hash is used to quickly identify duplicates due to its constant time complexity for fixed-length substrings.
Time Complexity: O(n log n), where n is the length of the string. The binary search takes O(log n), and for each midpoint, hashing takes O(n).
Space Complexity: O(n), primarily for storing hash values and powers of base.
1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
4#include <stdbool.h>
5
6#define MOD 10000007
7#define BASE 26
8
9unsigned long long rolling_hash(char *s, int start, int len, unsigned long long last_hash, unsigned long long power) {
10 if (start == 0) {
11 unsigned long long hash = 0;
12 for (int i = 0; i < len; ++i) {
13 hash = (hash * BASE + (s[i] - 'a')) % MOD;
14 }
15 return hash;
16 } else {
17 last_hash = (BASE * (last_hash - power * (s[start - 1] - 'a')) + (s[start + len - 1] - 'a')) % MOD;
18 if (last_hash < 0) last_hash += MOD;
19 return last_hash;
20 }
21}
22
23char *longestDupSubstring(char *s) {
24 int len = strlen(s);
25 int left = 0, right = len - 1, start = -1, max_len = 0;
26 unsigned long long power[len];
27 power[0] = 1;
28 for (int i = 1; i < len; ++i) power[i] = (power[i - 1] * BASE) % MOD;
29
30 while (left < right) {
31 int mid = left + (right - left) / 2;
32 int m = mid + 1;
33 unsigned long long curr_hash = 0;
34 unsigned long long seen_hashes[MOD] = {0};
35 bool found_dup = false;
36 for (int i = 0; i <= len - m; ++i) {
37 curr_hash = rolling_hash(s, i, m, curr_hash, power[m - 1]);
38 if (seen_hashes[curr_hash % MOD] != 0) {
39 found_dup = true;
40 start = seen_hashes[curr_hash % MOD] - 1;
41 max_len = m;
42 break;
43 } else {
44 seen_hashes[curr_hash % MOD] = i + 1;
45 }
46 }
47 if (found_dup) left = mid + 1;
48 else right = mid;
49 }
50 if (max_len > 0) {
51 char *res = (char *)malloc(sizeof(char) * (max_len + 1));
52 strncpy(res, s + start, max_len);
53 res[max_len] = '\0';
54 return res;
55 }
56 return "";
57}
58
59int main() {
60 char *s = "banana";
61 printf("%s\n", longestDupSubstring(s));
62 return 0;
63}
64
This C code uses binary search and rolling hashing to find the longest duplicated substring. It iteratively searches for the length and identifies duplicates using a hash table with modular arithmetic to minimize collision.
This method involves constructing a suffix array from the input string and then performing binary search on the suffixes to find the longest duplicate substring.
Using suffix arrays, we can efficiently sort and group starting indices of the given string. Then, by employing binary search, we determine the largest-length substring that repeats. The Longest Common Prefix (LCP) array helps in assessing the similarity of suffixes at each binary search step.
Time Complexity: O(n^2 log n), primarily due to the sorting step where n is the length of the input string.
Space Complexity: O(n^2), largely for storing pointers to suffixes.
1using System;
2using System.Collections.Generic;
3
4public class Solution {
5 public string LongestDupSubstring(string s) {
6 int n = s.Length;
7 var suffixes = new List<string>();
8 for (int i = 0; i < n; i++) {
9 suffixes.Add(s.Substring(i));
10 }
11 suffixes.Sort();
12
13 string result = "";
14 for (int i = 1; i < n; i++) {
15 int len = CommonPrefixLength(suffixes[i - 1], suffixes[i]);
16 if (len > result.Length) {
17 result = suffixes[i].Substring(0, len);
18 }
19 }
20 return result;
21 }
22
23 private int CommonPrefixLength(string s1, string s2) {
24 int length = 0;
25 while (length < s1.Length && length < s2.Length && s1[length] == s2[length]) {
26 length++;
27 }
28 return length;
29 }
30
31 public static void Main() {
32 Solution solution = new Solution();
33 Console.WriteLine(solution.LongestDupSubstring("banana"));
34 }
35}
36
This C# implementation constructs a suffix array, sorts it, and evaluates longest common prefixes between sorted suffixes to discern the longest duplicate substring.