Category Archives: Suffix Array

UVA – 11512 – GATTACA – [Suffix Array]

#include <cstdio>
#include <cstring>
#include <utility>
#include <iostream>
#include <algorithm>

using namespace std;

const int MAXN = 100100;

struct SuffixArray {
	pair<int, int> sf[MAXN];
	int sa[MAXN], r[MAXN], height[MAXN], h[MAXN];

	struct mycmp {
		pair<int, int> * suffix;
		mycmp(pair<int, int> * suffix_) :
			suffix(suffix_) {
		}
		bool operator()(int lhs, int rhs) {
			return suffix[lhs] < suffix[rhs];
		}
	};

	void init(int n, const char a[MAXN]) {
		sa[0] = 0; // n = 1
		for (int i = 0; i < n; ++i)
			r[i] = a[i];

		for (int i, m = 1; m < n; m <<= 1) {
			for (i = 0; i < n; sa[i] = i++)
				sf[i] = make_pair(r[i], i + m < n ? r[i + m] : -1);
			sort(sa, sa + n, mycmp(sf));
			for (i = 1, r[sa[0]] = 0; i < n; ++i)
				r[sa[i]] = sf[sa[i]] != sf[sa[i - 1]] ? i : r[sa[i - 1]];
		}

		for (int i = 0; i < n; ++i) {
			if (r[i] == 0) {
				h[i] = 0;
			} else {
				int x = i, y = sa[r[i] - 1], z = max(0, h[i - 1] - 1);
				while (x + z < n && y + z < n && a[x + z] == a[y + z]) {
					++z;
				}
				h[i] = z;
			}
		}
		for (int i = 0; i < n; ++i) {
			height[i] = h[sa[i]];
		}
	}
} sa;

int main() {
	char s[1005];
	int T;
	scanf("%d", &T);
	while (T--) {
		scanf("%s", s);
		int len = strlen(s);
		sa.init(len, s);
		int maxlen = 0, maxi = 0, cnt = 0;
		for (int i = 1; i < len; i++) {
			if (sa.height[i] && sa.height[i] > maxlen) {
				maxlen = sa.height[i], maxi = i, cnt = 2;
			} else if (sa.height[i] && sa.height[i] == maxlen) {
				if (sa.height[i] == sa.height[i - 1])
					cnt++;
			}
		}
		if (cnt == 0)
			printf("%s\n", "No repetitions found!");
		else {
			for (int i = sa.sa[maxi], c = 0; c < maxlen; i++, c++)
				putchar(s[i]);
			printf(" %d\n", cnt);
		}

	}
}
</pre>

UVA – 10679 – I Love Strings!! – [Suffix Array nlgn]

#include <vector>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
 
using namespace std;
 
//  O(n log n) - Manber and Myers 
 
#define N 1000005
char str[N]; //input
int rank[N], pos[N]; //output

int cnt[N], next[N]; 
bool bh[N], b2h[N];
 

bool smaller_first_char(int a, int b) {
	return str[a] < str[b];
}
 
void suffixSort(int n) {
	for (int i = 0; i < n; ++i) {
		pos[i] = i;
	}
	sort(pos, pos + n, smaller_first_char);

 
	for (int i = 0; i < n; ++i) {
		bh[i] = i == 0 || str[pos[i]] != str[pos[i - 1]];
		b2h[i] = false;
	}
 
	for (int h = 1; h < n; h <<= 1) {
		int buckets = 0;
		for (int i = 0, j; i < n; i = j) {
			j = i + 1;
			while (j < n && !bh[j])
				j++;
			next[i] = j;
			buckets++;
		}
		if (buckets == n)
			break; 

 
		for (int i = 0; i < n; i = next[i]) {
			cnt[i] = 0;
			for (int j = i; j < next[i]; ++j) {
				rank[pos[j]] = i;
			}
		}
 
		cnt[rank[n - h]]++;
		b2h[rank[n - h]] = true;
		for (int i = 0; i < n; i = next[i]) {
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0) {
					int head = rank[s];
					rank[s] = head + cnt[head]++;
					b2h[rank[s]] = true;
				}
			}
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0 && b2h[rank[s]]) {
					for (int k = rank[s] + 1; !bh[k] && b2h[k]; k++)
						b2h[k] = false;
				}
			}
		}
		for (int i = 0; i < n; ++i) {
			pos[rank[i]] = i;
			bh[i] |= b2h[i];
		}
	}
	for (int i = 0; i < n; ++i) {
		rank[pos[i]] = i;
	}
}
 
 
// O(n) - lcp - "Arrays and Its Applications" by Toru Kasai, Gunho Lee

int height[N];
// height[i] = lcp of suffix pos[i] and suffix pos[i-1]
// height[0] = 0
void getHeight(int n) {
	for (int i = 0; i < n; ++i)
		rank[pos[i]] = i;
	height[0] = 0;
	for (int i = 0, h = 0; i < n; ++i) {
		if (rank[i] > 0) {
			int j = pos[rank[i] - 1];
			while (i + h < n && j + h < n && str[i + h] == str[j + h])
				h++;
			height[rank[i]] = h;
			if (h > 0)
				h--;
		}
	}
}

 
int find(char * p) {
	int l = 0, sz = strlen(p), h = strlen(str) - 1;

	while (l <= h) {
		int mid = l + (h - l) / 2;
		char * tmp = (char*)malloc(sz+1);
		strncpy(tmp, str + pos[mid], sz);
		tmp[sz] = 0;
		int cmp = strcmp(p, tmp);
		if (cmp < 0)
			h = mid - 1;
		if (cmp > 0)
			l = mid + 1;
		if (!cmp)
			return mid;
	}
	return -1;
}

int main() {
	freopen("input.txt", "r", stdin);
	string s;
	int T, q;
	char p[2000], tmp[200];
	scanf("%d", &T);
	gets(str);
	while (T--) {
		gets(str);
		scanf("%d", &q);
		gets(tmp);
		long long len = 0LL + strlen(str);
		suffixSort(len);
		while (q--) {
			gets(p);
			if (find(p) != -1)
				puts("y");
			else
				puts("n");
		}

	}
}

SPOJ – 705 – New Distinct Substrings – [Suffix Arrays nlgn]

#include <vector>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
 
using namespace std;
 
//  O(n log n) - Manber and Myers 
 
#define N 1000005
char str[N]; //input
int rank[N], pos[N]; //output

int cnt[N], next[N]; 
bool bh[N], b2h[N];
 

bool smaller_first_char(int a, int b) {
	return str[a] < str[b];
}
 
void suffixSort(int n) {
	for (int i = 0; i < n; ++i) {
		pos[i] = i;
	}
	sort(pos, pos + n, smaller_first_char);

 
	for (int i = 0; i < n; ++i) {
		bh[i] = i == 0 || str[pos[i]] != str[pos[i - 1]];
		b2h[i] = false;
	}
 
	for (int h = 1; h < n; h <<= 1) {
		int buckets = 0;
		for (int i = 0, j; i < n; i = j) {
			j = i + 1;
			while (j < n && !bh[j])
				j++;
			next[i] = j;
			buckets++;
		}
		if (buckets == n)
			break; 

 
		for (int i = 0; i < n; i = next[i]) {
			cnt[i] = 0;
			for (int j = i; j < next[i]; ++j) {
				rank[pos[j]] = i;
			}
		}
 
		cnt[rank[n - h]]++;
		b2h[rank[n - h]] = true;
		for (int i = 0; i < n; i = next[i]) {
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0) {
					int head = rank[s];
					rank[s] = head + cnt[head]++;
					b2h[rank[s]] = true;
				}
			}
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0 && b2h[rank[s]]) {
					for (int k = rank[s] + 1; !bh[k] && b2h[k]; k++)
						b2h[k] = false;
				}
			}
		}
		for (int i = 0; i < n; ++i) {
			pos[rank[i]] = i;
			bh[i] |= b2h[i];
		}
	}
	for (int i = 0; i < n; ++i) {
		rank[pos[i]] = i;
	}
}
 
 
// O(n) - lcp - "Arrays and Its Applications" by Toru Kasai, Gunho Lee

int height[N];
// height[i] = lcp of suffix pos[i] and suffix pos[i-1]
// height[0] = 0
void getHeight(int n) {
	for (int i = 0; i < n; ++i)
		rank[pos[i]] = i;
	height[0] = 0;
	for (int i = 0, h = 0; i < n; ++i) {
		if (rank[i] > 0) {
			int j = pos[rank[i] - 1];
			while (i + h < n && j + h < n && str[i + h] == str[j + h])
				h++;
			height[rank[i]] = h;
			if (h > 0)
				h--;
		}
	}
}

 
int main() {
	string s;
	int T;
	scanf("%d", &T);
	gets(str);
	while (T--) {
		gets(str);
		long long len = 0LL + strlen(str);
		long long total = len*(len+1)/2;
		suffixSort(len);
		getHeight(len);
 
		for (int i = 0; i < len; i++) 
			total -=   height[i];
		
		cout << total << endl;
	}
}

SPOJ – 649 – Distinct Substrings – [Suffix Arrays (nlgn)]

#include <vector>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
 
using namespace std;
 
//  O(n log n) - Manber and Myers 
 
#define N 1000005
char str[N]; //input
int rank[N], pos[N]; //output

int cnt[N], next[N]; 
bool bh[N], b2h[N];
 

bool smaller_first_char(int a, int b) {
	return str[a] < str[b];
}
 
void suffixSort(int n) {
	for (int i = 0; i < n; ++i) {
		pos[i] = i;
	}
	sort(pos, pos + n, smaller_first_char);

 
	for (int i = 0; i < n; ++i) {
		bh[i] = i == 0 || str[pos[i]] != str[pos[i - 1]];
		b2h[i] = false;
	}
 
	for (int h = 1; h < n; h <<= 1) {
		int buckets = 0;
		for (int i = 0, j; i < n; i = j) {
			j = i + 1;
			while (j < n && !bh[j])
				j++;
			next[i] = j;
			buckets++;
		}
		if (buckets == n)
			break; 

 
		for (int i = 0; i < n; i = next[i]) {
			cnt[i] = 0;
			for (int j = i; j < next[i]; ++j) {
				rank[pos[j]] = i;
			}
		}
 
		cnt[rank[n - h]]++;
		b2h[rank[n - h]] = true;
		for (int i = 0; i < n; i = next[i]) {
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0) {
					int head = rank[s];
					rank[s] = head + cnt[head]++;
					b2h[rank[s]] = true;
				}
			}
			for (int j = i; j < next[i]; ++j) {
				int s = pos[j] - h;
				if (s >= 0 && b2h[rank[s]]) {
					for (int k = rank[s] + 1; !bh[k] && b2h[k]; k++)
						b2h[k] = false;
				}
			}
		}
		for (int i = 0; i < n; ++i) {
			pos[rank[i]] = i;
			bh[i] |= b2h[i];
		}
	}
	for (int i = 0; i < n; ++i) {
		rank[pos[i]] = i;
	}
}
 
 
// O(n) - lcp - "Arrays and Its Applications" by Toru Kasai, Gunho Lee

int height[N];
// height[i] = lcp of suffix pos[i] and suffix pos[i-1]
// height[0] = 0
void getHeight(int n) {
	for (int i = 0; i < n; ++i)
		rank[pos[i]] = i;
	height[0] = 0;
	for (int i = 0, h = 0; i < n; ++i) {
		if (rank[i] > 0) {
			int j = pos[rank[i] - 1];
			while (i + h < n && j + h < n && str[i + h] == str[j + h])
				h++;
			height[rank[i]] = h;
			if (h > 0)
				h--;
		}
	}
}

 
int main() {
	string s;
	int T;
	scanf("%d", &T);
	gets(str);
	while (T--) {
		gets(str);
		long long len = 0LL + strlen(str);
		long long total = len*(len+1)/2;
		suffixSort(len);
		getHeight(len);
 
		for (int i = 0; i < len; i++) 
			total -=   height[i];
		
		cout << total << endl;
	}
}