Suffix Array Algorithm
A suffix array is a data structure that stores all suffixes of a given string in the lexicographic order. It is useful for various string processing problems, such as pattern matching, searching, finding longest common prefix and so on. This array can quickly find the location of a pattern within a larger text.
How Suffix Array works?
Suppose the text is "Carpet". To construct its suffix array, follow the below steps −
Generate all the suffixes of the given text. In this case, possible suffixes could be "carpet", "arpet", "rpet", "pet", "et", "t".
Sort all the suffixes. All suffixes in sorted order are "arpet", "carpet", "et", "pet", "rpet", and "t".
Hence, the suffix array is as follows: [1, 0, 4, 3, 2, 5].

To use this suffix array for pattern matching, we can perform a binary search on the sorted suffixes to find the range of suffixes that start with the pattern. For instance, let's take the above string "Carpet" and we want to find the pattern "ar", we can compare it with the middle suffix in the suffix array, which is "pet".
Since "ar" is smaller than this suffix, we can discard the right half of the suffix array and continue the binary search on the left half. Eventually, we will find that the position of suffix that starts with "ar" is "1" in the original string.
Let's see the input-output scenario for Suffix Array −
Input: string: "AABABCEDBABCDEB" Output: Pattern found at index: 3 Pattern found at index: 9 Pattern found at index: 1
Example
Following is the example illustrating the working of suffix array in pattern matching.
#include <stdio.h> #include <stdlib.h> #include <string.h> // Structure of suffixes struct Suffix { int index; char suff[100]; }; // Function to compare two suffixes for sorting int strCompare(const void* a, const void* b) { struct Suffix* s1 = (struct Suffix*)a; struct Suffix* s2 = (struct Suffix*)b; return strcmp(s1->suff, s2->suff); } // Function to fill the suffix array int* fillSuffixArray(char* txt, int n) { struct Suffix* suffixes = (struct Suffix*) malloc(n * sizeof(struct Suffix)); // Store suffixes and their indexes in an array for (int i = 0; i < n; i++) { suffixes[i].index = i; strncpy(suffixes[i].suff, &(txt[i]), n - i); suffixes[i].suff[n-i] = '\0'; } // Sort the suffixes qsort(suffixes, n, sizeof(struct Suffix), strCompare); // Store indexes of all sorted suffixes in the suffix array int* suffixArr = (int*) malloc(n * sizeof(int)); for (int i = 0; i < n; i++) suffixArr[i] = suffixes[i].index; // Deallocate the dynamic memory free(suffixes); return suffixArr; } // binary search on suffix array and find all occurrences of pattern void suffixArraySearch(char* pat, char* txt, int* suffixArr, int n) { int m = strlen(pat); // binary search for pattern in text using the built suffix array int l = 0, r = n - 1; while (l <= r) { int mid = l + (r - l) / 2; char substr[100]; strncpy(substr, &(txt[suffixArr[mid]]), m); substr[m] = '\0'; int res = strncmp(pat, substr, m); if (res == 0) { printf("Pattern found at index: %d ", suffixArr[mid]); // Move to the left of mid int temp = mid - 1; while (temp >= 0 && strncmp(pat, &(txt[suffixArr[temp]]), m) == 0) { printf("Pattern found at index: %d ", suffixArr[temp]); temp--; } // Move to the right of mid temp = mid + 1; while (temp < n && strncmp(pat, &(txt[suffixArr[temp]]), m) == 0) { printf("Pattern found at index: %d ", suffixArr[temp]); temp++; } return; } if (res < 0) r = mid - 1; else l = mid + 1; } printf("Pattern not found "); } int main() { char txt[] = "AAAABCAEAAABCBDDAAAABC"; // pattern to be searched char pat[] = "AAABC"; int n = strlen(txt); int* suffixArr = fillSuffixArray(txt, n); suffixArraySearch(pat, txt, suffixArr, n); free(suffixArr); return 0; }
#include <iostream> #include <algorithm> #include <cstring> using namespace std; // Structure of suffixes struct Suffix { int index; string suff; }; // Function to compare two suffixes for sorting bool strCompare(Suffix a, Suffix b) { return a.suff < b.suff; } // Function to fill the suffix array int* fillSuffixArray(string txt, int n) { Suffix* suffixes = new Suffix[n]; // Storing suffixes and indexes in an array for (int i = 0; i < n; i++) { suffixes[i].index = i; suffixes[i].suff = txt.substr(i, n - i); } // Sorting the suffixes sort(suffixes, suffixes+n, strCompare); // Store indexes of all sorted suffixes in suffix array int* suffixArr = new int[n]; for (int i = 0; i < n; i++) suffixArr[i] = suffixes[i].index; // Deallocate the dynamic memory delete[] suffixes; return suffixArr; } // binary search on the suffix array and find all occurrences of pattern void suffixArraySearch(string pat, string txt, int* suffixArr, int n) { int m = pat.length(); // binary search for the pattern int l = 0, r = n - 1; while (l <= r) { int mid = l + (r - l) / 2; string substr = txt.substr(suffixArr[mid], m); if (pat == substr) { cout << "Pattern found at index: " << suffixArr[mid] << endl; // Move to the left of mid int temp = mid - 1; while (temp >= 0 && txt.substr(suffixArr[temp], m) == pat) { cout << "Pattern found at index: " << suffixArr[temp] << endl; temp--; } // Move to the right of mid temp = mid + 1; while (temp < n && txt.substr(suffixArr[temp], m) == pat) { cout << "Pattern found at index: " << suffixArr[temp] << endl; temp++; } return; } if (pat < substr) r = mid - 1; else l = mid + 1; } cout << "Pattern not found" << endl; } int main() { string txt = "AAAABCAEAAABCBDDAAAABC"; // pattern to be searched string pat = "AAABC"; int n = txt.length(); int* suffixArr = fillSuffixArray(txt, n); suffixArraySearch(pat, txt, suffixArr, n); delete[] suffixArr; return 0; }
import java.util.Arrays; public class Main { // Structure of suffixes static class SuffixCmpr implements Comparable<SuffixCmpr> { int index; String suff; // Constructor public SuffixCmpr(int index, String suff) { this.index = index; this.suff = suff; } // to sort suffixes alphabetically public int compareTo(SuffixCmpr other) { return this.suff.compareTo(other.suff); } } // method to build a suffix array public static int[] fillsuffixArray(String s) { int n = s.length(); SuffixCmpr[] suffixes = new SuffixCmpr[n]; // Create and sort suffixes for (int i = 0; i < n; i++) { suffixes[i] = new SuffixCmpr(i, s.substring(i)); } Arrays.sort(suffixes); // Store indexes of all sorted suffixes int[] fillsuffixArray = new int[n]; for (int i = 0; i < n; i++) { fillsuffixArray[i] = suffixes[i].index; } return fillsuffixArray; } // method to search a pattern in a text using suffix array public static void suffixArraySearch(String pattern, String txt, int[] suffArr) { int n = txt.length(); int m = pattern.length(); // binary search for the pattern in the text using suffix array int l = 0, r = n - 1; while (l <= r) { int mid = l + (r - l) / 2; int res = pattern.compareTo(txt.substring(suffArr[mid], Math.min(suffArr[mid] + m, n))); if (res == 0) { System.out.println("Pattern found at index: " + suffArr[mid]); // Move to previous suffix in the sorted array int temp = mid - 1; while (temp >= 0 && txt.substring(suffArr[temp], Math.min(suffArr[temp] + m, n)).equals(pattern)) { System.out.println("Pattern found at index: " + suffArr[temp]); temp--; } // Move to next suffix in the sorted array temp = mid + 1; while (temp < n && txt.substring(suffArr[temp], Math.min(suffArr[temp] + m, n)).equals(pattern)) { System.out.println("Pattern found at index: " + suffArr[temp]); temp++; } return; } if (res < 0) r = mid - 1; else l = mid + 1; } System.out.println("Pattern not found"); } public static void main(String[] args) { String txt = "AAAABCAEAAABCBDDAAAABC"; String pattern = "AAABC"; // Filling the suffix array int[] suffArr = fillsuffixArray(txt); // Calling method to Search pattern in suffix array suffixArraySearch(pattern, txt, suffArr); } }
def fill_suffix_array(txt): # Array of tuples, each tuple stores the index and suffix suffixes = [(i, txt[i:]) for i in range(len(txt))] # Sort the suffixes suffixes.sort(key=lambda x: x[1]) # Return the list of indices after sorting return [suff[0] for suff in suffixes] def suffixArraySearch(pat, txt, suffix_arr): n = len(txt) m = len(pat) # Iterate over the suffix array for i in range(n): if txt[suffix_arr[i]:min(suffix_arr[i] + m, n)] == pat: print(f"Pattern found at index: {suffix_arr[i]}") def main(): txt = "AAAABCAEAAABCBDDAAAABC" pat = "AAABC" suffix_arr = fill_suffix_array(txt) suffixArraySearch(pat, txt, suffix_arr) if __name__ == "__main__": main()
Output
Pattern found at index: 8 Pattern found at index: 1 Pattern found at index: 17
Complexity of Suffix Array
The advantage of using a suffix array for pattern matching is that it requires only O(m log n) time, where m is the length of the pattern and n is the length of the string. The disadvantage is that it requires O(n log n) time and O(n) space to construct the suffix array in the first place.