数据结构和算法

DSA - 主页 DSA - 概述 DSA - 环境设置 DSA - 算法基础 DSA - 渐近分析

数据结构

DSA - 数据结构基础 DSA - 数据结构和类型 DSA - 数组数据结构

链接列表

DSA - 链接列表数据结构 DSA - 双向链接列表数据结构 DSA - 循环链表数据结构

堆栈 &队列

DSA - 堆栈数据结构 DSA - 表达式解析 DSA - 队列数据结构

搜索算法

DSA - 搜索算法 DSA - 线性搜索算法 DSA - 二分搜索算法 DSA - 插值搜索 DSA - 跳跃搜索算法 DSA - 指数搜索 DSA - 斐波那契搜索 DSA - 子列表搜索 DSA - 哈希表

排序算法

DSA - 排序算法 DSA - 冒泡排序算法 DSA - 插入排序算法 DSA - 选择排序算法 DSA - 归并排序算法 DSA - 希尔排序算法 DSA - 堆排序 DSA - 桶排序算法 DSA - 计数排序算法 DSA - 基数排序算法 DSA - 快速排序算法

图形数据结构

DSA - 图形数据结构 DSA - 深度优先遍历 DSA - 广度优先遍历 DSA - 生成树

树数据结构

DSA - 树数据结构 DSA - 树遍历 DSA - 二叉搜索树 DSA - AVL 树 DSA - 红黑树 DSA - B树 DSA - B+ 树 DSA - 伸展树 DSA - 尝试 DSA - 堆数据结构

递归

DSA - 递归算法 DSA - 使用递归的汉诺塔 DSA - 使用递归的斐波那契数列

分而治之

DSA - 分而治之 DSA - 最大最小问题 DSA - 施特拉森矩阵乘法 DSA - Karatsuba 算法

贪婪算法

DSA - 贪婪算法 DSA - 旅行商问题(贪婪方法) DSA - Prim 最小生成树 DSA - Kruskal 最小生成树 DSA - Dijkstra 最短路径算法 DSA - 地图着色算法 DSA - 分数背包问题 DSA - 作业排序截止日期 DSA - 最佳合并模式算法

动态规划

DSA - 动态规划 DSA - 矩阵链乘法 DSA - Floyd Warshall 算法 DSA - 0-1 背包问题 DSA - 最长公共子序列算法 DSA - 旅行商问题(动态方法)

近似算法

DSA - 近似算法 DSA - 顶点覆盖算法 DSA - 集合覆盖问题 DSA - 旅行商问题(近似方法)

随机算法

DSA - 随机算法 DSA - 随机快速排序算法 DSA - Karger 最小割算法 DSA - Fisher-Yates 洗牌算法

DSA 有用资源

DSA - 问答 DSA - 快速指南 DSA - 有用资源 DSA - 讨论


Suffix Array Algorithm

A suffix array is a data structure that stores all suffixes of a given string in the lexicographic order. It is useful for various string processing problems, such as pattern matching, searching, finding longest common prefix and so on. This array can quickly find the location of a pattern within a larger text.

How Suffix Array works?

Suppose the text is "Carpet". To construct its suffix array, follow the below steps −

  • Generate all the suffixes of the given text. In this case, possible suffixes could be "carpet", "arpet", "rpet", "pet", "et", "t".

  • Sort all the suffixes. All suffixes in sorted order are "arpet", "carpet", "et", "pet", "rpet", and "t".

  • Hence, the suffix array is as follows: [1, 0, 4, 3, 2, 5].

Suffix Array

To use this suffix array for pattern matching, we can perform a binary search on the sorted suffixes to find the range of suffixes that start with the pattern. For instance, let's take the above string "Carpet" and we want to find the pattern "ar", we can compare it with the middle suffix in the suffix array, which is "pet".

Since "ar" is smaller than this suffix, we can discard the right half of the suffix array and continue the binary search on the left half. Eventually, we will find that the position of suffix that starts with "ar" is "1" in the original string.

Let's see the input-output scenario for Suffix Array −

Input:
string: "AABABCEDBABCDEB" 
Output:
Pattern found at index: 3
Pattern found at index: 9
Pattern found at index: 1 

Example

Following is the example illustrating the working of suffix array in pattern matching.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Structure of suffixes
struct Suffix {
   int index;
   char suff[100];
};
// Function to compare two suffixes for sorting
int strCompare(const void* a, const void* b) {
   struct Suffix* s1 = (struct Suffix*)a;
   struct Suffix* s2 = (struct Suffix*)b;
   return strcmp(s1->suff, s2->suff);
}
// Function to fill the suffix array
int* fillSuffixArray(char* txt, int n) {
   struct Suffix* suffixes = (struct Suffix*) malloc(n * sizeof(struct Suffix));
   // Store suffixes and their indexes in an array 
   for (int i = 0; i < n; i++) {
      suffixes[i].index = i;
      strncpy(suffixes[i].suff, &(txt[i]), n - i);
      suffixes[i].suff[n-i] = '\0';
   }
   // Sort the suffixes 
   qsort(suffixes, n, sizeof(struct Suffix), strCompare);
   // Store indexes of all sorted suffixes in the suffix array
   int* suffixArr = (int*) malloc(n * sizeof(int));
   for (int i = 0; i < n; i++)
      suffixArr[i] = suffixes[i].index;

   // Deallocate the dynamic memory
   free(suffixes);
   return suffixArr;
}
// binary search on suffix array and find all occurrences of pattern
void suffixArraySearch(char* pat, char* txt, int* suffixArr, int n) {
   int m = strlen(pat);
   // binary search for pattern in text using the built suffix array
   int l = 0, r = n - 1;
   while (l <= r) {
      int mid = l + (r - l) / 2;
      char substr[100];
      strncpy(substr, &(txt[suffixArr[mid]]), m);
      substr[m] = '\0';
      int res = strncmp(pat, substr, m);
      if (res == 0) {
         printf("Pattern found at index: %d
", suffixArr[mid]);
         // Move to the left of mid
         int temp = mid - 1;
         while (temp >= 0 && strncmp(pat, &(txt[suffixArr[temp]]), m) == 0) {
            printf("Pattern found at index: %d
", suffixArr[temp]);
            temp--;
         }
         // Move to the right of mid
         temp = mid + 1;
         while (temp < n && strncmp(pat, &(txt[suffixArr[temp]]), m) == 0) {
            printf("Pattern found at index: %d
", suffixArr[temp]);
            temp++;
         }
         return;
      }
      if (res < 0) r = mid - 1;
      else l = mid + 1;
    }
    printf("Pattern not found
");
}
int main() {
   char txt[] = "AAAABCAEAAABCBDDAAAABC";
   // pattern to be searched
   char pat[] = "AAABC";
   int n = strlen(txt);
   int* suffixArr = fillSuffixArray(txt, n);
   suffixArraySearch(pat, txt, suffixArr, n);
   free(suffixArr);  
   return 0;
}
#include <iostream>
#include <algorithm>
#include <cstring>
using namespace std;
// Structure of suffixes
struct Suffix {
   int index;
   string suff;
};
// Function to compare two suffixes for sorting
bool strCompare(Suffix a, Suffix b) {
   return a.suff < b.suff;
}
// Function to fill the suffix array
int* fillSuffixArray(string txt, int n) {
   Suffix* suffixes = new Suffix[n];
   // Storing suffixes and indexes in an array 
   for (int i = 0; i < n; i++) {
      suffixes[i].index = i;
      suffixes[i].suff = txt.substr(i, n - i);
   }
   // Sorting the suffixes 
   sort(suffixes, suffixes+n, strCompare);
   // Store indexes of all sorted suffixes in suffix array
   int* suffixArr = new int[n];
   for (int i = 0; i < n; i++)
      suffixArr[i] = suffixes[i].index;
   // Deallocate the dynamic memory
   delete[] suffixes;
   return suffixArr;
}
// binary search on the suffix array and find all occurrences of pattern
void suffixArraySearch(string pat, string txt, int* suffixArr, int n) {
   int m = pat.length();
   // binary search for the pattern 
   int l = 0, r = n - 1;
   while (l <= r) {
      int mid = l + (r - l) / 2;
      string substr = txt.substr(suffixArr[mid], m);
      if (pat == substr) {
         cout << "Pattern found at index: " << suffixArr[mid] << endl;
         // Move to the left of mid
         int temp = mid - 1;
         while (temp >= 0 && txt.substr(suffixArr[temp], m) == pat) {
            cout << "Pattern found at index: " << suffixArr[temp] << endl;
            temp--;
         }
         // Move to the right of mid
         temp = mid + 1;
         while (temp < n && txt.substr(suffixArr[temp], m) == pat) {
            cout << "Pattern found at index: " << suffixArr[temp] << endl;
            temp++;
         }
         return;
      }
      if (pat < substr) r = mid - 1;
      else l = mid + 1;
   }
   cout << "Pattern not found" << endl;
}
int main() {
   string txt = "AAAABCAEAAABCBDDAAAABC";
   // pattern to be searched
   string pat = "AAABC";
   int n = txt.length();
   int* suffixArr = fillSuffixArray(txt, n);
   suffixArraySearch(pat, txt, suffixArr, n);
   delete[] suffixArr;  
   return 0;
} 
import java.util.Arrays;
public class Main {
   // Structure of suffixes
   static class SuffixCmpr implements Comparable<SuffixCmpr> {
      int index;
      String suff;
      // Constructor
      public SuffixCmpr(int index, String suff) {
         this.index = index;
         this.suff = suff;
      }
      // to sort suffixes alphabetically
      public int compareTo(SuffixCmpr other) {
         return this.suff.compareTo(other.suff);
      }
   }
   // method to build a suffix array
   public static int[] fillsuffixArray(String s) {
      int n = s.length();
      SuffixCmpr[] suffixes = new SuffixCmpr[n];
      // Create and sort suffixes
      for (int i = 0; i < n; i++) {
         suffixes[i] = new SuffixCmpr(i, s.substring(i));
      }
      Arrays.sort(suffixes);
      // Store indexes of all sorted suffixes
      int[] fillsuffixArray = new int[n];
      for (int i = 0; i < n; i++) {
         fillsuffixArray[i] = suffixes[i].index;
      }
      return fillsuffixArray;
   }
   // method to search a pattern in a text using suffix array
   public static void suffixArraySearch(String pattern, String txt, int[] suffArr) {
      int n = txt.length();
      int m = pattern.length();
      // binary search for the pattern in the text using suffix array
      int l = 0, r = n - 1;
      while (l <= r) {
         int mid = l + (r - l) / 2;
         int res = pattern.compareTo(txt.substring(suffArr[mid], Math.min(suffArr[mid] + m, n)));
         if (res == 0) {
            System.out.println("Pattern found at index: " + suffArr[mid]);
            // Move to previous suffix in the sorted array
            int temp = mid - 1;
            while (temp >= 0 && txt.substring(suffArr[temp], Math.min(suffArr[temp] + m, n)).equals(pattern)) {
                System.out.println("Pattern found at index: " + suffArr[temp]);
                temp--;
            }
            // Move to next suffix in the sorted array
            temp = mid + 1;
            while (temp < n && txt.substring(suffArr[temp], Math.min(suffArr[temp] + m, n)).equals(pattern)) {
                System.out.println("Pattern found at index: " + suffArr[temp]);
                temp++;
            }
            return;
         }
         if (res < 0) r = mid - 1;
         else l = mid + 1;
      }
      System.out.println("Pattern not found");
   }
   public static void main(String[] args) {
      String txt = "AAAABCAEAAABCBDDAAAABC";
      String pattern = "AAABC";
      // Filling the suffix array
      int[] suffArr = fillsuffixArray(txt);
      // Calling method to Search pattern in suffix array
      suffixArraySearch(pattern, txt, suffArr);
   }
}
def fill_suffix_array(txt):
    # Array of tuples, each tuple stores the index and suffix
    suffixes = [(i, txt[i:]) for i in range(len(txt))]
    # Sort the suffixes
    suffixes.sort(key=lambda x: x[1])
    # Return the list of indices after sorting
    return [suff[0] for suff in suffixes]

def suffixArraySearch(pat, txt, suffix_arr):
    n = len(txt)
    m = len(pat)
    # Iterate over the suffix array
    for i in range(n):
        if txt[suffix_arr[i]:min(suffix_arr[i] + m, n)] == pat:
            print(f"Pattern found at index: {suffix_arr[i]}")

def main():
    txt = "AAAABCAEAAABCBDDAAAABC"
    pat = "AAABC"
    suffix_arr = fill_suffix_array(txt)
    suffixArraySearch(pat, txt, suffix_arr)

if __name__ == "__main__":
    main()

Output

Pattern found at index: 8
Pattern found at index: 1
Pattern found at index: 17

Complexity of Suffix Array

The advantage of using a suffix array for pattern matching is that it requires only O(m log n) time, where m is the length of the pattern and n is the length of the string. The disadvantage is that it requires O(n log n) time and O(n) space to construct the suffix array in the first place.