Efficient Construction of Finite Automata
What is Finite Automata?
Finite Automata is a mathematical model of computation used for recognizing patterns within input texts. It consists of a set of states and transitions between them. This machine can either accept or reject an input string which indicates it has only two states.
Finite Automata for Pattern Matching
To use finite automata for pattern matching, we need to construct an FA machine that only accepts the strings that match the given pattern. Suppose, we have created a finite automaton that has four states namely q0, q1, q2, and q3. The initial state would be 'q0', and the final state 'q3'. The transitions are labelled with characters of the pattern.
Now, we begin matching from the start state and read the string from left to right, following the transitions according to the input symbols. If we reach the final state after reading the whole string, then the string matches the pattern. Otherwise, the string does not match the pattern.
Following is the input-output scenario to enhance our understanding of the problem −
Input: main String: "ABAAABCDBBABCDDEBCABC" pattern: "ABC" Output: Pattern found at position: 4 Pattern found at position: 10 Pattern found at position: 18
Given an input string "ABAAABCDBBABCDDEBCABC" and the pattern "ABC," we use a finite automaton program to search for occurrences of the pattern in the string. The program identifies the pattern at index positions 4, 10, and 18.
Example
Now, let us implement the finite automata approach to solve pattern matching problem in different programming languages −
#include <stdio.h> #include <string.h> #define MAXCHAR 256 // to fill Finite Automata transition table based on given pattern void fillTransitionTable(const char *pattern, int transTable[][MAXCHAR]) { int longPS = 0; // initializing the first state with 0 for all characters for (int i = 0; i < MAXCHAR; i++) { transTable[0][i] = 0; } // For the first character of pattern, move to the first state transTable[0][(int)pattern[0]] = 1; // For rest of the pattern's characters, create states using prefix and suffix for (int i = 1; i <= strlen(pattern); i++) { // Copy values from LPS length to the current state for (int j = 0; j < MAXCHAR; j++) transTable[i][j] = transTable[longPS][j]; // For the current pattern character, move to the next state transTable[i][(int)pattern[i]] = i + 1; // Updating LPS length for the next state if (i < strlen(pattern)) longPS = transTable[longPS][(int)pattern[i]]; } } // to search for the pattern in main string using Finite Automata approach void FAPatternSearch(const char *mainString, const char *pattern, int array[], int *index) { int patLen = strlen(pattern); int strLen = strlen(mainString); // Creating transition table for the pattern int transTable[patLen + 1][MAXCHAR]; fillTransitionTable(pattern, transTable); int presentState = 0; // iterating over the main string for (int i = 0; i <= strLen; i++) { // Move to the next state if the transition is possible presentState = transTable[presentState][(int)mainString[i]]; // If the present state is the final state, the pattern is found if (presentState == patLen) { (*index)++; array[(*index)] = i - patLen + 1; } } } int main() { const char *mainString = "ABAAABCDBBABCDDEBCABC"; // pattern to be searched const char *pattern = "ABC"; int locArray[strlen(mainString)]; int index = -1; // Calling the pattern search function FAPatternSearch(mainString, pattern, locArray, &index); // to print the result for (int i = 0; i <= index; i++) { printf("Pattern found at position: %d ", locArray[i]); } return 0; }
#include<iostream> #define MAXCHAR 256 using namespace std; // to fill Finite Automata transition table based on given pattern void fillTransitionTable(string pattern, int transTable[][MAXCHAR]) { int longPS = 0; // initializing the first state with 0 for all characters for (int i = 0; i < MAXCHAR; i++) { transTable[0][i] = 0; } // For the first character of pattern, move to the first state transTable[0][pattern[0]] = 1; // For rest of the characters, create states using prefix and suffix for (int i = 1; i<= pattern.size(); i++) { // Copy values from LPS length to the current state for (int j = 0; j < MAXCHAR ; j++) transTable[i][j] = transTable[longPS][j]; // For the current pattern character, move to the next state transTable[i][pattern[i]] = i + 1; // Updating LPS length for the next state if (i < pattern.size()) longPS = transTable[longPS][pattern[i]]; } } // to search for the pattern in main string using Finite Automata approach void FAPatternSearch(string mainString, string pattern, int array[], int *index) { int patLen = pattern.size(); int strLen = mainString.size(); // Creating transition table for the pattern int transTable[patLen+1][MAXCHAR]; fillTransitionTable(pattern, transTable); int presentState = 0; // iterating over the main string for(int i = 0; i<=strLen; i++) { // Move to the next state if the transition is possible presentState = transTable[presentState][mainString[i]]; // If the present state is the final state, the pattern is found if(presentState == patLen) { (*index)++; array[(*index)] = i - patLen + 1 ; } } } int main() { string mainString = "ABAAABCDBBABCDDEBCABC"; // pattern to be searched string pattern = "ABC"; int locArray[mainString.size()]; int index = -1; // Calling the pattern search function FAPatternSearch(mainString, pattern, locArray, &index); // to print the result for(int i = 0; i <= index; i++) { cout << "Pattern found at position: " << locArray[i]<<endl; } }
public class Main { static final int MAXCHAR = 256; // to fill Finite Automata transition table based on given pattern public static void fillTransitionTable(String pattern, int[][] transTable) { int longPS = 0; // initializing the first state with 0 for all characters for (int i = 0; i < MAXCHAR; i++) { transTable[0][i] = 0; } // For the first character of pattern, move to the first state transTable[0][pattern.charAt(0)] = 1; // For rest of the characters, create states using prefix and suffix for (int i = 1; i < pattern.length(); i++) { // Copy values from LPS length to the current state for (int j = 0; j < MAXCHAR; j++) transTable[i][j] = transTable[longPS][j]; // For the current pattern character, move to the next state transTable[i][pattern.charAt(i)] = i + 1; // Updating LPS length for the next state longPS = transTable[longPS][pattern.charAt(i)]; } } // to search for the pattern in main string using Finite Automata approach public static void FAPatternSearch(String mainString, String pattern, int[] array, int[] index) { int patLen = pattern.length(); int strLen = mainString.length(); // Creating transition table for the pattern int[][] transTable = new int[patLen + 1][MAXCHAR]; fillTransitionTable(pattern, transTable); int presentState = 0; // iterating over the main string for (int i = 0; i < strLen; i++) { // Move to the next state if the transition is possible presentState = transTable[presentState][mainString.charAt(i)]; // If the present state is the final state, the pattern is found if (presentState == patLen) { index[0]++; array[index[0]] = i - patLen + 1; } } } public static void main(String[] args) { String mainString = "ABAAABCDBBABCDDEBCABC"; // pattern to be searched String pattern = "ABC"; int[] locArray = new int[mainString.length()]; int[] index = { -1 }; // Calling the pattern search method FAPatternSearch(mainString, pattern, locArray, index); // to print the result for (int i = 0; i <= index[0]; i++) { System.out.println("Pattern found at position: " + locArray[i]); } } }
MAXCHAR = 256 # to fill Finite Automata transition table based on given pattern def fillTransitionTable(pattern, transTable): longPS = 0 # initializing the first state with 0 for all characters for i in range(MAXCHAR): transTable[0][i] = 0 # For the first character of pattern, move to the first state transTable[0][ord( pattern[0])] = 1 # For rest of the pattern's characters, create states using prefix and suffix for i in range(1, len(pattern)): # Copy values from LPS length to the current state for j in range(MAXCHAR): transTable[i][j] = transTable[longPS][j] # For the current pattern character, move to the next state transTable[i][ord(pattern[i])] = i + 1 # Updating LPS length for the next state longPS = transTable[longPS][ord( pattern[i] )] # to search for the pattern in main string using Finite Automata approach def FAPatternSearch(mainString, pattern, array, index): patLen = len(pattern) strLen = len(mainString) # create a transition table for each pattern transTable = [[0] * MAXCHAR for _ in range(patLen + 1) ] fillTransitionTable(pattern, transTable) presentState = 0 # iterating over the main string for i in range(strLen): # Move to the next state if the transition is possible presentState = transTable[presentState][ord( mainString[i] )] # If the present state is the final state, the pattern is found if presentState == patLen: index[0] += 1 array[index[0]] = i - patLen + 1 mainString = "ABAAABCDBBABCDDEBCABC" pattern = "ABC" locArray = [0] * len(mainString) index = [-1] #Calling the pattern search function FAPatternSearch(mainString, pattern, locArray, index) for i in range(index[0] + 1): print("Pattern found at position:", locArray[i])
Output
Pattern found at position: 4 Pattern found at position: 10 Pattern found at position: 18