当前位置:网站首页>[ZOJ] P3228 Searching the String

[ZOJ] P3228 Searching the String

2022-06-23 01:23:00 Lupin123123

ac Automata exercises , Can be taken as ac Automata do not overlap matching templates . Topic link

Since there are multiple pattern strings to match , So if you use kmp The complexity is O ( q n ) O(qn) O(qn), This is obviously unbearable . Then consider the sharp weapon of multi pattern string matching ——ac automata . Time complexity is close to O ( n ) O(n) O(n).

The first problem to be solved for this problem is how to realize non overlapping matching . It is not difficult to find after thinking , Just record trie The last position of the word represented by the node in the figure in the text string . The specific expression is the following code :

	void match2(char *s)  // Do not overlap  
	{
    
		int len=strlen(s);
		int now=0;
		for (int i=0; i<len; i++)
		{
    
			int ch=s[i]-'a';
			for (int j=trie[now][ch]; j; j=fail[j])
			{
    
				if (!mark[j]) continue;
				if (i-l[j]+1>pre[j] || !pre[j]) ans2[j]++,pre[j]=i;
			}
			now=trie[now][ch];
		}
	}

At first I thought about adding all the words to the dictionary tree , And then run a lap match, Run again without overlapping match, Finally, output as required . But I overlooked a problem , That is, it is possible to add repeated words to the dictionary tree . How to solve ? At first I wanted to use unique duplicate removal ? Later, I found a more ingenious method , That's using pos[] Record where the word appears in the dictionary tree , Then you can guarantee trie The words represented by the nodes of the tree are unique . This technique can effectively avoid adding words repeatedly .
Another thing to note is the size of the array .trie Trees have at most n ∗ l e n n*len nlen Nodes ,trie Array first dimension open 6e5 That's it , Otherwise MLE.

Complete code :

#include<bits/stdc++.h>
#define FAST ios::sync_with_stdio(false),cin.tie(0),cout.tie(0)
#define INF 0x3f3f3f3f
typedef long long ll;
const int maxn = 2e5+5;

using namespace std;

struct ac_automaton
{
    
	int trie[maxn][26];
	int fail[maxn];
	int ans1[maxn],ans2[maxn];
	 int l[maxn],pre[maxn];
	bool mark[maxn];
	queue<int> q;
	int cnt;
	
	void init()
	{
    
		memset(trie,0,sizeof(trie));
		memset(fail,0,sizeof(fail));
		memset(mark,0,sizeof(mark));
		memset(ans1,0,sizeof(ans1));
		memset(ans2,0,sizeof(ans2));
		memset(pre,0,sizeof(pre));
		memset(l,0,sizeof(l));
		while(!q.empty()) q.pop();
		cnt=0;
	}
	
	int insert(string s, int id)
	{
    
		int now=0;
		for (int i=0; i<s.size(); i++)
		{
    
			int ch=s[i];
			if (!trie[now][ch-'a']) trie[now][ch-'a']=++cnt;
			now=trie[now][ch-'a'];
		}
		mark[now]=1;
		l[now]=s.size();
		return now;
	}
	
	void build()  // establish fial The pointer  
	{
    
		for (int i=0; i<26; i++)
		{
    
			if (trie[0][i])
			{
    
				fail[trie[0][i]]=0;
				q.push(trie[0][i]);	
			}	
		}
		while(!q.empty())
		{
    
			int u=q.front();
			q.pop();
			for (int i=0; i<26; i++)
			{
    
				if (trie[u][i])
				{
    
					fail[trie[u][i]]=trie[fail[u]][i];
					q.push(trie[u][i]);	
				}
				else trie[u][i]=trie[fail[u]][i];	
			}	
		} 
	}
	
	void match1(char *s)  // Can overlap  
	{
    
		int len=strlen(s);
		int now=0;
		for (int i=0; i<len; i++)
		{
    
			int ch=s[i]-'a';
			for (int j=trie[now][ch]; j; j=fail[j])
			{
    
				if (mark[j]) ans1[j]++;
			}
			now=trie[now][ch];
		}
	}
	
	void match2(char *s)  // Do not overlap  
	{
    
		int len=strlen(s);
		int now=0;
		for (int i=0; i<len; i++)
		{
    
			int ch=s[i]-'a';
			for (int j=trie[now][ch]; j; j=fail[j])
			{
    
				if (!mark[j]) continue;
				if (i-l[j]+1>pre[j] || !pre[j]) ans2[j]++,pre[j]=i;
			}
			now=trie[now][ch];
		}
	}
}ac;

int n,kase;
char a[maxn];
string s[maxn];
int q[maxn],pos[maxn];

int main()
{
    
   	FAST; 
   	while(cin>>a)
   	{
    
   		++kase;
     	cin>>n;
     	ac.init();

		for (int i=1; i<=n; i++)
		{
    
		  	cin>>q[i]>>s[i]; 
			pos[i]=ac.insert(s[i],i);
		}	
		 
		 ac.build();
		 ac.match1(a);
		 ac.match2(a);
		 
		 cout<<"Case "<<kase<<endl;
		 for (int i=1; i<=n; i++)
		 {
    
		 	if (q[i]==0) cout<<ac.ans1[pos[i]]<<endl;
		 	else cout<<ac.ans2[pos[i]]<<endl;
		 }
		 cout<<endl;
	}
	
return 0;
}



From this question, we can learn the following points :
1. Record the position where the pattern string matches successfully in the text string
2. understand ac The principle of automata
3. Techniques to avoid repetition

原网站

版权声明
本文为[Lupin123123]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/173/202206220917475976.html