OpenNet
RegEx.h
1 
2 // Product OpenNet
3 
8 
9 #pragma once
10 
11 // Constants
13 
14 #define REG_EX_DIGIT (char)(0xef)
15 #define REG_EX_DIGIT_NOT (char)(0xf0)
16 #define REG_EX_DOT (char)(0xf1)
17 #define REG_EX_END (char)(0xf2)
18 #define REG_EX_GROUP (char)(0xf3)
19 #define REG_EX_OK (char)(0xf4)
20 #define REG_EX_OR (char)(0xf5)
21 #define REG_EX_OR_END (char)(0xf6)
22 #define REG_EX_OR_FAST (char)(0xf7)
23 #define REG_EX_OR_NOT (char)(0xf8)
24 #define REG_EX_RANGE (char)(0xf9)
25 #define REG_EX_RETURN (char)(0xda)
26 #define REG_EX_SPACE (char)(0xfb)
27 #define REG_EX_SPACE_NOT (char)(0xfc)
28 #define REG_EX_START (char)(0xfd)
29 #define REG_EX_WORD (char)(0xfe)
30 #define REG_EX_WORD_NOT (char)(0xff)
31 
32 #define REG_EX_FLAG_OR 0x8000
33 #define REG_EX_LINK_MASK 0x7fff
34 
35 // Macros
37 
38 #define REG_EX_CREATE(T,S,C) RegEx_Create( (T), (S), (C), sizeof(S) / sizeof(S[0]) )
39 
40 #define REG_EX_STATE(C,I,A) { (C) , (I), (A), 0 }
41 #define REG_EX_STATE_DIGIT(I,A) { REG_EX_DIGIT , (I), (A), 0 }
42 #define REG_EX_STATE_DIGIT_NOT(I,A) { REG_EX_DIGIT_NOT, (I), (A), 0 }
43 #define REG_EX_STATE_DOT(I,A) { REG_EX_DOT , (I), (A), 0 }
44 #define REG_EX_STATE_END { REG_EX_END , 0, 0, 0 }
45 #define REG_EX_STATE_GROUP(I,A,L) { REG_EX_GROUP , (I), (A), (L) }
46 #define REG_EX_STATE_OK { REG_EX_OK , 0, 0, 0 }
47 #define REG_EX_STATE_OR(I,A,L) { REG_EX_OR , (I), (A), (L) }
48 #define REG_EX_STATE_OR_END { REG_EX_OR_END , 0, 0, 0 }
49 #define REG_EX_STATE_OR_FAST(I,A,L) { REG_EX_OR_FAST , (I), (A), (L) }
50 #define REG_EX_STATE_OR_NOT(I,A,L) { REG_EX_OR_NOT , (I), (A), (L) }
51 #define REG_EX_STATE_RANGE(B,E) { REG_EX_RANGE , (B), (E), 0 }
52 #define REG_EX_STATE_RETURN(L) { REG_EX_RETURN , 0, 0, (L) }
53 #define REG_EX_STATE_SPACE(I,A) { REG_EX_SPACE , (I), (A), 0 }
54 #define REG_EX_STATE_SPACE_NOT(I,A) { REG_EX_SPACE_NOT, (I), (A), 0 }
55 #define REG_EX_STATE_START { REG_EX_START , 0, 0, 0 }
56 #define REG_EX_STATE_WORD(I,A) { REG_EX_WORD , (I), (A), 0 }
57 #define REG_EX_STATE_WORD_NOT(I,A) { REG_EX_WORD_NOT , (I), (A), 0 }
58 
59 
60 // Data types
62 
72 typedef struct
73 {
74  char mC;
75 
76  unsigned char mMin;
77  unsigned char mMax;
78 
79  unsigned short mFlagAndLink;
80 }
82 
90 typedef struct
91 {
92 
93 // Internal
94 
95  unsigned char * mCounters;
96 
97  OPEN_NET_CONSTANT RegEx_State * mStates;
98  unsigned short mStateCount;
99 
100  unsigned short mThreads [15];
101  unsigned char mThreadCount ;
102  unsigned char mThreadCurrent;
103 
104  unsigned char mRunning;
105 
106 }
107 RegEx;
108 
109 #ifndef _OPEN_NET_NO_FUNCTION_
110 
111 // Internal
113 
114 int RegEx_IsCharValid(char aInput)
115 {
116  return (((9 <= aInput) && ( 10 >= aInput))
117  || (13 == aInput)
118  || ((32 <= aInput) && (126 >= aInput)));
119 }
120 
121 unsigned short RegEx_StateIndex_Get(RegEx * aThis)
122 {
123  return aThis->mThreads[aThis->mThreadCurrent];
124 }
125 
126 void RegEx_StateIndex_Set(RegEx * aThis, unsigned short aState)
127 {
128  aThis->mThreads[aThis->mThreadCurrent] = aState;
129 }
130 
131 // --------------------------------------------------------------------------
132 
133 unsigned short RegEx_Link_Get(RegEx * aThis)
134 {
135  return (aThis->mStates[RegEx_StateIndex_Get(aThis)].mFlagAndLink & REG_EX_LINK_MASK);
136 }
137 
138 void RegEx_Thread_Create(RegEx * aThis, unsigned short aState)
139 {
140  aThis->mThreads[aThis->mThreadCount] = aState;
141  aThis->mThreadCount++;
142 }
143 
144 void RegEx_Thread_Delete(RegEx * aThis)
145 {
146  aThis->mCounters[RegEx_StateIndex_Get(aThis)] = 0;
147  aThis->mThreadCount--;
148 
149  for (unsigned int i = aThis->mThreadCurrent; i < aThis->mThreadCount; i++)
150  {
151  aThis->mThreads[i] = aThis->mThreads[i + 1];
152  }
153 
154  aThis->mThreadCurrent--;
155 }
156 
157 // --------------------------------------------------------------------------
158 
159 void RegEx_Or_Handle(RegEx * aThis)
160 {
161  while (0 != (aThis->mStates[aThis->mThreads[aThis->mThreadCurrent]].mFlagAndLink & REG_EX_FLAG_OR))
162  {
163  RegEx_Thread_Create(aThis, aThis->mThreads[aThis->mThreadCurrent]);
164  aThis->mThreads[aThis->mThreadCurrent]++;
165  }
166 }
167 
168 void RegEx_Reset(RegEx * aThis)
169 {
170  aThis->mThreadCount = 0;
171  aThis->mThreadCurrent = 0;
172 
173  for (unsigned int i = 0; i < aThis->mStateCount; i++)
174  {
175  aThis->mCounters[i] = 0;
176  }
177 
178  RegEx_Thread_Create(aThis, 0);
179 
180  RegEx_Or_Handle(aThis);
181 }
182 
183 void RegEx_StateIndex_Next(RegEx * aThis)
184 {
185  aThis->mCounters[RegEx_StateIndex_Get(aThis)] = 0;
186 
187  if (0 != (aThis->mStates[aThis->mThreads[aThis->mThreadCurrent]].mFlagAndLink & REG_EX_FLAG_OR))
188  {
189  aThis->mThreads[aThis->mThreadCurrent]++;
190  }
191 
192  aThis->mThreads[aThis->mThreadCurrent]++;
193 
194  RegEx_Or_Handle(aThis);
195 }
196 
197 // --------------------------------------------------------------------------
198 
199 void RegEx_Counter_Inc(RegEx * aThis)
200 {
201  unsigned short lState = RegEx_StateIndex_Get(aThis);
202 
203  aThis->mCounters[lState]++;
204  if (aThis->mStates[lState].mMax <= aThis->mCounters[lState])
205  {
206  RegEx_StateIndex_Next(aThis);
207  }
208 }
209 
210 int RegEx_Repeat_Min(RegEx * aThis)
211 {
212  unsigned short lState = RegEx_StateIndex_Get(aThis);
213 
214  if (aThis->mStates[lState].mMin <= aThis->mCounters[lState])
215  {
216  RegEx_StateIndex_Next(aThis);
217  return 1;
218  }
219 
220  RegEx_Thread_Delete(aThis);
221  return 0;
222 }
223 
224 void RegEx_Start(RegEx * aThis)
225 {
226  RegEx_Reset(aThis);
227 
228  if (REG_EX_START == aThis->mStates[RegEx_StateIndex_Get(aThis)].mC)
229  {
230  RegEx_StateIndex_Next(aThis);
231  }
232 
233  aThis->mRunning = 0;
234 }
235 
236 // --------------------------------------------------------------------------
237 
238 void RegEx_OK(RegEx * aThis)
239 {
240  while (0 < aThis->mThreadCount)
241  {
242  aThis->mThreadCurrent = 0;
243 
244  RegEx_Thread_Delete(aThis);
245  }
246 
247  RegEx_Start(aThis);
248 }
249 
250 // ===== RegEx_Execute_... ==================================================
251 
252 int RegEx_Execute_C(RegEx * aThis, char aInput)
253 {
254  if (aThis->mStates[RegEx_StateIndex_Get(aThis)].mC == aInput)
255  {
256  RegEx_Counter_Inc(aThis);
257  return 0;
258  }
259 
260  return RegEx_Repeat_Min(aThis);
261 }
262 
263 int RegEx_Execute_Digit(RegEx * aThis, char aInput)
264 {
265  if (('0' <= aInput) && ('9' >= aInput))
266  {
267  RegEx_Counter_Inc(aThis);
268  return 0;
269  }
270 
271  return RegEx_Repeat_Min(aThis);
272 }
273 
274 int RegEx_Execute_Digit_Not(RegEx * aThis, char aInput)
275 {
276  if (('0' > aInput) || ('9' < aInput))
277  {
278  RegEx_Counter_Inc(aThis);
279  return 0;
280  }
281 
282  return RegEx_Repeat_Min(aThis);
283 }
284 
285 void RegEx_Execute_Group(RegEx * aThis)
286 {
287  unsigned short lState = RegEx_StateIndex_Get(aThis);
288 
289  if (aThis->mStates[lState].mMin <= aThis->mCounters[lState])
290  {
291  RegEx_Thread_Create(aThis, lState + 1);
292  }
293 
294  RegEx_StateIndex_Set(aThis, RegEx_Link_Get(aThis));
295 
296  RegEx_Or_Handle(aThis);
297 }
298 
299 void RegEx_Execute_Or(RegEx * aThis)
300 {
301  unsigned short lState = RegEx_StateIndex_Get(aThis);
302 
303  if (aThis->mStates[lState].mMin <= aThis->mCounters[lState])
304  {
305  RegEx_Thread_Create(aThis, lState + 1);
306  }
307 
308  unsigned short lLink = RegEx_Link_Get(aThis);
309 
310  RegEx_StateIndex_Set(aThis, lLink);
311 
312  for (;;)
313  {
314  lLink += 2;
315 
316  if (REG_EX_OR_END == aThis->mStates[lLink].mC)
317  {
318  break;
319  }
320 
321  RegEx_Thread_Create(aThis, lLink);
322  }
323 }
324 
325 int RegEx_Execute_Or_Fast(RegEx * aThis, char aInput)
326 {
327  unsigned short lLink = RegEx_Link_Get(aThis);
328 
329  for (;;)
330  {
331  switch (aThis->mStates[lLink].mC)
332  {
333  case REG_EX_OR_END:
334  return RegEx_Repeat_Min(aThis);
335 
336  case REG_EX_RANGE:
337  if ((aThis->mStates[lLink].mMin <= aInput) && (aThis->mStates[lLink].mMax >= aInput))
338  {
339  RegEx_Counter_Inc(aThis);
340  return 0;
341  }
342  break;
343 
344  default:
345  if (aThis->mStates[lLink].mC == aInput)
346  {
347  RegEx_Counter_Inc(aThis);
348  return 0;
349  }
350  }
351 
352  lLink++;
353  }
354 }
355 
356 int RegEx_Execute_Or_Not(RegEx * aThis, char aInput)
357 {
358  unsigned short lLink = RegEx_Link_Get(aThis);
359 
360  for (;;)
361  {
362  switch (aThis->mStates[lLink].mC)
363  {
364  case REG_EX_OR_END:
365  RegEx_Counter_Inc(aThis);
366  return 0;
367 
368  case REG_EX_RANGE:
369  if ((aThis->mStates[lLink].mMin <= aInput) && (aThis->mStates[lLink].mMax >= aInput))
370  {
371  return RegEx_Repeat_Min(aThis);
372  }
373  break;
374 
375  default :
376  if (aThis->mStates[lLink].mC == aInput)
377  {
378  return RegEx_Repeat_Min(aThis);
379  }
380  }
381 
382  lLink++;
383  }
384 }
385 
386 void RegEx_Execute_Range(RegEx * aThis, char aInput)
387 {
388  unsigned short lState = RegEx_StateIndex_Get(aThis);
389 
390  if ((aThis->mStates[lState].mMin <= aInput) && (aThis->mStates[lState].mMax >= aInput))
391  {
392  RegEx_StateIndex_Next(aThis);
393  }
394  else
395  {
396  RegEx_Thread_Delete(aThis);
397  }
398 }
399 
400 void RegEx_Execute_Return(RegEx * aThis)
401 {
402  unsigned short lLink = RegEx_Link_Get(aThis);
403 
404  RegEx_StateIndex_Set(aThis, lLink);
405 
406  RegEx_Counter_Inc(aThis);
407 }
408 
409 int RegEx_Execute_Space(RegEx * aThis, char aInput)
410 {
411  switch (aInput)
412  {
413  case ' ' :
414  case '\n':
415  case '\r':
416  case '\t':
417  RegEx_Counter_Inc(aThis);
418  return 0;
419  }
420 
421  return RegEx_Repeat_Min(aThis);
422 }
423 
424 int RegEx_Execute_Space_Not(RegEx * aThis, char aInput)
425 {
426  switch (aInput)
427  {
428  case ' ':
429  case '\n':
430  case '\r':
431  case '\t':
432  return RegEx_Repeat_Min(aThis);
433  }
434 
435  RegEx_Counter_Inc(aThis);
436  return 0;
437 }
438 
439 int RegEx_Execute_Word(RegEx * aThis, char aInput)
440 {
441  if ( (('0' <= aInput) && ('9' >= aInput))
442  || (('a' <= aInput) && ('z' >= aInput))
443  || (('A' <= aInput) && ('Z' >= aInput))
444  || ( '_' == aInput))
445  {
446  RegEx_Counter_Inc(aThis);
447  return 0;
448  }
449 
450  return RegEx_Repeat_Min(aThis);
451 }
452 
453 int RegEx_Execute_Word_Not(RegEx * aThis, char aInput)
454 {
455  if ( (('0' <= aInput) && ('9' >= aInput))
456  || (('a' <= aInput) && ('z' >= aInput))
457  || (('A' <= aInput) && ('Z' >= aInput))
458  || ( '_' == aInput))
459  {
460  return RegEx_Repeat_Min(aThis);
461  }
462 
463  RegEx_Counter_Inc(aThis);
464  return 0;
465 }
466 
467 // Functions
469 
470 void RegEx_Create(RegEx * aThis, OPEN_NET_CONSTANT RegEx_State * aStates, unsigned char * aCounters, unsigned int aCount)
471 {
472  aThis->mCounters = aCounters;
473  aThis->mStateCount = aCount ;
474  aThis->mStates = aStates ;
475 
476  RegEx_Start(aThis);
477 }
478 
479 int RegEx_End(RegEx * aThis)
480 {
481  while (0 < aThis->mThreadCount)
482  {
483  aThis->mThreadCurrent = 0;
484 
485  switch (aThis->mStates[RegEx_StateIndex_Get(aThis)].mC)
486  {
487  case REG_EX_DIGIT_NOT:
488  case REG_EX_DOT :
489  case REG_EX_OR_NOT :
490  case REG_EX_SPACE_NOT:
491  case REG_EX_WORD_NOT :
492  RegEx_Repeat_Min(aThis);
493  break;
494 
495  case REG_EX_END:
496  case REG_EX_OK :
497  RegEx_OK(aThis);
498  return 1;
499 
500  case REG_EX_GROUP : RegEx_Execute_Group (aThis); break;
501  case REG_EX_OR : RegEx_Execute_Or (aThis); break;
502  case REG_EX_RETURN: RegEx_Execute_Return(aThis); break;
503 
504  case REG_EX_DIGIT : RegEx_Execute_Digit (aThis, REG_EX_END); break;
505  case REG_EX_OR_FAST: RegEx_Execute_Or_Fast(aThis, REG_EX_END); break;
506  case REG_EX_SPACE : RegEx_Execute_Space (aThis, REG_EX_END); break;
507  case REG_EX_WORD : RegEx_Execute_Word (aThis, REG_EX_END); break;
508  default : RegEx_Execute_C (aThis, REG_EX_END); break;
509  }
510  }
511 
512  RegEx_Start(aThis);
513  return 0;
514 }
515 
516 int RegEx_Execute(RegEx * aThis, char aInput)
517 {
518  if (RegEx_IsCharValid(aInput))
519  {
520  aThis->mRunning = 1;
521  aThis->mThreadCurrent = 0;
522 
523  while (aThis->mThreadCurrent < aThis->mThreadCount)
524  {
525  int lContinue = 1;
526 
527  do
528  {
529  switch (aThis->mStates[RegEx_StateIndex_Get(aThis)].mC)
530  {
531  case REG_EX_DIGIT : lContinue = RegEx_Execute_Digit (aThis, aInput); break;
532  case REG_EX_DIGIT_NOT: lContinue = RegEx_Execute_Digit_Not(aThis, aInput); break;
533  case REG_EX_OR_FAST : lContinue = RegEx_Execute_Or_Fast (aThis, aInput); break;
534  case REG_EX_OR_NOT : lContinue = RegEx_Execute_Or_Not (aThis, aInput); break;
535  case REG_EX_SPACE : lContinue = RegEx_Execute_Space (aThis, aInput); break;
536  case REG_EX_SPACE_NOT: lContinue = RegEx_Execute_Space_Not(aThis, aInput); break;
537  case REG_EX_WORD : lContinue = RegEx_Execute_Word (aThis, aInput); break;
538  case REG_EX_WORD_NOT : lContinue = RegEx_Execute_Word_Not (aThis, aInput); break;
539 
540  case REG_EX_DOT : lContinue = 0; RegEx_Counter_Inc (aThis); break;
541  case REG_EX_END : lContinue = 0; RegEx_Thread_Delete (aThis); break;
542  case REG_EX_GROUP : lContinue = 1; RegEx_Execute_Group (aThis); break;
543  case REG_EX_OR : lContinue = 1; RegEx_Execute_Or (aThis); break;
544  case REG_EX_RETURN: lContinue = 1; RegEx_Execute_Return(aThis); break;
545 
546  case REG_EX_OK: RegEx_OK(aThis); return 1;
547 
548  case REG_EX_RANGE: lContinue = 0; RegEx_Execute_Range(aThis, aInput); break;
549 
550  default: lContinue = RegEx_Execute_C(aThis, aInput); break;
551  }
552  }
553  while (lContinue);
554 
555  aThis->mThreadCurrent++;
556  }
557 
558  if (0 == aThis->mThreadCount)
559  {
560  RegEx_Reset(aThis);
561  }
562  }
563  else
564  {
565  if (aThis->mRunning)
566  {
567  return RegEx_End(aThis);
568  }
569  }
570 
571  return 0;
572 }
573 
574 #endif // ! _OPEN_NET_NO_FUNCTION_
État de la machine à états pour une expression reguliere
Definition: RegEx.h:72
Contexte de la machine à états pour une expression reguliere.
Definition: RegEx.h:90