Ruby 3.2.5p208 (2024-07-26 revision 31d0f1a2e7dbfb60731d1f05b868e1d578cda493)
regexec.c
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_CACHE_MATCH_OPT
235
236/* count number of jump-like opcodes for allocation of cache memory. */
237static OnigPosition
238count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
239{
240 UChar* p = reg->p;
241 UChar* pend = p + reg->used;
242 LengthType len;
243 MemNumType mem;
244 MemNumType current_mem = -1;
245 long current_mem_num = 0;
246 OnigEncoding enc = reg->enc;
247
248 *num = 0;
249 *table_size = 0;
250
251 while (p < pend) {
252 switch (*p++) {
253 case OP_FINISH:
254 case OP_END:
255 break;
256
257 case OP_EXACT1: p++; break;
258 case OP_EXACT2: p += 2; break;
259 case OP_EXACT3: p += 3; break;
260 case OP_EXACT4: p += 4; break;
261 case OP_EXACT5: p += 5; break;
262 case OP_EXACTN:
263 GET_LENGTH_INC(len, p); p += len; break;
264 case OP_EXACTMB2N1: p += 2; break;
265 case OP_EXACTMB2N2: p += 4; break;
266 case OP_EXACTMB2N3: p += 6; break;
267 case OP_EXACTMB2N:
268 GET_LENGTH_INC(len, p); p += len * 2; break;
269 case OP_EXACTMB3N:
270 GET_LENGTH_INC(len, p); p += len * 3; break;
271 case OP_EXACTMBN:
272 {
273 int mb_len;
274 GET_LENGTH_INC(mb_len, p);
275 GET_LENGTH_INC(len, p);
276 p += mb_len * len;
277 }
278 break;
279
280 case OP_EXACT1_IC:
281 len = enclen(enc, p, pend); p += len; break;
282 case OP_EXACTN_IC:
283 GET_LENGTH_INC(len, p); p += len; break;
284
285 case OP_CCLASS:
286 case OP_CCLASS_NOT:
287 p += SIZE_BITSET; break;
288 case OP_CCLASS_MB:
289 case OP_CCLASS_MB_NOT:
290 GET_LENGTH_INC(len, p); p += len; break;
291 case OP_CCLASS_MIX:
292 case OP_CCLASS_MIX_NOT:
293 p += SIZE_BITSET;
294 GET_LENGTH_INC(len, p);
295 p += len;
296 break;
297
298 case OP_ANYCHAR:
299 case OP_ANYCHAR_ML:
300 break;
301 case OP_ANYCHAR_STAR:
302 case OP_ANYCHAR_ML_STAR:
303 *num += 1; *table_size += 1; break;
304 case OP_ANYCHAR_STAR_PEEK_NEXT:
305 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
306 p++; *num += 1; *table_size += 1; break;
307
308 case OP_WORD:
309 case OP_NOT_WORD:
310 case OP_WORD_BOUND:
311 case OP_NOT_WORD_BOUND:
312 case OP_WORD_BEGIN:
313 case OP_WORD_END:
314 break;
315
316 case OP_ASCII_WORD:
317 case OP_NOT_ASCII_WORD:
318 case OP_ASCII_WORD_BOUND:
319 case OP_NOT_ASCII_WORD_BOUND:
320 case OP_ASCII_WORD_BEGIN:
321 case OP_ASCII_WORD_END:
322 break;
323
324 case OP_BEGIN_BUF:
325 case OP_END_BUF:
326 case OP_BEGIN_LINE:
327 case OP_END_LINE:
328 case OP_SEMI_END_BUF:
329 case OP_BEGIN_POSITION:
330 break;
331
332 case OP_BACKREF1:
333 case OP_BACKREF2:
334 case OP_BACKREFN:
335 case OP_BACKREFN_IC:
336 case OP_BACKREF_MULTI:
337 case OP_BACKREF_MULTI_IC:
338 case OP_BACKREF_WITH_LEVEL:
339 goto fail;
340
341 case OP_MEMORY_START:
342 case OP_MEMORY_START_PUSH:
343 case OP_MEMORY_END_PUSH:
344 case OP_MEMORY_END_PUSH_REC:
345 case OP_MEMORY_END:
346 case OP_MEMORY_END_REC:
347 p += SIZE_MEMNUM; break;
348
349 case OP_KEEP:
350 break;
351
352 case OP_FAIL:
353 break;
354 case OP_JUMP:
355 p += SIZE_RELADDR;
356 break;
357 case OP_PUSH:
358 p += SIZE_RELADDR;
359 *num += 1;
360 *table_size += 1;
361 break;
362 case OP_POP:
363 break;
364 case OP_PUSH_OR_JUMP_EXACT1:
365 case OP_PUSH_IF_PEEK_NEXT:
366 p += SIZE_RELADDR + 1; *num += 1; *table_size += 1; break;
367 case OP_REPEAT:
368 case OP_REPEAT_NG:
369 if (current_mem != -1) {
370 // A nested OP_REPEAT is not yet supported.
371 goto fail;
372 }
373 GET_MEMNUM_INC(mem, p);
374 p += SIZE_RELADDR;
375 if (reg->repeat_range[mem].lower == 0) {
376 *num += 1;
377 *table_size += 1;
378 }
379 reg->repeat_range[mem].base_num = *num;
380 current_mem = mem;
381 current_mem_num = *num;
382 break;
383 case OP_REPEAT_INC:
384 case OP_REPEAT_INC_NG:
385 GET_MEMNUM_INC(mem, p);
386 if (mem != current_mem) {
387 // A lone or invalid OP_REPEAT_INC is found.
388 goto fail;
389 }
390 {
391 long inner_num = *num - current_mem_num;
392 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
393 repeat_range->inner_num = inner_num;
394 *num -= inner_num;
395 *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
396 if (repeat_range->lower < repeat_range->upper) {
397 *table_size += 1;
398 }
399 current_mem = -1;
400 current_mem_num = 0;
401 }
402 break;
403 case OP_REPEAT_INC_SG:
404 case OP_REPEAT_INC_NG_SG:
405 // TODO: Support nested OP_REPEAT.
406 goto fail;
407 case OP_NULL_CHECK_START:
408 case OP_NULL_CHECK_END:
409 case OP_NULL_CHECK_END_MEMST:
410 case OP_NULL_CHECK_END_MEMST_PUSH:
411 p += SIZE_MEMNUM; break;
412
413 case OP_PUSH_POS:
414 case OP_POP_POS:
415 case OP_PUSH_POS_NOT:
416 case OP_FAIL_POS:
417 case OP_PUSH_STOP_BT:
418 case OP_POP_STOP_BT:
419 case OP_LOOK_BEHIND:
420 case OP_PUSH_LOOK_BEHIND_NOT:
421 case OP_FAIL_LOOK_BEHIND_NOT:
422 case OP_PUSH_ABSENT_POS:
423 case OP_ABSENT_END:
424 case OP_ABSENT:
425 goto fail;
426
427 case OP_CALL:
428 case OP_RETURN:
429 goto fail;
430
431 case OP_CONDITION:
432 goto fail;
433
434 case OP_STATE_CHECK_PUSH:
435 case OP_STATE_CHECK_PUSH_OR_JUMP:
436 case OP_STATE_CHECK:
437 case OP_STATE_CHECK_ANYCHAR_STAR:
438 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
439 goto fail;
440
441 case OP_SET_OPTION_PUSH:
442 case OP_SET_OPTION:
443 p += SIZE_OPTION;
444 break;
445
446 default:
447 goto bytecode_error;
448 }
449 }
450
451 return 0;
452
453fail:
454 *num = NUM_CACHE_OPCODE_FAIL;
455 return 0;
456
457bytecode_error:
458 return ONIGERR_UNDEFINED_BYTECODE;
459}
460
461static OnigPosition
462init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
463{
464 UChar* pbegin;
465 UChar* p = reg->p;
466 UChar* pend = p + reg->used;
467 LengthType len;
468 MemNumType mem;
469 MemNumType current_mem = -1;
470 long num = 0;
471 long current_mem_num = 0;
472 OnigEncoding enc = reg->enc;
473
474 while (p < pend) {
475 pbegin = p;
476 switch (*p++) {
477 case OP_FINISH:
478 case OP_END:
479 break;
480
481 case OP_EXACT1: p++; break;
482 case OP_EXACT2: p += 2; break;
483 case OP_EXACT3: p += 3; break;
484 case OP_EXACT4: p += 4; break;
485 case OP_EXACT5: p += 5; break;
486 case OP_EXACTN:
487 GET_LENGTH_INC(len, p); p += len; break;
488 case OP_EXACTMB2N1: p += 2; break;
489 case OP_EXACTMB2N2: p += 4; break;
490 case OP_EXACTMB2N3: p += 6; break;
491 case OP_EXACTMB2N:
492 GET_LENGTH_INC(len, p); p += len * 2; break;
493 case OP_EXACTMB3N:
494 GET_LENGTH_INC(len, p); p += len * 3; break;
495 case OP_EXACTMBN:
496 {
497 int mb_len;
498 GET_LENGTH_INC(mb_len, p);
499 GET_LENGTH_INC(len, p);
500 p += mb_len * len;
501 }
502 break;
503
504 case OP_EXACT1_IC:
505 len = enclen(enc, p, pend); p += len; break;
506 case OP_EXACTN_IC:
507 GET_LENGTH_INC(len, p); p += len; break;
508
509 case OP_CCLASS:
510 case OP_CCLASS_NOT:
511 p += SIZE_BITSET; break;
512 case OP_CCLASS_MB:
513 case OP_CCLASS_MB_NOT:
514 GET_LENGTH_INC(len, p); p += len; break;
515 case OP_CCLASS_MIX:
516 case OP_CCLASS_MIX_NOT:
517 p += SIZE_BITSET;
518 GET_LENGTH_INC(len, p);
519 p += len;
520 break;
521
522 case OP_ANYCHAR:
523 case OP_ANYCHAR_ML:
524 break;
525 case OP_ANYCHAR_STAR:
526 case OP_ANYCHAR_ML_STAR:
527 table->addr = pbegin;
528 table->num = num - current_mem_num;
529 table->outer_repeat = current_mem;
530 num++;
531 table++;
532 break;
533 case OP_ANYCHAR_STAR_PEEK_NEXT:
534 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
535 p++;
536 table->addr = pbegin;
537 table->num = num - current_mem_num;
538 table->outer_repeat = current_mem;
539 num++;
540 table++;
541 break;
542
543 case OP_WORD:
544 case OP_NOT_WORD:
545 case OP_WORD_BOUND:
546 case OP_NOT_WORD_BOUND:
547 case OP_WORD_BEGIN:
548 case OP_WORD_END:
549 break;
550
551 case OP_ASCII_WORD:
552 case OP_NOT_ASCII_WORD:
553 case OP_ASCII_WORD_BOUND:
554 case OP_NOT_ASCII_WORD_BOUND:
555 case OP_ASCII_WORD_BEGIN:
556 case OP_ASCII_WORD_END:
557 break;
558
559 case OP_BEGIN_BUF:
560 case OP_END_BUF:
561 case OP_BEGIN_LINE:
562 case OP_END_LINE:
563 case OP_SEMI_END_BUF:
564 case OP_BEGIN_POSITION:
565 break;
566
567 case OP_BACKREF1:
568 case OP_BACKREF2:
569 case OP_BACKREFN:
570 case OP_BACKREFN_IC:
571 case OP_BACKREF_MULTI:
572 case OP_BACKREF_MULTI_IC:
573 case OP_BACKREF_WITH_LEVEL:
574 goto unexpected_bytecode_error;
575
576 case OP_MEMORY_START:
577 case OP_MEMORY_START_PUSH:
578 case OP_MEMORY_END_PUSH:
579 case OP_MEMORY_END_PUSH_REC:
580 case OP_MEMORY_END:
581 case OP_MEMORY_END_REC:
582 p += SIZE_MEMNUM; break;
583
584 case OP_KEEP:
585 break;
586
587 case OP_FAIL:
588 break;
589 case OP_JUMP:
590 p += SIZE_RELADDR;
591 break;
592 case OP_PUSH:
593 p += SIZE_RELADDR;
594 table->addr = pbegin;
595 table->num = num - current_mem_num;
596 table->outer_repeat = current_mem;
597 num++;
598 table++;
599 break;
600 case OP_POP:
601 break;
602 case OP_PUSH_OR_JUMP_EXACT1:
603 case OP_PUSH_IF_PEEK_NEXT:
604 p += SIZE_RELADDR + 1;
605 table->addr = pbegin;
606 table->num = num - current_mem_num;
607 table->outer_repeat = current_mem;
608 num++;
609 table++;
610 break;
611 case OP_REPEAT:
612 case OP_REPEAT_NG:
613 GET_MEMNUM_INC(mem, p);
614 p += SIZE_RELADDR;
615 if (reg->repeat_range[mem].lower == 0) {
616 table->addr = pbegin;
617 table->num = num - current_mem_num;
618 table->outer_repeat = -1;
619 num++;
620 table++;
621 }
622 current_mem = mem;
623 current_mem_num = num;
624 break;
625 case OP_REPEAT_INC:
626 case OP_REPEAT_INC_NG:
627 GET_MEMNUM_INC(mem, p);
628 {
629 long inner_num = num - current_mem_num;
630 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
631 if (repeat_range->lower < repeat_range->upper) {
632 table->addr = pbegin;
633 table->num = num - current_mem_num;
634 table->outer_repeat = mem;
635 table++;
636 }
637 num -= inner_num;
638 num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
639 current_mem = -1;
640 current_mem_num = 0;
641 }
642 break;
643 case OP_REPEAT_INC_SG:
644 case OP_REPEAT_INC_NG_SG:
645 // TODO: support OP_REPEAT opcodes.
646 goto unexpected_bytecode_error;
647 case OP_NULL_CHECK_START:
648 case OP_NULL_CHECK_END:
649 case OP_NULL_CHECK_END_MEMST:
650 case OP_NULL_CHECK_END_MEMST_PUSH:
651 p += SIZE_MEMNUM; break;
652
653 case OP_PUSH_POS:
654 case OP_POP_POS:
655 case OP_PUSH_POS_NOT:
656 case OP_FAIL_POS:
657 case OP_PUSH_STOP_BT:
658 case OP_POP_STOP_BT:
659 case OP_LOOK_BEHIND:
660 case OP_PUSH_LOOK_BEHIND_NOT:
661 case OP_FAIL_LOOK_BEHIND_NOT:
662 case OP_PUSH_ABSENT_POS:
663 case OP_ABSENT_END:
664 case OP_ABSENT:
665 goto unexpected_bytecode_error;
666
667 case OP_CALL:
668 case OP_RETURN:
669 goto unexpected_bytecode_error;
670
671 case OP_CONDITION:
672 goto unexpected_bytecode_error;
673
674 case OP_STATE_CHECK_PUSH:
675 case OP_STATE_CHECK_PUSH_OR_JUMP:
676 case OP_STATE_CHECK:
677 case OP_STATE_CHECK_ANYCHAR_STAR:
678 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
679 goto unexpected_bytecode_error;
680
681 case OP_SET_OPTION_PUSH:
682 case OP_SET_OPTION:
683 p += SIZE_OPTION;
684 break;
685
686 default:
687 goto bytecode_error;
688 }
689 }
690
691 return 0;
692
693unexpected_bytecode_error:
694 return ONIGERR_UNEXPECTED_BYTECODE;
695
696bytecode_error:
697 return ONIGERR_UNDEFINED_BYTECODE;
698}
699#else /* USE_MATCH_CACHE */
700static OnigPosition
701count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
702{
703 *num = NUM_CACHE_OPCODE_FAIL;
704 return 0;
705}
706#endif
707
708extern int
709onig_check_linear_time(OnigRegexType* reg)
710{
711 long num = 0, table_size = 0;
712 count_num_cache_opcode(reg, &num, &table_size);
713 return num != NUM_CACHE_OPCODE_FAIL;
714}
715
716extern void
717onig_region_clear(OnigRegion* region)
718{
719 int i;
720
721 for (i = 0; i < region->num_regs; i++) {
722 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
723 }
724#ifdef USE_CAPTURE_HISTORY
725 history_root_free(region);
726#endif
727}
728
729extern int
730onig_region_resize(OnigRegion* region, int n)
731{
732 region->num_regs = n;
733
734 if (n < ONIG_NREGION)
735 n = ONIG_NREGION;
736
737 if (region->allocated == 0) {
738 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
739 if (region->beg == 0)
740 return ONIGERR_MEMORY;
741
742 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
743 if (region->end == 0) {
744 xfree(region->beg);
745 return ONIGERR_MEMORY;
746 }
747
748 region->allocated = n;
749 }
750 else if (region->allocated < n) {
751 OnigPosition *tmp;
752
753 region->allocated = 0;
754 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
755 if (tmp == 0) {
756 xfree(region->beg);
757 xfree(region->end);
758 return ONIGERR_MEMORY;
759 }
760 region->beg = tmp;
761 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
762 if (tmp == 0) {
763 xfree(region->beg);
764 xfree(region->end);
765 return ONIGERR_MEMORY;
766 }
767 region->end = tmp;
768
769 region->allocated = n;
770 }
771
772 return 0;
773}
774
775static int
776onig_region_resize_clear(OnigRegion* region, int n)
777{
778 int r;
779
780 r = onig_region_resize(region, n);
781 if (r != 0) return r;
782 onig_region_clear(region);
783 return 0;
784}
785
786extern int
787onig_region_set(OnigRegion* region, int at, int beg, int end)
788{
789 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
790
791 if (at >= region->allocated) {
792 int r = onig_region_resize(region, at + 1);
793 if (r < 0) return r;
794 }
795
796 region->beg[at] = beg;
797 region->end[at] = end;
798 return 0;
799}
800
801extern void
802onig_region_init(OnigRegion* region)
803{
804 region->num_regs = 0;
805 region->allocated = 0;
806 region->beg = (OnigPosition* )0;
807 region->end = (OnigPosition* )0;
808#ifdef USE_CAPTURE_HISTORY
809 region->history_root = (OnigCaptureTreeNode* )0;
810#endif
811}
812
813extern OnigRegion*
814onig_region_new(void)
815{
816 OnigRegion* r;
817
818 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
819 if (r)
820 onig_region_init(r);
821 return r;
822}
823
824extern void
825onig_region_free(OnigRegion* r, int free_self)
826{
827 if (r) {
828 if (r->allocated > 0) {
829 if (r->beg) xfree(r->beg);
830 if (r->end) xfree(r->end);
831 r->allocated = 0;
832 }
833#ifdef USE_CAPTURE_HISTORY
834 history_root_free(r);
835#endif
836 if (free_self) xfree(r);
837 }
838}
839
840extern void
841onig_region_copy(OnigRegion* to, const OnigRegion* from)
842{
843#define RREGC_SIZE (sizeof(int) * from->num_regs)
844 int i, r;
845
846 if (to == from) return;
847
848 r = onig_region_resize(to, from->num_regs);
849 if (r) return;
850
851 for (i = 0; i < from->num_regs; i++) {
852 to->beg[i] = from->beg[i];
853 to->end[i] = from->end[i];
854 }
855 to->num_regs = from->num_regs;
856
857#ifdef USE_CAPTURE_HISTORY
858 history_root_free(to);
859
860 if (IS_NOT_NULL(from->history_root)) {
861 to->history_root = history_tree_clone(from->history_root);
862 }
863#endif
864}
865
866
868#define INVALID_STACK_INDEX -1
869
870/* stack type */
871/* used by normal-POP */
872#define STK_ALT 0x0001
873#define STK_LOOK_BEHIND_NOT 0x0002
874#define STK_POS_NOT 0x0003
875/* handled by normal-POP */
876#define STK_MEM_START 0x0100
877#define STK_MEM_END 0x8200
878#define STK_REPEAT_INC 0x0300
879#define STK_STATE_CHECK_MARK 0x1000
880/* avoided by normal-POP */
881#define STK_NULL_CHECK_START 0x3000
882#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
883#define STK_MEM_END_MARK 0x8400
884#define STK_POS 0x0500 /* used when POP-POS */
885#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
886#define STK_REPEAT 0x0700
887#define STK_CALL_FRAME 0x0800
888#define STK_RETURN 0x0900
889#define STK_VOID 0x0a00 /* for fill a blank */
890#define STK_ABSENT_POS 0x0b00 /* for absent */
891#define STK_ABSENT 0x0c00 /* absent inner loop marker */
892
893/* stack type check mask */
894#define STK_MASK_POP_USED 0x00ff
895#define STK_MASK_TO_VOID_TARGET 0x10ff
896#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
897
898#ifdef USE_CACHE_MATCH_OPT
899#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
900 (msa).enable_cache_match_opt = 0;\
901 (msa).num_fail = 0;\
902 (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
903 (msa).num_cache_table = 0;\
904 (msa).cache_index_table = (OnigCacheIndex *)0;\
905 (msa).match_cache = (uint8_t *)0;\
906} while(0)
907#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
908 if ((msa).cache_index_table) xfree((msa).cache_index_table);\
909 if ((msa).match_cache) xfree((msa).match_cache);\
910} while(0)
911#else
912#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
913#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa)
914#endif
915
916#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
917# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
918 (msa).stack_p = (void* )0;\
919 (msa).options = (arg_option);\
920 (msa).region = (arg_region);\
921 (msa).start = (arg_start);\
922 (msa).gpos = (arg_gpos);\
923 (msa).best_len = ONIG_MISMATCH;\
924 (msa).counter = 0;\
925 (msa).end_time = 0;\
926 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
927} while(0)
928#else
929# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
930 (msa).stack_p = (void* )0;\
931 (msa).options = (arg_option);\
932 (msa).region = (arg_region);\
933 (msa).start = (arg_start);\
934 (msa).gpos = (arg_gpos);\
935 (msa).counter = 0;\
936 (msa).end_time = 0;\
937 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
938} while(0)
939#endif
940
941#ifdef USE_COMBINATION_EXPLOSION_CHECK
942
943# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
944
945# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
946 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
947 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
948 offset = ((offset) * (state_num)) >> 3;\
949 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
950 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
951 (msa).state_check_buff = (void* )xmalloc(size);\
952 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
953 }\
954 else \
955 (msa).state_check_buff = (void* )xalloca(size);\
956 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
957 (size_t )(size - (offset))); \
958 (msa).state_check_buff_size = size;\
959 }\
960 else {\
961 (msa).state_check_buff = (void* )0;\
962 (msa).state_check_buff_size = 0;\
963 }\
964 }\
965 else {\
966 (msa).state_check_buff = (void* )0;\
967 (msa).state_check_buff_size = 0;\
968 }\
969 } while(0)
970
971# define MATCH_ARG_FREE(msa) do {\
972 if ((msa).stack_p) xfree((msa).stack_p);\
973 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
974 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
975 }\
976 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
977} while(0)
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979# define MATCH_ARG_FREE(msa) do {\
980 if ((msa).stack_p) xfree((msa).stack_p);\
981 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
982} while (0)
983#endif /* USE_COMBINATION_EXPLOSION_CHECK */
984
985
986
987#define MAX_PTR_NUM 100
988
989#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
990 if (ptr_num > MAX_PTR_NUM) {\
991 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
992 heap_addr = alloc_addr;\
993 if (msa->stack_p) {\
994 stk_alloc = (OnigStackType* )(msa->stack_p);\
995 stk_base = stk_alloc;\
996 stk = stk_base;\
997 stk_end = stk_base + msa->stack_n;\
998 } else {\
999 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1000 stk_base = stk_alloc;\
1001 stk = stk_base;\
1002 stk_end = stk_base + (stack_num);\
1003 }\
1004 } else if (msa->stack_p) {\
1005 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1006 heap_addr = NULL;\
1007 stk_alloc = (OnigStackType* )(msa->stack_p);\
1008 stk_base = stk_alloc;\
1009 stk = stk_base;\
1010 stk_end = stk_base + msa->stack_n;\
1011 }\
1012 else {\
1013 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1014 + sizeof(OnigStackType) * (stack_num));\
1015 heap_addr = NULL;\
1016 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1017 stk_base = stk_alloc;\
1018 stk = stk_base;\
1019 stk_end = stk_base + (stack_num);\
1020 }\
1021} while(0)
1022
1023#define STACK_SAVE do{\
1024 if (stk_base != stk_alloc) {\
1025 msa->stack_p = stk_base;\
1026 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1027 };\
1028} while(0)
1029
1030static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1031
1032extern unsigned int
1033onig_get_match_stack_limit_size(void)
1034{
1035 return MatchStackLimitSize;
1036}
1037
1038extern int
1039onig_set_match_stack_limit_size(unsigned int size)
1040{
1041 MatchStackLimitSize = size;
1042 return 0;
1043}
1044
1045static int
1046stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1047 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1048{
1049 size_t n;
1050 OnigStackType *x, *stk_base, *stk_end, *stk;
1051
1052 stk_base = *arg_stk_base;
1053 stk_end = *arg_stk_end;
1054 stk = *arg_stk;
1055
1056 n = stk_end - stk_base;
1057 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1058 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1059 if (IS_NULL(x)) {
1060 STACK_SAVE;
1061 return ONIGERR_MEMORY;
1062 }
1063 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1064 n *= 2;
1065 }
1066 else {
1067 unsigned int limit_size = MatchStackLimitSize;
1068 n *= 2;
1069 if (limit_size != 0 && n > limit_size) {
1070 if ((unsigned int )(stk_end - stk_base) == limit_size)
1071 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1072 else
1073 n = limit_size;
1074 }
1075 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1076 if (IS_NULL(x)) {
1077 STACK_SAVE;
1078 return ONIGERR_MEMORY;
1079 }
1080 }
1081 *arg_stk = x + (stk - stk_base);
1082 *arg_stk_base = x;
1083 *arg_stk_end = x + n;
1084 return 0;
1085}
1086
1087#define STACK_ENSURE(n) do {\
1088 if (stk_end - stk < (n)) {\
1089 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1090 if (r != 0) {\
1091 STACK_SAVE;\
1092 if (xmalloc_base) xfree(xmalloc_base);\
1093 return r;\
1094 }\
1095 }\
1096} while(0)
1097
1098#define STACK_AT(index) (stk_base + (index))
1099#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1100
1101#define STACK_PUSH_TYPE(stack_type) do {\
1102 STACK_ENSURE(1);\
1103 stk->type = (stack_type);\
1104 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1105 STACK_INC;\
1106} while(0)
1107
1108#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1109
1110#ifdef USE_COMBINATION_EXPLOSION_CHECK
1111# define STATE_CHECK_POS(s,snum) \
1112 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1113# define STATE_CHECK_VAL(v,snum) do {\
1114 if (state_check_buff != NULL) {\
1115 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1116 (v) = state_check_buff[x/8] & (1<<(x%8));\
1117 }\
1118 else (v) = 0;\
1119} while(0)
1120
1121
1122# define ELSE_IF_STATE_CHECK_MARK(stk) \
1123 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1124 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1125 state_check_buff[x/8] |= (1<<(x%8)); \
1126 }
1127
1128# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1129 STACK_ENSURE(1);\
1130 stk->type = (stack_type);\
1131 stk->u.state.pcode = (pat);\
1132 stk->u.state.pstr = (s);\
1133 stk->u.state.pstr_prev = (sprev);\
1134 stk->u.state.state_check = 0;\
1135 stk->u.state.pkeep = (keep);\
1136 STACK_INC;\
1137} while(0)
1138
1139# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1140 stk->type = (stack_type);\
1141 stk->u.state.pcode = (pat);\
1142 stk->u.state.state_check = 0;\
1143 STACK_INC;\
1144} while(0)
1145
1146# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1147 STACK_ENSURE(1);\
1148 stk->type = STK_ALT;\
1149 stk->u.state.pcode = (pat);\
1150 stk->u.state.pstr = (s);\
1151 stk->u.state.pstr_prev = (sprev);\
1152 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1153 stk->u.state.pkeep = (keep);\
1154 STACK_INC;\
1155} while(0)
1156
1157# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1158 if (state_check_buff != NULL) {\
1159 STACK_ENSURE(1);\
1160 stk->type = STK_STATE_CHECK_MARK;\
1161 stk->u.state.pstr = (s);\
1162 stk->u.state.state_check = (snum);\
1163 STACK_INC;\
1164 }\
1165} while(0)
1166
1167#else /* USE_COMBINATION_EXPLOSION_CHECK */
1168
1169# define ELSE_IF_STATE_CHECK_MARK(stk)
1170
1171# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1172 STACK_ENSURE(1);\
1173 stk->type = (stack_type);\
1174 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1175 stk->u.state.pcode = (pat);\
1176 stk->u.state.pstr = (s);\
1177 stk->u.state.pstr_prev = (sprev);\
1178 stk->u.state.pkeep = (keep);\
1179 STACK_INC;\
1180} while(0)
1181
1182# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1183 stk->type = (stack_type);\
1184 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1185 stk->u.state.pcode = (pat);\
1186 STACK_INC;\
1187} while(0)
1188#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1189
1190#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1191#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1192#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1193#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1194#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1195#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1196 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1197
1198#ifdef USE_CACHE_MATCH_OPT
1199
1200#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
1201 if (enable) {\
1202 long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
1203 if (cache_index >= 0) {\
1204 long key = (num_cache_size) * (long)(pos) + cache_index;\
1205 long index = key >> 3;\
1206 long mask = 1 << (key & 7);\
1207 if ((match_cache)[index] & mask) {\
1208 goto fail;\
1209 }\
1210 (match_cache)[index] |= mask;\
1211 }\
1212 }\
1213} while (0)
1214
1215static long
1216find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, long num_cache_table, UChar* p)
1217{
1218 long l = 0, r = num_cache_table - 1, m = 0;
1219 OnigCacheIndex* item;
1220 OnigRepeatRange* range;
1221 OnigStackType *stkp;
1222 int count = 0;
1223 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
1224
1225 while (l <= r) {
1226 m = (l + r) / 2;
1227 if (table[m].addr == p) break;
1228 if (table[m].addr < p) l = m + 1;
1229 else r = m - 1;
1230 }
1231
1232 if (!(0 <= m && m < num_cache_table && table[m].addr == p)) {
1233 return -1;
1234 }
1235
1236 item = &table[m];
1237 if (item->outer_repeat == -1) {
1238 return item->num;
1239 }
1240
1241 range = &reg->repeat_range[item->outer_repeat];
1242
1243 stkp = &stk[repeat_stk[item->outer_repeat]];
1244 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
1245
1246 if (count < range->lower) {
1247 return range->base_num + range->inner_num * count + item->num;
1248 }
1249
1250 if (range->upper == 0x7fffffff) {
1251 return range->base_num + range->inner_num * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) + item->num;
1252 }
1253
1254 return range->base_num + range->inner_num * (range->lower - 1) + (range->inner_num + 1) * (count - range->lower + 1) + item->num;
1255}
1256
1257static void
1258reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos, uint8_t* match_cache, OnigCacheIndex *table, long num_cache_size, long num_cache_table)
1259{
1260 long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0;
1261 int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
1262 OnigCacheIndex *item1, *item2;
1263 long k1, k2, base;
1264
1265 while (l <= r) {
1266 m1 = (l + r) / 2;
1267 if (table[m1].addr == pbegin) break;
1268 if (table[m1].addr < pbegin) l = m1 + 1;
1269 else r = m1 - 1;
1270 }
1271
1272 l = 0, r = num_cache_table - 1;
1273 while (l <= r) {
1274 m2 = (l + r) / 2;
1275 if (table[m2].addr == pend) break;
1276 if (table[m2].addr < pend) l = m2 + 1;
1277 else r = m2 - 1;
1278 }
1279
1280 if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
1281 if (table[m2].addr > pend && m2 - 1 > 0) m2--;
1282
1283 item1 = &table[m1];
1284 item2 = &table[m2];
1285
1286 if (item1->outer_repeat < 0) k1 = item1->num;
1287 else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
1288
1289 if (item2->outer_repeat < 0) k2 = item2->num;
1290 else {
1291 OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat];
1292 if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
1293 else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
1294 }
1295
1296 base = pos * num_cache_size;
1297 k1 += base;
1298 k2 += base;
1299
1300 if ((k1 >> 3) == (k2 >> 3)) {
1301 match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
1302 } else {
1303 long i = k1 >> 3;
1304 if (k1 & 7) {
1305 match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
1306 i++;
1307 }
1308 if (i < (k2 >> 3)) {
1309 xmemset(&match_cache[i], 0, (k2 >> 3) - i);
1310 if (k2 & 7) {
1311 match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1));
1312 }
1313 }
1314 }
1315}
1316
1317#else
1318#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
1319#endif /* USE_CACHE_MATCH_OPT */
1320
1321#define STACK_PUSH_REPEAT(id, pat) do {\
1322 STACK_ENSURE(1);\
1323 stk->type = STK_REPEAT;\
1324 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1325 stk->u.repeat.num = (id);\
1326 stk->u.repeat.pcode = (pat);\
1327 stk->u.repeat.count = 0;\
1328 STACK_INC;\
1329} while(0)
1330
1331#define STACK_PUSH_REPEAT_INC(sindex) do {\
1332 STACK_ENSURE(1);\
1333 stk->type = STK_REPEAT_INC;\
1334 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1335 stk->u.repeat_inc.si = (sindex);\
1336 STACK_INC;\
1337} while(0)
1338
1339#define STACK_PUSH_MEM_START(mnum, s) do {\
1340 STACK_ENSURE(1);\
1341 stk->type = STK_MEM_START;\
1342 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1343 stk->u.mem.num = (mnum);\
1344 stk->u.mem.pstr = (s);\
1345 stk->u.mem.start = mem_start_stk[mnum];\
1346 stk->u.mem.end = mem_end_stk[mnum];\
1347 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1348 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1349 STACK_INC;\
1350} while(0)
1351
1352#define STACK_PUSH_MEM_END(mnum, s) do {\
1353 STACK_ENSURE(1);\
1354 stk->type = STK_MEM_END;\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.mem.num = (mnum);\
1357 stk->u.mem.pstr = (s);\
1358 stk->u.mem.start = mem_start_stk[mnum];\
1359 stk->u.mem.end = mem_end_stk[mnum];\
1360 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1361 STACK_INC;\
1362} while(0)
1363
1364#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1365 STACK_ENSURE(1);\
1366 stk->type = STK_MEM_END_MARK;\
1367 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1368 stk->u.mem.num = (mnum);\
1369 STACK_INC;\
1370} while(0)
1371
1372#define STACK_GET_MEM_START(mnum, k) do {\
1373 int level = 0;\
1374 k = stk;\
1375 while (k > stk_base) {\
1376 k--;\
1377 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1378 && k->u.mem.num == (mnum)) {\
1379 level++;\
1380 }\
1381 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1382 if (level == 0) break;\
1383 level--;\
1384 }\
1385 }\
1386} while(0)
1387
1388#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1389 int level = 0;\
1390 while (k < stk) {\
1391 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1392 if (level == 0) (start) = k->u.mem.pstr;\
1393 level++;\
1394 }\
1395 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1396 level--;\
1397 if (level == 0) {\
1398 (end) = k->u.mem.pstr;\
1399 break;\
1400 }\
1401 }\
1402 k++;\
1403 }\
1404} while(0)
1405
1406#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1407 STACK_ENSURE(1);\
1408 stk->type = STK_NULL_CHECK_START;\
1409 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1410 stk->u.null_check.num = (cnum);\
1411 stk->u.null_check.pstr = (s);\
1412 STACK_INC;\
1413} while(0)
1414
1415#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1416 STACK_ENSURE(1);\
1417 stk->type = STK_NULL_CHECK_END;\
1418 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1419 stk->u.null_check.num = (cnum);\
1420 STACK_INC;\
1421} while(0)
1422
1423#define STACK_PUSH_CALL_FRAME(pat) do {\
1424 STACK_ENSURE(1);\
1425 stk->type = STK_CALL_FRAME;\
1426 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1427 stk->u.call_frame.ret_addr = (pat);\
1428 STACK_INC;\
1429} while(0)
1430
1431#define STACK_PUSH_RETURN do {\
1432 STACK_ENSURE(1);\
1433 stk->type = STK_RETURN;\
1434 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1435 STACK_INC;\
1436} while(0)
1437
1438#define STACK_PUSH_ABSENT_POS(start, end) do {\
1439 STACK_ENSURE(1);\
1440 stk->type = STK_ABSENT_POS;\
1441 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1442 stk->u.absent_pos.abs_pstr = (start);\
1443 stk->u.absent_pos.end_pstr = (end);\
1444 STACK_INC;\
1445} while(0)
1446
1447
1448#ifdef ONIG_DEBUG
1449# define STACK_BASE_CHECK(p, at) \
1450 if ((p) < stk_base) {\
1451 fprintf(stderr, "at %s\n", at);\
1452 goto stack_error;\
1453 }
1454#else
1455# define STACK_BASE_CHECK(p, at)
1456#endif
1457
1458#define STACK_POP_ONE do {\
1459 stk--;\
1460 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1461} while(0)
1462
1463#define STACK_POP do {\
1464 switch (pop_level) {\
1465 case STACK_POP_LEVEL_FREE:\
1466 while (1) {\
1467 stk--;\
1468 STACK_BASE_CHECK(stk, "STACK_POP"); \
1469 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1470 ELSE_IF_STATE_CHECK_MARK(stk);\
1471 }\
1472 break;\
1473 case STACK_POP_LEVEL_MEM_START:\
1474 while (1) {\
1475 stk--;\
1476 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1477 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1478 else if (stk->type == STK_MEM_START) {\
1479 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1480 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1481 }\
1482 ELSE_IF_STATE_CHECK_MARK(stk);\
1483 }\
1484 break;\
1485 default:\
1486 while (1) {\
1487 stk--;\
1488 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1489 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1490 else if (stk->type == STK_MEM_START) {\
1491 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1492 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1493 }\
1494 else if (stk->type == STK_REPEAT_INC) {\
1495 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1496 }\
1497 else if (stk->type == STK_MEM_END) {\
1498 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1499 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1500 }\
1501 ELSE_IF_STATE_CHECK_MARK(stk);\
1502 }\
1503 break;\
1504 }\
1505} while(0)
1506
1507#define STACK_POP_TIL_POS_NOT do {\
1508 while (1) {\
1509 stk--;\
1510 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1511 if (stk->type == STK_POS_NOT) break;\
1512 else if (stk->type == STK_MEM_START) {\
1513 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1514 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1515 }\
1516 else if (stk->type == STK_REPEAT_INC) {\
1517 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1518 }\
1519 else if (stk->type == STK_MEM_END) {\
1520 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1521 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1522 }\
1523 ELSE_IF_STATE_CHECK_MARK(stk);\
1524 }\
1525} while(0)
1526
1527#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1528 while (1) {\
1529 stk--;\
1530 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1531 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1532 else if (stk->type == STK_MEM_START) {\
1533 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1534 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1535 }\
1536 else if (stk->type == STK_REPEAT_INC) {\
1537 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1538 }\
1539 else if (stk->type == STK_MEM_END) {\
1540 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1541 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1542 }\
1543 ELSE_IF_STATE_CHECK_MARK(stk);\
1544 }\
1545} while(0)
1546
1547#define STACK_POP_TIL_ABSENT do {\
1548 while (1) {\
1549 stk--;\
1550 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1551 if (stk->type == STK_ABSENT) break;\
1552 else if (stk->type == STK_MEM_START) {\
1553 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1554 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1555 }\
1556 else if (stk->type == STK_REPEAT_INC) {\
1557 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1558 }\
1559 else if (stk->type == STK_MEM_END) {\
1560 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1561 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1562 }\
1563 ELSE_IF_STATE_CHECK_MARK(stk);\
1564 }\
1565} while(0)
1566
1567#define STACK_POP_ABSENT_POS(start, end) do {\
1568 stk--;\
1569 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1570 (start) = stk->u.absent_pos.abs_pstr;\
1571 (end) = stk->u.absent_pos.end_pstr;\
1572} while(0)
1573
1574#define STACK_POS_END(k) do {\
1575 k = stk;\
1576 while (1) {\
1577 k--;\
1578 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1579 if (IS_TO_VOID_TARGET(k)) {\
1580 k->type = STK_VOID;\
1581 }\
1582 else if (k->type == STK_POS) {\
1583 k->type = STK_VOID;\
1584 break;\
1585 }\
1586 }\
1587} while(0)
1588
1589#define STACK_STOP_BT_END do {\
1590 OnigStackType *k = stk;\
1591 while (1) {\
1592 k--;\
1593 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1594 if (IS_TO_VOID_TARGET(k)) {\
1595 k->type = STK_VOID;\
1596 }\
1597 else if (k->type == STK_STOP_BT) {\
1598 k->type = STK_VOID;\
1599 break;\
1600 }\
1601 }\
1602} while(0)
1603
1604#define STACK_NULL_CHECK(isnull,id,s) do {\
1605 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1606 while (1) {\
1607 k--;\
1608 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1609 if (k->type == STK_NULL_CHECK_START) {\
1610 if (k->u.null_check.num == (id)) {\
1611 (isnull) = (k->u.null_check.pstr == (s));\
1612 break;\
1613 }\
1614 }\
1615 }\
1616} while(0)
1617
1618#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1619 int level = 0;\
1620 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1621 while (1) {\
1622 k--;\
1623 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1624 if (k->type == STK_NULL_CHECK_START) {\
1625 if (k->u.null_check.num == (id)) {\
1626 if (level == 0) {\
1627 (isnull) = (k->u.null_check.pstr == (s));\
1628 break;\
1629 }\
1630 else level--;\
1631 }\
1632 }\
1633 else if (k->type == STK_NULL_CHECK_END) {\
1634 level++;\
1635 }\
1636 }\
1637} while(0)
1638
1639#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
1640 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1641 while (1) {\
1642 k--;\
1643 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1644 if (k->type == STK_NULL_CHECK_START) {\
1645 if (k->u.null_check.num == (id)) {\
1646 if (k->u.null_check.pstr != (s)) {\
1647 (isnull) = 0;\
1648 break;\
1649 }\
1650 else {\
1651 UChar* endp;\
1652 (isnull) = 1;\
1653 while (k < stk) {\
1654 if (k->type == STK_MEM_START) {\
1655 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1656 (isnull) = 0; (ischange) = 1; break;\
1657 }\
1658 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1659 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1660 else\
1661 endp = (UChar* )k->u.mem.end;\
1662 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1663 (isnull) = 0; (ischange) = 1; break;\
1664 }\
1665 else if (endp != s) {\
1666 (isnull) = -1; /* empty, but position changed */ \
1667 }\
1668 }\
1669 k++;\
1670 }\
1671 break;\
1672 }\
1673 }\
1674 }\
1675 }\
1676} while(0)
1677
1678#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1679 int level = 0;\
1680 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1681 while (1) {\
1682 k--;\
1683 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1684 if (k->type == STK_NULL_CHECK_START) {\
1685 if (k->u.null_check.num == (id)) {\
1686 if (level == 0) {\
1687 if (k->u.null_check.pstr != (s)) {\
1688 (isnull) = 0;\
1689 break;\
1690 }\
1691 else {\
1692 UChar* endp;\
1693 (isnull) = 1;\
1694 while (k < stk) {\
1695 if (k->type == STK_MEM_START) {\
1696 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1697 (isnull) = 0; break;\
1698 }\
1699 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1700 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1701 else\
1702 endp = (UChar* )k->u.mem.end;\
1703 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1704 (isnull) = 0; break;\
1705 }\
1706 else if (endp != s) {\
1707 (isnull) = -1; /* empty, but position changed */ \
1708 }\
1709 }\
1710 k++;\
1711 }\
1712 break;\
1713 }\
1714 }\
1715 else {\
1716 level--;\
1717 }\
1718 }\
1719 }\
1720 else if (k->type == STK_NULL_CHECK_END) {\
1721 if (k->u.null_check.num == (id)) level++;\
1722 }\
1723 }\
1724} while(0)
1725
1726#define STACK_GET_REPEAT(id, k) do {\
1727 int level = 0;\
1728 k = stk;\
1729 while (1) {\
1730 k--;\
1731 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1732 if (k->type == STK_REPEAT) {\
1733 if (level == 0) {\
1734 if (k->u.repeat.num == (id)) {\
1735 break;\
1736 }\
1737 }\
1738 }\
1739 else if (k->type == STK_CALL_FRAME) level--;\
1740 else if (k->type == STK_RETURN) level++;\
1741 }\
1742} while(0)
1743
1744#define STACK_RETURN(addr) do {\
1745 int level = 0;\
1746 OnigStackType* k = stk;\
1747 while (1) {\
1748 k--;\
1749 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1750 if (k->type == STK_CALL_FRAME) {\
1751 if (level == 0) {\
1752 (addr) = k->u.call_frame.ret_addr;\
1753 break;\
1754 }\
1755 else level--;\
1756 }\
1757 else if (k->type == STK_RETURN)\
1758 level++;\
1759 }\
1760} while(0)
1761
1762
1763#define STRING_CMP(s1,s2,len) do {\
1764 while (len-- > 0) {\
1765 if (*s1++ != *s2++) goto fail;\
1766 }\
1767} while(0)
1768
1769#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1770 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1771 goto fail; \
1772} while(0)
1773
1774static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1775 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1776{
1777 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1778 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1779 UChar *p1, *p2, *end1, *s2;
1780 int len1, len2;
1781
1782 s2 = *ps2;
1783 end1 = s1 + mblen;
1784 while (s1 < end1) {
1785 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1786 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1787 if (len1 != len2) return 0;
1788 p1 = buf1;
1789 p2 = buf2;
1790 while (len1-- > 0) {
1791 if (*p1 != *p2) return 0;
1792 p1++;
1793 p2++;
1794 }
1795 }
1796
1797 *ps2 = s2;
1798 return 1;
1799}
1800
1801#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1802 is_fail = 0;\
1803 while (len-- > 0) {\
1804 if (*s1++ != *s2++) {\
1805 is_fail = 1; break;\
1806 }\
1807 }\
1808} while(0)
1809
1810#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1811 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1812 is_fail = 1; \
1813 else \
1814 is_fail = 0; \
1815} while(0)
1816
1817
1818#define IS_EMPTY_STR (str == end)
1819#define ON_STR_BEGIN(s) ((s) == str)
1820#define ON_STR_END(s) ((s) == end)
1821#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1822# define DATA_ENSURE_CHECK1 (s < right_range)
1823# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1824# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1825# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1826# define ABSENT_END_POS right_range
1827#else
1828# define DATA_ENSURE_CHECK1 (s < end)
1829# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1830# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1831# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1832# define ABSENT_END_POS end
1833#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1834
1835int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1836
1837static inline int
1838enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1839{
1840 if (enc->max_enc_len == enc->min_enc_len) {
1841 return (p < e ? enc->min_enc_len : 0);
1842 }
1843 else {
1844 return onigenc_mbclen_approximate(p, e, enc);
1845 }
1846}
1847
1848
1849#ifdef USE_CAPTURE_HISTORY
1850static int
1851make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1852 OnigStackType* stk_top, UChar* str, regex_t* reg)
1853{
1854 int n, r;
1855 OnigCaptureTreeNode* child;
1856 OnigStackType* k = *kp;
1857
1858 while (k < stk_top) {
1859 if (k->type == STK_MEM_START) {
1860 n = k->u.mem.num;
1861 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1862 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1863 child = history_node_new();
1864 CHECK_NULL_RETURN_MEMERR(child);
1865 child->group = n;
1866 child->beg = k->u.mem.pstr - str;
1867 r = history_tree_add_child(node, child);
1868 if (r != 0) {
1869 history_tree_free(child);
1870 return r;
1871 }
1872 *kp = (k + 1);
1873 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1874 if (r != 0) return r;
1875
1876 k = *kp;
1877 child->end = k->u.mem.pstr - str;
1878 }
1879 }
1880 else if (k->type == STK_MEM_END) {
1881 if (k->u.mem.num == node->group) {
1882 node->end = k->u.mem.pstr - str;
1883 *kp = k;
1884 return 0;
1885 }
1886 }
1887 k++;
1888 }
1889
1890 return 1; /* 1: root node ending. */
1891}
1892#endif /* USE_CAPTURE_HISTORY */
1893
1894#ifdef USE_BACKREF_WITH_LEVEL
1895static int
1896mem_is_in_memp(int mem, int num, UChar* memp)
1897{
1898 int i;
1899 MemNumType m;
1900
1901 for (i = 0; i < num; i++) {
1902 GET_MEMNUM_INC(m, memp);
1903 if (mem == (int )m) return 1;
1904 }
1905 return 0;
1906}
1907
1908static int backref_match_at_nested_level(regex_t* reg,
1909 OnigStackType* top, OnigStackType* stk_base,
1910 int ignore_case, int case_fold_flag,
1911 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1912{
1913 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1914 int level;
1915 OnigStackType* k;
1916
1917 level = 0;
1918 k = top;
1919 k--;
1920 while (k >= stk_base) {
1921 if (k->type == STK_CALL_FRAME) {
1922 level--;
1923 }
1924 else if (k->type == STK_RETURN) {
1925 level++;
1926 }
1927 else if (level == nest) {
1928 if (k->type == STK_MEM_START) {
1929 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1930 pstart = k->u.mem.pstr;
1931 if (pend != NULL_UCHARP) {
1932 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1933 p = pstart;
1934 ss = *s;
1935
1936 if (ignore_case != 0) {
1937 if (string_cmp_ic(reg->enc, case_fold_flag,
1938 pstart, &ss, pend - pstart, send) == 0)
1939 return 0; /* or goto next_mem; */
1940 }
1941 else {
1942 while (p < pend) {
1943 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1944 }
1945 }
1946
1947 *s = ss;
1948 return 1;
1949 }
1950 }
1951 }
1952 else if (k->type == STK_MEM_END) {
1953 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1954 pend = k->u.mem.pstr;
1955 }
1956 }
1957 }
1958 k--;
1959 }
1960
1961 return 0;
1962}
1963#endif /* USE_BACKREF_WITH_LEVEL */
1964
1965
1966#ifdef ONIG_DEBUG_STATISTICS
1967
1968# ifdef _WIN32
1969# include <windows.h>
1970static LARGE_INTEGER ts, te, freq;
1971# define GETTIME(t) QueryPerformanceCounter(&(t))
1972# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1973 * 1000000 / freq.QuadPart)
1974# else /* _WIN32 */
1975
1976# define USE_TIMEOFDAY
1977
1978# ifdef USE_TIMEOFDAY
1979# ifdef HAVE_SYS_TIME_H
1980# include <sys/time.h>
1981# endif
1982# ifdef HAVE_UNISTD_H
1983# include <unistd.h>
1984# endif
1985static struct timeval ts, te;
1986# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1987# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1988 (((te).tv_sec - (ts).tv_sec)*1000000))
1989# else /* USE_TIMEOFDAY */
1990# ifdef HAVE_SYS_TIMES_H
1991# include <sys/times.h>
1992# endif
1993static struct tms ts, te;
1994# define GETTIME(t) times(&(t))
1995# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1996# endif /* USE_TIMEOFDAY */
1997
1998# endif /* _WIN32 */
1999
2000static int OpCounter[256];
2001static int OpPrevCounter[256];
2002static unsigned long OpTime[256];
2003static int OpCurr = OP_FINISH;
2004static int OpPrevTarget = OP_FAIL;
2005static int MaxStackDepth = 0;
2006
2007# define MOP_IN(opcode) do {\
2008 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2009 OpCurr = opcode;\
2010 OpCounter[opcode]++;\
2011 GETTIME(ts);\
2012} while(0)
2013
2014# define MOP_OUT do {\
2015 GETTIME(te);\
2016 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2017} while(0)
2018
2019extern void
2020onig_statistics_init(void)
2021{
2022 int i;
2023 for (i = 0; i < 256; i++) {
2024 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2025 }
2026 MaxStackDepth = 0;
2027# ifdef _WIN32
2028 QueryPerformanceFrequency(&freq);
2029# endif
2030}
2031
2032extern void
2033onig_print_statistics(FILE* f)
2034{
2035 int i;
2036 fprintf(f, " count prev time\n");
2037 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2038 fprintf(f, "%8d: %8d: %10lu: %s\n",
2039 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2040 }
2041 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2042}
2043
2044# define STACK_INC do {\
2045 stk++;\
2046 if (stk - stk_base > MaxStackDepth) \
2047 MaxStackDepth = stk - stk_base;\
2048} while(0)
2049
2050#else /* ONIG_DEBUG_STATISTICS */
2051# define STACK_INC stk++
2052
2053# define MOP_IN(opcode)
2054# define MOP_OUT
2055#endif /* ONIG_DEBUG_STATISTICS */
2056
2057
2058#ifdef ONIG_DEBUG_MATCH
2059static char *
2060stack_type_str(int stack_type)
2061{
2062 switch (stack_type) {
2063 case STK_ALT: return "Alt ";
2064 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2065 case STK_POS_NOT: return "PosNot";
2066 case STK_MEM_START: return "MemS ";
2067 case STK_MEM_END: return "MemE ";
2068 case STK_REPEAT_INC: return "RepInc";
2069 case STK_STATE_CHECK_MARK: return "StChMk";
2070 case STK_NULL_CHECK_START: return "NulChS";
2071 case STK_NULL_CHECK_END: return "NulChE";
2072 case STK_MEM_END_MARK: return "MemEMk";
2073 case STK_POS: return "Pos ";
2074 case STK_STOP_BT: return "StopBt";
2075 case STK_REPEAT: return "Rep ";
2076 case STK_CALL_FRAME: return "Call ";
2077 case STK_RETURN: return "Ret ";
2078 case STK_VOID: return "Void ";
2079 case STK_ABSENT_POS: return "AbsPos";
2080 case STK_ABSENT: return "Absent";
2081 default: return " ";
2082 }
2083}
2084#endif
2085
2086/* match data(str - end) from position (sstart). */
2087/* if sstart == str then set sprev to NULL. */
2088static OnigPosition
2089match_at(regex_t* reg, const UChar* str, const UChar* end,
2090#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2091 const UChar* right_range,
2092#endif
2093 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2094{
2095 static const UChar FinishCode[] = { OP_FINISH };
2096
2097 int i, num_mem, pop_level;
2098 ptrdiff_t n, best_len;
2099 LengthType tlen, tlen2;
2100 MemNumType mem;
2101 RelAddrType addr;
2102 OnigOptionType option = reg->options;
2103 OnigEncoding encode = reg->enc;
2104 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2105 UChar *s, *q, *sbegin;
2106 UChar *p = reg->p;
2107 UChar *pbegin = p;
2108 UChar *pkeep;
2109 char *alloca_base;
2110 char *xmalloc_base = NULL;
2111 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
2112 OnigStackType *stkp; /* used as any purpose. */
2113 OnigStackIndex si;
2114 OnigStackIndex *repeat_stk;
2115 OnigStackIndex *mem_start_stk, *mem_end_stk;
2116#ifdef USE_COMBINATION_EXPLOSION_CHECK
2117 int scv;
2118 unsigned char* state_check_buff = msa->state_check_buff;
2119 int num_comb_exp_check = reg->num_comb_exp_check;
2120#endif
2121
2122#if USE_TOKEN_THREADED_VM
2123# define OP_OFFSET 1
2124# define VM_LOOP JUMP;
2125# define VM_LOOP_END
2126# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2127# define DEFAULT L_DEFAULT:
2128# define NEXT sprev = sbegin; JUMP
2129# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2130
2131 RB_GNUC_EXTENSION static const void *oplabels[] = {
2132 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2133 &&L_OP_END, /* pattern code terminator (success end) */
2134
2135 &&L_OP_EXACT1, /* single byte, N = 1 */
2136 &&L_OP_EXACT2, /* single byte, N = 2 */
2137 &&L_OP_EXACT3, /* single byte, N = 3 */
2138 &&L_OP_EXACT4, /* single byte, N = 4 */
2139 &&L_OP_EXACT5, /* single byte, N = 5 */
2140 &&L_OP_EXACTN, /* single byte */
2141 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2142 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2143 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2144 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2145 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2146 &&L_OP_EXACTMBN, /* other length */
2147
2148 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2149 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2150
2151 &&L_OP_CCLASS,
2152 &&L_OP_CCLASS_MB,
2153 &&L_OP_CCLASS_MIX,
2154 &&L_OP_CCLASS_NOT,
2155 &&L_OP_CCLASS_MB_NOT,
2156 &&L_OP_CCLASS_MIX_NOT,
2157
2158 &&L_OP_ANYCHAR, /* "." */
2159 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2160 &&L_OP_ANYCHAR_STAR, /* ".*" */
2161 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2162 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2163 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2164
2165 &&L_OP_WORD,
2166 &&L_OP_NOT_WORD,
2167 &&L_OP_WORD_BOUND,
2168 &&L_OP_NOT_WORD_BOUND,
2169# ifdef USE_WORD_BEGIN_END
2170 &&L_OP_WORD_BEGIN,
2171 &&L_OP_WORD_END,
2172# else
2173 &&L_DEFAULT,
2174 &&L_DEFAULT,
2175# endif
2176 &&L_OP_ASCII_WORD,
2177 &&L_OP_NOT_ASCII_WORD,
2178 &&L_OP_ASCII_WORD_BOUND,
2179 &&L_OP_NOT_ASCII_WORD_BOUND,
2180# ifdef USE_WORD_BEGIN_END
2181 &&L_OP_ASCII_WORD_BEGIN,
2182 &&L_OP_ASCII_WORD_END,
2183# else
2184 &&L_DEFAULT,
2185 &&L_DEFAULT,
2186# endif
2187
2188 &&L_OP_BEGIN_BUF,
2189 &&L_OP_END_BUF,
2190 &&L_OP_BEGIN_LINE,
2191 &&L_OP_END_LINE,
2192 &&L_OP_SEMI_END_BUF,
2193 &&L_OP_BEGIN_POSITION,
2194
2195 &&L_OP_BACKREF1,
2196 &&L_OP_BACKREF2,
2197 &&L_OP_BACKREFN,
2198 &&L_OP_BACKREFN_IC,
2199 &&L_OP_BACKREF_MULTI,
2200 &&L_OP_BACKREF_MULTI_IC,
2201# ifdef USE_BACKREF_WITH_LEVEL
2202 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2203# else
2204 &&L_DEFAULT,
2205# endif
2206 &&L_OP_MEMORY_START,
2207 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2208 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2209# ifdef USE_SUBEXP_CALL
2210 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2211# else
2212 &&L_DEFAULT,
2213# endif
2214 &&L_OP_MEMORY_END,
2215# ifdef USE_SUBEXP_CALL
2216 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2217# else
2218 &&L_DEFAULT,
2219# endif
2220
2221 &&L_OP_KEEP,
2222
2223 &&L_OP_FAIL, /* pop stack and move */
2224 &&L_OP_JUMP,
2225 &&L_OP_PUSH,
2226 &&L_OP_POP,
2227# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2228 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2229# else
2230 &&L_DEFAULT,
2231# endif
2232 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2233 &&L_OP_REPEAT, /* {n,m} */
2234 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2235 &&L_OP_REPEAT_INC,
2236 &&L_OP_REPEAT_INC_NG, /* non greedy */
2237 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2238 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2239 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2240 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2241# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2242 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2243# else
2244 &&L_DEFAULT,
2245# endif
2246# ifdef USE_SUBEXP_CALL
2247 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2248# else
2249 &&L_DEFAULT,
2250# endif
2251
2252 &&L_OP_PUSH_POS, /* (?=...) start */
2253 &&L_OP_POP_POS, /* (?=...) end */
2254 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2255 &&L_OP_FAIL_POS, /* (?!...) end */
2256 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2257 &&L_OP_POP_STOP_BT, /* (?>...) end */
2258 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2259 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2260 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2261 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2262 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2263 &&L_OP_ABSENT_END, /* (?~...) end */
2264
2265# ifdef USE_SUBEXP_CALL
2266 &&L_OP_CALL, /* \g<name> */
2267 &&L_OP_RETURN,
2268# else
2269 &&L_DEFAULT,
2270 &&L_DEFAULT,
2271# endif
2272 &&L_OP_CONDITION,
2273
2274# ifdef USE_COMBINATION_EXPLOSION_CHECK
2275 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2276 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2277 &&L_OP_STATE_CHECK, /* check only */
2278# else
2279 &&L_DEFAULT,
2280 &&L_DEFAULT,
2281 &&L_DEFAULT,
2282# endif
2283# ifdef USE_COMBINATION_EXPLOSION_CHECK
2284 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2285 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2286# else
2287 &&L_DEFAULT,
2288 &&L_DEFAULT,
2289# endif
2290 /* no need: IS_DYNAMIC_OPTION() == 0 */
2291# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2292 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2293 &&L_OP_SET_OPTION /* set option */
2294# else
2295 &&L_DEFAULT,
2296 &&L_DEFAULT
2297# endif
2298 };
2299#else /* USE_TOKEN_THREADED_VM */
2300
2301# define OP_OFFSET 0
2302# define VM_LOOP \
2303 while (1) { \
2304 OPCODE_EXEC_HOOK; \
2305 pbegin = p; \
2306 sbegin = s; \
2307 switch (*p++) {
2308# define VM_LOOP_END } sprev = sbegin; }
2309# define CASE(x) case x:
2310# define DEFAULT default:
2311# define NEXT break
2312# define JUMP continue; break
2313#endif /* USE_TOKEN_THREADED_VM */
2314
2315
2316#ifdef USE_SUBEXP_CALL
2317/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2318 etc. Additional space is required. */
2319# define ADD_NUMMEM 1
2320#else
2321/* Stack #0 not is used. */
2322# define ADD_NUMMEM 0
2323#endif
2324
2325 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2326
2327 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2328 pop_level = reg->stack_pop_level;
2329 num_mem = reg->num_mem;
2330 repeat_stk = (OnigStackIndex* )alloca_base;
2331
2332 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2333 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2334 {
2335 OnigStackIndex *pp = mem_start_stk;
2336 for (; pp < repeat_stk + n; pp += 2) {
2337 pp[0] = INVALID_STACK_INDEX;
2338 pp[1] = INVALID_STACK_INDEX;
2339 }
2340 }
2341#ifndef USE_SUBEXP_CALL
2342 mem_start_stk--; /* for index start from 1,
2343 mem_start_stk[1]..mem_start_stk[num_mem] */
2344 mem_end_stk--; /* for index start from 1,
2345 mem_end_stk[1]..mem_end_stk[num_mem] */
2346#endif
2347
2348#ifdef ONIG_DEBUG_MATCH
2349 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2350 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2351 fprintf(stderr, "size: %d, start offset: %d\n",
2352 (int )(end - str), (int )(sstart - str));
2353 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2354#endif
2355
2356 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2357 best_len = ONIG_MISMATCH;
2358 s = (UChar* )sstart;
2359 pkeep = (UChar* )sstart;
2360
2361
2362#ifdef ONIG_DEBUG_MATCH
2363# define OPCODE_EXEC_HOOK \
2364 if (s) { \
2365 UChar *op, *q, *bp, buf[50]; \
2366 int len; \
2367 op = p - OP_OFFSET; \
2368 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2369 bp = buf; \
2370 q = s; \
2371 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2372 for (i = 0; i < 7 && q < end; i++) { \
2373 len = enclen(encode, q, end); \
2374 while (len-- > 0) *bp++ = *q++; \
2375 } \
2376 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2377 } \
2378 xmemcpy(bp, "\"", 1); bp += 1; \
2379 *bp = 0; \
2380 fputs((char* )buf, stderr); \
2381 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2382 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2383 stk - stk_base - 1, \
2384 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2385 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2386 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2387 fprintf(stderr, "\n"); \
2388 }
2389#else
2390# define OPCODE_EXEC_HOOK ((void) 0)
2391#endif
2392
2393
2394 VM_LOOP {
2395 CASE(OP_END) MOP_IN(OP_END);
2396 n = s - sstart;
2397 if (n > best_len) {
2398 OnigRegion* region;
2399#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2400 if (IS_FIND_LONGEST(option)) {
2401 if (n > msa->best_len) {
2402 msa->best_len = n;
2403 msa->best_s = (UChar* )sstart;
2404 }
2405 else
2406 goto end_best_len;
2407 }
2408#endif
2409 best_len = n;
2410 region = msa->region;
2411 if (region) {
2412 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2413 region->end[0] = s - str;
2414 for (i = 1; i <= num_mem; i++) {
2415 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2416 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2417 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2418 else
2419 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2420
2421 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2422 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2423 : (UChar* )((void* )mem_end_stk[i])) - str;
2424 }
2425 else {
2426 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2427 }
2428 }
2429
2430#ifdef USE_CAPTURE_HISTORY
2431 if (reg->capture_history != 0) {
2432 int r;
2433 OnigCaptureTreeNode* node;
2434
2435 if (IS_NULL(region->history_root)) {
2436 region->history_root = node = history_node_new();
2437 CHECK_NULL_RETURN_MEMERR(node);
2438 }
2439 else {
2440 node = region->history_root;
2441 history_tree_clear(node);
2442 }
2443
2444 node->group = 0;
2445 node->beg = ((pkeep > s) ? s : pkeep) - str;
2446 node->end = s - str;
2447
2448 stkp = stk_base;
2449 r = make_capture_history_tree(region->history_root, &stkp,
2450 stk, (UChar* )str, reg);
2451 if (r < 0) {
2452 best_len = r; /* error code */
2453 goto finish;
2454 }
2455 }
2456#endif /* USE_CAPTURE_HISTORY */
2457 } /* if (region) */
2458 } /* n > best_len */
2459
2460#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2461 end_best_len:
2462#endif
2463 MOP_OUT;
2464
2465 if (IS_FIND_CONDITION(option)) {
2466 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2467 best_len = ONIG_MISMATCH;
2468 goto fail; /* for retry */
2469 }
2470 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2471 goto fail; /* for retry */
2472 }
2473 }
2474
2475 /* default behavior: return first-matching result. */
2476 goto finish;
2477 NEXT;
2478
2479 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2480 DATA_ENSURE(1);
2481 if (*p != *s) goto fail;
2482 p++; s++;
2483 MOP_OUT;
2484 NEXT;
2485
2486 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2487 {
2488 int len;
2489 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2490
2491 DATA_ENSURE(1);
2492 len = ONIGENC_MBC_CASE_FOLD(encode,
2493 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2494 case_fold_flag,
2495 &s, end, lowbuf);
2496 DATA_ENSURE(0);
2497 q = lowbuf;
2498 while (len-- > 0) {
2499 if (*p != *q) {
2500 goto fail;
2501 }
2502 p++; q++;
2503 }
2504 }
2505 MOP_OUT;
2506 NEXT;
2507
2508 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2509 DATA_ENSURE(2);
2510 if (*p != *s) goto fail;
2511 p++; s++;
2512 if (*p != *s) goto fail;
2513 sprev = s;
2514 p++; s++;
2515 MOP_OUT;
2516 JUMP;
2517
2518 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2519 DATA_ENSURE(3);
2520 if (*p != *s) goto fail;
2521 p++; s++;
2522 if (*p != *s) goto fail;
2523 p++; s++;
2524 if (*p != *s) goto fail;
2525 sprev = s;
2526 p++; s++;
2527 MOP_OUT;
2528 JUMP;
2529
2530 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2531 DATA_ENSURE(4);
2532 if (*p != *s) goto fail;
2533 p++; s++;
2534 if (*p != *s) goto fail;
2535 p++; s++;
2536 if (*p != *s) goto fail;
2537 p++; s++;
2538 if (*p != *s) goto fail;
2539 sprev = s;
2540 p++; s++;
2541 MOP_OUT;
2542 JUMP;
2543
2544 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2545 DATA_ENSURE(5);
2546 if (*p != *s) goto fail;
2547 p++; s++;
2548 if (*p != *s) goto fail;
2549 p++; s++;
2550 if (*p != *s) goto fail;
2551 p++; s++;
2552 if (*p != *s) goto fail;
2553 p++; s++;
2554 if (*p != *s) goto fail;
2555 sprev = s;
2556 p++; s++;
2557 MOP_OUT;
2558 JUMP;
2559
2560 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2561 GET_LENGTH_INC(tlen, p);
2562 DATA_ENSURE(tlen);
2563 while (tlen-- > 0) {
2564 if (*p++ != *s++) goto fail;
2565 }
2566 sprev = s - 1;
2567 MOP_OUT;
2568 JUMP;
2569
2570 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2571 {
2572 int len;
2573 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2574
2575 GET_LENGTH_INC(tlen, p);
2576 endp = p + tlen;
2577
2578 while (p < endp) {
2579 sprev = s;
2580 DATA_ENSURE(1);
2581 len = ONIGENC_MBC_CASE_FOLD(encode,
2582 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2583 case_fold_flag,
2584 &s, end, lowbuf);
2585 DATA_ENSURE(0);
2586 q = lowbuf;
2587 while (len-- > 0) {
2588 if (*p != *q) goto fail;
2589 p++; q++;
2590 }
2591 }
2592 }
2593
2594 MOP_OUT;
2595 JUMP;
2596
2597 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2598 DATA_ENSURE(2);
2599 if (*p != *s) goto fail;
2600 p++; s++;
2601 if (*p != *s) goto fail;
2602 p++; s++;
2603 MOP_OUT;
2604 NEXT;
2605
2606 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2607 DATA_ENSURE(4);
2608 if (*p != *s) goto fail;
2609 p++; s++;
2610 if (*p != *s) goto fail;
2611 p++; s++;
2612 sprev = s;
2613 if (*p != *s) goto fail;
2614 p++; s++;
2615 if (*p != *s) goto fail;
2616 p++; s++;
2617 MOP_OUT;
2618 JUMP;
2619
2620 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2621 DATA_ENSURE(6);
2622 if (*p != *s) goto fail;
2623 p++; s++;
2624 if (*p != *s) goto fail;
2625 p++; s++;
2626 if (*p != *s) goto fail;
2627 p++; s++;
2628 if (*p != *s) goto fail;
2629 p++; s++;
2630 sprev = s;
2631 if (*p != *s) goto fail;
2632 p++; s++;
2633 if (*p != *s) goto fail;
2634 p++; s++;
2635 MOP_OUT;
2636 JUMP;
2637
2638 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2639 GET_LENGTH_INC(tlen, p);
2640 DATA_ENSURE(tlen * 2);
2641 while (tlen-- > 0) {
2642 if (*p != *s) goto fail;
2643 p++; s++;
2644 if (*p != *s) goto fail;
2645 p++; s++;
2646 }
2647 sprev = s - 2;
2648 MOP_OUT;
2649 JUMP;
2650
2651 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2652 GET_LENGTH_INC(tlen, p);
2653 DATA_ENSURE(tlen * 3);
2654 while (tlen-- > 0) {
2655 if (*p != *s) goto fail;
2656 p++; s++;
2657 if (*p != *s) goto fail;
2658 p++; s++;
2659 if (*p != *s) goto fail;
2660 p++; s++;
2661 }
2662 sprev = s - 3;
2663 MOP_OUT;
2664 JUMP;
2665
2666 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2667 GET_LENGTH_INC(tlen, p); /* mb-len */
2668 GET_LENGTH_INC(tlen2, p); /* string len */
2669 tlen2 *= tlen;
2670 DATA_ENSURE(tlen2);
2671 while (tlen2-- > 0) {
2672 if (*p != *s) goto fail;
2673 p++; s++;
2674 }
2675 sprev = s - tlen;
2676 MOP_OUT;
2677 JUMP;
2678
2679 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2680 DATA_ENSURE(1);
2681 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2682 p += SIZE_BITSET;
2683 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2684 MOP_OUT;
2685 NEXT;
2686
2687 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2688 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2689
2690 cclass_mb:
2691 GET_LENGTH_INC(tlen, p);
2692 {
2693 OnigCodePoint code;
2694 UChar *ss;
2695 int mb_len;
2696
2697 DATA_ENSURE(1);
2698 mb_len = enclen_approx(encode, s, end);
2699 DATA_ENSURE(mb_len);
2700 ss = s;
2701 s += mb_len;
2702 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2703
2704#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2705 if (! onig_is_in_code_range(p, code)) goto fail;
2706#else
2707 q = p;
2708 ALIGNMENT_RIGHT(q);
2709 if (! onig_is_in_code_range(q, code)) goto fail;
2710#endif
2711 }
2712 p += tlen;
2713 MOP_OUT;
2714 NEXT;
2715
2716 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2717 DATA_ENSURE(1);
2718 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2719 p += SIZE_BITSET;
2720 goto cclass_mb;
2721 }
2722 else {
2723 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2724 goto fail;
2725
2726 p += SIZE_BITSET;
2727 GET_LENGTH_INC(tlen, p);
2728 p += tlen;
2729 s++;
2730 }
2731 MOP_OUT;
2732 NEXT;
2733
2734 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2735 DATA_ENSURE(1);
2736 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2737 p += SIZE_BITSET;
2738 s += enclen(encode, s, end);
2739 MOP_OUT;
2740 NEXT;
2741
2742 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2743 DATA_ENSURE(1);
2744 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2745 s++;
2746 GET_LENGTH_INC(tlen, p);
2747 p += tlen;
2748 goto cc_mb_not_success;
2749 }
2750
2751 cclass_mb_not:
2752 GET_LENGTH_INC(tlen, p);
2753 {
2754 OnigCodePoint code;
2755 UChar *ss;
2756 int mb_len = enclen(encode, s, end);
2757
2758 if (! DATA_ENSURE_CHECK(mb_len)) {
2759 DATA_ENSURE(1);
2760 s = (UChar* )end;
2761 p += tlen;
2762 goto cc_mb_not_success;
2763 }
2764
2765 ss = s;
2766 s += mb_len;
2767 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2768
2769#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2770 if (onig_is_in_code_range(p, code)) goto fail;
2771#else
2772 q = p;
2773 ALIGNMENT_RIGHT(q);
2774 if (onig_is_in_code_range(q, code)) goto fail;
2775#endif
2776 }
2777 p += tlen;
2778
2779 cc_mb_not_success:
2780 MOP_OUT;
2781 NEXT;
2782
2783 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2784 DATA_ENSURE(1);
2785 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2786 p += SIZE_BITSET;
2787 goto cclass_mb_not;
2788 }
2789 else {
2790 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2791 goto fail;
2792
2793 p += SIZE_BITSET;
2794 GET_LENGTH_INC(tlen, p);
2795 p += tlen;
2796 s++;
2797 }
2798 MOP_OUT;
2799 NEXT;
2800
2801 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2802 DATA_ENSURE(1);
2803 n = enclen_approx(encode, s, end);
2804 DATA_ENSURE(n);
2805 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2806 s += n;
2807 MOP_OUT;
2808 NEXT;
2809
2810 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2811 DATA_ENSURE(1);
2812 n = enclen_approx(encode, s, end);
2813 DATA_ENSURE(n);
2814 s += n;
2815 MOP_OUT;
2816 NEXT;
2817
2818 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2819 while (DATA_ENSURE_CHECK1) {
2820 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2821 STACK_PUSH_ALT(p, s, sprev, pkeep);
2822 n = enclen_approx(encode, s, end);
2823 DATA_ENSURE(n);
2824 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2825 sprev = s;
2826 s += n;
2827 }
2828 MOP_OUT;
2829 JUMP;
2830
2831 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2832 while (DATA_ENSURE_CHECK1) {
2833 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2834 STACK_PUSH_ALT(p, s, sprev, pkeep);
2835 n = enclen_approx(encode, s, end);
2836 if (n > 1) {
2837 DATA_ENSURE(n);
2838 sprev = s;
2839 s += n;
2840 }
2841 else {
2842 sprev = s;
2843 s++;
2844 }
2845 }
2846 MOP_OUT;
2847 JUMP;
2848
2849 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2850 while (DATA_ENSURE_CHECK1) {
2851 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
2852 if (*p == *s) {
2853 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2854 } else {
2855 /* We need to increment num_fail here, for invoking a cache optimization correctly. */
2856 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
2857#ifdef USE_CACHE_MATCH_OPT
2858 msa->num_fail++;
2859#endif
2860 }
2861 n = enclen_approx(encode, s, end);
2862 DATA_ENSURE(n);
2863 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2864 sprev = s;
2865 s += n;
2866 }
2867 p++;
2868 MOP_OUT;
2869 NEXT;
2870
2871 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2872 while (DATA_ENSURE_CHECK1) {
2873 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2874 if (*p == *s) {
2875 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2876 } else {
2877 /* We need to increment num_fail here, for invoking a cache optimization correctly. */
2878 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
2879#ifdef USE_CACHE_MATCH_OPT
2880 msa->num_fail++;
2881#endif
2882 }
2883 n = enclen_approx(encode, s, end);
2884 if (n > 1) {
2885 DATA_ENSURE(n);
2886 sprev = s;
2887 s += n;
2888 }
2889 else {
2890 sprev = s;
2891 s++;
2892 }
2893 }
2894 p++;
2895 MOP_OUT;
2896 NEXT;
2897
2898#ifdef USE_COMBINATION_EXPLOSION_CHECK
2899 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2900 GET_STATE_CHECK_NUM_INC(mem, p);
2901 while (DATA_ENSURE_CHECK1) {
2902 STATE_CHECK_VAL(scv, mem);
2903 if (scv) goto fail;
2904
2905 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2906 n = enclen_approx(encode, s, end);
2907 DATA_ENSURE(n);
2908 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2909 sprev = s;
2910 s += n;
2911 }
2912 MOP_OUT;
2913 NEXT;
2914
2915 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2916 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2917
2918 GET_STATE_CHECK_NUM_INC(mem, p);
2919 while (DATA_ENSURE_CHECK1) {
2920 STATE_CHECK_VAL(scv, mem);
2921 if (scv) goto fail;
2922
2923 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2924 n = enclen_approx(encode, s, end);
2925 if (n > 1) {
2926 DATA_ENSURE(n);
2927 sprev = s;
2928 s += n;
2929 }
2930 else {
2931 sprev = s;
2932 s++;
2933 }
2934 }
2935 MOP_OUT;
2936 NEXT;
2937#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2938
2939 CASE(OP_WORD) MOP_IN(OP_WORD);
2940 DATA_ENSURE(1);
2941 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2942 goto fail;
2943
2944 s += enclen(encode, s, end);
2945 MOP_OUT;
2946 NEXT;
2947
2948 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2949 DATA_ENSURE(1);
2950 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2951 goto fail;
2952
2953 s += enclen(encode, s, end);
2954 MOP_OUT;
2955 NEXT;
2956
2957 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2958 DATA_ENSURE(1);
2959 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2960 goto fail;
2961
2962 s += enclen(encode, s, end);
2963 MOP_OUT;
2964 NEXT;
2965
2966 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2967 DATA_ENSURE(1);
2968 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2969 goto fail;
2970
2971 s += enclen(encode, s, end);
2972 MOP_OUT;
2973 NEXT;
2974
2975 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2976 if (ON_STR_BEGIN(s)) {
2977 DATA_ENSURE(1);
2978 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2979 goto fail;
2980 }
2981 else if (ON_STR_END(s)) {
2982 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2983 goto fail;
2984 }
2985 else {
2986 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2987 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2988 goto fail;
2989 }
2990 MOP_OUT;
2991 JUMP;
2992
2993 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2994 if (ON_STR_BEGIN(s)) {
2995 DATA_ENSURE(1);
2996 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2997 goto fail;
2998 }
2999 else if (ON_STR_END(s)) {
3000 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3001 goto fail;
3002 }
3003 else {
3004 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3005 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3006 goto fail;
3007 }
3008 MOP_OUT;
3009 JUMP;
3010
3011 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
3012 if (ON_STR_BEGIN(s)) {
3013 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3014 goto fail;
3015 }
3016 else if (ON_STR_END(s)) {
3017 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3018 goto fail;
3019 }
3020 else {
3021 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3022 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3023 goto fail;
3024 }
3025 MOP_OUT;
3026 JUMP;
3027
3028 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3029 if (ON_STR_BEGIN(s)) {
3030 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3031 goto fail;
3032 }
3033 else if (ON_STR_END(s)) {
3034 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3035 goto fail;
3036 }
3037 else {
3038 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3039 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3040 goto fail;
3041 }
3042 MOP_OUT;
3043 JUMP;
3044
3045#ifdef USE_WORD_BEGIN_END
3046 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3047 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3048 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3049 MOP_OUT;
3050 JUMP;
3051 }
3052 }
3053 goto fail;
3054 NEXT;
3055
3056 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3057 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3058 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3059 MOP_OUT;
3060 JUMP;
3061 }
3062 }
3063 goto fail;
3064 NEXT;
3065
3066 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3067 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3068 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3069 MOP_OUT;
3070 JUMP;
3071 }
3072 }
3073 goto fail;
3074 NEXT;
3075
3076 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3077 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3078 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3079 MOP_OUT;
3080 JUMP;
3081 }
3082 }
3083 goto fail;
3084 NEXT;
3085#endif
3086
3087 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3088 if (! ON_STR_BEGIN(s)) goto fail;
3089 if (IS_NOTBOS(msa->options)) goto fail;
3090
3091 MOP_OUT;
3092 JUMP;
3093
3094 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3095 if (! ON_STR_END(s)) goto fail;
3096 if (IS_NOTEOS(msa->options)) goto fail;
3097
3098 MOP_OUT;
3099 JUMP;
3100
3101 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3102 if (ON_STR_BEGIN(s)) {
3103 if (IS_NOTBOL(msa->options)) goto fail;
3104 MOP_OUT;
3105 JUMP;
3106 }
3107 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3108#ifdef USE_CRNL_AS_LINE_TERMINATOR
3109 && !(IS_NEWLINE_CRLF(option)
3110 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3111#endif
3112 && !ON_STR_END(s)) {
3113 MOP_OUT;
3114 JUMP;
3115 }
3116 goto fail;
3117 NEXT;
3118
3119 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3120 if (ON_STR_END(s)) {
3121#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3122 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3123#endif
3124 if (IS_NOTEOL(msa->options)) goto fail;
3125 MOP_OUT;
3126 JUMP;
3127#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3128 }
3129#endif
3130 }
3131 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3132 MOP_OUT;
3133 JUMP;
3134 }
3135 goto fail;
3136 NEXT;
3137
3138 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3139 if (ON_STR_END(s)) {
3140#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3141 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3142#endif
3143 if (IS_NOTEOL(msa->options)) goto fail;
3144 MOP_OUT;
3145 JUMP;
3146#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3147 }
3148#endif
3149 }
3150 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3151 UChar* ss = s + enclen(encode, s, end);
3152 if (ON_STR_END(ss)) {
3153 MOP_OUT;
3154 JUMP;
3155 }
3156#ifdef USE_CRNL_AS_LINE_TERMINATOR
3157 else if (IS_NEWLINE_CRLF(option)
3158 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3159 ss += enclen(encode, ss, end);
3160 if (ON_STR_END(ss)) {
3161 MOP_OUT;
3162 JUMP;
3163 }
3164 }
3165#endif
3166 }
3167 goto fail;
3168 NEXT;
3169
3170 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3171 if (s != msa->gpos)
3172 goto fail;
3173
3174 MOP_OUT;
3175 JUMP;
3176
3177 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3178 GET_MEMNUM_INC(mem, p);
3179 STACK_PUSH_MEM_START(mem, s);
3180 MOP_OUT;
3181 JUMP;
3182
3183 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3184 GET_MEMNUM_INC(mem, p);
3185 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3186 mem_end_stk[mem] = INVALID_STACK_INDEX;
3187 MOP_OUT;
3188 JUMP;
3189
3190 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3191 GET_MEMNUM_INC(mem, p);
3192 STACK_PUSH_MEM_END(mem, s);
3193 MOP_OUT;
3194 JUMP;
3195
3196 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3197 GET_MEMNUM_INC(mem, p);
3198 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3199 MOP_OUT;
3200 JUMP;
3201
3202 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3203 pkeep = s;
3204 MOP_OUT;
3205 JUMP;
3206
3207#ifdef USE_SUBEXP_CALL
3208 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3209 GET_MEMNUM_INC(mem, p);
3210 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3211 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3212 STACK_PUSH_MEM_END(mem, s);
3213 MOP_OUT;
3214 JUMP;
3215
3216 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3217 GET_MEMNUM_INC(mem, p);
3218 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3219 STACK_GET_MEM_START(mem, stkp);
3220
3221 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3222 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3223 else
3224 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3225
3226 STACK_PUSH_MEM_END_MARK(mem);
3227 MOP_OUT;
3228 JUMP;
3229#endif
3230
3231 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3232 mem = 1;
3233 goto backref;
3234 NEXT;
3235
3236 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3237 mem = 2;
3238 goto backref;
3239 NEXT;
3240
3241 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3242 GET_MEMNUM_INC(mem, p);
3243 backref:
3244 {
3245 int len;
3246 UChar *pstart, *pend;
3247
3248 /* if you want to remove following line,
3249 you should check in parse and compile time. */
3250 if (mem > num_mem) goto fail;
3251 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3252 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3253
3254 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3255 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3256 else
3257 pstart = (UChar* )((void* )mem_start_stk[mem]);
3258
3259 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3260 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3261 : (UChar* )((void* )mem_end_stk[mem]));
3262 n = pend - pstart;
3263 DATA_ENSURE(n);
3264 sprev = s;
3265 STRING_CMP(pstart, s, n);
3266 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3267 sprev += len;
3268
3269 MOP_OUT;
3270 JUMP;
3271 }
3272
3273 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3274 GET_MEMNUM_INC(mem, p);
3275 {
3276 int len;
3277 UChar *pstart, *pend;
3278
3279 /* if you want to remove following line,
3280 you should check in parse and compile time. */
3281 if (mem > num_mem) goto fail;
3282 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3283 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3284
3285 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3286 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3287 else
3288 pstart = (UChar* )((void* )mem_start_stk[mem]);
3289
3290 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3291 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3292 : (UChar* )((void* )mem_end_stk[mem]));
3293 n = pend - pstart;
3294 DATA_ENSURE(n);
3295 sprev = s;
3296 STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3297 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3298 sprev += len;
3299
3300 MOP_OUT;
3301 JUMP;
3302 }
3303 NEXT;
3304
3305 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3306 {
3307 int len, is_fail;
3308 UChar *pstart, *pend, *swork;
3309
3310 GET_LENGTH_INC(tlen, p);
3311 for (i = 0; i < tlen; i++) {
3312 GET_MEMNUM_INC(mem, p);
3313
3314 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3315 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3316
3317 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3318 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3319 else
3320 pstart = (UChar* )((void* )mem_start_stk[mem]);
3321
3322 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3323 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3324 : (UChar* )((void* )mem_end_stk[mem]));
3325 n = pend - pstart;
3326 DATA_ENSURE_CONTINUE(n);
3327 sprev = s;
3328 swork = s;
3329 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3330 if (is_fail) continue;
3331 s = swork;
3332 while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3333 sprev += len;
3334
3335 p += (SIZE_MEMNUM * (tlen - i - 1));
3336 break; /* success */
3337 }
3338 if (i == tlen) goto fail;
3339 MOP_OUT;
3340 JUMP;
3341 }
3342 NEXT;
3343
3344 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3345 {
3346 int len, is_fail;
3347 UChar *pstart, *pend, *swork;
3348
3349 GET_LENGTH_INC(tlen, p);
3350 for (i = 0; i < tlen; i++) {
3351 GET_MEMNUM_INC(mem, p);
3352
3353 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3354 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3355
3356 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3357 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3358 else
3359 pstart = (UChar* )((void* )mem_start_stk[mem]);
3360
3361 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3362 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3363 : (UChar* )((void* )mem_end_stk[mem]));
3364 n = pend - pstart;
3365 DATA_ENSURE_CONTINUE(n);
3366 sprev = s;
3367 swork = s;
3368 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3369 if (is_fail) continue;
3370 s = swork;
3371 while (sprev + (len = enclen(encode, sprev, end)) < s)
3372 sprev += len;
3373
3374 p += (SIZE_MEMNUM * (tlen - i - 1));
3375 break; /* success */
3376 }
3377 if (i == tlen) goto fail;
3378 MOP_OUT;
3379 JUMP;
3380 }
3381
3382#ifdef USE_BACKREF_WITH_LEVEL
3383 CASE(OP_BACKREF_WITH_LEVEL)
3384 {
3385 int len;
3386 OnigOptionType ic;
3387 LengthType level;
3388
3389 GET_OPTION_INC(ic, p);
3390 GET_LENGTH_INC(level, p);
3391 GET_LENGTH_INC(tlen, p);
3392
3393 sprev = s;
3394 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3395 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3396 while (sprev + (len = enclen(encode, sprev, end)) < s)
3397 sprev += len;
3398
3399 p += (SIZE_MEMNUM * tlen);
3400 }
3401 else
3402 goto fail;
3403
3404 MOP_OUT;
3405 JUMP;
3406 }
3407
3408#endif
3409
3410#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3411 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3412 GET_OPTION_INC(option, p);
3413 STACK_PUSH_ALT(p, s, sprev, pkeep);
3414 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3415 MOP_OUT;
3416 JUMP;
3417
3418 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3419 GET_OPTION_INC(option, p);
3420 MOP_OUT;
3421 JUMP;
3422#endif
3423
3424 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3425 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3426 STACK_PUSH_NULL_CHECK_START(mem, s);
3427 MOP_OUT;
3428 JUMP;
3429
3430 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3431 {
3432 int isnull;
3433
3434 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3435 STACK_NULL_CHECK(isnull, mem, s);
3436 if (isnull) {
3437#ifdef ONIG_DEBUG_MATCH
3438 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3439 (int )mem, (uintptr_t )s, s);
3440#endif
3441 null_check_found:
3442 /* empty loop founded, skip next instruction */
3443 switch (*p++) {
3444 case OP_JUMP:
3445 case OP_PUSH:
3446 p += SIZE_RELADDR;
3447 break;
3448 case OP_REPEAT_INC:
3449 case OP_REPEAT_INC_NG:
3450 case OP_REPEAT_INC_SG:
3451 case OP_REPEAT_INC_NG_SG:
3452 p += SIZE_MEMNUM;
3453 break;
3454 default:
3455 goto unexpected_bytecode_error;
3456 break;
3457 }
3458 }
3459 }
3460 MOP_OUT;
3461 JUMP;
3462
3463#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3464 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3465 {
3466 int isnull;
3467 int ischanged = 0; // set 1 when a loop is empty but memory status is changed.
3468
3469 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3470 STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
3471 if (isnull) {
3472# ifdef ONIG_DEBUG_MATCH
3473 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3474 (int )mem, (uintptr_t )s, s);
3475# endif
3476 if (isnull == -1) goto fail;
3477 goto null_check_found;
3478 }
3479# ifdef USE_CACHE_MATCH_OPT
3480 if (ischanged && msa->enable_cache_match_opt) {
3481 RelAddrType rel;
3482 OnigUChar *addr;
3483 MemNumType mem;
3484 UChar* tmp = p;
3485 switch (*tmp++) {
3486 case OP_JUMP:
3487 case OP_PUSH:
3488 GET_RELADDR_INC(rel, tmp);
3489 addr = tmp + rel;
3490 break;
3491 case OP_REPEAT_INC:
3492 case OP_REPEAT_INC_NG:
3493 GET_MEMNUM_INC(mem, tmp);
3494 addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
3495 break;
3496 default:
3497 goto unexpected_bytecode_error;
3498 }
3499 reset_match_cache(reg, addr, pbegin, (long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_opcode, msa->num_cache_table);
3500 }
3501# endif
3502 }
3503 MOP_OUT;
3504 JUMP;
3505#endif
3506
3507#ifdef USE_SUBEXP_CALL
3508 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3509 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3510 {
3511 int isnull;
3512
3513 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3514# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3515 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3516# else
3517 STACK_NULL_CHECK_REC(isnull, mem, s);
3518# endif
3519 if (isnull) {
3520# ifdef ONIG_DEBUG_MATCH
3521 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3522 (int )mem, (uintptr_t )s, s);
3523# endif
3524 if (isnull == -1) goto fail;
3525 goto null_check_found;
3526 }
3527 else {
3528 STACK_PUSH_NULL_CHECK_END(mem);
3529 }
3530 }
3531 MOP_OUT;
3532 JUMP;
3533#endif
3534
3535 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3536 GET_RELADDR_INC(addr, p);
3537 p += addr;
3538 MOP_OUT;
3539 CHECK_INTERRUPT_IN_MATCH_AT;
3540 JUMP;
3541
3542 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3543 GET_RELADDR_INC(addr, p);
3544 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3545 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3546 MOP_OUT;
3547 JUMP;
3548
3549#ifdef USE_COMBINATION_EXPLOSION_CHECK
3550 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3551 GET_STATE_CHECK_NUM_INC(mem, p);
3552 STATE_CHECK_VAL(scv, mem);
3553 if (scv) goto fail;
3554
3555 GET_RELADDR_INC(addr, p);
3556 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3557 MOP_OUT;
3558 JUMP;
3559
3560 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3561 GET_STATE_CHECK_NUM_INC(mem, p);
3562 GET_RELADDR_INC(addr, p);
3563 STATE_CHECK_VAL(scv, mem);
3564 if (scv) {
3565 p += addr;
3566 }
3567 else {
3568 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3569 }
3570 MOP_OUT;
3571 JUMP;
3572
3573 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3574 GET_STATE_CHECK_NUM_INC(mem, p);
3575 STATE_CHECK_VAL(scv, mem);
3576 if (scv) goto fail;
3577
3578 STACK_PUSH_STATE_CHECK(s, mem);
3579 MOP_OUT;
3580 JUMP;
3581#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3582
3583 CASE(OP_POP) MOP_IN(OP_POP);
3584 STACK_POP_ONE;
3585 /* We need to increment num_fail here, for invoking a cache optimization correctly, */
3586 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3587#ifdef USE_CACHE_MATCH_OPT
3588 msa->num_fail++;
3589#endif
3590 MOP_OUT;
3591 JUMP;
3592
3593#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3594 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3595 GET_RELADDR_INC(addr, p);
3596 if (*p == *s && DATA_ENSURE_CHECK1) {
3597 p++;
3598 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3599 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3600 MOP_OUT;
3601 JUMP;
3602 }
3603 p += (addr + 1);
3604 MOP_OUT;
3605 JUMP;
3606#endif
3607
3608 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3609 GET_RELADDR_INC(addr, p);
3610 if (*p == *s) {
3611 p++;
3612 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3613 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3614 MOP_OUT;
3615 JUMP;
3616 }
3617 p++;
3618 MOP_OUT;
3619 JUMP;
3620
3621 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3622 {
3623 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3624 GET_RELADDR_INC(addr, p);
3625
3626 STACK_ENSURE(1);
3627 repeat_stk[mem] = GET_STACK_INDEX(stk);
3628 STACK_PUSH_REPEAT(mem, p);
3629
3630 if (reg->repeat_range[mem].lower == 0) {
3631 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
3632 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3633 }
3634 }
3635 MOP_OUT;
3636 JUMP;
3637
3638 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3639 {
3640 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3641 GET_RELADDR_INC(addr, p);
3642
3643 STACK_ENSURE(1);
3644 repeat_stk[mem] = GET_STACK_INDEX(stk);
3645 STACK_PUSH_REPEAT(mem, p);
3646
3647 if (reg->repeat_range[mem].lower == 0) {
3648 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3649 STACK_PUSH_ALT(p, s, sprev, pkeep);
3650 p += addr;
3651 }
3652 }
3653 MOP_OUT;
3654 JUMP;
3655
3656 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3657 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3658 si = repeat_stk[mem];
3659 stkp = STACK_AT(si);
3660
3661 repeat_inc:
3662 stkp->u.repeat.count++;
3663 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3664 /* end of repeat. Nothing to do. */
3665 }
3666 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3667 if (*pbegin == OP_REPEAT_INC) {
3668 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3669 }
3670 STACK_PUSH_ALT(p, s, sprev, pkeep);
3671 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3672 }
3673 else {
3674 p = stkp->u.repeat.pcode;
3675 }
3676 STACK_PUSH_REPEAT_INC(si);
3677 MOP_OUT;
3678 CHECK_INTERRUPT_IN_MATCH_AT;
3679 JUMP;
3680
3681 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3682 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3683 STACK_GET_REPEAT(mem, stkp);
3684 si = GET_STACK_INDEX(stkp);
3685 goto repeat_inc;
3686 NEXT;
3687
3688 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3689 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3690 si = repeat_stk[mem];
3691 stkp = STACK_AT(si);
3692
3693 repeat_inc_ng:
3694 stkp->u.repeat.count++;
3695 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3696 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3697 UChar* pcode = stkp->u.repeat.pcode;
3698
3699 STACK_PUSH_REPEAT_INC(si);
3700 if (*pbegin == OP_REPEAT_INC_NG) {
3701 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3702 }
3703 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3704 }
3705 else {
3706 p = stkp->u.repeat.pcode;
3707 STACK_PUSH_REPEAT_INC(si);
3708 }
3709 }
3710 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3711 STACK_PUSH_REPEAT_INC(si);
3712 }
3713 MOP_OUT;
3714 CHECK_INTERRUPT_IN_MATCH_AT;
3715 JUMP;
3716
3717 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3718 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3719 STACK_GET_REPEAT(mem, stkp);
3720 si = GET_STACK_INDEX(stkp);
3721 goto repeat_inc_ng;
3722 NEXT;
3723
3724 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3725 STACK_PUSH_POS(s, sprev, pkeep);
3726 MOP_OUT;
3727 JUMP;
3728
3729 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3730 {
3731 STACK_POS_END(stkp);
3732 s = stkp->u.state.pstr;
3733 sprev = stkp->u.state.pstr_prev;
3734 }
3735 MOP_OUT;
3736 JUMP;
3737
3738 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3739 GET_RELADDR_INC(addr, p);
3740 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3741 MOP_OUT;
3742 JUMP;
3743
3744 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3745 STACK_POP_TIL_POS_NOT;
3746 goto fail;
3747 NEXT;
3748
3749 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3750 STACK_PUSH_STOP_BT;
3751 MOP_OUT;
3752 JUMP;
3753
3754 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3755 STACK_STOP_BT_END;
3756 MOP_OUT;
3757 JUMP;
3758
3759 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3760 GET_LENGTH_INC(tlen, p);
3761 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3762 if (IS_NULL(s)) goto fail;
3763 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3764 MOP_OUT;
3765 JUMP;
3766
3767 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3768 GET_RELADDR_INC(addr, p);
3769 GET_LENGTH_INC(tlen, p);
3770 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3771 if (IS_NULL(q)) {
3772 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3773 If you want to change to fail, replace following line. */
3774 p += addr;
3775 /* goto fail; */
3776 }
3777 else {
3778 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3779 s = q;
3780 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3781 }
3782 MOP_OUT;
3783 JUMP;
3784
3785 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3786 STACK_POP_TIL_LOOK_BEHIND_NOT;
3787 goto fail;
3788 NEXT;
3789
3790 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3791 /* Save the absent-start-pos and the original end-pos. */
3792 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3793 MOP_OUT;
3794 JUMP;
3795
3796 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3797 {
3798 const UChar* aend = ABSENT_END_POS;
3799 UChar* absent;
3800 UChar* selfp = p - 1;
3801
3802 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3803 GET_RELADDR_INC(addr, p);
3804#ifdef ONIG_DEBUG_MATCH
3805 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3806#endif
3807 if ((absent > aend) && (s > absent)) {
3808 /* An empty match occurred in (?~...) at the start point.
3809 * Never match. */
3810 STACK_POP;
3811 goto fail;
3812 }
3813 else if ((s >= aend) && (s > absent)) {
3814 if (s > aend) {
3815 /* Only one (or less) character matched in the last iteration.
3816 * This is not a possible point. */
3817 goto fail;
3818 }
3819 /* All possible points were found. Try matching after (?~...). */
3820 DATA_ENSURE(0);
3821 p += addr;
3822 }
3823 else if (s == end) {
3824 /* At the end of the string, just match with it */
3825 DATA_ENSURE(0);
3826 p += addr;
3827 }
3828 else {
3829 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3830 n = enclen(encode, s, end);
3831 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3832 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3833 STACK_PUSH_ABSENT;
3834 ABSENT_END_POS = aend;
3835 }
3836 }
3837 MOP_OUT;
3838 JUMP;
3839
3840 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3841 /* The pattern inside (?~...) was matched.
3842 * Set the end-pos temporary and go to next iteration. */
3843 if (sprev < ABSENT_END_POS)
3844 ABSENT_END_POS = sprev;
3845#ifdef ONIG_DEBUG_MATCH
3846 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3847#endif
3848 STACK_POP_TIL_ABSENT;
3849 goto fail;
3850 NEXT;
3851
3852#ifdef USE_SUBEXP_CALL
3853 CASE(OP_CALL) MOP_IN(OP_CALL);
3854 GET_ABSADDR_INC(addr, p);
3855 STACK_PUSH_CALL_FRAME(p);
3856 p = reg->p + addr;
3857 MOP_OUT;
3858 JUMP;
3859
3860 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3861 STACK_RETURN(p);
3862 STACK_PUSH_RETURN;
3863 MOP_OUT;
3864 JUMP;
3865#endif
3866
3867 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3868 GET_MEMNUM_INC(mem, p);
3869 GET_RELADDR_INC(addr, p);
3870 if ((mem > num_mem) ||
3871 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3872 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3873 p += addr;
3874 }
3875 MOP_OUT;
3876 JUMP;
3877
3878 CASE(OP_FINISH)
3879 goto finish;
3880 NEXT;
3881
3882 CASE(OP_FAIL)
3883 if (0) {
3884 /* fall */
3885 fail:
3886 MOP_OUT;
3887 }
3888 MOP_IN(OP_FAIL);
3889 STACK_POP;
3890 p = stk->u.state.pcode;
3891 s = stk->u.state.pstr;
3892 sprev = stk->u.state.pstr_prev;
3893 pkeep = stk->u.state.pkeep;
3894
3895#ifdef USE_CACHE_MATCH_OPT
3896 if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3897 msa->enable_cache_match_opt = 1;
3898 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3899 OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
3900 if (r < 0) goto bytecode_error;
3901 }
3902 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
3903 msa->enable_cache_match_opt = 0;
3904 goto fail_match_cache_opt;
3905 }
3906 if (msa->cache_index_table == NULL) {
3907 OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_table * sizeof(OnigCacheIndex));
3908 if (table == NULL) {
3909 return ONIGERR_MEMORY;
3910 }
3911 OnigPosition r = init_cache_index_table(reg, table);
3912 if (r < 0) {
3913 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
3914 else goto bytecode_error;
3915 }
3916 msa->cache_index_table = table;
3917 }
3918 size_t len = (end - str) + 1;
3919 size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;
3920 /* overflow check */
3921 if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) {
3922 return ONIGERR_MEMORY;
3923 }
3924 /* Currently, int is used for the key of match_cache */
3925 if (match_cache_size8 >= LONG_MAX_LIMIT) {
3926 return ONIGERR_MEMORY;
3927 }
3928 size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
3929 msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t));
3930 if (msa->match_cache == NULL) {
3931 return ONIGERR_MEMORY;
3932 }
3933 xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t));
3934 }
3935 fail_match_cache_opt:
3936#endif
3937
3938#ifdef USE_COMBINATION_EXPLOSION_CHECK
3939 if (stk->u.state.state_check != 0) {
3940 stk->type = STK_STATE_CHECK_MARK;
3941 stk++;
3942 }
3943#endif
3944
3945 MOP_OUT;
3946 CHECK_INTERRUPT_IN_MATCH_AT;
3947 JUMP;
3948
3949 DEFAULT
3950 goto bytecode_error;
3951 } VM_LOOP_END
3952
3953 finish:
3954 STACK_SAVE;
3955 if (xmalloc_base) xfree(xmalloc_base);
3956 return best_len;
3957
3958#ifdef ONIG_DEBUG
3959 stack_error:
3960 STACK_SAVE;
3961 if (xmalloc_base) xfree(xmalloc_base);
3962 return ONIGERR_STACK_BUG;
3963#endif
3964
3965 bytecode_error:
3966 STACK_SAVE;
3967 if (xmalloc_base) xfree(xmalloc_base);
3968 return ONIGERR_UNDEFINED_BYTECODE;
3969
3970 unexpected_bytecode_error:
3971 STACK_SAVE;
3972 if (xmalloc_base) xfree(xmalloc_base);
3973 return ONIGERR_UNEXPECTED_BYTECODE;
3974}
3975
3976
3977static UChar*
3978slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3979 const UChar* text, const UChar* text_end, UChar* text_range)
3980{
3981 UChar *t, *p, *s, *end;
3982
3983 end = (UChar* )text_end;
3984 end -= target_end - target - 1;
3985 if (end > text_range)
3986 end = text_range;
3987
3988 s = (UChar* )text;
3989
3990 if (enc->max_enc_len == enc->min_enc_len) {
3991 int n = enc->max_enc_len;
3992
3993 while (s < end) {
3994 if (*s == *target) {
3995 p = s + 1;
3996 t = target + 1;
3997 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3998 return s;
3999 }
4000 s += n;
4001 }
4002 return (UChar* )NULL;
4003 }
4004 while (s < end) {
4005 if (*s == *target) {
4006 p = s + 1;
4007 t = target + 1;
4008 if (target_end == t || memcmp(t, p, target_end - t) == 0)
4009 return s;
4010 }
4011 s += enclen(enc, s, text_end);
4012 }
4013
4014 return (UChar* )NULL;
4015}
4016
4017static int
4018str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4019 const UChar* t, const UChar* tend,
4020 const UChar* p, const UChar* end)
4021{
4022 int lowlen;
4023 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4024
4025 while (t < tend) {
4026 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4027 q = lowbuf;
4028 while (lowlen > 0) {
4029 if (*t++ != *q++) return 0;
4030 lowlen--;
4031 }
4032 }
4033
4034 return 1;
4035}
4036
4037static UChar*
4038slow_search_ic(OnigEncoding enc, int case_fold_flag,
4039 UChar* target, UChar* target_end,
4040 const UChar* text, const UChar* text_end, UChar* text_range)
4041{
4042 UChar *s, *end;
4043
4044 end = (UChar* )text_end;
4045 end -= target_end - target - 1;
4046 if (end > text_range)
4047 end = text_range;
4048
4049 s = (UChar* )text;
4050
4051 while (s < end) {
4052 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4053 s, text_end))
4054 return s;
4055
4056 s += enclen(enc, s, text_end);
4057 }
4058
4059 return (UChar* )NULL;
4060}
4061
4062static UChar*
4063slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4064 const UChar* text, const UChar* adjust_text,
4065 const UChar* text_end, const UChar* text_start)
4066{
4067 UChar *t, *p, *s;
4068
4069 s = (UChar* )text_end;
4070 s -= (target_end - target);
4071 if (s > text_start)
4072 s = (UChar* )text_start;
4073 else
4074 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4075
4076 while (s >= text) {
4077 if (*s == *target) {
4078 p = s + 1;
4079 t = target + 1;
4080 while (t < target_end) {
4081 if (*t != *p++)
4082 break;
4083 t++;
4084 }
4085 if (t == target_end)
4086 return s;
4087 }
4088 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4089 }
4090
4091 return (UChar* )NULL;
4092}
4093
4094static UChar*
4095slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4096 UChar* target, UChar* target_end,
4097 const UChar* text, const UChar* adjust_text,
4098 const UChar* text_end, const UChar* text_start)
4099{
4100 UChar *s;
4101
4102 s = (UChar* )text_end;
4103 s -= (target_end - target);
4104 if (s > text_start)
4105 s = (UChar* )text_start;
4106 else
4107 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4108
4109 while (s >= text) {
4110 if (str_lower_case_match(enc, case_fold_flag,
4111 target, target_end, s, text_end))
4112 return s;
4113
4114 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4115 }
4116
4117 return (UChar* )NULL;
4118}
4119
4120#ifndef USE_SUNDAY_QUICK_SEARCH
4121/* Boyer-Moore-Horspool search applied to a multibyte string */
4122static UChar*
4123bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4124 const UChar* text, const UChar* text_end,
4125 const UChar* text_range)
4126{
4127 const UChar *s, *se, *t, *p, *end;
4128 const UChar *tail;
4129 ptrdiff_t skip, tlen1;
4130
4131# ifdef ONIG_DEBUG_SEARCH
4132 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4133 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4134# endif
4135
4136 tail = target_end - 1;
4137 tlen1 = tail - target;
4138 end = text_range;
4139 if (end + tlen1 > text_end)
4140 end = text_end - tlen1;
4141
4142 s = text;
4143
4144 if (IS_NULL(reg->int_map)) {
4145 while (s < end) {
4146 p = se = s + tlen1;
4147 t = tail;
4148 while (*p == *t) {
4149 if (t == target) return (UChar* )s;
4150 p--; t--;
4151 }
4152 skip = reg->map[*se];
4153 t = s;
4154 do {
4155 s += enclen(reg->enc, s, end);
4156 } while ((s - t) < skip && s < end);
4157 }
4158 }
4159 else {
4160# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4161 while (s < end) {
4162 p = se = s + tlen1;
4163 t = tail;
4164 while (*p == *t) {
4165 if (t == target) return (UChar* )s;
4166 p--; t--;
4167 }
4168 skip = reg->int_map[*se];
4169 t = s;
4170 do {
4171 s += enclen(reg->enc, s, end);
4172 } while ((s - t) < skip && s < end);
4173 }
4174# endif
4175 }
4176
4177 return (UChar* )NULL;
4178}
4179
4180/* Boyer-Moore-Horspool search */
4181static UChar*
4182bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4183 const UChar* text, const UChar* text_end, const UChar* text_range)
4184{
4185 const UChar *s, *t, *p, *end;
4186 const UChar *tail;
4187
4188# ifdef ONIG_DEBUG_SEARCH
4189 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4190 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4191# endif
4192
4193 end = text_range + (target_end - target) - 1;
4194 if (end > text_end)
4195 end = text_end;
4196
4197 tail = target_end - 1;
4198 s = text + (target_end - target) - 1;
4199 if (IS_NULL(reg->int_map)) {
4200 while (s < end) {
4201 p = s;
4202 t = tail;
4203# ifdef ONIG_DEBUG_SEARCH
4204 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4205 (intptr_t )(s - text), s);
4206# endif
4207 while (*p == *t) {
4208 if (t == target) return (UChar* )p;
4209 p--; t--;
4210 }
4211 s += reg->map[*s];
4212 }
4213 }
4214 else { /* see int_map[] */
4215# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4216 while (s < end) {
4217 p = s;
4218 t = tail;
4219 while (*p == *t) {
4220 if (t == target) return (UChar* )p;
4221 p--; t--;
4222 }
4223 s += reg->int_map[*s];
4224 }
4225# endif
4226 }
4227 return (UChar* )NULL;
4228}
4229
4230/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4231static UChar*
4232bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4233 const UChar* text, const UChar* text_end,
4234 const UChar* text_range)
4235{
4236 const UChar *s, *se, *t, *end;
4237 const UChar *tail;
4238 ptrdiff_t skip, tlen1;
4239 OnigEncoding enc = reg->enc;
4240 int case_fold_flag = reg->case_fold_flag;
4241
4242# ifdef ONIG_DEBUG_SEARCH
4243 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4244 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4245# endif
4246
4247 tail = target_end - 1;
4248 tlen1 = tail - target;
4249 end = text_range;
4250 if (end + tlen1 > text_end)
4251 end = text_end - tlen1;
4252
4253 s = text;
4254
4255 if (IS_NULL(reg->int_map)) {
4256 while (s < end) {
4257 se = s + tlen1;
4258 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4259 s, se + 1))
4260 return (UChar* )s;
4261 skip = reg->map[*se];
4262 t = s;
4263 do {
4264 s += enclen(reg->enc, s, end);
4265 } while ((s - t) < skip && s < end);
4266 }
4267 }
4268 else {
4269# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4270 while (s < end) {
4271 se = s + tlen1;
4272 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4273 s, se + 1))
4274 return (UChar* )s;
4275 skip = reg->int_map[*se];
4276 t = s;
4277 do {
4278 s += enclen(reg->enc, s, end);
4279 } while ((s - t) < skip && s < end);
4280 }
4281# endif
4282 }
4283
4284 return (UChar* )NULL;
4285}
4286
4287/* Boyer-Moore-Horspool search (ignore case) */
4288static UChar*
4289bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4290 const UChar* text, const UChar* text_end, const UChar* text_range)
4291{
4292 const UChar *s, *p, *end;
4293 const UChar *tail;
4294 OnigEncoding enc = reg->enc;
4295 int case_fold_flag = reg->case_fold_flag;
4296
4297# ifdef ONIG_DEBUG_SEARCH
4298 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4299 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4300# endif
4301
4302 end = text_range + (target_end - target) - 1;
4303 if (end > text_end)
4304 end = text_end;
4305
4306 tail = target_end - 1;
4307 s = text + (target_end - target) - 1;
4308 if (IS_NULL(reg->int_map)) {
4309 while (s < end) {
4310 p = s - (target_end - target) + 1;
4311 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4312 p, s + 1))
4313 return (UChar* )p;
4314 s += reg->map[*s];
4315 }
4316 }
4317 else { /* see int_map[] */
4318# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4319 while (s < end) {
4320 p = s - (target_end - target) + 1;
4321 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4322 p, s + 1))
4323 return (UChar* )p;
4324 s += reg->int_map[*s];
4325 }
4326# endif
4327 }
4328 return (UChar* )NULL;
4329}
4330
4331#else /* USE_SUNDAY_QUICK_SEARCH */
4332
4333/* Sunday's quick search applied to a multibyte string */
4334static UChar*
4335bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4336 const UChar* text, const UChar* text_end,
4337 const UChar* text_range)
4338{
4339 const UChar *s, *se, *t, *p, *end;
4340 const UChar *tail;
4341 ptrdiff_t skip, tlen1;
4342 OnigEncoding enc = reg->enc;
4343
4344# ifdef ONIG_DEBUG_SEARCH
4345 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4346 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4347# endif
4348
4349 tail = target_end - 1;
4350 tlen1 = tail - target;
4351 end = text_range;
4352 if (end + tlen1 > text_end)
4353 end = text_end - tlen1;
4354
4355 s = text;
4356
4357 if (IS_NULL(reg->int_map)) {
4358 while (s < end) {
4359 p = se = s + tlen1;
4360 t = tail;
4361 while (*p == *t) {
4362 if (t == target) return (UChar* )s;
4363 p--; t--;
4364 }
4365 if (s + 1 >= end) break;
4366 skip = reg->map[se[1]];
4367 t = s;
4368 do {
4369 s += enclen(enc, s, end);
4370 } while ((s - t) < skip && s < end);
4371 }
4372 }
4373 else {
4374# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4375 while (s < end) {
4376 p = se = s + tlen1;
4377 t = tail;
4378 while (*p == *t) {
4379 if (t == target) return (UChar* )s;
4380 p--; t--;
4381 }
4382 if (s + 1 >= end) break;
4383 skip = reg->int_map[se[1]];
4384 t = s;
4385 do {
4386 s += enclen(enc, s, end);
4387 } while ((s - t) < skip && s < end);
4388 }
4389# endif
4390 }
4391
4392 return (UChar* )NULL;
4393}
4394
4395/* Sunday's quick search */
4396static UChar*
4397bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4398 const UChar* text, const UChar* text_end, const UChar* text_range)
4399{
4400 const UChar *s, *t, *p, *end;
4401 const UChar *tail;
4402 ptrdiff_t tlen1;
4403
4404# ifdef ONIG_DEBUG_SEARCH
4405 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4406 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4407# endif
4408
4409 tail = target_end - 1;
4410 tlen1 = tail - target;
4411 end = text_range + tlen1;
4412 if (end > text_end)
4413 end = text_end;
4414
4415 s = text + tlen1;
4416 if (IS_NULL(reg->int_map)) {
4417 while (s < end) {
4418 p = s;
4419 t = tail;
4420 while (*p == *t) {
4421 if (t == target) return (UChar* )p;
4422 p--; t--;
4423 }
4424 if (s + 1 >= end) break;
4425 s += reg->map[s[1]];
4426 }
4427 }
4428 else { /* see int_map[] */
4429# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4430 while (s < end) {
4431 p = s;
4432 t = tail;
4433 while (*p == *t) {
4434 if (t == target) return (UChar* )p;
4435 p--; t--;
4436 }
4437 if (s + 1 >= end) break;
4438 s += reg->int_map[s[1]];
4439 }
4440# endif
4441 }
4442 return (UChar* )NULL;
4443}
4444
4445/* Sunday's quick search applied to a multibyte string (ignore case) */
4446static UChar*
4447bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4448 const UChar* text, const UChar* text_end,
4449 const UChar* text_range)
4450{
4451 const UChar *s, *se, *t, *end;
4452 const UChar *tail;
4453 ptrdiff_t skip, tlen1;
4454 OnigEncoding enc = reg->enc;
4455 int case_fold_flag = reg->case_fold_flag;
4456
4457# ifdef ONIG_DEBUG_SEARCH
4458 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4459 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4460# endif
4461
4462 tail = target_end - 1;
4463 tlen1 = tail - target;
4464 end = text_range;
4465 if (end + tlen1 > text_end)
4466 end = text_end - tlen1;
4467
4468 s = text;
4469
4470 if (IS_NULL(reg->int_map)) {
4471 while (s < end) {
4472 se = s + tlen1;
4473 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4474 s, se + 1))
4475 return (UChar* )s;
4476 if (s + 1 >= end) break;
4477 skip = reg->map[se[1]];
4478 t = s;
4479 do {
4480 s += enclen(enc, s, end);
4481 } while ((s - t) < skip && s < end);
4482 }
4483 }
4484 else {
4485# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4486 while (s < end) {
4487 se = s + tlen1;
4488 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4489 s, se + 1))
4490 return (UChar* )s;
4491 if (s + 1 >= end) break;
4492 skip = reg->int_map[se[1]];
4493 t = s;
4494 do {
4495 s += enclen(enc, s, end);
4496 } while ((s - t) < skip && s < end);
4497 }
4498# endif
4499 }
4500
4501 return (UChar* )NULL;
4502}
4503
4504/* Sunday's quick search (ignore case) */
4505static UChar*
4506bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4507 const UChar* text, const UChar* text_end, const UChar* text_range)
4508{
4509 const UChar *s, *p, *end;
4510 const UChar *tail;
4511 ptrdiff_t tlen1;
4512 OnigEncoding enc = reg->enc;
4513 int case_fold_flag = reg->case_fold_flag;
4514
4515# ifdef ONIG_DEBUG_SEARCH
4516 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4517 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4518# endif
4519
4520 tail = target_end - 1;
4521 tlen1 = tail - target;
4522 end = text_range + tlen1;
4523 if (end > text_end)
4524 end = text_end;
4525
4526 s = text + tlen1;
4527 if (IS_NULL(reg->int_map)) {
4528 while (s < end) {
4529 p = s - tlen1;
4530 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4531 p, s + 1))
4532 return (UChar* )p;
4533 if (s + 1 >= end) break;
4534 s += reg->map[s[1]];
4535 }
4536 }
4537 else { /* see int_map[] */
4538# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4539 while (s < end) {
4540 p = s - tlen1;
4541 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4542 p, s + 1))
4543 return (UChar* )p;
4544 if (s + 1 >= end) break;
4545 s += reg->int_map[s[1]];
4546 }
4547# endif
4548 }
4549 return (UChar* )NULL;
4550}
4551#endif /* USE_SUNDAY_QUICK_SEARCH */
4552
4553#ifdef USE_INT_MAP_BACKWARD
4554static int
4555set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4556 int** skip)
4557{
4558 int i, len;
4559
4560 if (IS_NULL(*skip)) {
4561 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4562 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4563 }
4564
4565 len = (int )(end - s);
4566 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4567 (*skip)[i] = len;
4568
4569 for (i = len - 1; i > 0; i--)
4570 (*skip)[s[i]] = i;
4571
4572 return 0;
4573}
4574
4575static UChar*
4576bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4577 const UChar* text, const UChar* adjust_text,
4578 const UChar* text_end, const UChar* text_start)
4579{
4580 const UChar *s, *t, *p;
4581
4582 s = text_end - (target_end - target);
4583 if (text_start < s)
4584 s = text_start;
4585 else
4586 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4587
4588 while (s >= text) {
4589 p = s;
4590 t = target;
4591 while (t < target_end && *p == *t) {
4592 p++; t++;
4593 }
4594 if (t == target_end)
4595 return (UChar* )s;
4596
4597 s -= reg->int_map_backward[*s];
4598 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4599 }
4600
4601 return (UChar* )NULL;
4602}
4603#endif
4604
4605static UChar*
4606map_search(OnigEncoding enc, UChar map[],
4607 const UChar* text, const UChar* text_range, const UChar* text_end)
4608{
4609 const UChar *s = text;
4610
4611 while (s < text_range) {
4612 if (map[*s]) return (UChar* )s;
4613
4614 s += enclen(enc, s, text_end);
4615 }
4616 return (UChar* )NULL;
4617}
4618
4619static UChar*
4620map_search_backward(OnigEncoding enc, UChar map[],
4621 const UChar* text, const UChar* adjust_text,
4622 const UChar* text_start, const UChar* text_end)
4623{
4624 const UChar *s = text_start;
4625
4626 while (s >= text) {
4627 if (map[*s]) return (UChar* )s;
4628
4629 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4630 }
4631 return (UChar* )NULL;
4632}
4633
4634extern OnigPosition
4635onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4636 OnigOptionType option)
4637{
4638 ptrdiff_t r;
4639 UChar *prev;
4640 OnigMatchArg msa;
4641
4642 MATCH_ARG_INIT(msa, option, region, at, at);
4643#ifdef USE_COMBINATION_EXPLOSION_CHECK
4644 {
4645 ptrdiff_t offset = at - str;
4646 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4647 }
4648#endif
4649
4650 if (region) {
4651 r = onig_region_resize_clear(region, reg->num_mem + 1);
4652 }
4653 else
4654 r = 0;
4655
4656 if (r == 0) {
4657 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4658 r = match_at(reg, str, end,
4659#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4660 end,
4661#endif
4662 at, prev, &msa);
4663 }
4664
4665 MATCH_ARG_FREE(msa);
4666 return r;
4667}
4668
4669static int
4670forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4671 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4672{
4673 UChar *p, *pprev = (UChar* )NULL;
4674 size_t input_len = end - str;
4675
4676#ifdef ONIG_DEBUG_SEARCH
4677 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4678 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4679#endif
4680
4681 if (reg->dmin > input_len) {
4682 return 0;
4683 }
4684
4685 p = s;
4686 if (reg->dmin > 0) {
4687 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4688 p += reg->dmin;
4689 }
4690 else {
4691 UChar *q = p + reg->dmin;
4692
4693 if (q >= end) return 0; /* fail */
4694 while (p < q) p += enclen(reg->enc, p, end);
4695 }
4696 }
4697
4698 retry:
4699 switch (reg->optimize) {
4700 case ONIG_OPTIMIZE_EXACT:
4701 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4702 break;
4703 case ONIG_OPTIMIZE_EXACT_IC:
4704 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4705 reg->exact, reg->exact_end, p, end, range);
4706 break;
4707
4708 case ONIG_OPTIMIZE_EXACT_BM:
4709 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4710 break;
4711
4712 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4713 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4714 break;
4715
4716 case ONIG_OPTIMIZE_EXACT_BM_IC:
4717 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4718 break;
4719
4720 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4721 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4722 break;
4723
4724 case ONIG_OPTIMIZE_MAP:
4725 p = map_search(reg->enc, reg->map, p, range, end);
4726 break;
4727 }
4728
4729 if (p && p < range) {
4730 if (p - reg->dmin < s) {
4731 retry_gate:
4732 pprev = p;
4733 p += enclen(reg->enc, p, end);
4734 goto retry;
4735 }
4736
4737 if (reg->sub_anchor) {
4738 UChar* prev;
4739
4740 switch (reg->sub_anchor) {
4741 case ANCHOR_BEGIN_LINE:
4742 if (!ON_STR_BEGIN(p)) {
4743 prev = onigenc_get_prev_char_head(reg->enc,
4744 (pprev ? pprev : str), p, end);
4745 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4746 goto retry_gate;
4747 }
4748 break;
4749
4750 case ANCHOR_END_LINE:
4751 if (ON_STR_END(p)) {
4752#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4753 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4754 (pprev ? pprev : str), p);
4755 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4756 goto retry_gate;
4757#endif
4758 }
4759 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4760 goto retry_gate;
4761 break;
4762 }
4763 }
4764
4765 if (reg->dmax == 0) {
4766 *low = p;
4767 if (low_prev) {
4768 if (*low > s)
4769 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4770 else
4771 *low_prev = onigenc_get_prev_char_head(reg->enc,
4772 (pprev ? pprev : str), p, end);
4773 }
4774 }
4775 else {
4776 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4777 if (p < str + reg->dmax) {
4778 *low = (UChar* )str;
4779 if (low_prev)
4780 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4781 }
4782 else {
4783 *low = p - reg->dmax;
4784 if (*low > s) {
4785 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4786 *low, end, (const UChar** )low_prev);
4787 if (low_prev && IS_NULL(*low_prev))
4788 *low_prev = onigenc_get_prev_char_head(reg->enc,
4789 (pprev ? pprev : s), *low, end);
4790 }
4791 else {
4792 if (low_prev)
4793 *low_prev = onigenc_get_prev_char_head(reg->enc,
4794 (pprev ? pprev : str), *low, end);
4795 }
4796 }
4797 }
4798 }
4799 /* no needs to adjust *high, *high is used as range check only */
4800 *high = p - reg->dmin;
4801
4802#ifdef ONIG_DEBUG_SEARCH
4803 fprintf(stderr,
4804 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4805 *low - str, *high - str, reg->dmin, reg->dmax);
4806#endif
4807 return 1; /* success */
4808 }
4809
4810 return 0; /* fail */
4811}
4812
4813#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4814
4815static int
4816backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4817 UChar* s, const UChar* range, UChar* adjrange,
4818 UChar** low, UChar** high)
4819{
4820 UChar *p;
4821 size_t input_len = end - str;
4822
4823 if (reg->dmin > input_len) {
4824 return 0;
4825 }
4826
4827 range += reg->dmin;
4828 p = s;
4829
4830 retry:
4831 switch (reg->optimize) {
4832 case ONIG_OPTIMIZE_EXACT:
4833 exact_method:
4834 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4835 range, adjrange, end, p);
4836 break;
4837
4838 case ONIG_OPTIMIZE_EXACT_IC:
4839 case ONIG_OPTIMIZE_EXACT_BM_IC:
4840 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4841 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4842 reg->exact, reg->exact_end,
4843 range, adjrange, end, p);
4844 break;
4845
4846 case ONIG_OPTIMIZE_EXACT_BM:
4847 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4848#ifdef USE_INT_MAP_BACKWARD
4849 if (IS_NULL(reg->int_map_backward)) {
4850 int r;
4851 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4852 goto exact_method;
4853
4854 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4855 &(reg->int_map_backward));
4856 if (r) return r;
4857 }
4858 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4859 end, p);
4860#else
4861 goto exact_method;
4862#endif
4863 break;
4864
4865 case ONIG_OPTIMIZE_MAP:
4866 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4867 break;
4868 }
4869
4870 if (p) {
4871 if (reg->sub_anchor) {
4872 UChar* prev;
4873
4874 switch (reg->sub_anchor) {
4875 case ANCHOR_BEGIN_LINE:
4876 if (!ON_STR_BEGIN(p)) {
4877 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4878 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4879 p = prev;
4880 goto retry;
4881 }
4882 }
4883 break;
4884
4885 case ANCHOR_END_LINE:
4886 if (ON_STR_END(p)) {
4887#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4888 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4889 if (IS_NULL(prev)) goto fail;
4890 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4891 p = prev;
4892 goto retry;
4893 }
4894#endif
4895 }
4896 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4897 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4898 if (IS_NULL(p)) goto fail;
4899 goto retry;
4900 }
4901 break;
4902 }
4903 }
4904
4905 /* no needs to adjust *high, *high is used as range check only */
4906 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4907 *low = p - reg->dmax;
4908 *high = p - reg->dmin;
4909 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4910 }
4911
4912#ifdef ONIG_DEBUG_SEARCH
4913 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4914 (int )(*low - str), (int )(*high - str));
4915#endif
4916 return 1; /* success */
4917 }
4918
4919 fail:
4920#ifdef ONIG_DEBUG_SEARCH
4921 fprintf(stderr, "backward_search_range: fail.\n");
4922#endif
4923 return 0; /* fail */
4924}
4925
4926
4927extern OnigPosition
4928onig_search(regex_t* reg, const UChar* str, const UChar* end,
4929 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4930{
4931 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4932}
4933
4934extern OnigPosition
4935onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4936 const UChar* global_pos,
4937 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4938{
4939 ptrdiff_t r;
4940 UChar *s, *prev;
4941 OnigMatchArg msa;
4942#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4943 const UChar *orig_start = start;
4944 const UChar *orig_range = range;
4945#endif
4946
4947#ifdef ONIG_DEBUG_SEARCH
4948 fprintf(stderr,
4949 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4950 (uintptr_t )str, str, end - str, start - str, range - str);
4951#endif
4952
4953 if (region) {
4954 r = onig_region_resize_clear(region, reg->num_mem + 1);
4955 if (r) goto finish_no_msa;
4956 }
4957
4958 if (start > end || start < str) goto mismatch_no_msa;
4959
4960
4961#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4962# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4963# define MATCH_AND_RETURN_CHECK(upper_range) \
4964 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4965 if (r != ONIG_MISMATCH) {\
4966 if (r >= 0) {\
4967 if (! IS_FIND_LONGEST(reg->options)) {\
4968 goto match;\
4969 }\
4970 }\
4971 else goto finish; /* error */ \
4972 }
4973# else
4974# define MATCH_AND_RETURN_CHECK(upper_range) \
4975 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4976 if (r != ONIG_MISMATCH) {\
4977 if (r >= 0) {\
4978 goto match;\
4979 }\
4980 else goto finish; /* error */ \
4981 }
4982# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4983#else
4984# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4985# define MATCH_AND_RETURN_CHECK(none) \
4986 r = match_at(reg, str, end, s, prev, &msa);\
4987 if (r != ONIG_MISMATCH) {\
4988 if (r >= 0) {\
4989 if (! IS_FIND_LONGEST(reg->options)) {\
4990 goto match;\
4991 }\
4992 }\
4993 else goto finish; /* error */ \
4994 }
4995# else
4996# define MATCH_AND_RETURN_CHECK(none) \
4997 r = match_at(reg, str, end, s, prev, &msa);\
4998 if (r != ONIG_MISMATCH) {\
4999 if (r >= 0) {\
5000 goto match;\
5001 }\
5002 else goto finish; /* error */ \
5003 }
5004# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
5005#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
5006
5007
5008 /* anchor optimize: resume search range */
5009 if (reg->anchor != 0 && str < end) {
5010 UChar *min_semi_end, *max_semi_end;
5011
5012 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
5013 /* search start-position only */
5014 begin_position:
5015 if (range > start)
5016 {
5017 if (global_pos > start)
5018 {
5019 if (global_pos < range)
5020 range = global_pos + 1;
5021 }
5022 else
5023 range = start + 1;
5024 }
5025 else
5026 range = start;
5027 }
5028 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5029 /* search str-position only */
5030 if (range > start) {
5031 if (start != str) goto mismatch_no_msa;
5032 range = str + 1;
5033 }
5034 else {
5035 if (range <= str) {
5036 start = str;
5037 range = str;
5038 }
5039 else
5040 goto mismatch_no_msa;
5041 }
5042 }
5043 else if (reg->anchor & ANCHOR_END_BUF) {
5044 min_semi_end = max_semi_end = (UChar* )end;
5045
5046 end_buf:
5047 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5048 goto mismatch_no_msa;
5049
5050 if (range > start) {
5051 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5052 start = min_semi_end - reg->anchor_dmax;
5053 if (start < end)
5054 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5055 }
5056 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5057 range = max_semi_end - reg->anchor_dmin + 1;
5058 }
5059
5060 if (start > range) goto mismatch_no_msa;
5061 /* If start == range, match with empty at end.
5062 Backward search is used. */
5063 }
5064 else {
5065 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5066 range = min_semi_end - reg->anchor_dmax;
5067 }
5068 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5069 start = max_semi_end - reg->anchor_dmin;
5070 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5071 }
5072 if (range > start) goto mismatch_no_msa;
5073 }
5074 }
5075 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5076 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5077
5078 max_semi_end = (UChar* )end;
5079 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5080 min_semi_end = pre_end;
5081
5082#ifdef USE_CRNL_AS_LINE_TERMINATOR
5083 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5084 if (IS_NOT_NULL(pre_end) &&
5085 IS_NEWLINE_CRLF(reg->options) &&
5086 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5087 min_semi_end = pre_end;
5088 }
5089#endif
5090 if (min_semi_end > str && start <= min_semi_end) {
5091 goto end_buf;
5092 }
5093 }
5094 else {
5095 min_semi_end = (UChar* )end;
5096 goto end_buf;
5097 }
5098 }
5099 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5100 goto begin_position;
5101 }
5102 }
5103 else if (str == end) { /* empty string */
5104 static const UChar address_for_empty_string[] = "";
5105
5106#ifdef ONIG_DEBUG_SEARCH
5107 fprintf(stderr, "onig_search: empty string.\n");
5108#endif
5109
5110 if (reg->threshold_len == 0) {
5111 start = end = str = address_for_empty_string;
5112 s = (UChar* )start;
5113 prev = (UChar* )NULL;
5114
5115 MATCH_ARG_INIT(msa, option, region, start, start);
5116#ifdef USE_COMBINATION_EXPLOSION_CHECK
5117 msa.state_check_buff = (void* )0;
5118 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5119#endif
5120 MATCH_AND_RETURN_CHECK(end);
5121 goto mismatch;
5122 }
5123 goto mismatch_no_msa;
5124 }
5125
5126#ifdef ONIG_DEBUG_SEARCH
5127 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5128 (int )(end - str), (int )(start - str), (int )(range - str));
5129#endif
5130
5131 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5132#ifdef USE_COMBINATION_EXPLOSION_CHECK
5133 {
5134 ptrdiff_t offset = (MIN(start, range) - str);
5135 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5136 }
5137#endif
5138
5139 s = (UChar* )start;
5140 if (range > start) { /* forward search */
5141 if (s > str)
5142 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5143 else
5144 prev = (UChar* )NULL;
5145
5146 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5147 UChar *sch_range, *low, *high, *low_prev;
5148
5149 sch_range = (UChar* )range;
5150 if (reg->dmax != 0) {
5151 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5152 sch_range = (UChar* )end;
5153 else {
5154 sch_range += reg->dmax;
5155 if (sch_range > end) sch_range = (UChar* )end;
5156 }
5157 }
5158
5159 if ((end - start) < reg->threshold_len)
5160 goto mismatch;
5161
5162 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5163 do {
5164 if (! forward_search_range(reg, str, end, s, sch_range,
5165 &low, &high, &low_prev)) goto mismatch;
5166 if (s < low) {
5167 s = low;
5168 prev = low_prev;
5169 }
5170 while (s <= high) {
5171 MATCH_AND_RETURN_CHECK(orig_range);
5172 prev = s;
5173 s += enclen(reg->enc, s, end);
5174 }
5175 } while (s < range);
5176 goto mismatch;
5177 }
5178 else { /* check only. */
5179 if (! forward_search_range(reg, str, end, s, sch_range,
5180 &low, &high, (UChar** )NULL)) goto mismatch;
5181
5182 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5183 do {
5184 MATCH_AND_RETURN_CHECK(orig_range);
5185 prev = s;
5186 s += enclen(reg->enc, s, end);
5187
5188 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5189 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5190 && s < range) {
5191 prev = s;
5192 s += enclen(reg->enc, s, end);
5193 }
5194 }
5195 } while (s < range);
5196 goto mismatch;
5197 }
5198 }
5199 }
5200
5201 do {
5202 MATCH_AND_RETURN_CHECK(orig_range);
5203 prev = s;
5204 s += enclen(reg->enc, s, end);
5205 } while (s < range);
5206
5207 if (s == range) { /* because empty match with /$/. */
5208 MATCH_AND_RETURN_CHECK(orig_range);
5209 }
5210 }
5211 else { /* backward search */
5212 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5213 UChar *low, *high, *adjrange, *sch_start;
5214
5215 if (range < end)
5216 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5217 else
5218 adjrange = (UChar* )end;
5219
5220 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5221 (end - range) >= reg->threshold_len) {
5222 do {
5223 sch_start = s + reg->dmax;
5224 if (sch_start > end) sch_start = (UChar* )end;
5225 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5226 &low, &high) <= 0)
5227 goto mismatch;
5228
5229 if (s > high)
5230 s = high;
5231
5232 while (s >= low) {
5233 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5234 MATCH_AND_RETURN_CHECK(orig_start);
5235 s = prev;
5236 }
5237 } while (s >= range);
5238 goto mismatch;
5239 }
5240 else { /* check only. */
5241 if ((end - range) < reg->threshold_len) goto mismatch;
5242
5243 sch_start = s;
5244 if (reg->dmax != 0) {
5245 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5246 sch_start = (UChar* )end;
5247 else {
5248 sch_start += reg->dmax;
5249 if (sch_start > end) sch_start = (UChar* )end;
5250 else
5251 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5252 start, sch_start, end);
5253 }
5254 }
5255 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5256 &low, &high) <= 0) goto mismatch;
5257 }
5258 }
5259
5260 do {
5261 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5262 MATCH_AND_RETURN_CHECK(orig_start);
5263 s = prev;
5264 } while (s >= range);
5265 }
5266
5267 mismatch:
5268#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5269 if (IS_FIND_LONGEST(reg->options)) {
5270 if (msa.best_len >= 0) {
5271 s = msa.best_s;
5272 goto match;
5273 }
5274 }
5275#endif
5276 r = ONIG_MISMATCH;
5277
5278 finish:
5279 MATCH_ARG_FREE(msa);
5280
5281 /* If result is mismatch and no FIND_NOT_EMPTY option,
5282 then the region is not set in match_at(). */
5283 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5284 onig_region_clear(region);
5285 }
5286
5287#ifdef ONIG_DEBUG
5288 if (r != ONIG_MISMATCH)
5289 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5290#endif
5291 return r;
5292
5293 mismatch_no_msa:
5294 r = ONIG_MISMATCH;
5295 finish_no_msa:
5296#ifdef ONIG_DEBUG
5297 if (r != ONIG_MISMATCH)
5298 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5299#endif
5300 return r;
5301
5302 match:
5303 MATCH_ARG_FREE(msa);
5304 return s - str;
5305}
5306
5307extern OnigPosition
5308onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5309 OnigRegion* region, OnigOptionType option,
5310 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5311 void* callback_arg)
5312{
5313 OnigPosition r;
5314 OnigPosition n;
5315 int rs;
5316 const UChar* start;
5317
5318 n = 0;
5319 start = str;
5320 while (1) {
5321 r = onig_search(reg, str, end, start, end, region, option);
5322 if (r >= 0) {
5323 rs = scan_callback(n, r, region, callback_arg);
5324 n++;
5325 if (rs != 0)
5326 return rs;
5327
5328 if (region->end[0] == start - str) {
5329 if (start >= end) break;
5330 start += enclen(reg->enc, start, end);
5331 }
5332 else
5333 start = str + region->end[0];
5334
5335 if (start > end)
5336 break;
5337 }
5338 else if (r == ONIG_MISMATCH) {
5339 break;
5340 }
5341 else { /* error */
5342 return r;
5343 }
5344 }
5345
5346 return n;
5347}
5348
5349extern OnigEncoding
5350onig_get_encoding(const regex_t* reg)
5351{
5352 return reg->enc;
5353}
5354
5355extern OnigOptionType
5356onig_get_options(const regex_t* reg)
5357{
5358 return reg->options;
5359}
5360
5361extern OnigCaseFoldType
5362onig_get_case_fold_flag(const regex_t* reg)
5363{
5364 return reg->case_fold_flag;
5365}
5366
5367extern const OnigSyntaxType*
5368onig_get_syntax(const regex_t* reg)
5369{
5370 return reg->syntax;
5371}
5372
5373extern int
5374onig_number_of_captures(const regex_t* reg)
5375{
5376 return reg->num_mem;
5377}
5378
5379extern int
5380onig_number_of_capture_histories(const regex_t* reg)
5381{
5382#ifdef USE_CAPTURE_HISTORY
5383 int i, n;
5384
5385 n = 0;
5386 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5387 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5388 n++;
5389 }
5390 return n;
5391#else
5392 return 0;
5393#endif
5394}
5395
5396extern void
5397onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5398{
5399 *to = *from;
5400}
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition defines.h:89
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
Definition win32.h:698