Skip to content

Commit 1ce0dab

Browse files
committed
Strings::matchAll(): added option 'lazy'
1 parent 1152372 commit 1ce0dab

File tree

2 files changed

+89
-6
lines changed

2 files changed

+89
-6
lines changed

Diff for: src/Utils/Strings.php

+26-6
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ public static function match(
589589
/**
590590
* Searches the string for all occurrences matching the regular expression and
591591
* returns an array of arrays containing the found expression and each subexpression.
592+
* @return ($lazy is true ? \Generator<int, array> : array[])
592593
*/
593594
public static function matchAll(
594595
string $subject,
@@ -599,21 +600,41 @@ public static function matchAll(
599600
bool $unmatchedAsNull = false,
600601
bool $patternOrder = false,
601602
bool $utf8 = false,
602-
): array
603+
bool $lazy = false,
604+
): array|\Generator
603605
{
604-
$flags = is_int($captureOffset) // back compatibility
605-
? $captureOffset
606-
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);
607-
608606
if ($utf8) {
609607
$offset = strlen(self::substring($subject, 0, $offset));
610608
$pattern .= 'u';
611609
}
612610

611+
if ($lazy) {
612+
$flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
613+
return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) {
614+
$counter = 0;
615+
while (
616+
$offset <= strlen($subject)
617+
&& self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
618+
) {
619+
$offset = $m[0][1] + strlen($m[0][0]);
620+
if (!$captureOffset) {
621+
$m = array_map(fn($item) => $item[0], $m);
622+
} elseif ($utf8) {
623+
$m = self::bytesToChars($subject, [$m])[0];
624+
}
625+
yield $counter++ => $m;
626+
}
627+
})();
628+
}
629+
613630
if ($offset > strlen($subject)) {
614631
return [];
615632
}
616633

634+
$flags = is_int($captureOffset) // back compatibility
635+
? $captureOffset
636+
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder && !$lazy ? PREG_PATTERN_ORDER : 0);
637+
617638
self::pcre('preg_match_all', [
618639
$pattern, $subject, &$m,
619640
($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
@@ -622,7 +643,6 @@ public static function matchAll(
622643
return $utf8 && $captureOffset
623644
? self::bytesToChars($subject, $m)
624645
: $m;
625-
626646
}
627647

628648

Diff for: tests/Utils/Strings.matchAll.lazy().phpt

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
/**
4+
* Test: Nette\Utils\Strings::matchAll()
5+
*/
6+
7+
declare(strict_types=1);
8+
9+
use Nette\Utils\Strings;
10+
use Tester\Assert;
11+
12+
13+
require __DIR__ . '/../bootstrap.php';
14+
15+
16+
Assert::type(Generator::class, Strings::matchAll('hello world!', '#([E-L])+#', lazy: true));
17+
Assert::type(Generator::class, Strings::matchAll('hello world!', '#([E-L])+#', lazy: true, offset: 50));
18+
19+
Assert::same(0, iterator_count(Strings::matchAll('hello world!', '#([E-L])+#', lazy: true)));
20+
21+
Assert::same([
22+
['hell', 'l'],
23+
['l', 'l'],
24+
], iterator_to_array(Strings::matchAll('hello world!', '#([e-l])+#', lazy: true)));
25+
26+
Assert::same([
27+
['hell'],
28+
['l'],
29+
], iterator_to_array(Strings::matchAll('hello world!', '#[e-l]+#', lazy: true)));
30+
31+
Assert::same([
32+
[['lu', 2], ['l', 2], ['u', 3]],
33+
[['ou', 6], ['o', 6], ['u', 7]],
34+
[['k', 10], ['k', 10], ['', 11]],
35+
[['k', 14], ['k', 14], ['', 15]],
36+
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, lazy: true)));
37+
38+
Assert::same([
39+
[['lu', 1], ['l', 1], ['u', 2]],
40+
[['ou', 4], ['o', 4], ['u', 5]],
41+
[['k', 7], ['k', 7], ['', 8]],
42+
[['k', 10], ['k', 10], ['', 11]],
43+
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, utf8: true, lazy: true)));
44+
45+
Assert::same(
46+
[['l'], ['k'], ['k']],
47+
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, lazy: true)),
48+
);
49+
50+
Assert::same(
51+
[['k'], ['k']],
52+
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, utf8: true, lazy: true)),
53+
);
54+
55+
Assert::same(
56+
[['e', null]],
57+
iterator_to_array(Strings::matchAll('hello world!', '#e(x)*#', unmatchedAsNull: true, lazy: true)),
58+
);
59+
60+
Assert::same(
61+
[],
62+
iterator_to_array(Strings::matchAll('hello world!', '', offset: 50, lazy: true)),
63+
);

0 commit comments

Comments
 (0)