-
Notifications
You must be signed in to change notification settings - Fork 506
Improve preg_split()
function ReturnType
#3757
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 2.1.x
Are you sure you want to change the base?
Changes from 18 commits
6f8c0c0
ca44a91
9c33a2a
48c714d
5a0b989
05ac909
97ed353
a95ed66
4031293
0a01610
043ed19
68da760
db052cc
a647277
b9c303a
319bcbb
9c1a389
8cb3030
37f9b3e
cb5925b
541b024
ba25f6b
b4f4885
fb30cd7
660195b
6487739
03319d4
e4a07b0
9388d23
c206ccc
fce5dfd
79623a4
307cf54
57bb73e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,17 +7,29 @@ | |||||
use PHPStan\Reflection\FunctionReflection; | ||||||
use PHPStan\TrinaryLogic; | ||||||
use PHPStan\Type\Accessory\AccessoryArrayListType; | ||||||
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType; | ||||||
use PHPStan\Type\Accessory\NonEmptyArrayType; | ||||||
use PHPStan\Type\ArrayType; | ||||||
use PHPStan\Type\BitwiseFlagHelper; | ||||||
use PHPStan\Type\Constant\ConstantArrayType; | ||||||
use PHPStan\Type\Constant\ConstantArrayTypeBuilder; | ||||||
use PHPStan\Type\Constant\ConstantBooleanType; | ||||||
use PHPStan\Type\Constant\ConstantIntegerType; | ||||||
use PHPStan\Type\Constant\ConstantStringType; | ||||||
use PHPStan\Type\DynamicFunctionReturnTypeExtension; | ||||||
use PHPStan\Type\ErrorType; | ||||||
use PHPStan\Type\IntegerRangeType; | ||||||
use PHPStan\Type\IntegerType; | ||||||
use PHPStan\Type\MixedType; | ||||||
use PHPStan\Type\StringType; | ||||||
use PHPStan\Type\Type; | ||||||
use PHPStan\Type\TypeCombinator; | ||||||
use PHPStan\Type\TypeUtils; | ||||||
use function count; | ||||||
use function is_array; | ||||||
use function is_int; | ||||||
use function preg_match; | ||||||
use function preg_split; | ||||||
use function strtolower; | ||||||
|
||||||
final class PregSplitDynamicReturnTypeExtension implements DynamicFunctionReturnTypeExtension | ||||||
|
@@ -36,17 +48,132 @@ public function isFunctionSupported(FunctionReflection $functionReflection): boo | |||||
|
||||||
public function getTypeFromFunctionCall(FunctionReflection $functionReflection, FuncCall $functionCall, Scope $scope): ?Type | ||||||
{ | ||||||
$flagsArg = $functionCall->getArgs()[3] ?? null; | ||||||
$args = $functionCall->getArgs(); | ||||||
if (count($args) < 2) { | ||||||
return null; | ||||||
} | ||||||
$patternArg = $args[0]; | ||||||
$subjectArg = $args[1]; | ||||||
$limitArg = $args[2] ?? null; | ||||||
$flagArg = $args[3] ?? null; | ||||||
$patternType = $scope->getType($patternArg->value); | ||||||
$patternConstantTypes = $patternType->getConstantStrings(); | ||||||
$subjectType = $scope->getType($subjectArg->value); | ||||||
$subjectConstantTypes = $subjectType->getConstantStrings(); | ||||||
malsuke marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
if ( | ||||||
count($patternConstantTypes) > 0 | ||||||
&& @preg_match($patternConstantTypes[0]->getValue(), '') === false | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we usually us There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs to check all patterns not only the first |
||||||
) { | ||||||
return new ErrorType(); | ||||||
} | ||||||
|
||||||
$limits = []; | ||||||
if ($limitArg === null) { | ||||||
$limits = [-1]; | ||||||
} else { | ||||||
$limitType = $scope->getType($limitArg->value); | ||||||
foreach ($limitType->getConstantScalarValues() as $limit) { | ||||||
if (!is_int($limit)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. numeric-string $limit is not an error |
||||||
return new ErrorType(); | ||||||
malsuke marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
$limits[] = $limit; | ||||||
} | ||||||
} | ||||||
|
||||||
$flags = []; | ||||||
if ($flagArg === null) { | ||||||
$flags = [0]; | ||||||
} else { | ||||||
$flagType = $scope->getType($flagArg->value); | ||||||
foreach ($flagType->getConstantScalarValues() as $flag) { | ||||||
if (!is_int($flag)) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be consistent with limit, this might also allow numeric-string |
||||||
return new ErrorType(); | ||||||
malsuke marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
$flags[] = $flag; | ||||||
} | ||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By replacing it as follows, type checking within multiple Constant loops will no longer be necessary. $flags = [];
$flagType = $scope->getType($flagArg->value);
foreach ($flagType->getConstantScalarValues() as $flag) {
if (!is_int()) {
return new ErrorType();
}
$flags[] = $flag;
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. resolved 8cb3030 |
||||||
|
||||||
|
||||||
if (count($patternConstantTypes) === 0 || count($subjectConstantTypes) === 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this if-branch might be factored out into a private method for readability |
||||||
$returnNonEmptyStrings = $flagArg !== null && $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_NO_EMPTY')->yes(); | ||||||
if ($returnNonEmptyStrings) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would inline this only once used variable to ease reading the code |
||||||
$returnStringType = TypeCombinator::intersect( | ||||||
new StringType(), | ||||||
new AccessoryNonEmptyStringType(), | ||||||
); | ||||||
} else { | ||||||
$returnStringType = new StringType(); | ||||||
} | ||||||
|
||||||
if ($flagsArg !== null && $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagsArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE')->yes()) { | ||||||
$type = new ArrayType( | ||||||
new IntegerType(), | ||||||
new ConstantArrayType([new ConstantIntegerType(0), new ConstantIntegerType(1)], [new StringType(), IntegerRangeType::fromInterval(0, null)], [2], [], TrinaryLogic::createYes()), | ||||||
$capturedArrayType = new ConstantArrayType( | ||||||
[new ConstantIntegerType(0), new ConstantIntegerType(1)], | ||||||
[$returnStringType, IntegerRangeType::fromInterval(0, null)], | ||||||
[2], | ||||||
[], | ||||||
TrinaryLogic::createYes(), | ||||||
); | ||||||
return TypeCombinator::union(TypeCombinator::intersect($type, new AccessoryArrayListType()), new ConstantBooleanType(false)); | ||||||
|
||||||
$returnInternalValueType = $returnStringType; | ||||||
if ($flagArg !== null) { | ||||||
$flagState = $this->bitwiseFlagAnalyser->bitwiseOrContainsConstant($flagArg->value, $scope, 'PREG_SPLIT_OFFSET_CAPTURE'); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
if ($flagState->yes()) { | ||||||
$capturedArrayListType = TypeCombinator::intersect( | ||||||
new ArrayType(new IntegerType(), $capturedArrayType), | ||||||
new AccessoryArrayListType(), | ||||||
); | ||||||
|
||||||
if ($subjectType->isNonEmptyString()->yes()) { | ||||||
$capturedArrayListType = TypeCombinator::intersect($capturedArrayListType, new NonEmptyArrayType()); | ||||||
} | ||||||
|
||||||
return TypeUtils::toBenevolentUnion(TypeCombinator::union($capturedArrayListType, new ConstantBooleanType(false))); | ||||||
} | ||||||
if ($flagState->maybe()) { | ||||||
$returnInternalValueType = TypeCombinator::union(new StringType(), $capturedArrayType); | ||||||
} | ||||||
} | ||||||
|
||||||
$returnListType = TypeCombinator::intersect(new ArrayType(new MixedType(), $returnInternalValueType), new AccessoryArrayListType()); | ||||||
if ($subjectType->isNonEmptyString()->yes()) { | ||||||
$returnListType = TypeCombinator::intersect( | ||||||
$returnListType, | ||||||
new NonEmptyArrayType(), | ||||||
); | ||||||
} | ||||||
|
||||||
return TypeUtils::toBenevolentUnion(TypeCombinator::union($returnListType, new ConstantBooleanType(false))); | ||||||
} | ||||||
|
||||||
$resultTypes = []; | ||||||
foreach ($patternConstantTypes as $patternConstantType) { | ||||||
foreach ($subjectConstantTypes as $subjectConstantType) { | ||||||
foreach ($limits as $limit) { | ||||||
foreach ($flags as $flag) { | ||||||
$result = @preg_split($patternConstantType->getValue(), $subjectConstantType->getValue(), $limit, $flag); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think using Strings::split here is not right because the limit is fixed to -1. |
||||||
if ($result === false) { | ||||||
continue; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if one of the static analysis time values make |
||||||
} | ||||||
$constantArray = ConstantArrayTypeBuilder::createEmpty(); | ||||||
foreach ($result as $key => $value) { | ||||||
if (is_array($value)) { | ||||||
$valueConstantArray = ConstantArrayTypeBuilder::createEmpty(); | ||||||
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(0), new ConstantStringType($value[0])); | ||||||
$valueConstantArray->setOffsetValueType(new ConstantIntegerType(1), new ConstantIntegerType($value[1])); | ||||||
$returnInternalValueType = $valueConstantArray->getArray(); | ||||||
} else { | ||||||
$returnInternalValueType = new ConstantStringType($value); | ||||||
} | ||||||
$constantArray->setOffsetValueType(new ConstantIntegerType($key), $returnInternalValueType); | ||||||
} | ||||||
|
||||||
$resultTypes[] = $constantArray->getArray(); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
return null; | ||||||
return TypeCombinator::union(...$resultTypes); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we are missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the above comment is still true and we are missing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, does this mean that every possible result of preg_split includes the possibility of false, and therefore we need to add false to the union type? I had implemented it to return an Error if preg_split returns false, as a warning. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return type of The current "return ErrorType" could be turned into "return null" in case other rules will already report a phpstan error for the code examples. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have fixed about that in the following commit. Additionally, since handling for the false case is no longer necessary, I have removed |
||||||
} | ||||||
|
||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The other preg_ method are not using benevolent union, so I would think more consistent to not use a benevolent union here too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@VincentLanglet @ondrejmirtes
I understand. I would like to remove benevolent union.
On the other hand, I think that preg_split should not return false unless there is an issue with the regular expression. Furthermore, in this PR, I have modified the code so that if the regular expression is incorrect, an error is returned early in the parsing process.
Therefore, if the regular expression is correct, I am considering not adding false as a Union.
(In this case, this bug can also be fixed.
phpstan-src/tests/PHPStan/Analyser/AnalyserIntegrationTest.php
Lines 890 to 900 in 76740fd
If you think not to use benevolent union, do you think it would be fine to remove false? I would like to hear your opinion on this. I would like to get your opinion before making any modifications.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Like any other preg method I think it can returns false if an internal error occurs like
And in the pho.ini there is some config like pcre.recursion_limit or pcre.backtrack_limit.
So I would keep a non-benevolent union AND false.
If we decide to remove false from the signature it should be removed from all the preg methods. But I dont think we should go this way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand.
I modified it to keep a non-benevolent union AND false.
37f9b3e