Skip to content

Commit cb60c6f

Browse files
committed
pythongh-74756: support precision field for integer formatting types
```pycon >>> f"{-12:.8b}" '11110100' >>> f"{200:.8b}" Traceback (most recent call last): File "<python-input-5>", line 1, in <module> f"{200:.8b}" ^^^^^^^^^ OverflowError: Expected integer in range [-2**7, 2**7) >>> f"{123:.8d}" '00000123' >>> f"{-12:.8d}" '-00000012' ```
1 parent 6aa88a2 commit cb60c6f

File tree

5 files changed

+135
-15
lines changed

5 files changed

+135
-15
lines changed

Diff for: Doc/library/string.rst

+12-2
Original file line numberDiff line numberDiff line change
@@ -460,8 +460,18 @@ displayed after the decimal point for presentation types
460460
``'f'`` and ``'F'``, or before and after the decimal point for presentation
461461
types ``'g'`` or ``'G'``. For string presentation types the field
462462
indicates the maximum field size - in other words, how many characters will be
463-
used from the field content. The *precision* is not allowed for integer
464-
presentation types.
463+
used from the field content.
464+
465+
For integer presentation types, the precision gives the minimal number of
466+
digits to appear, expanded with an appropriate number of leading zeros. Note
467+
that for non-decimal presentation types --- two's complements are used to
468+
represent signed integers, accepting values in range ``[-m,m)``, where
469+
``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` and
470+
``'x'``/``'X'`` types, respectively. A precision of ``0`` is treated as
471+
equivalent to a precision of ``1`` here.
472+
473+
.. versionchanged:: next
474+
Precision specification allowed for integer presentation types.
465475

466476
The ``'_'`` or ``','`` option after *precision* means the use of an underscore
467477
or a comma for a thousands separator of the fractional part for floating-point

Diff for: Lib/test/test_long.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ def test__format__(self):
675675
self.assertEqual(format(123456789, 'd'), '123456789')
676676
self.assertEqual(format(123456789, ','), '123,456,789')
677677
self.assertEqual(format(123456789, '_'), '123_456_789')
678+
self.assertEqual(format(3, '1.3'), '003')
678679

679680
# sign and aligning are interdependent
680681
self.assertEqual(format(1, "-"), '1')
@@ -706,6 +707,9 @@ def test__format__(self):
706707
self.assertRaises(ValueError, format, 1234567890, ',x')
707708
self.assertEqual(format(1234567890, '_x'), '4996_02d2')
708709
self.assertEqual(format(1234567890, '_X'), '4996_02D2')
710+
self.assertEqual(format(8086, '#.8x'), '0x00001f96')
711+
self.assertRaises(OverflowError, format, 2048, '.3x')
712+
self.assertRaises(OverflowError, format, -2049, '.3x')
709713

710714
# octal
711715
self.assertEqual(format(3, "o"), "3")
@@ -720,6 +724,9 @@ def test__format__(self):
720724
self.assertEqual(format(-1234, "+o"), "-2322")
721725
self.assertRaises(ValueError, format, 1234567890, ',o')
722726
self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
727+
self.assertEqual(format(18, '#.3o'), '0o022')
728+
self.assertRaises(OverflowError, format, 256, '.3o')
729+
self.assertRaises(OverflowError, format, -257, '.3o')
723730

724731
# binary
725732
self.assertEqual(format(3, "b"), "11")
@@ -734,9 +741,15 @@ def test__format__(self):
734741
self.assertEqual(format(-1234, "+b"), "-10011010010")
735742
self.assertRaises(ValueError, format, 1234567890, ',b')
736743
self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
744+
self.assertEqual(format(-12, '.8b'), '11110100')
745+
self.assertEqual(format(73, '.8b'), '01001001')
746+
self.assertEqual(format(73, '#.8b'), '0b01001001')
747+
self.assertRaises(OverflowError, format, 300, '.8b')
748+
self.assertRaises(OverflowError, format, -200, '.8b')
749+
self.assertRaises(OverflowError, format, 128, '.8b')
750+
self.assertRaises(OverflowError, format, -129, '.8b')
737751

738752
# make sure these are errors
739-
self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed
740753
self.assertRaises(ValueError, format, 3, "_c") # underscore,
741754
self.assertRaises(ValueError, format, 3, ",c") # comma, and
742755
self.assertRaises(ValueError, format, 3, "+c") # sign not allowed

Diff for: Lib/test/test_types.py

-2
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,6 @@ def test(i, format_spec, result):
357357

358358
# make sure these are errors
359359

360-
# precision disallowed
361-
self.assertRaises(ValueError, 3 .__format__, "1.3")
362360
# sign not allowed with 'c'
363361
self.assertRaises(ValueError, 3 .__format__, "+c")
364362
# format spec must be string
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Support precision field for integer formatting types. For binary, octal and
2+
hexadecimal formatting types --- twos complements are used to represent
3+
signed values. Patch by Sergey B Kirpichev.

Diff for: Python/formatter_unicode.c

+106-10
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ parse_internal_render_format_spec(PyObject *obj,
358358
}
359359
}
360360

361-
if (format->type == 'n'
361+
if ((format->type == 'n' || format->type == 'd' || format->type == 'b'
362+
|| format->type == 'o' || format->type == 'x' || format->type == 'X')
362363
&& format->frac_thousands_separator != LT_NO_LOCALE)
363364
{
364365
invalid_thousands_separator_type(format->frac_thousands_separator,
@@ -979,12 +980,6 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
979980
from a hard-code pseudo-locale */
980981
LocaleInfo locale = LocaleInfo_STATIC_INIT;
981982

982-
/* no precision allowed on integers */
983-
if (format->precision != -1) {
984-
PyErr_SetString(PyExc_ValueError,
985-
"Precision not allowed in integer format specifier");
986-
goto done;
987-
}
988983
/* no negative zero coercion on integers */
989984
if (format->no_neg_0) {
990985
PyErr_SetString(PyExc_ValueError,
@@ -1063,6 +1058,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
10631058

10641059
if (format->sign != '+' && format->sign != ' '
10651060
&& format->width == -1
1061+
&& format->precision == -1
10661062
&& format->type != 'X' && format->type != 'n'
10671063
&& !format->thousands_separators
10681064
&& PyLong_CheckExact(value))
@@ -1077,9 +1073,109 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
10771073
n_prefix = leading_chars_to_skip;
10781074

10791075
/* Do the hard part, converting to a string in a given base */
1080-
tmp = _PyLong_Format(value, base);
1081-
if (tmp == NULL)
1082-
goto done;
1076+
if (format->precision != -1) {
1077+
/* Use two's complement for 'b', 'o' and 'x' formatting types */
1078+
if (format->type == 'b' || format->type == 'x'
1079+
|| format->type == 'o' || format->type == 'X')
1080+
{
1081+
int64_t shift = Py_MAX(1, format->precision);
1082+
1083+
if (format->type == 'x' || format->type == 'X') {
1084+
shift *= 4;
1085+
}
1086+
else if (format->type == 'o') {
1087+
shift *= 3;
1088+
}
1089+
1090+
PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift);
1091+
PyObject *mod2 = _PyLong_Rshift(mod, 1);
1092+
PyObject *value2 = value;
1093+
1094+
if (mod == NULL || mod2 == NULL) {
1095+
Py_XDECREF(mod);
1096+
Py_XDECREF(mod2);
1097+
goto done;
1098+
}
1099+
if (PyLong_IsNegative(value)) {
1100+
value2 = PyNumber_Negative(mod2);
1101+
if (value2 == NULL) {
1102+
Py_DECREF(mod2);
1103+
goto done;
1104+
}
1105+
Py_SETREF(mod2, value2);
1106+
if (PyObject_RichCompareBool(value, mod2, Py_LT)) {
1107+
Py_DECREF(mod2);
1108+
PyErr_Format(PyExc_OverflowError,
1109+
"Expected integer in range [-2**%ld, 2**%ld)",
1110+
shift - 1, shift - 1);
1111+
goto done;
1112+
}
1113+
Py_DECREF(mod2);
1114+
value2 = PyNumber_Add(value, mod);
1115+
Py_DECREF(mod);
1116+
if (value2 == NULL) {
1117+
goto done;
1118+
}
1119+
}
1120+
else {
1121+
if (PyObject_RichCompareBool(value2, mod2, Py_GE)) {
1122+
Py_DECREF(mod);
1123+
Py_DECREF(mod2);
1124+
PyErr_Format(PyExc_OverflowError,
1125+
"Expected integer in range [-2**%ld, 2**%ld)",
1126+
shift - 1, shift - 1);
1127+
goto done;
1128+
}
1129+
Py_DECREF(mod);
1130+
Py_DECREF(mod2);
1131+
Py_INCREF(value2);
1132+
}
1133+
tmp = _PyLong_Format(value2, base);
1134+
Py_DECREF(value2);
1135+
}
1136+
else {
1137+
tmp = _PyLong_Format(value, base);
1138+
}
1139+
1140+
/* Prepend enough leading zeros (after the sign) */
1141+
1142+
int sign = PyUnicode_READ_CHAR(tmp, leading_chars_to_skip) == '-';
1143+
Py_ssize_t tmp2_len = format->precision + leading_chars_to_skip + sign;
1144+
Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp);
1145+
Py_ssize_t gap = tmp2_len - tmp_len;
1146+
1147+
if (gap > 0) {
1148+
PyObject *tmp2 = PyUnicode_New(tmp2_len, 127);
1149+
1150+
if (PyUnicode_CopyCharacters(tmp2, leading_chars_to_skip + gap + sign,
1151+
tmp, leading_chars_to_skip + sign,
1152+
tmp2_len - leading_chars_to_skip - sign) == -1) {
1153+
Py_DECREF(tmp2);
1154+
goto done;
1155+
}
1156+
if (PyUnicode_Fill(tmp2, leading_chars_to_skip + sign, gap, '0') == -1) {
1157+
Py_DECREF(tmp2);
1158+
goto done;
1159+
}
1160+
if (sign && PyUnicode_WriteChar(tmp2, leading_chars_to_skip, '-') == -1) {
1161+
Py_DECREF(tmp2);
1162+
goto done;
1163+
}
1164+
if (leading_chars_to_skip
1165+
&& PyUnicode_CopyCharacters(tmp2, 0, tmp, 0,
1166+
leading_chars_to_skip) == -1) {
1167+
Py_DECREF(tmp2);
1168+
goto done;
1169+
}
1170+
Py_SETREF(tmp, tmp2);
1171+
}
1172+
}
1173+
else {
1174+
tmp = _PyLong_Format(value, base);
1175+
if (tmp == NULL) {
1176+
goto done;
1177+
}
1178+
}
10831179

10841180
inumeric_chars = 0;
10851181
n_digits = PyUnicode_GET_LENGTH(tmp);

0 commit comments

Comments
 (0)