pythongh-74756: support precision field for integer formatting types

skirpichev · skirpichev · commit cb60c6f9200e · 2025-03-31T07:48:31.000+03:00
```pycon
&gt;&gt;&gt; f"{-12:.8b}"
'11110100'
&gt;&gt;&gt; f"{200:.8b}"
Traceback (most recent call last):
  File "&lt;python-input-5&gt;", line 1, in &lt;module&gt;
    f"{200:.8b}"
      ^^^^^^^^^
OverflowError: Expected integer in range [-2**7, 2**7)
&gt;&gt;&gt; f"{123:.8d}"
'00000123'
&gt;&gt;&gt; f"{-12:.8d}"
'-00000012'
```
diff --git a/Doc/library/string.rst b/Doc/library/string.rst
@@ -460,8 +460,18 @@ displayed after the decimal point for presentation types
 ``'f'`` and ``'F'``, or before and after the decimal point for presentation
 types ``'g'`` or ``'G'``.  For string presentation types the field
 indicates the maximum field size - in other words, how many characters will be
-used from the field content.  The *precision* is not allowed for integer
-presentation types.
+used from the field content.
+
+For integer presentation types, the precision gives the minimal number of
+digits to appear, expanded with an appropriate number of leading zeros.  Note
+that for non-decimal presentation types --- two's complements are used to
+represent signed integers, accepting values in range ``[-m,m)``, where
+``m=2**(k*precision-1)`` and ``k=1,3,4`` for ``'b'``, ``'o'`` and
+``'x'``/``'X'`` types, respectively.  A precision of ``0`` is treated as
+equivalent to a precision of ``1`` here.
+
+.. versionchanged:: next
+   Precision specification allowed for integer presentation types.
 
 The ``'_'`` or ``','`` option after *precision* means the use of an underscore
 or a comma for a thousands separator of the fractional part for floating-point
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
@@ -675,6 +675,7 @@ def test__format__(self):
         self.assertEqual(format(123456789, 'd'), '123456789')
         self.assertEqual(format(123456789, ','), '123,456,789')
         self.assertEqual(format(123456789, '_'), '123_456_789')
+        self.assertEqual(format(3, '1.3'), '003')
 
         # sign and aligning are interdependent
         self.assertEqual(format(1, "-"), '1')
@@ -706,6 +707,9 @@ def test__format__(self):
         self.assertRaises(ValueError, format, 1234567890, ',x')
         self.assertEqual(format(1234567890, '_x'), '4996_02d2')
         self.assertEqual(format(1234567890, '_X'), '4996_02D2')
+        self.assertEqual(format(8086, '#.8x'), '0x00001f96')
+        self.assertRaises(OverflowError, format, 2048, '.3x')
+        self.assertRaises(OverflowError, format, -2049, '.3x')
 
         # octal
         self.assertEqual(format(3, "o"), "3")
@@ -720,6 +724,9 @@ def test__format__(self):
         self.assertEqual(format(-1234, "+o"), "-2322")
         self.assertRaises(ValueError, format, 1234567890, ',o')
         self.assertEqual(format(1234567890, '_o'), '111_4540_1322')
+        self.assertEqual(format(18, '#.3o'), '0o022')
+        self.assertRaises(OverflowError, format, 256, '.3o')
+        self.assertRaises(OverflowError, format, -257, '.3o')
 
         # binary
         self.assertEqual(format(3, "b"), "11")
@@ -734,9 +741,15 @@ def test__format__(self):
         self.assertEqual(format(-1234, "+b"), "-10011010010")
         self.assertRaises(ValueError, format, 1234567890, ',b')
         self.assertEqual(format(12345, '_b'), '11_0000_0011_1001')
+        self.assertEqual(format(-12, '.8b'), '11110100')
+        self.assertEqual(format(73, '.8b'), '01001001')
+        self.assertEqual(format(73, '#.8b'), '0b01001001')
+        self.assertRaises(OverflowError, format, 300, '.8b')
+        self.assertRaises(OverflowError, format, -200, '.8b')
+        self.assertRaises(OverflowError, format, 128, '.8b')
+        self.assertRaises(OverflowError, format, -129, '.8b')
 
         # make sure these are errors
-        self.assertRaises(ValueError, format, 3, "1.3")  # precision disallowed
         self.assertRaises(ValueError, format, 3, "_c")   # underscore,
         self.assertRaises(ValueError, format, 3, ",c")   # comma, and
         self.assertRaises(ValueError, format, 3, "+c")   # sign not allowed
diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py
@@ -357,8 +357,6 @@ def test(i, format_spec, result):
 
         # make sure these are errors
 
-        # precision disallowed
-        self.assertRaises(ValueError, 3 .__format__, "1.3")
         # sign not allowed with 'c'
         self.assertRaises(ValueError, 3 .__format__, "+c")
         # format spec must be string
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-07-25-18.gh-issue-74756.7aCPNT.rst
@@ -0,0 +1,3 @@
+Support precision field for integer formatting types.  For binary, octal and
+hexadecimal formatting types --- twos complements are used to represent
+signed values.  Patch by Sergey B Kirpichev.
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
@@ -358,7 +358,8 @@ parse_internal_render_format_spec(PyObject *obj,
         }
     }
 
-    if (format->type == 'n'
+    if ((format->type == 'n' || format->type == 'd' || format->type == 'b'
+         || format->type == 'o' || format->type == 'x' || format->type == 'X')
         && format->frac_thousands_separator != LT_NO_LOCALE)
     {
         invalid_thousands_separator_type(format->frac_thousands_separator,
@@ -979,12 +980,6 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
        from a hard-code pseudo-locale */
     LocaleInfo locale = LocaleInfo_STATIC_INIT;
 
-    /* no precision allowed on integers */
-    if (format->precision != -1) {
-        PyErr_SetString(PyExc_ValueError,
-                        "Precision not allowed in integer format specifier");
-        goto done;
-    }
     /* no negative zero coercion on integers */
     if (format->no_neg_0) {
         PyErr_SetString(PyExc_ValueError,
@@ -1063,6 +1058,7 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
 
         if (format->sign != '+' && format->sign != ' '
             && format->width == -1
+            && format->precision == -1
             && format->type != 'X' && format->type != 'n'
             && !format->thousands_separators
             && PyLong_CheckExact(value))
@@ -1077,9 +1073,109 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
             n_prefix = leading_chars_to_skip;
 
         /* Do the hard part, converting to a string in a given base */
-        tmp = _PyLong_Format(value, base);
-        if (tmp == NULL)
-            goto done;
+        if (format->precision != -1) {
+            /* Use two's complement for 'b', 'o' and 'x' formatting types */
+            if (format->type == 'b' || format->type == 'x'
+                || format->type == 'o' || format->type == 'X')
+            {
+                int64_t shift = Py_MAX(1, format->precision);
+
+                if (format->type == 'x' || format->type == 'X') {
+                    shift *= 4;
+                }
+                else if (format->type == 'o') {
+                    shift *= 3;
+                }
+
+                PyObject *mod = _PyLong_Lshift(PyLong_FromLong(1), shift);
+                PyObject *mod2 = _PyLong_Rshift(mod, 1);
+                PyObject *value2 = value;
+
+                if (mod == NULL || mod2 == NULL) {
+                    Py_XDECREF(mod);
+                    Py_XDECREF(mod2);
+                    goto done;
+                }
+                if (PyLong_IsNegative(value)) {
+                    value2 = PyNumber_Negative(mod2);
+                    if (value2 == NULL) {
+                        Py_DECREF(mod2);
+                        goto done;
+                    }
+                    Py_SETREF(mod2, value2);
+                    if (PyObject_RichCompareBool(value, mod2, Py_LT)) {
+                        Py_DECREF(mod2);
+                        PyErr_Format(PyExc_OverflowError,
+                                     "Expected integer in range [-2**%ld, 2**%ld)",
+                                     shift - 1, shift - 1);
+                        goto done;
+                    }
+                    Py_DECREF(mod2);
+                    value2 = PyNumber_Add(value, mod);
+                    Py_DECREF(mod);
+                    if (value2 == NULL) {
+                        goto done;
+                    }
+                }
+                else {
+                    if (PyObject_RichCompareBool(value2, mod2, Py_GE)) {
+                        Py_DECREF(mod);
+                        Py_DECREF(mod2);
+                        PyErr_Format(PyExc_OverflowError,
+                                     "Expected integer in range [-2**%ld, 2**%ld)",
+                                     shift - 1, shift - 1);
+                        goto done;
+                    }
+                    Py_DECREF(mod);
+                    Py_DECREF(mod2);
+                    Py_INCREF(value2);
+                }
+                tmp = _PyLong_Format(value2, base);
+                Py_DECREF(value2);
+            }
+            else {
+                tmp = _PyLong_Format(value, base);
+            }
+
+            /* Prepend enough leading zeros (after the sign) */
+
+            int sign = PyUnicode_READ_CHAR(tmp, leading_chars_to_skip) == '-';
+            Py_ssize_t tmp2_len = format->precision + leading_chars_to_skip + sign;
+            Py_ssize_t tmp_len = PyUnicode_GET_LENGTH(tmp);
+            Py_ssize_t gap = tmp2_len - tmp_len;
+
+            if (gap > 0) {
+                PyObject *tmp2 = PyUnicode_New(tmp2_len, 127);
+
+                if (PyUnicode_CopyCharacters(tmp2, leading_chars_to_skip + gap + sign,
+                                             tmp, leading_chars_to_skip + sign,
+                                             tmp2_len - leading_chars_to_skip - sign) == -1) {
+                    Py_DECREF(tmp2);
+                    goto done;
+                }
+                if (PyUnicode_Fill(tmp2, leading_chars_to_skip + sign, gap, '0') == -1) {
+                    Py_DECREF(tmp2);
+                    goto done;
+                }
+                if (sign && PyUnicode_WriteChar(tmp2, leading_chars_to_skip, '-') == -1) {
+                    Py_DECREF(tmp2);
+                    goto done;
+                }
+                if (leading_chars_to_skip
+                    && PyUnicode_CopyCharacters(tmp2, 0, tmp, 0,
+                                                leading_chars_to_skip) == -1) {
+                    Py_DECREF(tmp2);
+                    goto done;
+                }
+                Py_SETREF(tmp, tmp2);
+            }
+        }
+        else {
+            tmp = _PyLong_Format(value, base);
+            if (tmp == NULL) {
+                goto done;
+            }
+        }
 
         inumeric_chars = 0;
         n_digits = PyUnicode_GET_LENGTH(tmp);

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+Support precision field for integer formatting types. For binary, octal and`
	`2`	`+hexadecimal formatting types --- twos complements are used to represent`
	`3`	`+signed values. Patch by Sergey B Kirpichev.`