Add extension point for letting TestLoader specify a custom sharding scheme.

Abseil Team · copybara-github · commit c3dd5a431c9a · 2025-04-16T00:02:34.000-07:00
PiperOrigin-RevId: 731407072
diff --git a/absl/testing/absltest.py b/absl/testing/absltest.py
@@ -26,6 +26,7 @@
 import enum
 import errno
 import faulthandler
+import functools
 import getpass
 import inspect
 import io
@@ -2505,6 +2506,35 @@ def getTestCaseNames(self, testCaseClass):  # pylint:disable=invalid-name
       self._random.shuffle(names)
     return names
 
+  def shardTestCaseNames(
+      self, ordered_names: Sequence[str], shard_index: int, total_shards: int
+  ) -> Sequence[str]:
+    """Filters and returns test case names for a specific shard.
+
+    This method is intended to be used in conjunction with test sharding
+    (e.g., when running tests on a distributed system or when running tests
+    with bazel's test sharding feature). It will return a subset of the
+    input test case names, based on the shard index and total shard count.
+
+    Args:
+      names: A sequence of test case names.
+      shard_index: The index of the current shard.
+      total_shards: The total number of shards.
+
+    Returns:
+      A sequence of test case names for the current shard.
+    """
+    bucket_iterator = itertools.cycle(range(total_shards))
+    filtered_names = []
+    # We need to sort the list of tests in order to determine which tests this
+    # shard is responsible for; however, it's important to preserve the order
+    # returned by the base loader, e.g. in the case of randomized test ordering.
+    for testcase in sorted(ordered_names):
+      bucket = next(bucket_iterator)
+      if bucket == shard_index:
+        filtered_names.append(testcase)
+    return [x for x in ordered_names if x in filtered_names]
+
 
 def get_default_xml_output_filename() -> Optional[str]:
   if os.environ.get('XML_OUTPUT_FILE'):
@@ -2626,21 +2656,19 @@ def _setup_sharding(
   # the test case names for this shard.
   delegate_get_names = base_loader.getTestCaseNames
 
-  bucket_iterator = itertools.cycle(range(total_shards))
+  def getSharedTestCaseNames(testCaseClass):
+    has_shard_test_case_names = hasattr(base_loader, 'shardTestCaseNames')
+    if has_shard_test_case_names:
+      sharder = getattr(base_loader, 'shardTestCaseNames')
+    else:
+      sharder = TestLoader.shardTestCaseNames
 
-  def getShardedTestCaseNames(testCaseClass):
-    filtered_names = []
-    # We need to sort the list of tests in order to determine which tests this
-    # shard is responsible for; however, it's important to preserve the order
-    # returned by the base loader, e.g. in the case of randomized test ordering.
-    ordered_names = delegate_get_names(testCaseClass)
-    for testcase in sorted(ordered_names):
-      bucket = next(bucket_iterator)
-      if bucket == shard_index:
-        filtered_names.append(testcase)
-    return [x for x in ordered_names if x in filtered_names]
+    names = sharder(
+        delegate_get_names(testCaseClass), shard_index, total_shards
+    )
+    return names
 
-  base_loader.getTestCaseNames = getShardedTestCaseNames  # type: ignore[method-assign]
+  base_loader.getTestCaseNames = getSharedTestCaseNames  # type: ignore[method-assign]
   return base_loader, shard_index