diff --git a/elasticsearch_dsl/search.py b/elasticsearch_dsl/search.py index 94778dfa7..c4943aa00 100644 --- a/elasticsearch_dsl/search.py +++ b/elasticsearch_dsl/search.py @@ -264,6 +264,25 @@ def _clone(self): return s + + +def _reverse_sort_entry(sort_entry): + # "field" + if isinstance(sort_entry, string_types): + if sort_entry == '_score': + return {'_score': 'asc'} + return {sort_entry: 'desc'} + + f, sort_entry = sort_entry.copy().popitem() + # {"field": "asc/desc"} + if isinstance(sort_entry, string_types): + return {f: 'asc' if sort_entry == 'desc' else 'desc'} + + # {"field": {"order": "asc/desc"}} + sort_entry = sort_entry.copy() + sort_entry['order'] = 'asc' if sort_entry['order'] == 'desc' else 'desc' + return {f: sort_entry} + class Search(Request): query = ProxyDescriptor('query') post_filter = ProxyDescriptor('post_filter') @@ -337,6 +356,56 @@ def __getitem__(self, n): s._extra['size'] = 1 return s + def get_page_count(self, size=None): + size = size if size is not None else self._extra.get("size", 10) + if size == 0: + return 0 + pages, docs_left = divmod(self.count(), size) + if docs_left: + pages += 1 + return pages + + def get_page(self, page_no, size=None): + if page_no == 0: + raise ValueError("Search pagination is 1-based.") + size = size if size is not None else self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = size * (abs(page_no) - 1) + s._extra["size"] = size + + # reverse the sort order when pagination from back + if page_no < 0: + s._sort = [_reverse_sort_entry(se) for se in self._sort] + + resp = s.execute() + + # reverse the hits in the page when pagination from back + if page_no < 0: + resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1] + + return resp + + def get_next_page(self, last_hit, size=None): + size = size if size is not None else self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = 0 + s._extra["size"] = size + s._extra["search_after"] = list(last_hit) + return s.execute() + + def get_previous_page(self, first_hit, size=None): + size = size if size is not None else self._extra.get("size", 10) + s = self._clone() + s._extra["from"] = 0 + s._extra["size"] = size + s._extra["search_after"] = list(first_hit) + # reverse the sort order + s._sort = [_reverse_sort_entry(se) for se in self._sort] + resp = s.execute() + # reverse the hits in the page + resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1] + return resp + @classmethod def from_dict(cls, d): """ diff --git a/test_elasticsearch_dsl/test_integration/test_pagination.py b/test_elasticsearch_dsl/test_integration/test_pagination.py new file mode 100644 index 000000000..ddaabe7aa --- /dev/null +++ b/test_elasticsearch_dsl/test_integration/test_pagination.py @@ -0,0 +1,85 @@ +from random import shuffle + +from elasticsearch_dsl import Search + +from pytest import fixture + +@fixture(scope="session") +def sorted_search(data_client): + return Search(index='flat-git').sort( + 'stats.lines', + '-stats.files', + {'_id': {'order': 'desc'}}) + +@fixture(scope="session") +def commits(sorted_search): + """ + List of all commits as sorted by ``sorted_search`` + """ + return list(sorted_search.params(preserve_order=True).scan()) + +def get_commit_page(commits, page, size=10): + """ + Get appropriate page using python slicing for control. + """ + start = (page - 1) * size + return commits[start:start + size] + +def test_get_page_count(sorted_search): + assert sorted_search.get_page_count() == 6 + assert sorted_search[:2].get_page_count() == 26 + assert sorted_search[:1].get_page_count() == 52 + assert sorted_search[:100].get_page_count() == 1 + assert sorted_search[:0].get_page_count() == 0 + +def test_get_page(sorted_search, commits): + # set page size to 2 + s = sorted_search[:2] + + # process pages in random order to avoid possible side effects + pages = list(range(1, 27)) + shuffle(pages) + + for page_no in pages: + page = get_commit_page(commits, page_no, 2) + assert page == s.get_page(page_no).hits + + # non existing page returns empty + assert len(s.get_page(27).hits) == 0 + assert len(s.get_page(42).hits) == 0 + +def test_get_negative_page(sorted_search, commits): + # set page size to 2 + s = sorted_search[:2] + + # process pages in random order to avoid possible side effects + pages = list(range(-1, -27, -1)) + shuffle(pages) + + for page_no in pages: + page = get_commit_page(commits, 27 + page_no, 2) + assert page == s.get_page(page_no).hits + + # non existing page returns empty + assert len(s.get_page(-27).hits) == 0 + assert len(s.get_page(-42).hits) == 0 + +def test_get_next_page(sorted_search, commits): + # manually retrieve page 4 of size 5 + page4 = sorted_search[15:20].execute() + assert page4.hits == get_commit_page(commits, 4, 5) + + # set page size to 5 + s = sorted_search[:5] + page5 = s.get_next_page(page4.hits[-1].meta.sort) + assert page5.hits == get_commit_page(commits, 5, 5) + +def test_get_previous_page(sorted_search, commits): + # manually retrieve page 4 of size 5 + page4 = sorted_search[15:20].execute() + assert page4.hits == get_commit_page(commits, 4, 5) + + # set page size to 5 + s = sorted_search[:5] + page3 = s.get_previous_page(page4.hits[0].meta.sort) + assert page3.hits == get_commit_page(commits, 3, 5) diff --git a/test_elasticsearch_dsl/test_pagination.py b/test_elasticsearch_dsl/test_pagination.py new file mode 100644 index 000000000..10bb56979 --- /dev/null +++ b/test_elasticsearch_dsl/test_pagination.py @@ -0,0 +1,59 @@ +from elasticsearch_dsl.utils import AttrDict +from elasticsearch_dsl.search import Search + +from pytest import raises + +class DummySearch(Search): + def __init__(self, *args, **kwargs): + super(DummySearch, self).__init__(*args, **kwargs) + self._executions = [] + + def execute(self, *args, **kwargs): + return AttrDict({ + 'req': self.to_dict(), + 'hits': { + 'hits': list(range(self._extra.get('size', 10))) + } + }) + +def test_pages_are_1_based(): + body = DummySearch().get_page(1) + assert body['req']["size"] == 10 + assert body['req']["from"] == 0 + +def test_pages_respect_page_size(): + body = DummySearch()[:6].get_page(2) + assert body['req']["size"] == 6 + assert body['req']["from"] == 6 + +def test_page_size_can_be_overwritten(): + body = DummySearch()[:6].get_page(2, size=10) + assert body['req']["size"] == 10 + assert body['req']["from"] == 10 + +def test_get_page_doesnt_allow_0(): + with raises(ValueError): + DummySearch().get_page(0) + +def test_next_page_respects_size(): + body = DummySearch()[123:124].get_next_page([1, 2]) + assert body['req']["size"] == 1 + assert body['req']["from"] == 0 + assert body['req']["search_after"] == [1, 2] + +def test_previous_page_reverses_sort_and_hits(): + body = DummySearch()[:5].sort( + '_score', + '-publish_date', + {'author.keyword': 'asc'} + ).get_previous_page([1, 2]) + + assert body['req']["size"] == 5 + assert body['req']["from"] == 0 + assert body['req']["search_after"] == [1, 2] + assert body['req']['sort'] == [ + {'_score': 'asc'}, + {"publish_date": {"order": "asc"}}, + {'author.keyword': 'desc'} + ] + assert body['hits']['hits'] == [4, 3, 2, 1, 0]