Skip to content

[WIP] Pagination #806

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions elasticsearch_dsl/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,25 @@ def _clone(self):
return s




def _reverse_sort_entry(sort_entry):
# "field"
if isinstance(sort_entry, string_types):
if sort_entry == '_score':
return {'_score': 'asc'}
return {sort_entry: 'desc'}

f, sort_entry = sort_entry.copy().popitem()
# {"field": "asc/desc"}
if isinstance(sort_entry, string_types):
return {f: 'asc' if sort_entry == 'desc' else 'desc'}

# {"field": {"order": "asc/desc"}}
sort_entry = sort_entry.copy()
sort_entry['order'] = 'asc' if sort_entry['order'] == 'desc' else 'desc'
return {f: sort_entry}

class Search(Request):
query = ProxyDescriptor('query')
post_filter = ProxyDescriptor('post_filter')
Expand Down Expand Up @@ -337,6 +356,56 @@ def __getitem__(self, n):
s._extra['size'] = 1
return s

def get_page_count(self, size=None):
size = size if size is not None else self._extra.get("size", 10)
if size == 0:
return 0
pages, docs_left = divmod(self.count(), size)
if docs_left:
pages += 1
return pages

def get_page(self, page_no, size=None):
if page_no == 0:
raise ValueError("Search pagination is 1-based.")
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
s._extra["from"] = size * (abs(page_no) - 1)
s._extra["size"] = size

# reverse the sort order when pagination from back
if page_no < 0:
s._sort = [_reverse_sort_entry(se) for se in self._sort]

resp = s.execute()

# reverse the hits in the page when pagination from back
if page_no < 0:
resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]

return resp

def get_next_page(self, last_hit, size=None):
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
s._extra["from"] = 0
s._extra["size"] = size
s._extra["search_after"] = list(last_hit)
return s.execute()

def get_previous_page(self, first_hit, size=None):
size = size if size is not None else self._extra.get("size", 10)
s = self._clone()
s._extra["from"] = 0
s._extra["size"] = size
s._extra["search_after"] = list(first_hit)
# reverse the sort order
s._sort = [_reverse_sort_entry(se) for se in self._sort]
resp = s.execute()
# reverse the hits in the page
resp['hits']['hits'] = resp.to_dict()['hits']['hits'][::-1]
return resp

@classmethod
def from_dict(cls, d):
"""
Expand Down
85 changes: 85 additions & 0 deletions test_elasticsearch_dsl/test_integration/test_pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
from random import shuffle

from elasticsearch_dsl import Search

from pytest import fixture

@fixture(scope="session")
def sorted_search(data_client):
return Search(index='flat-git').sort(
'stats.lines',
'-stats.files',
{'_id': {'order': 'desc'}})

@fixture(scope="session")
def commits(sorted_search):
"""
List of all commits as sorted by ``sorted_search``
"""
return list(sorted_search.params(preserve_order=True).scan())

def get_commit_page(commits, page, size=10):
"""
Get appropriate page using python slicing for control.
"""
start = (page - 1) * size
return commits[start:start + size]

def test_get_page_count(sorted_search):
assert sorted_search.get_page_count() == 6
assert sorted_search[:2].get_page_count() == 26
assert sorted_search[:1].get_page_count() == 52
assert sorted_search[:100].get_page_count() == 1
assert sorted_search[:0].get_page_count() == 0

def test_get_page(sorted_search, commits):
# set page size to 2
s = sorted_search[:2]

# process pages in random order to avoid possible side effects
pages = list(range(1, 27))
shuffle(pages)

for page_no in pages:
page = get_commit_page(commits, page_no, 2)
assert page == s.get_page(page_no).hits

# non existing page returns empty
assert len(s.get_page(27).hits) == 0
assert len(s.get_page(42).hits) == 0

def test_get_negative_page(sorted_search, commits):
# set page size to 2
s = sorted_search[:2]

# process pages in random order to avoid possible side effects
pages = list(range(-1, -27, -1))
shuffle(pages)

for page_no in pages:
page = get_commit_page(commits, 27 + page_no, 2)
assert page == s.get_page(page_no).hits

# non existing page returns empty
assert len(s.get_page(-27).hits) == 0
assert len(s.get_page(-42).hits) == 0

def test_get_next_page(sorted_search, commits):
# manually retrieve page 4 of size 5
page4 = sorted_search[15:20].execute()
assert page4.hits == get_commit_page(commits, 4, 5)

# set page size to 5
s = sorted_search[:5]
page5 = s.get_next_page(page4.hits[-1].meta.sort)
assert page5.hits == get_commit_page(commits, 5, 5)

def test_get_previous_page(sorted_search, commits):
# manually retrieve page 4 of size 5
page4 = sorted_search[15:20].execute()
assert page4.hits == get_commit_page(commits, 4, 5)

# set page size to 5
s = sorted_search[:5]
page3 = s.get_previous_page(page4.hits[0].meta.sort)
assert page3.hits == get_commit_page(commits, 3, 5)
59 changes: 59 additions & 0 deletions test_elasticsearch_dsl/test_pagination.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from elasticsearch_dsl.utils import AttrDict
from elasticsearch_dsl.search import Search

from pytest import raises

class DummySearch(Search):
def __init__(self, *args, **kwargs):
super(DummySearch, self).__init__(*args, **kwargs)
self._executions = []

def execute(self, *args, **kwargs):
return AttrDict({
'req': self.to_dict(),
'hits': {
'hits': list(range(self._extra.get('size', 10)))
}
})

def test_pages_are_1_based():
body = DummySearch().get_page(1)
assert body['req']["size"] == 10
assert body['req']["from"] == 0

def test_pages_respect_page_size():
body = DummySearch()[:6].get_page(2)
assert body['req']["size"] == 6
assert body['req']["from"] == 6

def test_page_size_can_be_overwritten():
body = DummySearch()[:6].get_page(2, size=10)
assert body['req']["size"] == 10
assert body['req']["from"] == 10

def test_get_page_doesnt_allow_0():
with raises(ValueError):
DummySearch().get_page(0)

def test_next_page_respects_size():
body = DummySearch()[123:124].get_next_page([1, 2])
assert body['req']["size"] == 1
assert body['req']["from"] == 0
assert body['req']["search_after"] == [1, 2]

def test_previous_page_reverses_sort_and_hits():
body = DummySearch()[:5].sort(
'_score',
'-publish_date',
{'author.keyword': 'asc'}
).get_previous_page([1, 2])

assert body['req']["size"] == 5
assert body['req']["from"] == 0
assert body['req']["search_after"] == [1, 2]
assert body['req']['sort'] == [
{'_score': 'asc'},
{"publish_date": {"order": "asc"}},
{'author.keyword': 'desc'}
]
assert body['hits']['hits'] == [4, 3, 2, 1, 0]