diff options
author | Linus Nordberg <linus@nordu.net> | 2016-11-26 22:43:13 +0100 |
---|---|---|
committer | Linus Nordberg <linus@nordu.net> | 2016-11-26 22:43:13 +0100 |
commit | 8b96f7c91591ac0c667538a9c9316f28891ccc72 (patch) | |
tree | 9fe8f6376fd4e4b391946edc78dffb5dde7a3f4b | |
parent | af5ada8e45b992093947b8c370217ddac7165367 (diff) |
Add back chunking for get_entries in parallelised merge_fetch.
Even if it's not important for parallelism on the merge node, it's not
a bad idea to keep the requests from growing.
-rwxr-xr-x | tools/merge_fetch.py | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/tools/merge_fetch.py b/tools/merge_fetch.py index 10fd406..be9edce 100755 --- a/tools/merge_fetch.py +++ b/tools/merge_fetch.py @@ -127,17 +127,16 @@ def merge_fetch_worker(args, localconfig, storagenode, pipe): if to_fetch: logging.info("%s: fetching %d entries", storagenode["name"], len(to_fetch)) - fetchlist = list(to_fetch) with requests.sessions.Session() as session: - entries = get_entries(storagenode["name"], - "https://%s/" % storagenode["address"], - own_key, paths, fetchlist, - session=session) - for ehash in fetchlist: - entry = entries[ehash] - verify_entry(verifycert, entry, ehash) - pipe.send(('FETCHED', ehash, entry)) - to_fetch.remove(ehash) + for chunk in chunks(list(to_fetch), 100): + entries = get_entries(storagenode["name"], + "https://%s/" % storagenode["address"], + own_key, paths, chunk, session=session) + for ehash in chunk: + entry = entries[ehash] + verify_entry(verifycert, entry, ehash) + pipe.send(('FETCHED', ehash, entry)) + to_fetch.remove(ehash) new_entries = get_new_entries(storagenode["name"], "https://%s/" % storagenode["address"], |