diff options
author | Patrick Simianer <p@simianer.de> | 2017-12-20 22:26:57 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-12-20 22:26:57 +0100 |
commit | 32e8ad53760ad5743f39fd6a522ca7ba1e9516c3 (patch) | |
tree | 3c0f28b4ee088112dc06ddacadafa86850457794 /python | |
parent | a7b64caa0e22a974d352b091866d346da90c0ab5 (diff) |
python/shards.py
Diffstat (limited to 'python')
-rw-r--r-- | python/shards.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/python/shards.py b/python/shards.py new file mode 100644 index 0000000..f47921f --- /dev/null +++ b/python/shards.py @@ -0,0 +1,23 @@ +total = 100000 +data=range(total) +jobSize = 5000 +numJobs = (total // jobSize) + 1 +numSegmentsPerJob = total // numJobs +print numSegmentsPerJob +print(numJobs) +print('---') + +start = 0 +count = 0 +for i in range(numJobs): + if i == numJobs-1: + end = total + else: + end = start+numSegmentsPerJob + shard = data[start:end] + print(len(shard)) + count += len(shard) + start += numSegmentsPerJob + +print(count) + |