summaryrefslogtreecommitdiff
path: root/python/shards.py
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-12-20 22:26:57 +0100
committerPatrick Simianer <p@simianer.de>2017-12-20 22:26:57 +0100
commit32e8ad53760ad5743f39fd6a522ca7ba1e9516c3 (patch)
tree3c0f28b4ee088112dc06ddacadafa86850457794 /python/shards.py
parenta7b64caa0e22a974d352b091866d346da90c0ab5 (diff)
python/shards.py
Diffstat (limited to 'python/shards.py')
-rw-r--r--python/shards.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/python/shards.py b/python/shards.py
new file mode 100644
index 0000000..f47921f
--- /dev/null
+++ b/python/shards.py
@@ -0,0 +1,23 @@
+total = 100000
+data=range(total)
+jobSize = 5000
+numJobs = (total // jobSize) + 1
+numSegmentsPerJob = total // numJobs
+print numSegmentsPerJob
+print(numJobs)
+print('---')
+
+start = 0
+count = 0
+for i in range(numJobs):
+ if i == numJobs-1:
+ end = total
+ else:
+ end = start+numSegmentsPerJob
+ shard = data[start:end]
+ print(len(shard))
+ count += len(shard)
+ start += numSegmentsPerJob
+
+print(count)
+