summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/shards.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/python/shards.py b/python/shards.py
new file mode 100644
index 0000000..f47921f
--- /dev/null
+++ b/python/shards.py
@@ -0,0 +1,23 @@
+total = 100000
+data=range(total)
+jobSize = 5000
+numJobs = (total // jobSize) + 1
+numSegmentsPerJob = total // numJobs
+print numSegmentsPerJob
+print(numJobs)
+print('---')
+
+start = 0
+count = 0
+for i in range(numJobs):
+ if i == numJobs-1:
+ end = total
+ else:
+ end = start+numSegmentsPerJob
+ shard = data[start:end]
+ print(len(shard))
+ count += len(shard)
+ start += numSegmentsPerJob
+
+print(count)
+