summaryrefslogtreecommitdiff
path: root/python/shards.py
blob: f47921f468e2f20435f13162839088e954ad009b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
total = 100000
data=range(total)
jobSize = 5000
numJobs = (total // jobSize) + 1
numSegmentsPerJob = total // numJobs
print numSegmentsPerJob
print(numJobs)
print('---')

start = 0
count = 0
for i in range(numJobs):
    if i == numJobs-1:
        end = total
    else:
        end = start+numSegmentsPerJob
    shard = data[start:end]
    print(len(shard))
    count += len(shard)
    start += numSegmentsPerJob

print(count)