random sampling
This commit is contained in:
parent
b4de2ee0f9
commit
4172b43c0f
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
import argparse
|
import argparse
|
||||||
import traceback
|
import traceback
|
||||||
|
import random
|
||||||
from celery.execute import send_task
|
from celery.execute import send_task
|
||||||
from celery import current_app
|
from celery import current_app
|
||||||
|
|
||||||
@ -22,14 +23,15 @@ def main():
|
|||||||
docs = kernel.db.tracked.find({'doc_class': args.doc_class,
|
docs = kernel.db.tracked.find({'doc_class': args.doc_class,
|
||||||
'versions': {'$ne': []}
|
'versions': {'$ne': []}
|
||||||
}, timeout=False)
|
}, timeout=False)
|
||||||
print '%s docs in %s' % (docs.count(), args.doc_class)
|
total = docs.count()
|
||||||
|
print '{0} docs in {1}'.format(total, args.doc_class)
|
||||||
|
|
||||||
if args.sample:
|
if args.sample:
|
||||||
print 'sampling 100 documents'
|
limit = 100
|
||||||
docs = docs.limit(100)
|
print 'sampling {0} documents'.format(limit)
|
||||||
|
docs = docs.limit(limit).offset(random.randint(0, total-limit))
|
||||||
args.immediate = True
|
args.immediate = True
|
||||||
|
|
||||||
total = docs.count()
|
|
||||||
errors = 0
|
errors = 0
|
||||||
|
|
||||||
if args.immediate:
|
if args.immediate:
|
||||||
|
Loading…
Reference in New Issue
Block a user