-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathupdate_db.py
45 lines (40 loc) · 1.6 KB
/
update_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pandas as pd
import json
import mturk
import time
import pymongo
import sys
from datetime import datetime
""" Run this with 'python update_db.py x y' where x is 0 if using the sandbox, 1 if using the marketplace
and y is the amount of seconds to wait between loops """
create_hits_in_production = (sys.argv[1] == '1')
# Change the string in MongoClient to a connection string to a Mongodb base/cluster of your choice
db_client = pymongo.MongoClient(
"mongodb+srv://<username>:<password>@cluster0-hjstc.mongodb.net/test?retryWrites=true&w=majority"
)
db = db_client['tbfy']
hit_result_collection = db.hit_results if create_hits_in_production else db.hit_results_sandbox
mt = mturk.MTurk()
mt.launch_client(create_hits_in_production)
fails = 0
while True:
''' Update all hits in the database with correct results '''
hit_result_collection_list = list(hit_result_collection.find({'hit.HITStatus': {'$not': {'$eq': 'Disposed'}}}))
for hit in hit_result_collection_list:
try:
hit_result_collection.update_one(
{'_id': hit['_id']},
{
"$set": {
"hit": mt.client.get_hit(HITId = hit['_id'])['HIT'],
'answers': mt.approve_and_get_hit_answers(hit['_id'])
}
})
except Exception as e:
print(e)
fails = fails + 1
if fails > 4:
sys.exit(-1)
continue
print('{}: Updated db has {} non-disposed entries'.format(datetime.now(),len(hit_result_collection_list)))
time.sleep(int(sys.argv[2]))