-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzoomarks_csvextract_planet9.py
74 lines (63 loc) · 3.45 KB
/
zoomarks_csvextract_planet9.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -------------------------------------------------------------
# Panoptes Marking Export Script
#
# This script extracts individual markings from Zooniverse
# Panoptes classification data export CSV. This script is
# configured to export circular marker info for classifications
# collected only for the latest workflow version.
#
# Customizations are set for use with the following project:
# planet-9-rogue-worlds
#
# Column names, annotation info, and marking task ID may need
# be altered for this script to work for data exports from
# other projects.
#
# Written by: Cliff Johnson ([email protected])
# Last Edited: 10 January 2017
# Based on scripts by Brooke Simmons
# -------------------------------------------------------------
#Python 3.5.1
import sys
try:
classfile_in = sys.argv[1]
markfile_out = sys.argv[2]
except:
print("\nUsage: "+sys.argv[0]+" classifications_infile markings_outfile")
print(" classifications_infile: a Zooniverse (Panoptes) classifications data export CSV.")
print(" markings_outfile: a CSV file with marking information from classifications.")
print("\nExample: "+sys.argv[0]+" ap-aas229-test-classifications.csv ap-aas229-test-markings.csv")
sys.exit(0)
#classfile_in = 'ap-aas229-test-classifications.csv'
#markfile_out = 'ap-aas229-test-markings.csv'
import pandas as pd
import json
# Read in classification CSV and expand JSON fields
classifications = pd.read_csv(classfile_in)
classifications['metadata_json'] = [json.loads(q) for q in classifications.metadata]
classifications['annotations_json'] = [json.loads(q) for q in classifications.annotations]
classifications['subject_data_json'] = [json.loads(q) for q in classifications.subject_data]
# Calculate number of markings per classification
# Note: index of annotations_json ("q" here) corresponds to task number (i.e., 0)
classifications['n_markings'] = [ len(q[0]['value']) for q in classifications.annotations_json ]
### Classification Selection / CURRENT SETTING: most recent workflow version
# OPTION 1: Select only classifications from most recent workflow version
iclass = classifications[classifications.workflow_version == classifications['workflow_version'].max()]
# OPTION 2: Select most/all valid classifications using workflow_id and workflow_version
#iclass = classifications[(classifications['workflow_id'] == 1687) & (classifications['workflow_version'] > 40)]
# Output markings from classifications in iclass to new list of dictionaries (prep for pandas dataframe)
# Applicable for workflows with marking task as first task, and outputs data for circular markers (x,y,r)
clist=[]
for index, c in iclass.iterrows():
if c['n_markings'] > 0:
# Note: index of annotations_json corresponds to task number (i.e., 0)
for q in c.annotations_json[0]['value']:
# OPTIONAL EXPANSION: could use if statement here to split marker types
clist.append({'classification_id':c.classification_id, 'user_name':c.user_name, 'user_id':c.user_id,
'created_at':c.created_at, 'subject_ids':c.subject_ids, 'tool':q['tool'],
'tool_label':q['tool_label'], 'x':q['x'], 'y':q['y'], 'frame':q['frame']})
# Output list of dictionaries to pandas dataframe and export to CSV.
col_order=['classification_id','user_name','user_id','created_at','subject_ids',
'tool','tool_label','x','y','frame']
out=pd.DataFrame(clist)[col_order]
out.to_csv(markfile_out,index_label='mark_id')