Skip to content

Commit

Permalink
Another quickfix: removed more aff_raw
Browse files Browse the repository at this point in the history
  • Loading branch information
romanchyla committed Mar 30, 2021
1 parent e4e80da commit dc6c8b7
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 14 deletions.
12 changes: 2 additions & 10 deletions adsmp/solr_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,22 +101,15 @@ def extract_augments_pipeline(db_augments, solrdoc):
aff is a solr virtual field so it should never be set"""
if db_augments is None or len(db_augments) == 0:
return {}
return {'aff': db_augments.get('aff_raw', None),
# Make sure that preference is given to affiliations extracted by augment pipeline
return {'aff': db_augments.get('aff_raw', db_augments.get('aff', solrdoc.get('aff', None))),
'aff_abbrev': db_augments.get('aff_abbrev', None),
'aff_canonical': db_augments.get('aff_canonical', None),
'aff_facet': db_augments.get('aff_facet', None),
'aff_facet_hier': db_augments.get('aff_facet_hier', None),
'aff_id': db_augments.get('aff_id', None),
'institution': db_augments.get('institution', None)}

def modify_affiliations(data, solrdoc):
"""Make sure that preference is given to affiliations extracted
by augment pipeline
"""
# if solrdoc.get('aff_raw', None):
# solrdoc.pop('aff', None)
if solrdoc.get('aff', None):
solrdoc['aff_raw'] = solrdoc.get('aff', None)

def extract_fulltext(data, solrdoc):
out = {}
Expand Down Expand Up @@ -236,7 +229,6 @@ def get_timestamps(db_record, out):
('fulltext', extract_fulltext),
('#timestamps', get_timestamps), # use 'id' to be always called
('augments', extract_augments_pipeline), # over aff field, adds aff_*
('#affiliations', modify_affiliations)
]


Expand Down
7 changes: 3 additions & 4 deletions adsmp/tests/test_solr_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def test_solr_transformer(self):
rec = self.app.get_record('bibcode')
x = solr_updater.transform_json_record(rec)
# self.assertFalse('aff' in x, 'virtual field should not be in solr output')
self.assertTrue('aff' in x) # temporarily populating both aff and aff_raw
self.assertTrue(x['aff_raw'] == rec['bib_data']['aff'],

self.assertTrue(x['aff'] == rec['bib_data']['aff'],
'solr record should include aff from bib data when augment is not available')
self.assertFalse('aff_abbrev' in x,
'augment field should not be in solr record when augment is not available')
Expand All @@ -187,7 +187,6 @@ def test_solr_transformer(self):
u'aff_facet': [u'-', u'-', u'-', u'-'],
u'aff_facet_hier': [u'-', u'-', u'-', u'-'],
u'aff_id': [u'-', u'-', u'-', u'-'],
u'aff_raw': [u'augment pipeline aff', u'-', u'-', u'-'],
u'institution': [u'-', u'-', u'-', u'-']})

rec = self.app.get_record('bibcode')
Expand Down Expand Up @@ -305,7 +304,7 @@ def test_solr_transformer(self):

rec = self.app.get_record('bibcode')
x = solr_updater.transform_json_record(rec)
# self.assertFalse('aff' in x) # virtual field should not be in solr output

self.assertTrue('aff' in x) # aff is no longer a virtual field
self.assertEqual(x['aff'], rec['augments']['aff']) # solr record should prioritize aff data from augment
self.assertEqual(x['aff_abbrev'], rec['augments']['aff_abbrev']) # solr record should include augment data
Expand Down

0 comments on commit dc6c8b7

Please sign in to comment.