Do not override affiliation stated in user profile in default data

This patch changes priority of affiliation resolution between user profile
and email hostname. Previously email hostname had higher priority, but
now Stackalytics will always use data from static profile in default_data.json

Closes bug 1375474

Change-Id: I0a6bef901b0c53c4edf8355ca95e204dbe192b19
This commit is contained in:
Ilya Shakhat 2014-10-03 15:46:05 +04:00
parent a8bc77ac66
commit 358671934a
3 changed files with 42 additions and 4 deletions

View File

@ -140,6 +140,7 @@ def _store_users(runtime_storage_inst, users):
if stored_user:
stored_user.update(user)
user = stored_user
user['static'] = True
utils.store_user(runtime_storage_inst, user)

View File

@ -163,10 +163,13 @@ class RecordProcessor(object):
# collect ordinary fields
for key in ['seq', 'user_name', 'user_id',
'launchpad_id', 'companies']:
'launchpad_id', 'companies', 'static']:
merged_user[key] = next((v.get(key) for v in user_profiles
if v.get(key)), None)
if not merged_user['static']:
del merged_user['static']
# update user_id, prefer it to be equal to launchpad_id
merged_user['user_id'] = (merged_user['launchpad_id'] or
merged_user['user_id'])
@ -248,9 +251,11 @@ class RecordProcessor(object):
record['author_name'] = user['user_name']
company, policy = self._find_company(user['companies'], record['date'])
if company != '*robots' and policy == 'open':
company = (self._get_company_by_email(record.get('author_email'))
or company)
if not user.get('static'):
# for auto-generated profiles affiliation may be overridden
if company != '*robots' and policy == 'open':
company = (self._get_company_by_email(
record.get('author_email')) or company)
record['company_name'] = company
def _process_commit(self, record):

View File

@ -208,6 +208,38 @@ class TestRecordProcessor(testtools.TestCase):
self.assertIn('johndoe@ibm.com', utils.load_user(
record_processor_inst.runtime_storage_inst, 'john_doe')['emails'])
def test_process_commit_existing_user_new_email_known_company_static(self):
# User profile is configured in default_data. Email is new to us,
# and maps to other company. We still use a company specified
# in the profile
record_processor_inst = self.make_record_processor(
users=[
{'user_id': 'john_doe',
'launchpad_id': 'john_doe',
'user_name': 'John Doe',
'static': True,
'emails': ['johndoe@nec.co.jp'],
'companies': [{'company_name': 'NEC', 'end_date': 0}]}
],
companies=[{'company_name': 'IBM', 'domains': ['ibm.com']}],
lp_info={'johndoe@ibm.com':
{'name': 'john_doe', 'display_name': 'John Doe'}})
processed_commit = list(record_processor_inst.process(
generate_commits(author_email='johndoe@ibm.com',
author_name='John Doe')))[0]
expected_commit = {
'launchpad_id': 'john_doe',
'author_email': 'johndoe@ibm.com',
'author_name': 'John Doe',
'company_name': 'NEC',
}
self.assertRecordsMatch(expected_commit, processed_commit)
self.assertIn('johndoe@ibm.com', utils.load_user(
record_processor_inst.runtime_storage_inst, 'john_doe')['emails'])
def test_process_commit_existing_user_old_job_not_overridden(self):
# User is known to LP, his email is new to us, and maps to other
# company. Have some record with new email, but from the period when