From 358671934ac402eaeb7b66b9fbfa315e108bbfb3 Mon Sep 17 00:00:00 2001
From: Ilya Shakhat <ishakhat@mirantis.com>
Date: Fri, 3 Oct 2014 15:46:05 +0400
Subject: [PATCH] Do not override affiliation stated in user profile in default
 data

This patch changes priority of affiliation resolution between user profile
and email hostname. Previously email hostname had higher priority, but
now Stackalytics will always use data from static profile in default_data.json

Closes bug 1375474

Change-Id: I0a6bef901b0c53c4edf8355ca95e204dbe192b19
---
 .../processor/default_data_processor.py       |  1 +
 stackalytics/processor/record_processor.py    | 13 +++++---
 tests/unit/test_record_processor.py           | 32 +++++++++++++++++++
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/stackalytics/processor/default_data_processor.py b/stackalytics/processor/default_data_processor.py
index 5dd08ce01..d2270b3c7 100644
--- a/stackalytics/processor/default_data_processor.py
+++ b/stackalytics/processor/default_data_processor.py
@@ -140,6 +140,7 @@ def _store_users(runtime_storage_inst, users):
         if stored_user:
             stored_user.update(user)
             user = stored_user
+        user['static'] = True
         utils.store_user(runtime_storage_inst, user)
 
 
diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py
index 1dae6ac6f..aa079c8bf 100644
--- a/stackalytics/processor/record_processor.py
+++ b/stackalytics/processor/record_processor.py
@@ -163,10 +163,13 @@ class RecordProcessor(object):
 
         # collect ordinary fields
         for key in ['seq', 'user_name', 'user_id',
-                    'launchpad_id', 'companies']:
+                    'launchpad_id', 'companies', 'static']:
             merged_user[key] = next((v.get(key) for v in user_profiles
                                      if v.get(key)), None)
 
+        if not merged_user['static']:
+            del merged_user['static']
+
         # update user_id, prefer it to be equal to launchpad_id
         merged_user['user_id'] = (merged_user['launchpad_id'] or
                                   merged_user['user_id'])
@@ -248,9 +251,11 @@ class RecordProcessor(object):
             record['author_name'] = user['user_name']
 
         company, policy = self._find_company(user['companies'], record['date'])
-        if company != '*robots' and policy == 'open':
-            company = (self._get_company_by_email(record.get('author_email'))
-                       or company)
+        if not user.get('static'):
+            # for auto-generated profiles affiliation may be overridden
+            if company != '*robots' and policy == 'open':
+                company = (self._get_company_by_email(
+                    record.get('author_email')) or company)
         record['company_name'] = company
 
     def _process_commit(self, record):
diff --git a/tests/unit/test_record_processor.py b/tests/unit/test_record_processor.py
index 540c7ccc0..b57c01326 100644
--- a/tests/unit/test_record_processor.py
+++ b/tests/unit/test_record_processor.py
@@ -208,6 +208,38 @@ class TestRecordProcessor(testtools.TestCase):
         self.assertIn('johndoe@ibm.com', utils.load_user(
             record_processor_inst.runtime_storage_inst, 'john_doe')['emails'])
 
+    def test_process_commit_existing_user_new_email_known_company_static(self):
+        # User profile is configured in default_data. Email is new to us,
+        # and maps to other company. We still use a company specified
+        # in the profile
+        record_processor_inst = self.make_record_processor(
+            users=[
+                {'user_id': 'john_doe',
+                 'launchpad_id': 'john_doe',
+                 'user_name': 'John Doe',
+                 'static': True,
+                 'emails': ['johndoe@nec.co.jp'],
+                 'companies': [{'company_name': 'NEC', 'end_date': 0}]}
+            ],
+            companies=[{'company_name': 'IBM', 'domains': ['ibm.com']}],
+            lp_info={'johndoe@ibm.com':
+                     {'name': 'john_doe', 'display_name': 'John Doe'}})
+
+        processed_commit = list(record_processor_inst.process(
+            generate_commits(author_email='johndoe@ibm.com',
+                             author_name='John Doe')))[0]
+
+        expected_commit = {
+            'launchpad_id': 'john_doe',
+            'author_email': 'johndoe@ibm.com',
+            'author_name': 'John Doe',
+            'company_name': 'NEC',
+        }
+
+        self.assertRecordsMatch(expected_commit, processed_commit)
+        self.assertIn('johndoe@ibm.com', utils.load_user(
+            record_processor_inst.runtime_storage_inst, 'john_doe')['emails'])
+
     def test_process_commit_existing_user_old_job_not_overridden(self):
         # User is known to LP, his email is new to us, and maps to other
         # company. Have some record with new email, but from the period when