diff --git a/stackalytics/processor/mps.py b/stackalytics/processor/mps.py index c66c6d637..933d321e4 100644 --- a/stackalytics/processor/mps.py +++ b/stackalytics/processor/mps.py @@ -18,6 +18,7 @@ import re import time from oslo_log import log as logging +import requests import six from stackalytics.processor import utils @@ -36,9 +37,9 @@ def strip_garbage(s): return re.sub(r'\s+', ' ', re.sub(GARBAGE_PATTERN, '', s)) -def _retrieve_member(uri, member_id, html_parser): +def _retrieve_member(requests_session, uri, member_id, html_parser): - content = utils.read_uri(uri) + content = utils.read_uri(uri, session=requests_session) if not content: return {} @@ -84,11 +85,13 @@ def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead): cnt_empty = 0 cur_index = last_member_index + 1 html_parser = six.moves.html_parser.HTMLParser() + requests_session = requests.Session() while cnt_empty < members_look_ahead: profile_uri = uri + str(cur_index) - member = _retrieve_member(profile_uri, str(cur_index), html_parser) + member = _retrieve_member(requests_session, profile_uri, + str(cur_index), html_parser) if 'member_name' not in member: cnt_empty += 1 @@ -103,5 +106,6 @@ def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead): time.sleep(random.random() * 5) + requests_session.close() LOG.debug('Last_member_index: %s', last_member_index) runtime_storage_inst.set_by_key('last_member_index', last_member_index) diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index c6c8c60a1..79f42f518 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -593,7 +593,7 @@ class RecordProcessor(object): yield r def _update_records_with_releases(self, release_index): - LOG.debug('Update records with releases') + LOG.info('Update records with releases') for record in self.runtime_storage_inst.get_all_records(): if record['primary_key'] in release_index: @@ -606,7 +606,7 @@ class RecordProcessor(object): yield record def _update_records_with_user_info(self): - LOG.debug('Update user info in records') + LOG.info('Update user info in records') for record in self.runtime_storage_inst.get_all_records(): company_name = record['company_name'] @@ -625,7 +625,7 @@ class RecordProcessor(object): yield record def _update_commits_with_merge_date(self): - LOG.debug('Update commits with merge date') + LOG.info('Update commits with merge date') change_id_to_date = {} for record in self.runtime_storage_inst.get_all_records(): @@ -649,7 +649,7 @@ class RecordProcessor(object): yield record def _update_blueprints_with_mention_info(self): - LOG.debug('Process blueprints and calculate mention info') + LOG.info('Process blueprints and calculate mention info') valid_blueprints = {} mentioned_blueprints = {} @@ -706,7 +706,7 @@ class RecordProcessor(object): yield record def _determine_core_contributors(self): - LOG.debug('Determine core contributors') + LOG.info('Determine core contributors') module_branches = collections.defaultdict(set) quarter_ago = int(time.time()) - 60 * 60 * 24 * 30 * 3 # a quarter ago @@ -752,7 +752,7 @@ class RecordProcessor(object): yield mark def _update_marks_with_disagreement(self): - LOG.debug('Process marks to find disagreements') + LOG.info('Process marks to find disagreements') cores = set() for user in self.runtime_storage_inst.get_all_users(): @@ -788,7 +788,7 @@ class RecordProcessor(object): yield processed def _update_members_company_name(self): - LOG.debug('Update members with company names') + LOG.info('Update members with company names') for record in self.runtime_storage_inst.get_all_records(): if record['record_type'] != 'member': @@ -819,7 +819,7 @@ class RecordProcessor(object): user_processor.store_user(self.runtime_storage_inst, user) def _update_self_made_marks(self): - LOG.debug('Update self-made marks') + LOG.info('Update self-made marks') patch_id_to_user_id = {} for record in self.runtime_storage_inst.get_all_records(): if record['record_type'] == 'patch':