Use requests session in members retrieval

Also adjust log levels in record processing

Change-Id: I5a78293025c672c9e0c9583470a4aae66e7acfdd
This commit is contained in:
Ilya Shakhat 2015-10-14 13:16:33 +03:00
parent fcfe45bf13
commit 307b96efc1
2 changed files with 15 additions and 11 deletions

View File

@ -18,6 +18,7 @@ import re
import time
from oslo_log import log as logging
import requests
import six
from stackalytics.processor import utils
@ -36,9 +37,9 @@ def strip_garbage(s):
return re.sub(r'\s+', ' ', re.sub(GARBAGE_PATTERN, '', s))
def _retrieve_member(uri, member_id, html_parser):
def _retrieve_member(requests_session, uri, member_id, html_parser):
content = utils.read_uri(uri)
content = utils.read_uri(uri, session=requests_session)
if not content:
return {}
@ -84,11 +85,13 @@ def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead):
cnt_empty = 0
cur_index = last_member_index + 1
html_parser = six.moves.html_parser.HTMLParser()
requests_session = requests.Session()
while cnt_empty < members_look_ahead:
profile_uri = uri + str(cur_index)
member = _retrieve_member(profile_uri, str(cur_index), html_parser)
member = _retrieve_member(requests_session, profile_uri,
str(cur_index), html_parser)
if 'member_name' not in member:
cnt_empty += 1
@ -103,5 +106,6 @@ def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead):
time.sleep(random.random() * 5)
requests_session.close()
LOG.debug('Last_member_index: %s', last_member_index)
runtime_storage_inst.set_by_key('last_member_index', last_member_index)

View File

@ -593,7 +593,7 @@ class RecordProcessor(object):
yield r
def _update_records_with_releases(self, release_index):
LOG.debug('Update records with releases')
LOG.info('Update records with releases')
for record in self.runtime_storage_inst.get_all_records():
if record['primary_key'] in release_index:
@ -606,7 +606,7 @@ class RecordProcessor(object):
yield record
def _update_records_with_user_info(self):
LOG.debug('Update user info in records')
LOG.info('Update user info in records')
for record in self.runtime_storage_inst.get_all_records():
company_name = record['company_name']
@ -625,7 +625,7 @@ class RecordProcessor(object):
yield record
def _update_commits_with_merge_date(self):
LOG.debug('Update commits with merge date')
LOG.info('Update commits with merge date')
change_id_to_date = {}
for record in self.runtime_storage_inst.get_all_records():
@ -649,7 +649,7 @@ class RecordProcessor(object):
yield record
def _update_blueprints_with_mention_info(self):
LOG.debug('Process blueprints and calculate mention info')
LOG.info('Process blueprints and calculate mention info')
valid_blueprints = {}
mentioned_blueprints = {}
@ -706,7 +706,7 @@ class RecordProcessor(object):
yield record
def _determine_core_contributors(self):
LOG.debug('Determine core contributors')
LOG.info('Determine core contributors')
module_branches = collections.defaultdict(set)
quarter_ago = int(time.time()) - 60 * 60 * 24 * 30 * 3 # a quarter ago
@ -752,7 +752,7 @@ class RecordProcessor(object):
yield mark
def _update_marks_with_disagreement(self):
LOG.debug('Process marks to find disagreements')
LOG.info('Process marks to find disagreements')
cores = set()
for user in self.runtime_storage_inst.get_all_users():
@ -788,7 +788,7 @@ class RecordProcessor(object):
yield processed
def _update_members_company_name(self):
LOG.debug('Update members with company names')
LOG.info('Update members with company names')
for record in self.runtime_storage_inst.get_all_records():
if record['record_type'] != 'member':
@ -819,7 +819,7 @@ class RecordProcessor(object):
user_processor.store_user(self.runtime_storage_inst, user)
def _update_self_made_marks(self):
LOG.debug('Update self-made marks')
LOG.info('Update self-made marks')
patch_id_to_user_id = {}
for record in self.runtime_storage_inst.get_all_records():
if record['record_type'] == 'patch':