Backend for openstack.org members report
Implemented backend Change-Id: I5c2fbb51eeed3a70f22fa7bde2e77b492e2060a3
This commit is contained in:
parent
81c3df03d9
commit
ed515b4be9
@ -6767,6 +6767,8 @@
|
||||
}
|
||||
],
|
||||
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
||||
"member_lists": ["http://www.openstack.org/community/members/profile/"],
|
||||
"voting_date": "2014-Jan-01",
|
||||
"project_types": [
|
||||
{
|
||||
"id": "all",
|
||||
|
@ -181,6 +181,15 @@
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"member_lists": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"voting_date": {
|
||||
"type": "string"
|
||||
},
|
||||
"project_types": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
@ -17,6 +17,9 @@
|
||||
# Port where dashboard listens on
|
||||
# listen_port = 8080
|
||||
|
||||
# Number of days to update members
|
||||
# days_to_update_members = 7
|
||||
|
||||
# The address of file with corrections data
|
||||
# corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
|
||||
|
||||
|
@ -146,6 +146,8 @@
|
||||
],
|
||||
|
||||
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
||||
"member_lists": ["http://www.openstack.org/community/members/profile/"],
|
||||
"voting_date": "2014-Jan-01",
|
||||
|
||||
"project_types": [
|
||||
{
|
||||
|
@ -28,6 +28,8 @@ OPTS = [
|
||||
help='The address dashboard listens on'),
|
||||
cfg.IntOpt('listen-port', default=8080,
|
||||
help='The port dashboard listens on'),
|
||||
cfg.IntOpt('days_to_update_members', default=7,
|
||||
help='Number of days to update members'),
|
||||
cfg.StrOpt('corrections-uri',
|
||||
default=('https://raw.github.com/stackforge/stackalytics/'
|
||||
'master/etc/corrections.json'),
|
||||
|
@ -127,6 +127,11 @@ def _store_companies(runtime_storage_inst, companies):
|
||||
for company in companies:
|
||||
for domain in company['domains']:
|
||||
domains_index[domain] = company['company_name']
|
||||
|
||||
if 'aliases' in company:
|
||||
for alias in company['aliases']:
|
||||
domains_index[alias] = company['company_name']
|
||||
|
||||
runtime_storage_inst.set_by_key('companies', domains_index)
|
||||
|
||||
|
||||
|
@ -26,6 +26,7 @@ from stackalytics.processor import config
|
||||
from stackalytics.processor import default_data_processor
|
||||
from stackalytics.processor import lp
|
||||
from stackalytics.processor import mls
|
||||
from stackalytics.processor import mps
|
||||
from stackalytics.processor import rcs
|
||||
from stackalytics.processor import record_processor
|
||||
from stackalytics.processor import runtime_storage
|
||||
@ -140,10 +141,24 @@ def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
|
||||
runtime_storage_inst.set_records(processed_mail_iterator)
|
||||
|
||||
|
||||
def update_records(runtime_storage_inst):
|
||||
def process_member_list(uri, runtime_storage_inst, record_processor_inst):
|
||||
member_iterator = mps.log(uri, runtime_storage_inst,
|
||||
cfg.CONF.days_to_update_members)
|
||||
member_iterator_typed = _record_typer(member_iterator, 'member')
|
||||
processed_member_iterator = record_processor_inst.process(
|
||||
member_iterator_typed)
|
||||
runtime_storage_inst.set_records(processed_member_iterator)
|
||||
|
||||
|
||||
def update_members(runtime_storage_inst, record_processor_inst):
|
||||
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
|
||||
for member_list in member_lists:
|
||||
process_member_list(member_list, runtime_storage_inst,
|
||||
record_processor_inst)
|
||||
|
||||
|
||||
def update_records(runtime_storage_inst, record_processor_inst):
|
||||
repos = utils.load_repos(runtime_storage_inst)
|
||||
record_processor_inst = record_processor.RecordProcessor(
|
||||
runtime_storage_inst)
|
||||
|
||||
for repo in repos:
|
||||
process_repo(repo, runtime_storage_inst, record_processor_inst)
|
||||
@ -244,10 +259,16 @@ def main():
|
||||
|
||||
update_pids(runtime_storage_inst)
|
||||
|
||||
update_records(runtime_storage_inst)
|
||||
record_processor_inst = record_processor.RecordProcessor(
|
||||
runtime_storage_inst)
|
||||
|
||||
update_records(runtime_storage_inst, record_processor_inst)
|
||||
|
||||
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)
|
||||
|
||||
# long operation should be the last
|
||||
update_members(runtime_storage_inst, record_processor_inst)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
110
stackalytics/processor/mps.py
Normal file
110
stackalytics/processor/mps.py
Normal file
@ -0,0 +1,110 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import re
|
||||
import time
|
||||
|
||||
import six
|
||||
|
||||
from stackalytics.openstack.common import log as logging
|
||||
from stackalytics.processor import utils
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
NAME_AND_DATE_PATTERN = r'<h3>(?P<member_name>[^<]*)[\s\S]*?' \
|
||||
r'<div class="span-7 last">(?P<date_joined>[^<]*)'
|
||||
COMPANY_PATTERN = r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)' \
|
||||
r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)'
|
||||
|
||||
CNT_EMPTY_MEMBERS = 50
|
||||
|
||||
|
||||
def _convert_str_fields_to_unicode(result):
|
||||
for field, value in result.iteritems():
|
||||
if type(value) is str:
|
||||
try:
|
||||
value = six.text_type(value, 'utf8')
|
||||
result[field] = value
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _retrieve_member(uri, member_id):
|
||||
|
||||
content = utils.read_uri(uri)
|
||||
|
||||
if not content:
|
||||
return {}
|
||||
|
||||
member = {}
|
||||
|
||||
for rec in re.finditer(NAME_AND_DATE_PATTERN, content):
|
||||
result = rec.groupdict()
|
||||
|
||||
member['member_id'] = member_id
|
||||
member['member_name'] = result['member_name']
|
||||
member['date_joined'] = result['date_joined']
|
||||
member['member_uri'] = uri
|
||||
break
|
||||
|
||||
member['company_draft'] = '*independent'
|
||||
for rec in re.finditer(COMPANY_PATTERN, content):
|
||||
result = rec.groupdict()
|
||||
|
||||
member['company_draft'] = result['company_draft']
|
||||
|
||||
return member
|
||||
|
||||
|
||||
def log(uri, runtime_storage_inst, days_to_update_members):
|
||||
LOG.debug('Retrieving new openstack.org members')
|
||||
|
||||
last_update_members_date = runtime_storage_inst.get_by_key(
|
||||
'last_update_members_date') or 0
|
||||
last_member_index = runtime_storage_inst.get_by_key(
|
||||
'last_member_index') or 0
|
||||
|
||||
end_update_date = int(time.time()) - days_to_update_members * 24 * 60 * 60
|
||||
|
||||
if last_update_members_date <= end_update_date:
|
||||
last_member_index = 0
|
||||
last_update_members_date = int(time.time())
|
||||
|
||||
runtime_storage_inst.set_by_key('last_update_members_date',
|
||||
last_update_members_date)
|
||||
|
||||
cnt_empty = 0
|
||||
cur_index = last_member_index + 1
|
||||
|
||||
while cnt_empty < CNT_EMPTY_MEMBERS:
|
||||
|
||||
profile_uri = uri + str(cur_index)
|
||||
member = _retrieve_member(profile_uri, str(cur_index))
|
||||
|
||||
if 'member_name' not in member:
|
||||
cnt_empty += 1
|
||||
cur_index += 1
|
||||
continue
|
||||
|
||||
_convert_str_fields_to_unicode(member)
|
||||
|
||||
cnt_empty = 0
|
||||
last_member_index = cur_index
|
||||
cur_index += 1
|
||||
LOG.debug('New member: %s', member['member_id'])
|
||||
yield member
|
||||
|
||||
LOG.debug('Last_member_index: %s', last_member_index)
|
||||
runtime_storage_inst.set_by_key('last_member_index', last_member_index)
|
@ -397,6 +397,38 @@ class RecordProcessor(object):
|
||||
|
||||
yield bpc
|
||||
|
||||
def _process_member(self, record):
|
||||
user_id = "member:" + record['member_id']
|
||||
record['primary_key'] = user_id
|
||||
record['date'] = utils.member_date_to_timestamp(record['date_joined'])
|
||||
record['author_name'] = record['member_name']
|
||||
record['module'] = 'unknown'
|
||||
company_draft = record['company_draft']
|
||||
|
||||
company_name = self.domains_index.get(company_draft) or company_draft
|
||||
|
||||
# author_email is a key to create new user
|
||||
record['author_email'] = user_id
|
||||
record['company_name'] = company_name
|
||||
# _update_record_and_user function will create new user if needed
|
||||
self._update_record_and_user(record)
|
||||
record['company_name'] = company_name
|
||||
user = utils.load_user(self.runtime_storage_inst, user_id)
|
||||
del record['author_email']
|
||||
|
||||
user['user_name'] = record['author_name']
|
||||
user['companies'] = [{
|
||||
'company_name': company_name,
|
||||
'end_date': 0,
|
||||
}]
|
||||
user['company_name'] = company_name
|
||||
|
||||
utils.store_user(self.runtime_storage_inst, user)
|
||||
|
||||
record['company_name'] = company_name
|
||||
|
||||
yield record
|
||||
|
||||
def _apply_type_based_processing(self, record):
|
||||
if record['record_type'] == 'commit':
|
||||
for r in self._process_commit(record):
|
||||
@ -410,6 +442,9 @@ class RecordProcessor(object):
|
||||
elif record['record_type'] == 'bp':
|
||||
for r in self._process_blueprint(record):
|
||||
yield r
|
||||
elif record['record_type'] == 'member':
|
||||
for r in self._process_member(record):
|
||||
yield r
|
||||
|
||||
def _renew_record_date(self, record):
|
||||
record['week'] = utils.timestamp_to_week(record['date'])
|
||||
|
@ -46,6 +46,13 @@ def date_to_timestamp_ext(d):
|
||||
return int(d)
|
||||
|
||||
|
||||
def member_date_to_timestamp(d):
|
||||
if not d:
|
||||
return 0
|
||||
return int(time.mktime(
|
||||
datetime.datetime.strptime(d, '%B %d, %Y ').timetuple()))
|
||||
|
||||
|
||||
def iso8601_to_timestamp(s):
|
||||
return int(time.mktime(iso8601.parse_date(s).timetuple()))
|
||||
|
||||
|
60
tests/unit/test_mps.py
Normal file
60
tests/unit/test_mps.py
Normal file
@ -0,0 +1,60 @@
|
||||
# Copyright (c) 2013 Mirantis Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import re
|
||||
|
||||
import testtools
|
||||
|
||||
from stackalytics.processor import mps
|
||||
|
||||
|
||||
class TestMps(testtools.TestCase):
|
||||
def setUp(self):
|
||||
super(TestMps, self).setUp()
|
||||
|
||||
def test_member_parse_regex(self):
|
||||
|
||||
content = '''<h1>Individual Member Profile</h1>
|
||||
<div class="candidate span-14">
|
||||
<div class="span-4">
|
||||
<img src="/themes/openstack/images/generic-profile-photo.png"><p> </p>
|
||||
</div>
|
||||
<a name="profile-10501"></a>
|
||||
<div class="details span-10 last">
|
||||
<div class="last name-and-title">
|
||||
<h3>Jim Battenberg</h3>
|
||||
</div>
|
||||
<hr><div class="span-3"><strong>Date Joined</strong></div>
|
||||
<div class="span-7 last">June 25, 2013 <br><br></div>
|
||||
<div class="span-3"><strong>Affiliations</strong></div>
|
||||
<div class="span-7 last">
|
||||
<div>
|
||||
<b>Rackspace</b> From (Current)
|
||||
</div>
|
||||
</div>
|
||||
<div class="span-3"><strong>Statement of Interest </strong></div>
|
||||
<div class="span-7 last">
|
||||
<p>contribute logic and evangelize openstack</p>
|
||||
</div>
|
||||
<p> </p>'''
|
||||
|
||||
match = re.search(mps.NAME_AND_DATE_PATTERN, content)
|
||||
self.assertTrue(match)
|
||||
self.assertEqual('Jim Battenberg', match.group('member_name'))
|
||||
self.assertEqual('June 25, 2013 ', match.group('date_joined'))
|
||||
|
||||
match = re.search(mps.COMPANY_PATTERN, content)
|
||||
self.assertTrue(match)
|
||||
self.assertEqual('Rackspace', match.group('company_draft'))
|
@ -536,6 +536,58 @@ class TestRecordProcessor(testtools.TestCase):
|
||||
self.assertEqual(user, utils.load_user(
|
||||
record_processor_inst.runtime_storage_inst, 'john_doe@gmail.com'))
|
||||
|
||||
def test_create_member(self):
|
||||
member_record = {'member_id': '123456789',
|
||||
'member_name': 'John Doe',
|
||||
'member_uri': 'http://www.openstack.org/community'
|
||||
'/members/profile/123456789',
|
||||
'date_joined': 'August 01, 2012 ',
|
||||
'company_draft': 'Mirantis'}
|
||||
|
||||
record_processor_inst = self.make_record_processor()
|
||||
result_member = record_processor_inst._process_member(
|
||||
member_record).next()
|
||||
|
||||
self.assertEqual(result_member['primary_key'], 'member:123456789')
|
||||
self.assertEqual(result_member['date'], utils.member_date_to_timestamp(
|
||||
'August 01, 2012 '))
|
||||
self.assertEqual(result_member['author_name'], 'John Doe')
|
||||
self.assertEqual(result_member['company_name'], 'Mirantis')
|
||||
|
||||
result_user = utils.load_user(
|
||||
record_processor_inst.runtime_storage_inst, 'member:123456789')
|
||||
|
||||
self.assertEqual(result_user['user_name'], 'John Doe')
|
||||
self.assertEqual(result_user['company_name'], 'Mirantis')
|
||||
self.assertEqual(result_user['companies'],
|
||||
[{'company_name': 'Mirantis', 'end_date': 0}])
|
||||
|
||||
def test_update_member(self):
|
||||
member_record = {'member_id': '123456789',
|
||||
'member_name': 'John Doe',
|
||||
'member_uri': 'http://www.openstack.org/community'
|
||||
'/members/profile/123456789',
|
||||
'date_joined': 'August 01, 2012 ',
|
||||
'company_draft': 'Mirantis'}
|
||||
|
||||
record_processor_inst = self.make_record_processor()
|
||||
|
||||
updated_member_record = member_record
|
||||
updated_member_record['member_name'] = 'Bill Smith'
|
||||
updated_member_record['company_draft'] = 'Rackspace'
|
||||
|
||||
result_member = record_processor_inst._process_member(
|
||||
updated_member_record).next()
|
||||
self.assertEqual(result_member['author_name'], 'Bill Smith')
|
||||
self.assertEqual(result_member['company_name'], 'Rackspace')
|
||||
|
||||
result_user = utils.load_user(
|
||||
record_processor_inst.runtime_storage_inst, 'member:123456789')
|
||||
|
||||
self.assertEqual(result_user['user_name'], 'Bill Smith')
|
||||
self.assertEqual(result_user['companies'],
|
||||
[{'company_name': 'Rackspace', 'end_date': 0}])
|
||||
|
||||
def test_process_email_then_review(self):
|
||||
# it is expected that the user profile will contain both email and
|
||||
# LP id
|
||||
|
Loading…
x
Reference in New Issue
Block a user