Scrape reviews the new way.

This commit is contained in:
Michael Still 2014-03-28 16:04:37 +11:00
parent 0da8fdff4e
commit c5fa8c4c93
4 changed files with 37 additions and 209 deletions

View File

@ -6,26 +6,7 @@ import sys
import urllib import urllib
import conf import conf
import utilities
def read_remote_lines(url):
remote = urllib.urlopen(url)
data = ''
while True:
d = remote.read(100)
if not d:
break
data += d
if data.find('\n') != -1:
elems = data.split('\n')
for line in elems[:-1]:
yield line
data = elems[-1]
if data:
yield data
if __name__ == '__main__': if __name__ == '__main__':
@ -36,9 +17,9 @@ if __name__ == '__main__':
while day < datetime.datetime.now(): while day < datetime.datetime.now():
print 'Processing %s/%s/%s' % (day.year, day.month, day.day) print 'Processing %s/%s/%s' % (day.year, day.month, day.day)
for line in read_remote_lines('http://www.rcbops.com/gerrit/merged/' for line in utilities.read_remote_lines(
'%s/%s/%s' 'http://www.rcbops.com/gerrit/merged/%s/%s/%s'
% (day.year, day.month, day.day)): % (day.year, day.month, day.day)):
try: try:
j = json.loads(line) j = json.loads(line)
except: except:

View File

@ -85,11 +85,11 @@ while day < datetime.datetime.now():
day += one_day day += one_day
#for dirpath, subdirs, files in os.walk('merged'): for dirpath, subdirs, files in os.walk('merged'):
# for filename in files: for filename in files:
# if filename.endswith('.json'): if filename.endswith('.json'):
# continue continue
# changed_merge_files[os.path.join(dirpath, filename)] = True changed_merge_files[os.path.join(dirpath, filename)] = True
print 'Processing changed merge files' print 'Processing changed merge files'
for filename in changed_merge_files: for filename in changed_merge_files:
@ -117,11 +117,13 @@ for filename in changed_merge_files:
number = j['change']['number'] number = j['change']['number']
patchset = j['patchSet']['number'] patchset = j['patchSet']['number']
project = j['change']['project']
for approval in j.get('approvals', []): for approval in j.get('approvals', []):
reviews.setdefault(author, []) reviews.setdefault(author, [])
reviews[author].append({'number': number, reviews[author].append({'number': number,
'patchset': patchset, 'patchset': patchset,
'project': project,
'type': approval['type'], 'type': approval['type'],
'value': approval['value']}) 'value': approval['value']})

View File

@ -2,200 +2,45 @@
# Scrape review information from gerrit # Scrape review information from gerrit
import base64
import datetime import datetime
import hashlib
import json import json
import re
import sys
import time import time
import MySQLdb
import dbcachingexecute
import feedutils import feedutils
import sql import sql
def Reviews(component): def reviews():
cursor = feedutils.GetCursor() cursor = feedutils.GetCursor()
for l in dbcachingexecute.Execute(time.time() - 60,
'gerrit_query_approvals_json',
('ssh -i ~/.ssh/id_gerrit '
'review.openstack.org gerrit query '
'project:%s '
'--all-approvals --patch-sets '
'--format JSON'),
component, cleanup=True):
try: day = datetime.datetime.now()
d = json.loads(l) day -= datetime.timedelta(days=7)
except:
continue
if d.has_key('id'): while day < datetime.datetime.now():
b64 = base64.encodestring(l) print 'Processing %s/%s/%s' % (day.year, day.month, day.day)
checksum = hashlib.sha1(l).hexdigest() data = utilities.read_remote_file(
last_updated = datetime.datetime.fromtimestamp(d['lastUpdated']) 'http://www.rcbops.com/gerrit/merged/%s/%s/%s_reviews.json'
timestamp = sql.FormatSqlValue('timestamp', last_updated) % (day.year, day.month, day.day)):
insert = ('insert ignore into changes (changeid, timestamp, parsed, ' j = json.loads(data)
'checksum) values ("%s", %s, "%s", "%s");'
%(d['id'], timestamp, b64, checksum))
cursor.execute(insert)
if cursor.rowcount == 0:
cursor.execute('select * from changes where changeid="%s";'
% d['id'])
stored_checksum = cursor.fetchone()['checksum']
if checksum != stored_checksum:
cursor.execute('delete from changes where changeid="%s";'
% d['id'])
cursor.execute(insert)
cursor.execute('commit;')
for ps in d.get('patchSets', {}): for username in j:
patchset = ps.get('number') summary = {}
for review in j[username]:
summary.setdefault(review['project'], 0)
summary.setdefault('__total__', 0)
summary[review['project']] += 1
summary['__total__'] += 1
for review in ps.get('approvals', []): cursor.execute('delete from reviewsummary where '
# Deliberately leave the timezone alone here so its consistant 'username="%s" and day=date(%s);'
# with reports others generate. %(username, day))
updated_at = datetime.datetime.fromtimestamp(review['grantedOn']) cursor.execute('insert into reviewsummary'
username = review['by'].get('username', 'unknown') '(day, username, data, epoch) '
'values (date(%s), "%s", \'%s\', %d);'
if username in ['jenkins', 'smokestack']: %(day, username, json.dumps(summary),
continue int(time.time())))
cursor.execute('commit;')
timestamp = sql.FormatSqlValue('timestamp', updated_at)
score = review.get('value', 0)
cursor.execute('insert ignore into reviews '
'(changeid, username, timestamp, day, component, '
'patchset, score) '
'values ("%s", "%s", %s, date(%s), "%s", %s, %s);'
%(d['id'], username, timestamp, timestamp,
component, patchset, score))
if cursor.rowcount > 0:
# This is a new review, we assume we're the only writer
print 'New review from %s' % username
cursor.execute('select * from reviewsummary where '
'username="%s" and day=date(%s);'
%(username, timestamp))
if cursor.rowcount > 0:
row = cursor.fetchone()
summary = json.loads(row['data'])
else:
summary = {}
summary.setdefault(component, 0)
summary.setdefault('__total__', 0)
summary[component] += 1
summary['__total__'] += 1
cursor.execute('delete from reviewsummary where '
'username="%s" and day=date(%s);'
%(username, timestamp))
cursor.execute('insert into reviewsummary'
'(day, username, data, epoch) '
'values (date(%s), "%s", \'%s\', %d);'
%(timestamp, username,
json.dumps(summary),
int(time.time())))
cursor.execute('commit;')
if __name__ == '__main__': if __name__ == '__main__':
Reviews('openstack/heat-cfntools') reviews()
Reviews('openstack/heat')
Reviews('openstack/heat-templates')
Reviews('openstack/python-heatclient')
Reviews('openstack-infra/askbot-theme')
Reviews('openstack-infra/devstack-gate')
Reviews('openstack-infra/gear')
Reviews('openstack-infra/gerrit')
Reviews('openstack-infra/gerritbot')
Reviews('openstack-infra/gerritlib')
Reviews('openstack-infra/jeepyb')
Reviews('openstack-infra/gitdm')
Reviews('openstack-infra/git-review')
Reviews('openstack-infra/jenkins-job-builder')
Reviews('openstack-infra/lodgeit')
Reviews('openstack-infra/meetbot')
Reviews('openstack-infra/nose-html-output')
Reviews('openstack-infra/puppet-apparmor')
Reviews('openstack-infra/puppet-dashboard')
Reviews('openstack-infra/puppet-vcsrepo')
Reviews('openstack-infra/reviewday')
Reviews('openstack-infra/statusbot')
Reviews('openstack-infra/zmq-event-publisher')
Reviews('openstack-infra/zuul')
Reviews('openstack-dev/devstack')
Reviews('openstack-dev/grenade')
Reviews('openstack-dev/hacking')
Reviews('openstack-dev/pbr')
Reviews('openstack-dev/openstack-nose')
Reviews('openstack-dev/openstack-qa')
Reviews('openstack-dev/sandbox')
Reviews('openstack/api-site')
Reviews('openstack/ceilometer')
Reviews('openstack/cinder')
Reviews('openstack/compute-api')
Reviews('openstack/glance')
Reviews('openstack/horizon')
Reviews('openstack/identity-api')
Reviews('openstack/image-api')
Reviews('openstack/keystone')
Reviews('openstack/netconn-api')
Reviews('openstack/nova')
Reviews('openstack/object-api')
Reviews('openstack/openstack-chef')
Reviews('openstack-infra/config')
Reviews('openstack/openstack-manuals')
Reviews('openstack/openstack-planet')
Reviews('openstack/oslo-incubator')
Reviews('openstack/oslo.config')
Reviews('openstack/python-ceilometerclient')
Reviews('openstack/python-cinderclient')
Reviews('openstack/python-glanceclient')
Reviews('openstack/python-keystoneclient')
Reviews('openstack/python-novaclient')
Reviews('openstack/python-openstackclient')
Reviews('openstack/python-quantumclient')
Reviews('openstack/python-swiftclient')
Reviews('openstack/quantum')
Reviews('openstack/requirements')
Reviews('openstack/swift')
Reviews('openstack/tempest')
Reviews('openstack/volume-api')
Reviews('stackforge/MRaaS')
Reviews('stackforge/diskimage-builder')
Reviews('stackforge/tripleo-image-elements')
Reviews('stackforge/healthnmon')
Reviews('stackforge/libra')
Reviews('stackforge/python-libraclient')
Reviews('stackforge/marconi')
Reviews('stackforge/moniker')
Reviews('stackforge/python-monikerclient')
Reviews('stackforge/python-reddwarfclient')
Reviews('stackforge/reddwarf')
Reviews('stackforge/reddwarf-integration')
Reviews('stackforge/bufunfa')
Reviews('stackforge/kwapi')
Reviews('stackforge/climate')
Reviews('openstack-infra/gearman-plugin')
Reviews('stackforge/packstack')
Reviews('stackforge/database-api')
Reviews('stackforge/anvil')
Reviews('stackforge/savanna')
Reviews('stackforge/python-savannaclient')
Reviews('stackforge/os-config-applier')
Reviews('stackforge/os-refresh-config')
Reviews('stackforge/puppet-cinder')
Reviews('stackforge/puppet-glance')
Reviews('stackforge/puppet-horizon')
Reviews('stackforge/puppet-keystone')
Reviews('stackforge/puppet-nova')
Reviews('stackforge/puppet-openstack')
Reviews('stackforge/puppet-openstack_dev_env')
Reviews('stackforge/puppet-swift')
Reviews('stackforge/puppet-quantum')
Reviews('stackforge/opencafe')
Reviews('stackforge/cloudcafe')
Reviews('stackforge/cloudroast')