Make elastic recheck compatible with rdo elasticsearch
Updates elasticsearch python client to newer version and works with RDO ES. Story: TRIPLEOCI-188 Change-Id: If49d91f72d60aa237f732afd0213d083e39d83be
This commit is contained in:
parent
6d7aca0c27
commit
d87a5b7077
@ -8,14 +8,14 @@ services:
|
||||
command: /root/cron-start.sh
|
||||
environment:
|
||||
- DB_URI
|
||||
- ES_URL=${ES_URL}
|
||||
- ES_URL
|
||||
- GERRIT_HOST
|
||||
# - GERRIT_KEY
|
||||
- GERRIT_USER=${GERRIT_USER}
|
||||
- IRC_NICK
|
||||
- IRC_PASS
|
||||
- LOG_CONFIG
|
||||
- LS_URL=${LS_URL}
|
||||
- LS_URL
|
||||
volumes:
|
||||
- er-volume:/data
|
||||
# mount queries from outside the container, so we can update them w/o having to restert it
|
||||
@ -43,14 +43,14 @@ services:
|
||||
command: nginx -g 'daemon off;'
|
||||
environment:
|
||||
- DB_URI
|
||||
- ES_URL=${ES_URL}
|
||||
- ES_URL
|
||||
- GERRIT_HOST
|
||||
# - GERRIT_KEY
|
||||
- GERRIT_USER=${GERRIT_USER}
|
||||
- IRC_NICK
|
||||
- IRC_PASS
|
||||
- LOG_CONFIG
|
||||
- LS_URL=${LS_URL}
|
||||
- LS_URL
|
||||
ports:
|
||||
- 80:80
|
||||
# we do not want to start it too soon as it may fail to start if
|
||||
|
@ -53,7 +53,7 @@ def all_fails(classifier):
|
||||
"""
|
||||
all_fails = {}
|
||||
results = classifier.hits_by_query(er_config.ALL_FAILS_QUERY,
|
||||
size=30000, days=14)
|
||||
size=10000, days=14)
|
||||
facets = er_results.FacetSet()
|
||||
facets.detect_facets(results, ["build_uuid"])
|
||||
for build in facets:
|
||||
|
@ -18,9 +18,9 @@ import argparse
|
||||
from datetime import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
from lazr.restfulclient.errors import ServerError
|
||||
from launchpadlib import launchpad
|
||||
import pyelasticsearch
|
||||
import elasticsearch
|
||||
import pytz
|
||||
import requests
|
||||
|
||||
@ -71,6 +71,11 @@ def get_launchpad_bug(bug):
|
||||
LOG.exception("Failed to get Launchpad data for bug %s", bug)
|
||||
bugdata = dict(name='Unable to get launchpad data',
|
||||
affects='Unknown', reviews=[])
|
||||
# because for some reason launchpad returns 500 instead of 404
|
||||
except ServerError:
|
||||
LOG.exception("Failed to get Launchpad data for bug %s", bug)
|
||||
bugdata = dict(name='Unable to get launchpad data',
|
||||
affects='Unknown', reviews=[])
|
||||
return bugdata
|
||||
|
||||
|
||||
@ -149,7 +154,8 @@ def main():
|
||||
timeframe = days * 24 * STEP / 1000
|
||||
|
||||
last_indexed = int(
|
||||
((classifier.most_recent() - epoch).total_seconds()) * 1000)
|
||||
((classifier.most_recent().replace(tzinfo=pytz.utc)
|
||||
- epoch).total_seconds()) * 1000)
|
||||
behind = now - last_indexed
|
||||
|
||||
# the data we're going to return, including interesting headers
|
||||
@ -191,7 +197,7 @@ def main():
|
||||
args.queue,
|
||||
size=3000,
|
||||
days=days)
|
||||
except pyelasticsearch.exceptions.InvalidJsonResponseError:
|
||||
except elasticsearch.SerializationError:
|
||||
LOG.exception("Invalid Json while collecting metrics for query %s",
|
||||
query['query'])
|
||||
continue
|
||||
@ -199,7 +205,7 @@ def main():
|
||||
LOG.exception("Timeout while collecting metrics for query %s",
|
||||
query['query'])
|
||||
continue
|
||||
except pyelasticsearch.exceptions.ElasticHttpError as ex:
|
||||
except elasticsearch.TransportError as ex:
|
||||
LOG.error('Error from elasticsearch query for bug %s: %s',
|
||||
query['bug'], ex)
|
||||
continue
|
||||
|
@ -119,6 +119,7 @@ def all_fails(classifier, config=None):
|
||||
'openstack/nova',
|
||||
'openstack/requirements',
|
||||
'openstack/tempest',
|
||||
'openstack/tripleo-ci',
|
||||
'openstack-dev/devstack',
|
||||
'openstack-dev/grenade',
|
||||
'openstack-infra/devstack-gate',
|
||||
@ -147,6 +148,8 @@ def all_fails(classifier, config=None):
|
||||
log = result.log_url.split('console.html')[0]
|
||||
elif 'job-output.txt' in result.log_url:
|
||||
log = result.log_url.split('job-output.txt')[0]
|
||||
else:
|
||||
log = ('/').join(result.log_url.split('/')[:-1])
|
||||
other_fails["%s.%s" % (build, name)] = {
|
||||
'log': log,
|
||||
'timestamp': timestamp,
|
||||
@ -318,7 +321,8 @@ def collect_metrics(classifier, fails, config=None):
|
||||
for q in classifier.queries:
|
||||
try:
|
||||
results = classifier.hits_by_query(q['query'],
|
||||
size=config.uncat_search_size)
|
||||
size=config.uncat_search_size,
|
||||
days=7)
|
||||
hits = _status_count(results)
|
||||
LOG.debug("Collected metrics for query %s, hits %s", q['query'],
|
||||
hits)
|
||||
|
@ -15,11 +15,18 @@
|
||||
import os
|
||||
import re
|
||||
import configparser
|
||||
import codecs
|
||||
|
||||
# Can be overriden by defining environment variables with same name
|
||||
DEFAULTS = {
|
||||
'ES_URL': 'http://logstash.openstack.org:80/elasticsearch',
|
||||
'LS_URL': 'http://logstash.openstack.org',
|
||||
'ES_URL': codecs.decode(
|
||||
'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
|
||||
'eqbcebwrpg.bet/rynfgvpfrnepu/',
|
||||
'rot_13'),
|
||||
'LS_URL': codecs.decode(
|
||||
'uggcf://xvonan:on5r4np6-624n-49sr-956r-48no8poso2o6@erivrj.' +
|
||||
'eqbcebwrpg.bet/rynfgvpfrnepu/',
|
||||
'rot_13'),
|
||||
'DB_URI': 'mysql+pymysql://query:query@logstash.openstack.org/subunit2sql',
|
||||
'server_password': '',
|
||||
'CI_USERNAME': 'jenkins',
|
||||
@ -59,16 +66,16 @@ INCLUDED_PROJECTS_REGEX = "(^openstack/|devstack|grenade)"
|
||||
# Let's value legibility over pep8 line width here...
|
||||
ALL_FAILS_QUERY = (
|
||||
'('
|
||||
'(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"playbooks/base/post.yaml")' # noqa E501
|
||||
'(filename:"job-output.txt" AND message:"POST-RUN END" AND message:"post.yaml")' # noqa E501
|
||||
' OR '
|
||||
'(filename:"console.html" AND (message:"[Zuul] Job complete" OR message:"[SCP] Copying console log" OR message:"Grabbing consoleLog"))' # noqa E501
|
||||
')'
|
||||
' AND build_status:"FAILURE"'
|
||||
' AND build_queue:"gate"'
|
||||
' AND build_queue:"check"'
|
||||
' AND voting:"1"'
|
||||
)
|
||||
|
||||
UNCAT_MAX_SEARCH_SIZE = 30000
|
||||
UNCAT_MAX_SEARCH_SIZE = 10000
|
||||
|
||||
|
||||
class Config(object):
|
||||
|
@ -19,7 +19,7 @@ import time
|
||||
|
||||
import dateutil.parser as dp
|
||||
import gerritlib.gerrit
|
||||
import pyelasticsearch
|
||||
import elasticsearch
|
||||
import sqlalchemy
|
||||
from sqlalchemy import orm
|
||||
from subunit2sql.db import api as db_api
|
||||
@ -285,7 +285,7 @@ class Stream(object):
|
||||
self.log.debug(e)
|
||||
except FilesNotReady as e:
|
||||
self.log.info(e)
|
||||
except pyelasticsearch.exceptions.InvalidJsonResponseError:
|
||||
except elasticsearch.SerializationError:
|
||||
# If ElasticSearch returns an error code, sleep and retry
|
||||
# TODO(jogo): if this works pull out search into a helper
|
||||
# function that does this.
|
||||
@ -390,7 +390,7 @@ class Classifier(object):
|
||||
def most_recent(self):
|
||||
"""Return the datetime of the most recently indexed event."""
|
||||
query = qb.most_recent_event()
|
||||
results = self.es.search(query, size='1')
|
||||
results = self.es.search(query, size='1', days=14)
|
||||
if len(results) > 0:
|
||||
last = dp.parse(results[0].timestamp)
|
||||
return last
|
||||
|
@ -20,7 +20,8 @@ import datetime
|
||||
import pprint
|
||||
|
||||
import dateutil.parser as dp
|
||||
import pyelasticsearch
|
||||
import elasticsearch
|
||||
from elasticsearch import Elasticsearch
|
||||
import pytz
|
||||
|
||||
|
||||
@ -39,10 +40,11 @@ class SearchEngine(object):
|
||||
return self.index_cache[index]
|
||||
|
||||
try:
|
||||
es.status(index=index)
|
||||
es.indices.stats(index=index)
|
||||
# es.indices.status(index=index)
|
||||
self.index_cache[index] = True
|
||||
return True
|
||||
except pyelasticsearch.exceptions.ElasticHttpNotFoundError:
|
||||
except elasticsearch.exceptions.NotFoundError:
|
||||
return False
|
||||
|
||||
def search(self, query, size=1000, recent=False, days=0):
|
||||
@ -65,8 +67,9 @@ class SearchEngine(object):
|
||||
The returned result is a ResultSet query.
|
||||
|
||||
"""
|
||||
es = pyelasticsearch.ElasticSearch(self._url)
|
||||
es = Elasticsearch(self._url)
|
||||
args = {'size': size}
|
||||
indexes = []
|
||||
if recent or days:
|
||||
# today's index
|
||||
datefmt = self._indexfmt
|
||||
@ -87,8 +90,15 @@ class SearchEngine(object):
|
||||
if self._is_valid_index(es, index_name):
|
||||
indexes.append(index_name)
|
||||
args['index'] = indexes
|
||||
|
||||
results = es.search(query, **args)
|
||||
if isinstance(query, str):
|
||||
query = {"query": {
|
||||
"query_string": {
|
||||
"query": query
|
||||
}
|
||||
}
|
||||
}
|
||||
params = {"size": size, "request_timeout": 40}
|
||||
results = es.search(index=indexes, body=query, params=params)
|
||||
return ResultSet(results)
|
||||
|
||||
|
||||
@ -161,7 +171,7 @@ class FacetSet(dict):
|
||||
# is too large and ES won't return it. At some point we should probably
|
||||
# log a warning/error for these so we can clean them up.
|
||||
if facet == "timestamp" and data is not None:
|
||||
ts = dp.parse(data)
|
||||
ts = dp.parse(data).replace(tzinfo=pytz.utc)
|
||||
tsepoch = int(calendar.timegm(ts.timetuple()))
|
||||
# take the floor based on resolution
|
||||
ts -= datetime.timedelta(
|
||||
|
@ -67,7 +67,7 @@ class Context():
|
||||
|
||||
def _is_valid_ElasticSearch_query(self, x, bug) -> bool:
|
||||
query = qb.generic(x['query'])
|
||||
results = self.classifier.es.search(query, size='10')
|
||||
results = self.classifier.es.search(query, size='10', days=1)
|
||||
|
||||
valid_query = len(results) > 0
|
||||
if not valid_query:
|
||||
|
@ -52,5 +52,5 @@ class UnitTestCase(elastic_recheck.tests.TestCase):
|
||||
def setUp(self):
|
||||
super(UnitTestCase, self).setUp()
|
||||
|
||||
self.useFixture(fixtures.MonkeyPatch('pyelasticsearch.ElasticSearch',
|
||||
self.useFixture(fixtures.MonkeyPatch('elasticsearch.ElasticSearch',
|
||||
FakeES))
|
||||
|
@ -21,18 +21,19 @@ from elastic_recheck.tests import unit
|
||||
class TestElasticRecheck(unit.UnitTestCase):
|
||||
def test_hits_by_query_no_results(self):
|
||||
c = er.Classifier("queries.yaml")
|
||||
results = c.hits_by_query("this should find no bugs")
|
||||
results = c.hits_by_query("this_should_find_no_bugs", days=1)
|
||||
self.assertEqual(len(results), 0)
|
||||
self.assertEqual(results.took, 53)
|
||||
# removing took which was hardcoded to 53 as it varies
|
||||
self.assertEqual(results.timed_out, False)
|
||||
|
||||
def test_hits_by_query(self):
|
||||
c = er.Classifier("queries.yaml")
|
||||
q = ('''message:"Cannot ''createImage''"'''
|
||||
''' AND filename:"console.html" AND voting:1''')
|
||||
results = c.hits_by_query(q)
|
||||
self.assertEqual(len(results), 20)
|
||||
self.assertEqual(results.took, 46)
|
||||
# updating the query to ensure we get at least some hits
|
||||
q = 'filename:"job-output.txt" AND ' \
|
||||
'message:"POST-RUN END" AND message:"post.yaml"'
|
||||
results = c.hits_by_query(q, days=1)
|
||||
# As 10 is the maximum results retrieved from the server
|
||||
self.assertEqual(len(results), 100)
|
||||
self.assertEqual(results.timed_out, False)
|
||||
|
||||
|
||||
|
@ -16,7 +16,8 @@ import datetime
|
||||
import json
|
||||
|
||||
import mock
|
||||
import pyelasticsearch
|
||||
import elasticsearch
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from elastic_recheck import results
|
||||
from elastic_recheck import tests
|
||||
@ -112,7 +113,7 @@ class MockDatetimeYesterday(datetime.datetime):
|
||||
'%Y-%m-%dT%H:%M:%S')
|
||||
|
||||
|
||||
@mock.patch.object(pyelasticsearch.ElasticSearch, 'search', return_value={})
|
||||
@mock.patch.object(Elasticsearch, 'search', return_value={})
|
||||
class TestSearchEngine(tests.TestCase):
|
||||
"""Tests that the elastic search API is called correctly."""
|
||||
|
||||
@ -125,7 +126,9 @@ class TestSearchEngine(tests.TestCase):
|
||||
# Tests a basic search with recent=False.
|
||||
result_set = self.engine.search(self.query, size=10)
|
||||
self.assertEqual(0, len(result_set))
|
||||
search_mock.assert_called_once_with(self.query, size=10)
|
||||
search_mock.assert_called_once_with(body={'query': {
|
||||
'query_string': {'query': self.query}
|
||||
}}, params={'size': 10, "request_timeout": 40}, index=[])
|
||||
|
||||
def _test_search_recent(self, search_mock, datetime_mock,
|
||||
expected_indexes):
|
||||
@ -133,14 +136,17 @@ class TestSearchEngine(tests.TestCase):
|
||||
result_set = self.engine.search(self.query, size=10, recent=True)
|
||||
self.assertEqual(0, len(result_set))
|
||||
search_mock.assert_called_once_with(
|
||||
self.query, size=10, index=expected_indexes)
|
||||
body={'query': {'query_string': {'query': self.query}}},
|
||||
params={'size': 10, "request_timeout": 40},
|
||||
index=expected_indexes)
|
||||
|
||||
def test_search_recent_current_index_only(self, search_mock):
|
||||
# The search index comparison goes back one hour and cuts off by day,
|
||||
# so test that we're one hour and one second into today so we only have
|
||||
# one index in the search call.
|
||||
with mock.patch.object(
|
||||
pyelasticsearch.ElasticSearch, 'status') as mock_data:
|
||||
elasticsearch.client.indices.IndicesClient, 'stats') \
|
||||
as mock_data:
|
||||
mock_data.return_value = "Not an exception"
|
||||
self._test_search_recent(search_mock, MockDatetimeToday,
|
||||
expected_indexes=['logstash-2014.06.12'])
|
||||
@ -150,7 +156,8 @@ class TestSearchEngine(tests.TestCase):
|
||||
# so test that we're 59 minutes and 59 seconds into today so that we
|
||||
# have an index for today and yesterday in the search call.
|
||||
with mock.patch.object(
|
||||
pyelasticsearch.ElasticSearch, 'status') as mock_data:
|
||||
elasticsearch.client.indices.IndicesClient, 'stats') \
|
||||
as mock_data:
|
||||
mock_data.return_value = "Not an exception"
|
||||
self._test_search_recent(search_mock, MockDatetimeYesterday,
|
||||
expected_indexes=['logstash-2014.06.12',
|
||||
@ -159,22 +166,30 @@ class TestSearchEngine(tests.TestCase):
|
||||
def test_search_no_indexes(self, search_mock):
|
||||
# Test when no indexes are valid
|
||||
with mock.patch.object(
|
||||
pyelasticsearch.ElasticSearch, 'status') as mock_data:
|
||||
mock_data.side_effect = pyelasticsearch.exceptions.\
|
||||
ElasticHttpNotFoundError()
|
||||
elasticsearch.client.indices.IndicesClient, 'stats') \
|
||||
as mock_data:
|
||||
mock_data.side_effect = elasticsearch.exceptions.NotFoundError
|
||||
self._test_search_recent(search_mock, MockDatetimeYesterday,
|
||||
expected_indexes=[])
|
||||
|
||||
def test_search_days(self, search_mock):
|
||||
# Test when specific days are used.
|
||||
with mock.patch.object(
|
||||
pyelasticsearch.ElasticSearch, 'status') as mock_data:
|
||||
elasticsearch.client.indices.IndicesClient, 'stats') \
|
||||
as mock_data:
|
||||
mock_data.return_value = "Not an exception"
|
||||
datetime.datetime = MockDatetimeYesterday
|
||||
result_set = self.engine.search(self.query, size=10, days=3,
|
||||
recent=False)
|
||||
self.assertEqual(0, len(result_set))
|
||||
search_mock.assert_called_once_with(self.query, size=10,
|
||||
index=['logstash-2014.06.12',
|
||||
'logstash-2014.06.11',
|
||||
'logstash-2014.06.10'])
|
||||
search_mock.assert_called_once_with(body={
|
||||
'query': {
|
||||
'query_string': {
|
||||
'query': self.query
|
||||
}
|
||||
}
|
||||
},
|
||||
params={'size': 10, "request_timeout": 40},
|
||||
index=['logstash-2014.06.12',
|
||||
'logstash-2014.06.11',
|
||||
'logstash-2014.06.10'])
|
||||
|
@ -1,7 +1,7 @@
|
||||
pbr>=1.8
|
||||
python-dateutil>=2.0
|
||||
pytz
|
||||
pyelasticsearch<1.0
|
||||
elasticsearch==7.14.0
|
||||
gerritlib
|
||||
python-daemon>=2.2.0
|
||||
irc>=17.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user