
Allow async action execution to be paused and resumed by updating the action execution via API. When an action execution is paused, the state transition will cascade up to the task execution. Implements: blueprint mistral-action-ex-pause-resume Change-Id: I87233d27c46cfe86a23beb8dfdc96f77e58d24c1
478 lines
14 KiB
Python
478 lines
14 KiB
Python
# Copyright 2015 - Mirantis, Inc.
|
|
# Copyright 2015 - StackStorm, Inc.
|
|
# Copyright 2016 - Nokia Networks.
|
|
# Copyright 2016 - Brocade Communications Systems, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from oslo_log import log as logging
|
|
from osprofiler import profiler
|
|
import traceback as tb
|
|
|
|
from mistral.db.v2 import api as db_api
|
|
from mistral.db.v2.sqlalchemy import models
|
|
from mistral.engine import action_queue
|
|
from mistral.engine import tasks
|
|
from mistral.engine import workflow_handler as wf_handler
|
|
from mistral import exceptions as exc
|
|
from mistral.lang import parser as spec_parser
|
|
from mistral.services import scheduler
|
|
from mistral.workflow import base as wf_base
|
|
from mistral.workflow import commands as wf_cmds
|
|
from mistral.workflow import states
|
|
|
|
|
|
"""Responsible for running tasks and handling results."""
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
_REFRESH_TASK_STATE_PATH = (
|
|
'mistral.engine.task_handler._refresh_task_state'
|
|
)
|
|
|
|
_SCHEDULED_ON_ACTION_COMPLETE_PATH = (
|
|
'mistral.engine.task_handler._scheduled_on_action_complete'
|
|
)
|
|
|
|
_SCHEDULED_ON_ACTION_UPDATE_PATH = (
|
|
'mistral.engine.task_handler._scheduled_on_action_update'
|
|
)
|
|
|
|
|
|
@profiler.trace('task-handler-run-task', hide_args=True)
|
|
def run_task(wf_cmd):
|
|
"""Runs workflow task.
|
|
|
|
:param wf_cmd: Workflow command.
|
|
"""
|
|
|
|
task = _build_task_from_command(wf_cmd)
|
|
|
|
try:
|
|
task.run()
|
|
except exc.MistralException as e:
|
|
wf_ex = wf_cmd.wf_ex
|
|
task_spec = wf_cmd.task_spec
|
|
|
|
msg = (
|
|
"Failed to run task [error=%s, wf=%s, task=%s]:\n%s" %
|
|
(e, wf_ex.name, task_spec.get_name(), tb.format_exc())
|
|
)
|
|
|
|
LOG.error(msg)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(wf_ex, msg)
|
|
|
|
return
|
|
|
|
if task.is_waiting() and (task.is_created() or task.is_state_changed()):
|
|
_schedule_refresh_task_state(task.task_ex, 1)
|
|
|
|
|
|
@profiler.trace('task-handler-on-action-complete', hide_args=True)
|
|
def _on_action_complete(action_ex):
|
|
"""Handles action completion event.
|
|
|
|
:param action_ex: Action execution.
|
|
"""
|
|
|
|
task_ex = action_ex.task_execution
|
|
|
|
if not task_ex:
|
|
return
|
|
|
|
task_spec = spec_parser.get_task_spec(task_ex.spec)
|
|
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
task = _create_task(
|
|
wf_ex,
|
|
spec_parser.get_workflow_spec_by_execution_id(wf_ex.id),
|
|
task_spec,
|
|
task_ex.in_context,
|
|
task_ex
|
|
)
|
|
|
|
try:
|
|
task.on_action_complete(action_ex)
|
|
except exc.MistralException as e:
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
msg = ("Failed to handle action completion [error=%s, wf=%s, task=%s,"
|
|
" action=%s]:\n%s" %
|
|
(e, wf_ex.name, task_ex.name, action_ex.name, tb.format_exc()))
|
|
|
|
LOG.error(msg)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(wf_ex, msg)
|
|
|
|
|
|
@profiler.trace('task-handler-on-action-update', hide_args=True)
|
|
def _on_action_update(action_ex):
|
|
"""Handles action update event.
|
|
|
|
:param action_ex: Action execution.
|
|
"""
|
|
|
|
task_ex = action_ex.task_execution
|
|
|
|
if not task_ex:
|
|
return
|
|
|
|
task_spec = spec_parser.get_task_spec(task_ex.spec)
|
|
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
task = _create_task(
|
|
wf_ex,
|
|
spec_parser.get_workflow_spec_by_execution_id(wf_ex.id),
|
|
task_spec,
|
|
task_ex.in_context,
|
|
task_ex
|
|
)
|
|
|
|
try:
|
|
task.on_action_update(action_ex)
|
|
except exc.MistralException as e:
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
msg = ("Failed to handle action update [error=%s, wf=%s, task=%s,"
|
|
" action=%s]:\n%s" %
|
|
(e, wf_ex.name, task_ex.name, action_ex.name, tb.format_exc()))
|
|
|
|
LOG.error(msg)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(wf_ex, msg)
|
|
|
|
return
|
|
|
|
|
|
def force_fail_task(task_ex, msg):
|
|
"""Forces the given task to fail.
|
|
|
|
This method implements the 'forced' task fail without giving a chance
|
|
to a workflow controller to handle the error. Its main purpose is to
|
|
reflect errors caused by workflow structure (errors 'publish', 'on-xxx'
|
|
clauses etc.) rather than failed actions. If such an error happens
|
|
we should also force the entire workflow to fail. I.e., this kind of
|
|
error must be propagated to a higher level, to the workflow.
|
|
|
|
:param task_ex: Task execution.
|
|
:param msg: Error message.
|
|
"""
|
|
wf_spec = spec_parser.get_workflow_spec_by_execution_id(
|
|
task_ex.workflow_execution_id
|
|
)
|
|
|
|
task = _build_task_from_execution(wf_spec, task_ex)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(task_ex.workflow_execution, msg)
|
|
|
|
|
|
def continue_task(task_ex):
|
|
wf_spec = spec_parser.get_workflow_spec_by_execution_id(
|
|
task_ex.workflow_execution_id
|
|
)
|
|
|
|
task = _build_task_from_execution(wf_spec, task_ex)
|
|
|
|
try:
|
|
task.set_state(states.RUNNING, None)
|
|
|
|
task.run()
|
|
except exc.MistralException as e:
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
msg = (
|
|
"Failed to run task [error=%s, wf=%s, task=%s]:\n%s" %
|
|
(e, wf_ex.name, task_ex.name, tb.format_exc())
|
|
)
|
|
|
|
LOG.error(msg)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(wf_ex, msg)
|
|
|
|
return
|
|
|
|
|
|
def complete_task(task_ex, state, state_info):
|
|
wf_spec = spec_parser.get_workflow_spec_by_execution_id(
|
|
task_ex.workflow_execution_id
|
|
)
|
|
|
|
task = _build_task_from_execution(wf_spec, task_ex)
|
|
|
|
try:
|
|
task.complete(state, state_info)
|
|
except exc.MistralException as e:
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
msg = (
|
|
"Failed to complete task [error=%s, wf=%s, task=%s]:\n%s" %
|
|
(e, wf_ex.name, task_ex.name, tb.format_exc())
|
|
)
|
|
|
|
LOG.error(msg)
|
|
|
|
task.set_state(states.ERROR, msg)
|
|
|
|
wf_handler.force_fail_workflow(wf_ex, msg)
|
|
|
|
return
|
|
|
|
|
|
def _build_task_from_execution(wf_spec, task_ex):
|
|
return _create_task(
|
|
task_ex.workflow_execution,
|
|
wf_spec,
|
|
wf_spec.get_task(task_ex.name),
|
|
task_ex.in_context,
|
|
task_ex
|
|
)
|
|
|
|
|
|
@profiler.trace('task-handler-build-task-from-command', hide_args=True)
|
|
def _build_task_from_command(cmd):
|
|
if isinstance(cmd, wf_cmds.RunExistingTask):
|
|
task = _create_task(
|
|
cmd.wf_ex,
|
|
cmd.wf_spec,
|
|
spec_parser.get_task_spec(cmd.task_ex.spec),
|
|
cmd.ctx,
|
|
task_ex=cmd.task_ex,
|
|
unique_key=cmd.task_ex.unique_key,
|
|
waiting=cmd.task_ex.state == states.WAITING,
|
|
triggered_by=cmd.triggered_by
|
|
)
|
|
|
|
if cmd.reset:
|
|
task.reset()
|
|
|
|
return task
|
|
|
|
if isinstance(cmd, wf_cmds.RunTask):
|
|
task = _create_task(
|
|
cmd.wf_ex,
|
|
cmd.wf_spec,
|
|
cmd.task_spec,
|
|
cmd.ctx,
|
|
unique_key=cmd.unique_key,
|
|
waiting=cmd.is_waiting(),
|
|
triggered_by=cmd.triggered_by
|
|
)
|
|
|
|
return task
|
|
|
|
raise exc.MistralError('Unsupported workflow command: %s' % cmd)
|
|
|
|
|
|
def _create_task(wf_ex, wf_spec, task_spec, ctx, task_ex=None,
|
|
unique_key=None, waiting=False, triggered_by=None):
|
|
if task_spec.get_with_items():
|
|
cls = tasks.WithItemsTask
|
|
else:
|
|
cls = tasks.RegularTask
|
|
|
|
return cls(
|
|
wf_ex,
|
|
wf_spec,
|
|
task_spec,
|
|
ctx,
|
|
task_ex=task_ex,
|
|
unique_key=unique_key,
|
|
waiting=waiting,
|
|
triggered_by=triggered_by
|
|
)
|
|
|
|
|
|
@action_queue.process
|
|
@profiler.trace('task-handler-refresh-task-state', hide_args=True)
|
|
def _refresh_task_state(task_ex_id):
|
|
with db_api.transaction():
|
|
task_ex = db_api.load_task_execution(task_ex_id)
|
|
|
|
if not task_ex:
|
|
return
|
|
|
|
wf_ex = task_ex.workflow_execution
|
|
|
|
if states.is_completed(wf_ex.state):
|
|
return
|
|
|
|
wf_spec = spec_parser.get_workflow_spec_by_execution_id(
|
|
task_ex.workflow_execution_id
|
|
)
|
|
|
|
wf_ctrl = wf_base.get_controller(wf_ex, wf_spec)
|
|
|
|
log_state = wf_ctrl.get_logical_task_state(
|
|
task_ex
|
|
)
|
|
|
|
state = log_state.state
|
|
state_info = log_state.state_info
|
|
|
|
# Update 'triggered_by' because it could have changed.
|
|
task_ex.runtime_context['triggered_by'] = log_state.triggered_by
|
|
|
|
if state == states.RUNNING:
|
|
continue_task(task_ex)
|
|
elif state == states.ERROR:
|
|
complete_task(task_ex, state, state_info)
|
|
elif state == states.WAITING:
|
|
# Let's assume that a task takes 0.01 sec in average to complete
|
|
# and based on this assumption calculate a time of the next check.
|
|
# The estimation is very rough, of course, but this delay will be
|
|
# decreasing as task preconditions will be completing which will
|
|
# give a decent asymptotic approximation.
|
|
# For example, if a 'join' task has 100 inbound incomplete tasks
|
|
# then the next 'refresh_task_state' call will happen in 10
|
|
# seconds. For 500 tasks it will be 50 seconds. The larger the
|
|
# workflow is, the more beneficial this mechanism will be.
|
|
delay = int(log_state.cardinality * 0.01)
|
|
|
|
_schedule_refresh_task_state(task_ex, max(1, delay))
|
|
else:
|
|
# Must never get here.
|
|
raise RuntimeError(
|
|
'Unexpected logical task state [task_ex_id=%s, task_name=%s, '
|
|
'state=%s]' % (task_ex_id, task_ex.name, state)
|
|
)
|
|
|
|
|
|
def _schedule_refresh_task_state(task_ex, delay=0):
|
|
"""Schedules task preconditions check.
|
|
|
|
This method provides transactional decoupling of task preconditions
|
|
check from events that can potentially satisfy those preconditions.
|
|
|
|
It's needed in non-locking model in order to avoid 'phantom read'
|
|
phenomena when reading state of multiple tasks to see if a task that
|
|
depends on them can start. Just starting a separate transaction
|
|
without using scheduler is not safe due to concurrency window that
|
|
we'll have in this case (time between transactions) whereas scheduler
|
|
is a special component that is designed to be resistant to failures.
|
|
|
|
:param task_ex: Task execution.
|
|
:param delay: Delay.
|
|
"""
|
|
key = 'th_c_t_s_a-%s' % task_ex.id
|
|
|
|
scheduler.schedule_call(
|
|
None,
|
|
_REFRESH_TASK_STATE_PATH,
|
|
delay,
|
|
key=key,
|
|
task_ex_id=task_ex.id
|
|
)
|
|
|
|
|
|
@action_queue.process
|
|
def _scheduled_on_action_complete(action_ex_id, wf_action):
|
|
with db_api.transaction():
|
|
if wf_action:
|
|
action_ex = db_api.get_workflow_execution(action_ex_id)
|
|
else:
|
|
action_ex = db_api.get_action_execution(action_ex_id)
|
|
|
|
_on_action_complete(action_ex)
|
|
|
|
|
|
def schedule_on_action_complete(action_ex, delay=0):
|
|
"""Schedules task completion check.
|
|
|
|
This method provides transactional decoupling of action completion from
|
|
task completion check. It's needed in non-locking model in order to
|
|
avoid 'phantom read' phenomena when reading state of multiple actions
|
|
to see if a task is completed. Just starting a separate transaction
|
|
without using scheduler is not safe due to concurrency window that we'll
|
|
have in this case (time between transactions) whereas scheduler is a
|
|
special component that is designed to be resistant to failures.
|
|
|
|
:param action_ex: Action execution.
|
|
:param delay: Minimum amount of time before task completion check
|
|
should be made.
|
|
"""
|
|
|
|
# Optimization to avoid opening a new transaction if it's not needed.
|
|
if not action_ex.task_execution.spec.get('with-items'):
|
|
_on_action_complete(action_ex)
|
|
|
|
return
|
|
|
|
key = 'th_on_a_c-%s' % action_ex.task_execution_id
|
|
|
|
scheduler.schedule_call(
|
|
None,
|
|
_SCHEDULED_ON_ACTION_COMPLETE_PATH,
|
|
delay,
|
|
key=key,
|
|
action_ex_id=action_ex.id,
|
|
wf_action=isinstance(action_ex, models.WorkflowExecution)
|
|
)
|
|
|
|
|
|
@action_queue.process
|
|
def _scheduled_on_action_update(action_ex_id, wf_action):
|
|
with db_api.transaction():
|
|
if wf_action:
|
|
action_ex = db_api.get_workflow_execution(action_ex_id)
|
|
else:
|
|
action_ex = db_api.get_action_execution(action_ex_id)
|
|
|
|
_on_action_update(action_ex)
|
|
|
|
|
|
def schedule_on_action_update(action_ex, delay=0):
|
|
"""Schedules task update check.
|
|
|
|
This method provides transactional decoupling of action update from
|
|
task update check. It's needed in non-locking model in order to
|
|
avoid 'phantom read' phenomena when reading state of multiple actions
|
|
to see if a task is updated. Just starting a separate transaction
|
|
without using scheduler is not safe due to concurrency window that we'll
|
|
have in this case (time between transactions) whereas scheduler is a
|
|
special component that is designed to be resistant to failures.
|
|
|
|
:param action_ex: Action execution.
|
|
:param delay: Minimum amount of time before task update check
|
|
should be made.
|
|
"""
|
|
|
|
# Optimization to avoid opening a new transaction if it's not needed.
|
|
if not action_ex.task_execution.spec.get('with-items'):
|
|
_on_action_update(action_ex)
|
|
|
|
return
|
|
|
|
key = 'th_on_a_c-%s' % action_ex.task_execution_id
|
|
|
|
scheduler.schedule_call(
|
|
None,
|
|
_SCHEDULED_ON_ACTION_UPDATE_PATH,
|
|
delay,
|
|
key=key,
|
|
action_ex_id=action_ex.id,
|
|
wf_action=isinstance(action_ex, models.WorkflowExecution)
|
|
)
|