801 lines
31 KiB
Python
801 lines
31 KiB
Python
|
#!/usr/bin/env python
|
||
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
| This file is part of the web2py Web Framework
|
||
|
| Created by niphlod@gmail.com
|
||
|
| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
|
||
|
|
||
|
Scheduler with redis backend
|
||
|
---------------------------------
|
||
|
"""
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import os
|
||
|
import time
|
||
|
import socket
|
||
|
import datetime
|
||
|
import logging
|
||
|
from json import loads, dumps
|
||
|
from gluon.utils import web2py_uuid
|
||
|
from gluon.storage import Storage
|
||
|
from gluon.scheduler import *
|
||
|
from gluon.scheduler import _decode_dict
|
||
|
from gluon.contrib.redis_utils import RWatchError
|
||
|
|
||
|
USAGE = """
|
||
|
## Example
|
||
|
|
||
|
For any existing app
|
||
|
|
||
|
Create File: app/models/scheduler.py ======
|
||
|
from gluon.contrib.redis_utils import RConn
|
||
|
from gluon.contrib.redis_scheduler import RScheduler
|
||
|
|
||
|
def demo1(*args,**vars):
|
||
|
print('you passed args=%s and vars=%s' % (args, vars))
|
||
|
return 'done!'
|
||
|
|
||
|
def demo2():
|
||
|
1/0
|
||
|
|
||
|
rconn = RConn()
|
||
|
mysched = RScheduler(db, dict(demo1=demo1,demo2=demo2), ...., redis_conn=rconn)
|
||
|
|
||
|
## run worker nodes with:
|
||
|
|
||
|
cd web2py
|
||
|
python web2py.py -K app
|
||
|
|
||
|
"""
|
||
|
|
||
|
|
||
|
path = os.getcwd()
|
||
|
|
||
|
if 'WEB2PY_PATH' not in os.environ:
|
||
|
os.environ['WEB2PY_PATH'] = path
|
||
|
|
||
|
IDENTIFIER = "%s#%s" % (socket.gethostname(), os.getpid())
|
||
|
|
||
|
logger = logging.getLogger('web2py.scheduler.%s' % IDENTIFIER)
|
||
|
|
||
|
POLLING = 'POLLING'
|
||
|
|
||
|
|
||
|
class RScheduler(Scheduler):
|
||
|
|
||
|
def __init__(self, db, tasks=None, migrate=True,
|
||
|
worker_name=None, group_names=None, heartbeat=HEARTBEAT,
|
||
|
max_empty_runs=0, discard_results=False, utc_time=False,
|
||
|
redis_conn=None, mode=1):
|
||
|
|
||
|
"""
|
||
|
Highly-experimental coordination with redis
|
||
|
Takes all args from Scheduler except redis_conn which
|
||
|
must be something closer to a StrictRedis instance.
|
||
|
|
||
|
My only regret - and the reason why I kept this under the hood for a
|
||
|
while - is that it's hard to hook up in web2py to something happening
|
||
|
right after the commit to a table, which will enable this version of the
|
||
|
scheduler to process "immediate" tasks right away instead of waiting a
|
||
|
few seconds (see FIXME in queue_task())
|
||
|
|
||
|
mode is reserved for future usage patterns.
|
||
|
Right now it moves the coordination (which is the most intensive
|
||
|
routine in the scheduler in matters of IPC) of workers to redis.
|
||
|
I'd like to have incrementally redis-backed modes of operations,
|
||
|
such as e.g.:
|
||
|
- 1: IPC through redis (which is the current implementation)
|
||
|
- 2: Store task results in redis (which will relieve further pressure
|
||
|
from the db leaving the scheduler_run table empty and possibly
|
||
|
keep things smooth as tasks results can be set to expire
|
||
|
after a bit of time)
|
||
|
- 3: Move all the logic for storing and queueing tasks to redis
|
||
|
itself - which means no scheduler_task usage too - and use
|
||
|
the database only as an historical record-bookkeeping
|
||
|
(e.g. for reporting)
|
||
|
|
||
|
As usual, I'm eager to see your comments.
|
||
|
"""
|
||
|
|
||
|
Scheduler.__init__(self, db, tasks=tasks, migrate=migrate,
|
||
|
worker_name=worker_name, group_names=group_names,
|
||
|
heartbeat=heartbeat, max_empty_runs=max_empty_runs,
|
||
|
discard_results=discard_results, utc_time=utc_time)
|
||
|
|
||
|
self.r_server = redis_conn
|
||
|
from gluon import current
|
||
|
self._application = current.request.application or 'appname'
|
||
|
|
||
|
def _nkey(self, key):
|
||
|
"""Helper to restrict all keys to a namespace and track them."""
|
||
|
prefix = 'w2p:rsched:%s' % self._application
|
||
|
allkeys = '%s:allkeys' % prefix
|
||
|
newkey = "%s:%s" % (prefix, key)
|
||
|
self.r_server.sadd(allkeys, newkey)
|
||
|
return newkey
|
||
|
|
||
|
def prune_all(self):
|
||
|
"""Global housekeeping."""
|
||
|
all_keys = self._nkey('allkeys')
|
||
|
with self.r_server.pipeline() as pipe:
|
||
|
while True:
|
||
|
try:
|
||
|
pipe.watch('PRUNE_ALL')
|
||
|
while True:
|
||
|
k = pipe.spop(all_keys)
|
||
|
if k is None:
|
||
|
break
|
||
|
pipe.delete(k)
|
||
|
pipe.execute()
|
||
|
break
|
||
|
except RWatchError:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
|
||
|
def dt2str(self, value):
|
||
|
return value.strftime('%Y-%m-%d %H:%M:%S')
|
||
|
|
||
|
def str2date(self, value):
|
||
|
return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
|
||
|
|
||
|
def send_heartbeat(self, counter):
|
||
|
"""
|
||
|
Workers coordination in redis.
|
||
|
It has evolved into something is not that easy.
|
||
|
Here we try to do what we need in a single transaction,
|
||
|
and retry that transaction if something goes wrong
|
||
|
"""
|
||
|
with self.r_server.pipeline() as pipe:
|
||
|
while True:
|
||
|
try:
|
||
|
pipe.watch('SEND_HEARTBEAT')
|
||
|
self.inner_send_heartbeat(counter, pipe)
|
||
|
pipe.execute()
|
||
|
self.adj_hibernation()
|
||
|
self.sleep()
|
||
|
break
|
||
|
except RWatchError:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
|
||
|
def inner_send_heartbeat(self, counter, pipe):
|
||
|
"""
|
||
|
Do a few things in the "maintenance" thread.
|
||
|
|
||
|
Specifically:
|
||
|
- registers the workers
|
||
|
- accepts commands sent to workers (KILL, TERMINATE, PICK, DISABLED, etc)
|
||
|
- adjusts sleep
|
||
|
- saves stats
|
||
|
- elects master
|
||
|
- does "housecleaning" for dead workers
|
||
|
- triggers tasks assignment
|
||
|
"""
|
||
|
r_server = pipe
|
||
|
status_keyset = self._nkey('worker_statuses')
|
||
|
status_key = self._nkey('worker_status:%s' % (self.worker_name))
|
||
|
now = self.now()
|
||
|
mybackedstatus = r_server.hgetall(status_key)
|
||
|
if not mybackedstatus:
|
||
|
r_server.hmset(
|
||
|
status_key,
|
||
|
dict(
|
||
|
status=ACTIVE, worker_name=self.worker_name,
|
||
|
first_heartbeat=self.dt2str(now),
|
||
|
last_heartbeat=self.dt2str(now),
|
||
|
group_names=dumps(self.group_names), is_ticker=False,
|
||
|
worker_stats=dumps(self.w_stats))
|
||
|
)
|
||
|
r_server.sadd(status_keyset, status_key)
|
||
|
if not self.w_stats.status == POLLING:
|
||
|
self.w_stats.status = ACTIVE
|
||
|
self.w_stats.sleep = self.heartbeat
|
||
|
mybackedstatus = ACTIVE
|
||
|
else:
|
||
|
mybackedstatus = mybackedstatus['status']
|
||
|
if mybackedstatus == DISABLED:
|
||
|
# keep sleeping
|
||
|
self.w_stats.status = DISABLED
|
||
|
r_server.hmset(
|
||
|
status_key,
|
||
|
dict(last_heartbeat=self.dt2str(now),
|
||
|
worker_stats=dumps(self.w_stats))
|
||
|
)
|
||
|
elif mybackedstatus == TERMINATE:
|
||
|
self.w_stats.status = TERMINATE
|
||
|
logger.debug("Waiting to terminate the current task")
|
||
|
self.give_up()
|
||
|
elif mybackedstatus == KILL:
|
||
|
self.w_stats.status = KILL
|
||
|
self.die()
|
||
|
else:
|
||
|
if mybackedstatus == STOP_TASK:
|
||
|
logger.info('Asked to kill the current task')
|
||
|
self.terminate_process()
|
||
|
logger.info('........recording heartbeat (%s)',
|
||
|
self.w_stats.status)
|
||
|
r_server.hmset(
|
||
|
status_key,
|
||
|
dict(
|
||
|
last_heartbeat=self.dt2str(now), status=ACTIVE,
|
||
|
worker_stats=dumps(self.w_stats)
|
||
|
)
|
||
|
)
|
||
|
# newroutine
|
||
|
r_server.expire(status_key, self.heartbeat * 3 * 15)
|
||
|
self.w_stats.sleep = self.heartbeat # re-activating the process
|
||
|
if self.w_stats.status not in (RUNNING, POLLING):
|
||
|
self.w_stats.status = ACTIVE
|
||
|
|
||
|
self.do_assign_tasks = False
|
||
|
if counter % 5 == 0 or mybackedstatus == PICK:
|
||
|
try:
|
||
|
logger.info(
|
||
|
' freeing workers that have not sent heartbeat')
|
||
|
registered_workers = r_server.smembers(status_keyset)
|
||
|
allkeys = self._nkey('allkeys')
|
||
|
for worker in registered_workers:
|
||
|
w = r_server.hgetall(worker)
|
||
|
w = Storage(w)
|
||
|
if not w:
|
||
|
r_server.srem(status_keyset, worker)
|
||
|
logger.info('removing %s from %s', worker, allkeys)
|
||
|
r_server.srem(allkeys, worker)
|
||
|
continue
|
||
|
try:
|
||
|
self.is_a_ticker = self.being_a_ticker(pipe)
|
||
|
except:
|
||
|
pass
|
||
|
if self.w_stats.status in (ACTIVE, POLLING):
|
||
|
self.do_assign_tasks = True
|
||
|
if self.is_a_ticker and self.do_assign_tasks:
|
||
|
# I'm a ticker, and 5 loops passed without reassigning tasks,
|
||
|
# let's do that and loop again
|
||
|
if not self.db_thread:
|
||
|
logger.debug('thread building own DAL object')
|
||
|
self.db_thread = DAL(
|
||
|
self.db._uri, folder=self.db._adapter.folder)
|
||
|
self.define_tables(self.db_thread, migrate=False)
|
||
|
db = self.db_thread
|
||
|
self.wrapped_assign_tasks(db)
|
||
|
return None
|
||
|
except:
|
||
|
logger.error('Error assigning tasks')
|
||
|
|
||
|
def being_a_ticker(self, pipe):
|
||
|
"""
|
||
|
Elects a ticker.
|
||
|
|
||
|
This is slightly more convoluted than the original
|
||
|
but if far more efficient
|
||
|
"""
|
||
|
r_server = pipe
|
||
|
status_keyset = self._nkey('worker_statuses')
|
||
|
registered_workers = r_server.smembers(status_keyset)
|
||
|
ticker = None
|
||
|
all_active = []
|
||
|
all_workers = []
|
||
|
for worker in registered_workers:
|
||
|
w = r_server.hgetall(worker)
|
||
|
if w['worker_name'] != self.worker_name and w['status'] == ACTIVE:
|
||
|
all_active.append(w)
|
||
|
if w['is_ticker'] == 'True' and ticker is None:
|
||
|
ticker = w
|
||
|
all_workers.append(w)
|
||
|
not_busy = self.w_stats.status in (ACTIVE, POLLING)
|
||
|
if not ticker:
|
||
|
if not_busy:
|
||
|
# only if this worker isn't busy, otherwise wait for a free one
|
||
|
for worker in all_workers:
|
||
|
key = self._nkey('worker_status:%s' % worker['worker_name'])
|
||
|
if worker['worker_name'] == self.worker_name:
|
||
|
r_server.hset(key, 'is_ticker', True)
|
||
|
else:
|
||
|
r_server.hset(key, 'is_ticker', False)
|
||
|
logger.info("TICKER: I'm a ticker")
|
||
|
else:
|
||
|
# giving up, only if I'm not alone
|
||
|
if len(all_active) > 1:
|
||
|
key = self._nkey('worker_status:%s' % (self.worker_name))
|
||
|
r_server.hset(key, 'is_ticker', False)
|
||
|
else:
|
||
|
not_busy = True
|
||
|
return not_busy
|
||
|
else:
|
||
|
logger.info(
|
||
|
"%s is a ticker, I'm a poor worker" % ticker['worker_name'])
|
||
|
return False
|
||
|
|
||
|
def assign_tasks(self, db):
|
||
|
"""
|
||
|
The real beauty.
|
||
|
|
||
|
We don't need to ASSIGN tasks, we just put
|
||
|
them into the relevant queue
|
||
|
"""
|
||
|
st, sd = db.scheduler_task, db.scheduler_task_deps
|
||
|
r_server = self.r_server
|
||
|
now = self.now()
|
||
|
status_keyset = self._nkey('worker_statuses')
|
||
|
with r_server.pipeline() as pipe:
|
||
|
while True:
|
||
|
try:
|
||
|
# making sure we're the only one doing the job
|
||
|
pipe.watch('ASSIGN_TASKS')
|
||
|
registered_workers = pipe.smembers(status_keyset)
|
||
|
all_workers = []
|
||
|
for worker in registered_workers:
|
||
|
w = pipe.hgetall(worker)
|
||
|
if w['status'] == ACTIVE:
|
||
|
all_workers.append(Storage(w))
|
||
|
pipe.execute()
|
||
|
break
|
||
|
except RWatchError:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
|
||
|
# build workers as dict of groups
|
||
|
wkgroups = {}
|
||
|
for w in all_workers:
|
||
|
group_names = loads(w.group_names)
|
||
|
for gname in group_names:
|
||
|
if gname not in wkgroups:
|
||
|
wkgroups[gname] = dict(
|
||
|
workers=[{'name': w.worker_name, 'c': 0}])
|
||
|
else:
|
||
|
wkgroups[gname]['workers'].append(
|
||
|
{'name': w.worker_name, 'c': 0})
|
||
|
# set queued tasks that expired between "runs" (i.e., you turned off
|
||
|
# the scheduler): then it wasn't expired, but now it is
|
||
|
db(
|
||
|
(st.status.belongs((QUEUED, ASSIGNED))) &
|
||
|
(st.stop_time < now)
|
||
|
).update(status=EXPIRED)
|
||
|
|
||
|
# calculate dependencies
|
||
|
deps_with_no_deps = db(
|
||
|
(sd.can_visit == False) &
|
||
|
(~sd.task_child.belongs(
|
||
|
db(sd.can_visit == False)._select(sd.task_parent)
|
||
|
)
|
||
|
)
|
||
|
)._select(sd.task_child)
|
||
|
no_deps = db(
|
||
|
(st.status.belongs((QUEUED, ASSIGNED))) &
|
||
|
(
|
||
|
(sd.id == None) | (st.id.belongs(deps_with_no_deps))
|
||
|
|
||
|
)
|
||
|
)._select(st.id, distinct=True, left=sd.on(
|
||
|
(st.id == sd.task_parent) &
|
||
|
(sd.can_visit == False)
|
||
|
)
|
||
|
)
|
||
|
|
||
|
all_available = db(
|
||
|
(st.status.belongs((QUEUED, ASSIGNED))) &
|
||
|
(st.next_run_time <= now) &
|
||
|
(st.enabled == True) &
|
||
|
(st.id.belongs(no_deps))
|
||
|
)
|
||
|
|
||
|
limit = len(all_workers) * (50 / (len(wkgroups) or 1))
|
||
|
|
||
|
# let's freeze it up
|
||
|
db.commit()
|
||
|
x = 0
|
||
|
r_server = self.r_server
|
||
|
for group in wkgroups.keys():
|
||
|
queued_list = self._nkey('queued:%s' % group)
|
||
|
queued_set = self._nkey('queued_set:%s' % group)
|
||
|
# if are running, let's don't assign them again
|
||
|
running_list = self._nkey('running:%s' % group)
|
||
|
while True:
|
||
|
# the joys for rpoplpush!
|
||
|
t = r_server.rpoplpush(running_list, queued_list)
|
||
|
if not t:
|
||
|
# no more
|
||
|
break
|
||
|
r_server.sadd(queued_set, t)
|
||
|
|
||
|
tasks = all_available(st.group_name == group).select(
|
||
|
limitby=(0, limit), orderby = st.next_run_time)
|
||
|
|
||
|
# put tasks in the processing list
|
||
|
|
||
|
for task in tasks:
|
||
|
x += 1
|
||
|
gname = task.group_name
|
||
|
|
||
|
if r_server.sismember(queued_set, task.id):
|
||
|
# already queued, we don't put on the list
|
||
|
continue
|
||
|
r_server.sadd(queued_set, task.id)
|
||
|
r_server.lpush(queued_list, task.id)
|
||
|
d = dict(status=QUEUED)
|
||
|
if not task.task_name:
|
||
|
d['task_name'] = task.function_name
|
||
|
db(
|
||
|
(st.id == task.id) &
|
||
|
(st.status.belongs((QUEUED, ASSIGNED)))
|
||
|
).update(**d)
|
||
|
db.commit()
|
||
|
# I didn't report tasks but I'm working nonetheless!!!!
|
||
|
if x > 0:
|
||
|
self.w_stats.empty_runs = 0
|
||
|
self.w_stats.queue = x
|
||
|
self.w_stats.distribution = wkgroups
|
||
|
self.w_stats.workers = len(all_workers)
|
||
|
# I'll be greedy only if tasks queued are equal to the limit
|
||
|
# (meaning there could be others ready to be queued)
|
||
|
self.greedy = x >= limit
|
||
|
logger.info('TICKER: workers are %s', len(all_workers))
|
||
|
logger.info('TICKER: tasks are %s', x)
|
||
|
|
||
|
def pop_task(self, db):
|
||
|
"""Lift a task off a queue."""
|
||
|
r_server = self.r_server
|
||
|
st = self.db.scheduler_task
|
||
|
task = None
|
||
|
# ready to process something
|
||
|
for group in self.group_names:
|
||
|
queued_set = self._nkey('queued_set:%s' % group)
|
||
|
queued_list = self._nkey('queued:%s' % group)
|
||
|
running_list = self._nkey('running:%s' % group)
|
||
|
running_dict = self._nkey('running_dict:%s' % group)
|
||
|
self.w_stats.status = POLLING
|
||
|
# polling for 1 minute in total. If more groups are in,
|
||
|
# polling is 1 minute in total
|
||
|
logger.debug(' polling on %s', group)
|
||
|
task_id = r_server.brpoplpush(queued_list, running_list,
|
||
|
timeout=60 / len(self.group_names))
|
||
|
logger.debug(' finished polling')
|
||
|
self.w_stats.status = ACTIVE
|
||
|
if task_id:
|
||
|
r_server.hset(running_dict, task_id, self.worker_name)
|
||
|
r_server.srem(queued_set, task_id)
|
||
|
task = db(
|
||
|
(st.id == task_id) &
|
||
|
(st.status == QUEUED)
|
||
|
).select().first()
|
||
|
if not task:
|
||
|
r_server.lrem(running_list, 0, task_id)
|
||
|
r_server.hdel(running_dict, task_id)
|
||
|
r_server.lrem(queued_list, 0, task_id)
|
||
|
logger.error("we received a task that isn't there (%s)",
|
||
|
task_id)
|
||
|
return None
|
||
|
break
|
||
|
now = self.now()
|
||
|
if task:
|
||
|
task.update_record(status=RUNNING, last_run_time=now)
|
||
|
# noone will touch my task!
|
||
|
db.commit()
|
||
|
logger.debug(' work to do %s', task.id)
|
||
|
else:
|
||
|
logger.info('nothing to do')
|
||
|
return None
|
||
|
times_run = task.times_run + 1
|
||
|
if task.cronline:
|
||
|
cron_recur = CronParser(task.cronline, now.replace(second=0))
|
||
|
next_run_time = cron_recur.get_next()
|
||
|
elif not task.prevent_drift:
|
||
|
next_run_time = task.last_run_time + datetime.timedelta(
|
||
|
seconds=task.period
|
||
|
)
|
||
|
else:
|
||
|
# calc next_run_time based on available slots
|
||
|
# see #1191
|
||
|
next_run_time = task.start_time
|
||
|
secondspassed = self.total_seconds(now - next_run_time)
|
||
|
steps = secondspassed // task.period + 1
|
||
|
next_run_time += datetime.timedelta(seconds=task.period * steps)
|
||
|
|
||
|
if times_run < task.repeats or task.repeats == 0:
|
||
|
# need to run (repeating task)
|
||
|
run_again = True
|
||
|
else:
|
||
|
# no need to run again
|
||
|
run_again = False
|
||
|
run_id = 0
|
||
|
while True and not self.discard_results:
|
||
|
logger.debug(' new scheduler_run record')
|
||
|
try:
|
||
|
run_id = db.scheduler_run.insert(
|
||
|
task_id=task.id,
|
||
|
status=RUNNING,
|
||
|
start_time=now,
|
||
|
worker_name=self.worker_name)
|
||
|
db.commit()
|
||
|
break
|
||
|
except:
|
||
|
time.sleep(0.5)
|
||
|
db.rollback()
|
||
|
logger.info('new task %(id)s "%(task_name)s"'
|
||
|
' %(application_name)s.%(function_name)s' % task)
|
||
|
return Task(
|
||
|
app=task.application_name,
|
||
|
function=task.function_name,
|
||
|
timeout=task.timeout,
|
||
|
args=task.args, # in json
|
||
|
vars=task.vars, # in json
|
||
|
task_id=task.id,
|
||
|
run_id=run_id,
|
||
|
run_again=run_again,
|
||
|
next_run_time=next_run_time,
|
||
|
times_run=times_run,
|
||
|
stop_time=task.stop_time,
|
||
|
retry_failed=task.retry_failed,
|
||
|
times_failed=task.times_failed,
|
||
|
sync_output=task.sync_output,
|
||
|
uuid=task.uuid,
|
||
|
group_name=task.group_name)
|
||
|
|
||
|
def report_task(self, task, task_report):
|
||
|
"""
|
||
|
Override.
|
||
|
|
||
|
Needs it only because we need to pop from the
|
||
|
running tasks
|
||
|
"""
|
||
|
r_server = self.r_server
|
||
|
db = self.db
|
||
|
now = self.now()
|
||
|
st = db.scheduler_task
|
||
|
sr = db.scheduler_run
|
||
|
if not self.discard_results:
|
||
|
if task_report.result != 'null' or task_report.tb:
|
||
|
# result is 'null' as a string if task completed
|
||
|
# if it's stopped it's None as NoneType, so we record
|
||
|
# the STOPPED "run" anyway
|
||
|
logger.debug(' recording task report in db (%s)',
|
||
|
task_report.status)
|
||
|
db(sr.id == task.run_id).update(
|
||
|
status=task_report.status,
|
||
|
stop_time=now,
|
||
|
run_result=task_report.result,
|
||
|
run_output=task_report.output,
|
||
|
traceback=task_report.tb)
|
||
|
else:
|
||
|
logger.debug(' deleting task report in db because of no result')
|
||
|
db(sr.id == task.run_id).delete()
|
||
|
# if there is a stop_time and the following run would exceed it
|
||
|
is_expired = (task.stop_time and
|
||
|
task.next_run_time > task.stop_time and
|
||
|
True or False)
|
||
|
status = (task.run_again and is_expired and EXPIRED or
|
||
|
task.run_again and not is_expired and
|
||
|
QUEUED or COMPLETED)
|
||
|
if task_report.status == COMPLETED:
|
||
|
# assigned calculations
|
||
|
d = dict(status=status,
|
||
|
next_run_time=task.next_run_time,
|
||
|
times_run=task.times_run,
|
||
|
times_failed=0,
|
||
|
assigned_worker_name=self.worker_name
|
||
|
)
|
||
|
db(st.id == task.task_id).update(**d)
|
||
|
if status == COMPLETED:
|
||
|
self.update_dependencies(db, task.task_id)
|
||
|
else:
|
||
|
st_mapping = {'FAILED': 'FAILED',
|
||
|
'TIMEOUT': 'TIMEOUT',
|
||
|
'STOPPED': 'FAILED'}[task_report.status]
|
||
|
status = (task.retry_failed and
|
||
|
task.times_failed < task.retry_failed and
|
||
|
QUEUED or task.retry_failed == -1 and
|
||
|
QUEUED or st_mapping)
|
||
|
db(st.id == task.task_id).update(
|
||
|
times_failed=st.times_failed + 1,
|
||
|
next_run_time=task.next_run_time,
|
||
|
status=status,
|
||
|
assigned_worker_name=self.worker_name
|
||
|
)
|
||
|
logger.info('task completed (%s)', task_report.status)
|
||
|
running_list = self._nkey('running:%s' % task.group_name)
|
||
|
running_dict = self._nkey('running_dict:%s' % task.group_name)
|
||
|
r_server.lrem(running_list, 0, task.task_id)
|
||
|
r_server.hdel(running_dict, task.task_id)
|
||
|
|
||
|
def wrapped_pop_task(self):
|
||
|
"""Commodity function to call `pop_task` and trap exceptions.
|
||
|
If an exception is raised, assume it happened because of database
|
||
|
contention and retries `pop_task` after 0.5 seconds
|
||
|
"""
|
||
|
db = self.db
|
||
|
db.commit() # another nifty db.commit() only for Mysql
|
||
|
x = 0
|
||
|
while x < 10:
|
||
|
try:
|
||
|
rtn = self.pop_task(db)
|
||
|
return rtn
|
||
|
break
|
||
|
# this is here to "interrupt" any blrpoplpush op easily
|
||
|
except KeyboardInterrupt:
|
||
|
self.give_up()
|
||
|
break
|
||
|
except:
|
||
|
self.w_stats.errors += 1
|
||
|
db.rollback()
|
||
|
logger.error(' error popping tasks')
|
||
|
x += 1
|
||
|
time.sleep(0.5)
|
||
|
|
||
|
def get_workers(self, only_ticker=False):
|
||
|
"""Return a dict holding worker_name : {**columns}
|
||
|
representing all "registered" workers.
|
||
|
only_ticker returns only the worker running as a TICKER,
|
||
|
if there is any
|
||
|
"""
|
||
|
r_server = self.r_server
|
||
|
status_keyset = self._nkey('worker_statuses')
|
||
|
registered_workers = r_server.smembers(status_keyset)
|
||
|
all_workers = {}
|
||
|
for worker in registered_workers:
|
||
|
w = r_server.hgetall(worker)
|
||
|
w = Storage(w)
|
||
|
if not w:
|
||
|
continue
|
||
|
all_workers[w.worker_name] = Storage(
|
||
|
status=w.status,
|
||
|
first_heartbeat=self.str2date(w.first_heartbeat),
|
||
|
last_heartbeat=self.str2date(w.last_heartbeat),
|
||
|
group_names=loads(w.group_names, object_hook=_decode_dict),
|
||
|
is_ticker=w.is_ticker == 'True' and True or False,
|
||
|
worker_stats=loads(w.worker_stats, object_hook=_decode_dict)
|
||
|
)
|
||
|
if only_ticker:
|
||
|
for k, v in all_workers.iteritems():
|
||
|
if v['is_ticker']:
|
||
|
return {k: v}
|
||
|
return {}
|
||
|
return all_workers
|
||
|
|
||
|
def set_worker_status(self, group_names=None, action=ACTIVE,
|
||
|
exclude=None, limit=None, worker_name=None):
|
||
|
"""Internal function to set worker's status"""
|
||
|
r_server = self.r_server
|
||
|
all_workers = self.get_workers()
|
||
|
if not group_names:
|
||
|
group_names = self.group_names
|
||
|
elif isinstance(group_names, str):
|
||
|
group_names = [group_names]
|
||
|
exclusion = exclude and exclude.append(action) or [action]
|
||
|
workers = []
|
||
|
if worker_name is not None:
|
||
|
if worker_name in all_workers.keys():
|
||
|
workers = [worker_name]
|
||
|
else:
|
||
|
for k, v in all_workers.iteritems():
|
||
|
if v.status not in exclusion and set(group_names) & set(v.group_names):
|
||
|
workers.append(k)
|
||
|
if limit and worker_name is None:
|
||
|
workers = workers[:limit]
|
||
|
if workers:
|
||
|
with r_server.pipeline() as pipe:
|
||
|
while True:
|
||
|
try:
|
||
|
pipe.watch('SET_WORKER_STATUS')
|
||
|
for w in workers:
|
||
|
worker_key = self._nkey('worker_status:%s' % w)
|
||
|
pipe.hset(worker_key, 'status', action)
|
||
|
pipe.execute()
|
||
|
break
|
||
|
except RWatchError:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
|
||
|
def queue_task(self, function, pargs=[], pvars={}, **kwargs):
|
||
|
"""
|
||
|
FIXME: immediate should put item in queue. The hard part is
|
||
|
that currently there are no hooks happening at post-commit time
|
||
|
Queue tasks. This takes care of handling the validation of all
|
||
|
parameters
|
||
|
|
||
|
Args:
|
||
|
function: the function (anything callable with a __name__)
|
||
|
pargs: "raw" args to be passed to the function. Automatically
|
||
|
jsonified.
|
||
|
pvars: "raw" kwargs to be passed to the function. Automatically
|
||
|
jsonified
|
||
|
kwargs: all the parameters available (basically, every
|
||
|
`scheduler_task` column). If args and vars are here, they should
|
||
|
be jsonified already, and they will override pargs and pvars
|
||
|
|
||
|
Returns:
|
||
|
a dict just as a normal validate_and_insert(), plus a uuid key
|
||
|
holding the uuid of the queued task. If validation is not passed
|
||
|
( i.e. some parameters are invalid) both id and uuid will be None,
|
||
|
and you'll get an "error" dict holding the errors found.
|
||
|
"""
|
||
|
if hasattr(function, '__name__'):
|
||
|
function = function.__name__
|
||
|
targs = 'args' in kwargs and kwargs.pop('args') or dumps(pargs)
|
||
|
tvars = 'vars' in kwargs and kwargs.pop('vars') or dumps(pvars)
|
||
|
tuuid = 'uuid' in kwargs and kwargs.pop('uuid') or web2py_uuid()
|
||
|
tname = 'task_name' in kwargs and kwargs.pop('task_name') or function
|
||
|
immediate = 'immediate' in kwargs and kwargs.pop('immediate') or None
|
||
|
cronline = kwargs.get('cronline')
|
||
|
kwargs.update(function_name=function,
|
||
|
task_name=tname,
|
||
|
args=targs,
|
||
|
vars=tvars,
|
||
|
uuid=tuuid)
|
||
|
if cronline:
|
||
|
try:
|
||
|
start_time = kwargs.get('start_time', self.now)
|
||
|
next_run_time = CronParser(cronline, start_time).get_next()
|
||
|
kwargs.update(start_time=start_time, next_run_time=next_run_time)
|
||
|
except:
|
||
|
pass
|
||
|
rtn = self.db.scheduler_task.validate_and_insert(**kwargs)
|
||
|
if not rtn.errors:
|
||
|
rtn.uuid = tuuid
|
||
|
if immediate:
|
||
|
r_server = self.r_server
|
||
|
ticker = self.get_workers(only_ticker=True)
|
||
|
if ticker.keys():
|
||
|
ticker = ticker.keys()[0]
|
||
|
with r_server.pipeline() as pipe:
|
||
|
while True:
|
||
|
try:
|
||
|
pipe.watch('SET_WORKER_STATUS')
|
||
|
worker_key = self._nkey('worker_status:%s' % ticker)
|
||
|
pipe.hset(worker_key, 'status', 'PICK')
|
||
|
pipe.execute()
|
||
|
break
|
||
|
except RWatchError:
|
||
|
time.sleep(0.1)
|
||
|
continue
|
||
|
else:
|
||
|
rtn.uuid = None
|
||
|
return rtn
|
||
|
|
||
|
def stop_task(self, ref):
|
||
|
"""Shortcut for task termination.
|
||
|
|
||
|
If the task is RUNNING it will terminate it, meaning that status
|
||
|
will be set as FAILED.
|
||
|
|
||
|
If the task is QUEUED, its stop_time will be set as to "now",
|
||
|
the enabled flag will be set to False, and the status to STOPPED
|
||
|
|
||
|
Args:
|
||
|
ref: can be
|
||
|
|
||
|
- an integer : lookup will be done by scheduler_task.id
|
||
|
- a string : lookup will be done by scheduler_task.uuid
|
||
|
|
||
|
Returns:
|
||
|
- 1 if task was stopped (meaning an update has been done)
|
||
|
- None if task was not found, or if task was not RUNNING or QUEUED
|
||
|
|
||
|
Note:
|
||
|
Experimental
|
||
|
"""
|
||
|
r_server = self.r_server
|
||
|
st = self.db.scheduler_task
|
||
|
if isinstance(ref, int):
|
||
|
q = st.id == ref
|
||
|
elif isinstance(ref, str):
|
||
|
q = st.uuid == ref
|
||
|
else:
|
||
|
raise SyntaxError(
|
||
|
"You can retrieve results only by id or uuid")
|
||
|
task = self.db(q).select(st.id, st.status, st.group_name)
|
||
|
task = task.first()
|
||
|
rtn = None
|
||
|
if not task:
|
||
|
return rtn
|
||
|
running_dict = self._nkey('running_dict:%s' % task.group_name)
|
||
|
if task.status == 'RUNNING':
|
||
|
worker_key = r_server.hget(running_dict, task.id)
|
||
|
worker_key = self._nkey('worker_status:%s' % (worker_key))
|
||
|
r_server.hset(worker_key, 'status', STOP_TASK)
|
||
|
elif task.status == 'QUEUED':
|
||
|
rtn = self.db(q).update(
|
||
|
stop_time=self.now(),
|
||
|
enabled=False,
|
||
|
status=STOPPED)
|
||
|
return rtn
|