Scale workers (#2617)
This commit is contained in:
@@ -98,16 +98,32 @@ Or, a path to a directory to run as a simple HTTP server:
|
||||
except ValueError as e:
|
||||
error_logger.exception(f"Failed to run app: {e}")
|
||||
else:
|
||||
if self.args.inspect or self.args.inspect_raw or self.args.trigger:
|
||||
if (
|
||||
self.args.inspect
|
||||
or self.args.inspect_raw
|
||||
or self.args.trigger
|
||||
or self.args.scale is not None
|
||||
):
|
||||
os.environ["SANIC_IGNORE_PRODUCTION_WARNING"] = "true"
|
||||
else:
|
||||
for http_version in self.args.http:
|
||||
app.prepare(**kwargs, version=http_version)
|
||||
|
||||
if self.args.inspect or self.args.inspect_raw or self.args.trigger:
|
||||
action = self.args.trigger or (
|
||||
"raw" if self.args.inspect_raw else "pretty"
|
||||
)
|
||||
if (
|
||||
self.args.inspect
|
||||
or self.args.inspect_raw
|
||||
or self.args.trigger
|
||||
or self.args.scale is not None
|
||||
):
|
||||
if self.args.scale is not None:
|
||||
if self.args.scale <= 0:
|
||||
error_logger.error("There must be at least 1 worker")
|
||||
sys.exit(1)
|
||||
action = f"scale={self.args.scale}"
|
||||
else:
|
||||
action = self.args.trigger or (
|
||||
"raw" if self.args.inspect_raw else "pretty"
|
||||
)
|
||||
inspect(
|
||||
app.config.INSPECTOR_HOST,
|
||||
app.config.INSPECTOR_PORT,
|
||||
|
||||
@@ -115,6 +115,12 @@ class ApplicationGroup(Group):
|
||||
const="shutdown",
|
||||
help=("Trigger all processes to shutdown"),
|
||||
)
|
||||
group.add_argument(
|
||||
"--scale",
|
||||
dest="scale",
|
||||
type=int,
|
||||
help=("Scale number of workers"),
|
||||
)
|
||||
|
||||
|
||||
class HTTPVersionGroup(Group):
|
||||
|
||||
@@ -55,17 +55,20 @@ class Inspector:
|
||||
else:
|
||||
action = conn.recv(64)
|
||||
if action == b"reload":
|
||||
conn.send(b"\n")
|
||||
self.reload()
|
||||
elif action == b"shutdown":
|
||||
conn.send(b"\n")
|
||||
self.shutdown()
|
||||
elif action.startswith(b"scale"):
|
||||
num_workers = int(action.split(b"=", 1)[-1])
|
||||
logger.info("Scaling to %s", num_workers)
|
||||
self.scale(num_workers)
|
||||
else:
|
||||
data = dumps(self.state_to_json())
|
||||
conn.send(data.encode())
|
||||
conn.close()
|
||||
conn.send(b"\n")
|
||||
conn.close()
|
||||
finally:
|
||||
logger.debug("Inspector closing")
|
||||
logger.info("Inspector closing")
|
||||
sock.close()
|
||||
|
||||
def stop(self, *_):
|
||||
@@ -80,6 +83,10 @@ class Inspector:
|
||||
message = "__ALL_PROCESSES__:"
|
||||
self._publisher.send(message)
|
||||
|
||||
def scale(self, num_workers: int):
|
||||
message = f"__SCALE__:{num_workers}"
|
||||
self._publisher.send(message)
|
||||
|
||||
def shutdown(self):
|
||||
message = "__TERMINATE__"
|
||||
self._publisher.send(message)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import os
|
||||
|
||||
from itertools import count
|
||||
from random import choice
|
||||
from signal import SIGINT, SIGTERM, Signals
|
||||
from signal import signal as signal_func
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from sanic.compat import OS_IS_WINDOWS
|
||||
from sanic.exceptions import ServerKilled
|
||||
@@ -30,33 +32,61 @@ class WorkerManager:
|
||||
):
|
||||
self.num_server = number
|
||||
self.context = context
|
||||
self.transient: List[Worker] = []
|
||||
self.durable: List[Worker] = []
|
||||
self.transient: Dict[str, Worker] = {}
|
||||
self.durable: Dict[str, Worker] = {}
|
||||
self.monitor_publisher, self.monitor_subscriber = monitor_pubsub
|
||||
self.worker_state = worker_state
|
||||
self.worker_state["Sanic-Main"] = {"pid": self.pid}
|
||||
self.terminated = False
|
||||
self._serve = serve
|
||||
self._server_settings = server_settings
|
||||
self._server_count = count()
|
||||
|
||||
if number == 0:
|
||||
raise RuntimeError("Cannot serve with no workers")
|
||||
|
||||
for i in range(number):
|
||||
self.manage(
|
||||
f"{WorkerProcess.SERVER_LABEL}-{i}",
|
||||
serve,
|
||||
server_settings,
|
||||
transient=True,
|
||||
)
|
||||
for _ in range(number):
|
||||
self.create_server()
|
||||
|
||||
signal_func(SIGINT, self.shutdown_signal)
|
||||
signal_func(SIGTERM, self.shutdown_signal)
|
||||
|
||||
def manage(self, ident, func, kwargs, transient=False):
|
||||
def manage(self, ident, func, kwargs, transient=False) -> Worker:
|
||||
container = self.transient if transient else self.durable
|
||||
container.append(
|
||||
Worker(ident, func, kwargs, self.context, self.worker_state)
|
||||
worker = Worker(ident, func, kwargs, self.context, self.worker_state)
|
||||
container[worker.ident] = worker
|
||||
return worker
|
||||
|
||||
def create_server(self) -> Worker:
|
||||
server_number = next(self._server_count)
|
||||
return self.manage(
|
||||
f"{WorkerProcess.SERVER_LABEL}-{server_number}",
|
||||
self._serve,
|
||||
self._server_settings,
|
||||
transient=True,
|
||||
)
|
||||
|
||||
def shutdown_server(self, ident: Optional[str] = None) -> None:
|
||||
if not ident:
|
||||
servers = [
|
||||
worker
|
||||
for worker in self.transient.values()
|
||||
if worker.ident.startswith(WorkerProcess.SERVER_LABEL)
|
||||
]
|
||||
if not servers:
|
||||
error_logger.error(
|
||||
"Server shutdown failed because a server was not found."
|
||||
)
|
||||
return
|
||||
worker = choice(servers) # nosec B311
|
||||
else:
|
||||
worker = self.transient[ident]
|
||||
|
||||
for process in worker.processes:
|
||||
process.terminate()
|
||||
|
||||
del self.transient[worker.ident]
|
||||
|
||||
def run(self):
|
||||
self.start()
|
||||
self.monitor()
|
||||
@@ -94,6 +124,28 @@ class WorkerManager:
|
||||
if not process_names or process.name in process_names:
|
||||
process.restart(**kwargs)
|
||||
|
||||
def scale(self, num_worker: int):
|
||||
if num_worker <= 0:
|
||||
raise ValueError("Cannot scale to 0 workers.")
|
||||
|
||||
change = num_worker - self.num_server
|
||||
if change == 0:
|
||||
logger.info(
|
||||
f"No change needed. There are already {num_worker} workers."
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(f"Scaling from {self.num_server} to {num_worker} workers")
|
||||
if change > 0:
|
||||
for _ in range(change):
|
||||
worker = self.create_server()
|
||||
for process in worker.processes:
|
||||
process.start()
|
||||
else:
|
||||
for _ in range(abs(change)):
|
||||
self.shutdown_server()
|
||||
self.num_server = num_worker
|
||||
|
||||
def monitor(self):
|
||||
self.wait_for_ack()
|
||||
while True:
|
||||
@@ -109,6 +161,9 @@ class WorkerManager:
|
||||
self.shutdown()
|
||||
break
|
||||
split_message = message.split(":", 1)
|
||||
if message.startswith("__SCALE__"):
|
||||
self.scale(int(split_message[-1]))
|
||||
continue
|
||||
processes = split_message[0]
|
||||
reloaded_files = (
|
||||
split_message[1] if len(split_message) > 1 else None
|
||||
@@ -161,8 +216,8 @@ class WorkerManager:
|
||||
self.kill()
|
||||
|
||||
@property
|
||||
def workers(self):
|
||||
return self.transient + self.durable
|
||||
def workers(self) -> List[Worker]:
|
||||
return list(self.transient.values()) + list(self.durable.values())
|
||||
|
||||
@property
|
||||
def processes(self):
|
||||
@@ -172,7 +227,7 @@ class WorkerManager:
|
||||
|
||||
@property
|
||||
def transient_processes(self):
|
||||
for worker in self.transient:
|
||||
for worker in self.transient.values():
|
||||
for process in worker.processes:
|
||||
yield process
|
||||
|
||||
|
||||
@@ -33,6 +33,10 @@ class WorkerMultiplexer:
|
||||
|
||||
reload = restart # no cov
|
||||
|
||||
def scale(self, num_workers: int):
|
||||
message = f"__SCALE__:{num_workers}"
|
||||
self._monitor_publisher.send(message)
|
||||
|
||||
def terminate(self, early: bool = False):
|
||||
message = "__TERMINATE_EARLY__" if early else "__TERMINATE__"
|
||||
self._monitor_publisher.send(message)
|
||||
|
||||
@@ -133,6 +133,8 @@ class WorkerProcess:
|
||||
|
||||
|
||||
class Worker:
|
||||
WORKER_PREFIX = "Sanic-"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ident: str,
|
||||
@@ -152,7 +154,7 @@ class Worker:
|
||||
def create_process(self) -> WorkerProcess:
|
||||
process = WorkerProcess(
|
||||
factory=self.context.Process,
|
||||
name=f"Sanic-{self.ident}-{len(self.processes)}",
|
||||
name=f"{self.WORKER_PREFIX}{self.ident}-{len(self.processes)}",
|
||||
target=self.serve,
|
||||
kwargs={**self.server_settings},
|
||||
worker_state=self.worker_state,
|
||||
|
||||
Reference in New Issue
Block a user