offload: Use (guix inferior) instead of (ssh dist node).

Using inferiors and thus 'guix repl' simplifies setup on build
machines (no need to worry about GUILE_LOAD_PATH etc.)

Furthermore, the 'guix repl -t machine' protocol running in a remote
pipe addresses several issues with the current implementation of nodes
and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile
--listen' process behind it, stateless (since a new process is started
each time), more efficient (the SSH channel can be reused), more
reliable (no 'pgrep', 'pkill', and shellology; see
<https://github.com/artyom-poptsov/guile-ssh/issues/11> as an example.)

* guix/ssh.scm (inferior-remote-eval): New procedure.
(send-files): Use it instead of 'make-node' and 'node-eval'.
* guix/scripts/offload.scm (node-guile-version): New procedure.
(node-free-disk-space, transfer-and-offload, node-load)
(choose-build-machine, assert-node-has-guix): Use 'remote-inferior'
instead of 'make-node' and 'inferior-eval' instead of 'node-eval'.
(assert-node-can-import, assert-node-can-export): Likewise, and add
'session' parameter.
(check-machine-availability): Likewise, and add calls to
'close-inferior' and 'disconnect!'.
(check-machine-status): Likewise.
* doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in
$PATH and $GUILE_LOAD_PATH; mention 'guix' alone.
This commit is contained in:
Ludovic Courtès 2018-12-24 15:40:04 +01:00
parent af15fe13b6
commit ed7b44370f
No known key found for this signature in database
GPG Key ID: 090B11993D9AEBB5
3 changed files with 80 additions and 63 deletions

View File

@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines.
@end table
@end deftp
The @code{guile} command must be in the search path on the build
machines. In addition, the Guix modules must be in
@code{$GUILE_LOAD_PATH} on the build machine---you can check whether
this is the case by running:
The @command{guix} command must be in the search path on the build
machines. You can check whether this is the case by running:
@example
ssh build-machine guile -c "'(use-modules (guix config))'"
ssh build-machine guix repl --version
@end example
There is one last thing to do once @file{machines.scm} is in place. As

View File

@ -23,13 +23,12 @@
#:use-module (ssh session)
#:use-module (ssh channel)
#:use-module (ssh popen)
#:use-module (ssh dist)
#:use-module (ssh dist node)
#:use-module (ssh version)
#:use-module (guix config)
#:use-module (guix records)
#:use-module (guix ssh)
#:use-module (guix store)
#:use-module (guix inferior)
#:use-module (guix derivations)
#:use-module ((guix serialization)
#:select (nar-error? nar-error-file))
@ -321,12 +320,15 @@ hook."
(set-port-revealed! port 1)
port))
(define (node-guile-version node)
(inferior-eval '(version) node))
(define (node-free-disk-space node)
"Return the free disk space, in bytes, in NODE's store."
(node-eval node
`(begin
(use-modules (guix build syscalls))
(free-disk-space ,(%store-prefix)))))
(inferior-eval `(begin
(use-modules (guix build syscalls))
(free-disk-space ,(%store-prefix)))
node))
(define* (transfer-and-offload drv machine
#:key
@ -367,8 +369,12 @@ MACHINE."
(derivation-file-name drv)
(build-machine-name machine)
(nix-protocol-error-message c))
(let* ((space (false-if-exception
(node-free-disk-space (make-node session)))))
(let* ((inferior (false-if-exception (remote-inferior session)))
(space (false-if-exception
(node-free-disk-space inferior))))
(when inferior
(close-inferior inferior))
;; Use exit code 100 for a permanent build failure. The daemon
;; interprets other non-zero codes as transient build failures.
@ -417,11 +423,11 @@ of free disk space on '~a'~%")
(define (node-load node)
"Return the load on NODE. Return +∞ if NODE is misbehaving."
(let ((line (node-eval node
'(begin
(use-modules (ice-9 rdelim))
(call-with-input-file "/proc/loadavg"
read-string)))))
(let ((line (inferior-eval '(begin
(use-modules (ice-9 rdelim))
(call-with-input-file "/proc/loadavg"
read-string))
node)))
(if (eof-object? line)
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
(match (string-tokenize line)
@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
;; Note: We call 'node-load' only as a last resort because it is
;; too costly to call it once for every machine.
(let* ((session (false-if-exception (open-ssh-session best)))
(node (and session (make-node session)))
(node (and session (remote-inferior session)))
(load (and node (normalized-load best (node-load node))))
(space (and node (node-free-disk-space node))))
(when node (close-inferior node))
(when session (disconnect! session))
(if (and node (< load 2.) (>= space %minimum-disk-space))
(match others
@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..."
(#f
(report-guile-error name))
((? string? version)
;; Note: The version string already contains the word "Guile".
(info (G_ "'~a' is running ~a~%")
(info (G_ "'~a' is running GNU Guile ~a~%")
name (node-guile-version node)))))
(define (assert-node-has-guix node name)
"Bail out if NODE lacks the (guix) module, or if its daemon is not running."
(catch 'node-repl-error
(lambda ()
(match (node-eval node
'(begin
(use-modules (guix))
(and add-text-to-store 'alright)))
(match (inferior-eval '(begin
(use-modules (guix))
(and add-text-to-store 'alright))
node)
('alright #t)
(_ (report-module-error name))))
(lambda (key . args)
@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..."
(catch 'node-repl-error
(lambda ()
(match (node-eval node
'(begin
(use-modules (guix))
(with-store store
(add-text-to-store store "test"
"Hello, build machine!"))))
(match (inferior-eval '(begin
(use-modules (guix))
(with-store store
(add-text-to-store store "test"
"Hello, build machine!")))
node)
((? string? str)
(info (G_ "Guix is usable on '~a' (test returned ~s)~%")
name str))
@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..."
(string-append name "-"
(number->string (random 1000000 (force %random-state)))))
(define (assert-node-can-import node name daemon-socket)
(define (assert-node-can-import session node name daemon-socket)
"Bail out if NODE refuses to import our archives."
(let ((session (node-session node)))
(with-store store
(let* ((item (add-text-to-store store "export-test" (nonce)))
(remote (connect-to-remote-daemon session daemon-socket)))
(with-store local
(send-files local (list item) remote))
(with-store store
(let* ((item (add-text-to-store store "export-test" (nonce)))
(remote (connect-to-remote-daemon session daemon-socket)))
(with-store local
(send-files local (list item) remote))
(if (valid-path? remote item)
(info (G_ "'~a' successfully imported '~a'~%")
name item)
(leave (G_ "'~a' was not properly imported on '~a'~%")
item name))))))
(if (valid-path? remote item)
(info (G_ "'~a' successfully imported '~a'~%")
name item)
(leave (G_ "'~a' was not properly imported on '~a'~%")
item name)))))
(define (assert-node-can-export node name daemon-socket)
(define (assert-node-can-export session node name daemon-socket)
"Bail out if we cannot import signed archives from NODE."
(let* ((session (node-session node))
(remote (connect-to-remote-daemon session daemon-socket))
(let* ((remote (connect-to-remote-daemon session daemon-socket))
(item (add-text-to-store remote "import-test" (nonce name))))
(with-store store
(if (and (retrieve-files store (list item) remote)
@ -701,11 +705,13 @@ machine."
(let* ((names (map build-machine-name machines))
(sockets (map build-machine-daemon-socket machines))
(sessions (map open-ssh-session machines))
(nodes (map make-node sessions)))
(nodes (map remote-inferior sessions)))
(for-each assert-node-repl nodes names)
(for-each assert-node-has-guix nodes names)
(for-each assert-node-can-import nodes names sockets)
(for-each assert-node-can-export nodes names sockets))))
(for-each assert-node-can-import sessions nodes names sockets)
(for-each assert-node-can-export sessions nodes names sockets)
(for-each close-inferior nodes)
(for-each disconnect! sessions))))
(define (check-machine-status machine-file pred)
"Print the load of each machine matching PRED in MACHINE-FILE."
@ -722,10 +728,11 @@ machine."
(length machines) machine-file)
(for-each (lambda (machine)
(let* ((session (open-ssh-session machine))
(node (make-node session))
(uts (node-eval node '(uname)))
(load (node-load node))
(free (node-free-disk-space node)))
(inferior (remote-inferior session))
(uts (inferior-eval '(uname) inferior))
(load (node-load inferior))
(free (node-free-disk-space inferior)))
(close-inferior inferior)
(disconnect! session)
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%"

View File

@ -27,8 +27,6 @@
#:use-module (ssh channel)
#:use-module (ssh popen)
#:use-module (ssh session)
#:use-module (ssh dist)
#:use-module (ssh dist node)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
@ -102,6 +100,20 @@ Throw an error on failure."
"guix" "repl" "-t" "machine")))
(port->inferior pipe)))
(define (inferior-remote-eval exp session)
"Evaluate EXP in a new inferior running in SESSION, and close the inferior
right away."
(let ((inferior (remote-inferior session)))
(dynamic-wind
(const #t)
(lambda ()
(inferior-eval exp inferior))
(lambda ()
;; Close INFERIOR right away to prevent finalization from happening in
;; another thread at the wrong time (see
;; <https://bugs.gnu.org/26976>.)
(close-inferior inferior)))))
(define* (remote-daemon-channel session
#:optional
(socket-name
@ -277,15 +289,15 @@ Return the list of store items actually sent."
;; Compute the subset of FILES missing on SESSION and send them.
(let* ((files (if recursive? (requisites local files) files))
(session (channel-get-session (nix-server-socket remote)))
(node (make-node session))
(missing (node-eval node
`(begin
(use-modules (guix)
(srfi srfi-1) (srfi srfi-26))
(missing (inferior-remote-eval
`(begin
(use-modules (guix)
(srfi srfi-1) (srfi srfi-26))
(with-store store
(remove (cut valid-path? store <>)
',files)))))
(with-store store
(remove (cut valid-path? store <>)
',files)))
session))
(count (length missing))
(sizes (map (lambda (item)
(path-info-nar-size (query-path-info local item)))