Skip to content

Commit a3d123a

Browse files
committed
cmd/makemac, cmd/coordinator: export warnings/error from makemac to coordinator
This adds information on warnings & errors to makemac's JSON status handler that is then parsed by the coordinator's health checking code, which already polls this JSON endpoint. Updates golang/go#32449 Updates golang/go#15760 Change-Id: I69bea7b07c184d1f62a358bc317376aa97018230 Reviewed-on: https://go-review.googlesource.com/c/build/+/181217 Reviewed-by: Brad Fitzpatrick <[email protected]>
1 parent 4c1c063 commit a3d123a

File tree

3 files changed

+99
-27
lines changed

3 files changed

+99
-27
lines changed

cmd/coordinator/status.go

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"bufio"
1111
"bytes"
1212
"context"
13+
"encoding/json"
1314
"fmt"
1415
"html"
1516
"html/template"
@@ -236,31 +237,21 @@ func newMacHealthChecker() *healthChecker {
236237
// And check that the makemac daemon is listening.
237238
var makeMac struct {
238239
sync.Mutex
239-
lastErr error
240-
lastCheck time.Time // currently unused
240+
lastCheck time.Time // currently unused
241+
lastErrors []string
242+
lastWarns []string
241243
}
242-
setMakeMacErr := func(err error) {
244+
setMakeMacStatus := func(errs, warns []string) {
243245
makeMac.Lock()
244246
defer makeMac.Unlock()
245-
makeMac.lastErr = err
246247
makeMac.lastCheck = time.Now()
248+
makeMac.lastErrors = errs
249+
makeMac.lastWarns = warns
247250
}
248251
go func() {
249-
c := &http.Client{Timeout: 15 * time.Second}
250252
for {
251-
res, err := c.Get("http://macstadiumd.golang.org:8713")
252-
if err != nil {
253-
setMakeMacErr(err)
254-
} else {
255-
res.Body.Close()
256-
if res.StatusCode != 200 {
257-
setMakeMacErr(fmt.Errorf("HTTP status %v", res.Status))
258-
} else if res.Header.Get("Content-Type") != "application/json" {
259-
setMakeMacErr(fmt.Errorf("unexpected content-type %q", res.Header.Get("Content-Type")))
260-
} else {
261-
setMakeMacErr(nil)
262-
}
263-
}
253+
errs, warns := fetchMakeMacStatus()
254+
setMakeMacStatus(errs, warns)
264255
time.Sleep(15 * time.Second)
265256
}
266257
}()
@@ -274,13 +265,39 @@ func newMacHealthChecker() *healthChecker {
274265
// Check makemac daemon.
275266
makeMac.Lock()
276267
defer makeMac.Unlock()
277-
if makeMac.lastErr != nil {
278-
w.errorf("makemac daemon: %v", makeMac.lastErr)
268+
for _, v := range makeMac.lastWarns {
269+
w.warnf("makemac daemon: %v", v)
270+
}
271+
for _, v := range makeMac.lastErrors {
272+
w.errorf("makemac daemon: %v", v)
279273
}
280274
},
281275
}
282276
}
283277

278+
func fetchMakeMacStatus() (errs, warns []string) {
279+
c := &http.Client{Timeout: 15 * time.Second}
280+
res, err := c.Get("http://macstadiumd.golang.org:8713")
281+
if err != nil {
282+
return []string{fmt.Sprintf("failed to fetch status: %v", err)}, nil
283+
}
284+
defer res.Body.Close()
285+
if res.StatusCode != 200 {
286+
return []string{fmt.Sprintf("HTTP status %v", res.Status)}, nil
287+
}
288+
if res.Header.Get("Content-Type") != "application/json" {
289+
return []string{fmt.Sprintf("unexpected content-type %q; want JSON", res.Header.Get("Content-Type"))}, nil
290+
}
291+
var resj struct {
292+
Errors []string
293+
Warnings []string
294+
}
295+
if err := json.NewDecoder(res.Body).Decode(&resj); err != nil {
296+
return []string{fmt.Sprintf("reading status response body: %v", err)}, nil
297+
}
298+
return resj.Errors, resj.Warnings
299+
}
300+
284301
func newJoyentSolarisChecker() *healthChecker {
285302
return &healthChecker{
286303
ID: "joyent-solaris",

cmd/makemac/README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
# golang.org/x/build/cmd/makemac
44

5-
The makemac command starts OS X VMs for the builders.
5+
The makemac command manages creating & destroying macOS VMs for the
6+
builders. See the README in x/build/env/darwin/macstadium for some
7+
more background.
68

79
## Deploying `makemac`
810

@@ -30,3 +32,12 @@ On that host,
3032
$ sudo systemctl restart makemac
3133
$ sudo journalctl -f -u makemac # watch it
3234
```
35+
36+
## Checking that it's running:
37+
38+
```
39+
$ curl -v http://macstadiumd.golang.org:8713
40+
```
41+
42+
(Note that URL won't work in a browser due to HSTS requirements on
43+
*.golang.org)

cmd/makemac/makemac.go

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Usage:
1616
package main
1717

1818
import (
19+
"bufio"
1920
"context"
2021
"encoding/json"
2122
"errors"
@@ -313,6 +314,9 @@ func govc(ctx context.Context, args ...string) error {
313314
fmt.Fprintf(os.Stderr, "$ govc %v\n", strings.Join(args, " "))
314315
out, err := exec.CommandContext(ctx, "govc", args...).CombinedOutput()
315316
if err != nil {
317+
if isFileSystemReadOnly() {
318+
out = append(out, "; filesystem is read-only"...)
319+
}
316320
return fmt.Errorf("govc %s ...: %v, %s", args[0], err, out)
317321
}
318322
return nil
@@ -372,7 +376,7 @@ func getState(ctx context.Context) (*State, error) {
372376

373377
var hosts elementList
374378
if err := govcJSONDecode(ctx, &hosts, "ls", "-json", "/MacStadium-ATL/host/MacMini_Cluster"); err != nil {
375-
return nil, fmt.Errorf("Reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
379+
return nil, fmt.Errorf("getState: reading /MacStadium-ATL/host/MacMini_Cluster: %v", err)
376380
}
377381
for _, h := range hosts.Elements {
378382
if h.Object.Self.Type == "HostSystem" {
@@ -384,7 +388,7 @@ func getState(ctx context.Context) (*State, error) {
384388

385389
var vms elementList
386390
if err := govcJSONDecode(ctx, &vms, "ls", "-json", "/MacStadium-ATL/vm"); err != nil {
387-
return nil, fmt.Errorf("Reading /MacStadium-ATL/vm: %v", err)
391+
return nil, fmt.Errorf("getState: reading /MacStadium-ATL/vm: %v", err)
388392
}
389393
for _, h := range vms.Elements {
390394
if h.Object.Self.Type != "VirtualMachine" {
@@ -528,6 +532,8 @@ var status struct {
528532
lastCheck time.Time
529533
lastLog string
530534
lastState *State
535+
warnings []string
536+
errors []string
531537
}
532538

533539
func init() {
@@ -581,14 +587,20 @@ func autoAdjust() {
581587

582588
st, err := getState(ctx)
583589
if err != nil {
584-
log.Printf("getting VMWare state: %v", err)
590+
status.Lock()
591+
status.errors = []string{err.Error()}
592+
status.Unlock()
593+
log.Print(err)
585594
return
586595
}
596+
var warnings, errors []string
587597
defer func() {
588598
// Set status.lastState once we're now longer using it.
589599
if st != nil {
590600
status.Lock()
591601
status.lastState = st
602+
status.warnings = warnings
603+
status.errors = errors
592604
status.Unlock()
593605
}
594606
}()
@@ -597,12 +609,14 @@ func autoAdjust() {
597609
req = req.WithContext(ctx)
598610
res, err := http.DefaultClient.Do(req)
599611
if err != nil {
612+
errors = append(errors, fmt.Sprintf("getting /status/reverse.json from coordinator: %v", err))
600613
log.Printf("getting reverse status: %v", err)
601614
return
602615
}
603616
defer res.Body.Close()
604617
var rstat types.ReverseBuilderStatus
605618
if err := json.NewDecoder(res.Body).Decode(&rstat); err != nil {
619+
errors = append(errors, fmt.Sprintf("decoding /status/reverse.json from coordinator: %v", err))
606620
log.Printf("decoding reverse.json: %v", err)
607621
return
608622
}
@@ -618,6 +632,7 @@ func autoAdjust() {
618632
}
619633

620634
// Destroy running VMs that appear to be dead and not connected to the coordinator.
635+
// TODO: do these all concurrently.
621636
dirty := false
622637
for name, vi := range st.VMInfo {
623638
if vi.BootTime.After(time.Now().Add(-3 * time.Minute)) {
@@ -632,18 +647,22 @@ func autoAdjust() {
632647
// Look it up by its slot name instead.
633648
rh = revHost[vi.SlotName]
634649
}
635-
if rh == nil { // || (!rh.Busy && rh.ConnectedSec > 50 && rh.HostType == "host-darwin-10_12") {
650+
if rh == nil {
636651
log.Printf("Destroying VM %q unknown to coordinator...", name)
637652
err := govc(ctx, "vm.destroy", name)
638653
log.Printf("vm.destroy(%q) = %v", name, err)
639654
dirty = true
655+
if err != nil {
656+
warnings = append(warnings, fmt.Sprintf("vm.destroy(%q) = %v", name, err))
657+
}
640658
}
641659
}
642660
for {
643661
if dirty {
644662
st, err = getState(ctx)
645663
if err != nil {
646-
log.Printf("getState: %v", err)
664+
errors = append(errors, err.Error())
665+
log.Print(err)
647666
return
648667
}
649668
}
@@ -661,7 +680,9 @@ func autoAdjust() {
661680
dedupLogf("Have capacity for %d more Mac VMs; creating requested 10.%d ...", canCreate, ver)
662681
slotName, err := st.CreateMac(ctx, ver)
663682
if err != nil {
664-
log.Printf("Error creating 10.%d: %v", ver, err)
683+
errStr := fmt.Sprintf("Error creating 10.%d: %v", ver, err)
684+
errors = append(errors, errStr)
685+
log.Print(errStr)
665686
return
666687
}
667688
log.Printf("Created 10.%d VM on %q", ver, slotName)
@@ -715,10 +736,14 @@ func handleStatus(w http.ResponseWriter, r *http.Request) {
715736
LastCheck string
716737
LastLog string
717738
LastState *State
739+
Warnings []string
740+
Errors []string
718741
}{
719742
LastCheck: status.lastCheck.UTC().Format(time.RFC3339),
720743
LastLog: status.lastLog,
721744
LastState: status.lastState,
745+
Warnings: status.warnings,
746+
Errors: status.errors,
722747
}
723748
j, _ := json.MarshalIndent(res, "", "\t")
724749
w.Write(j)
@@ -823,3 +848,22 @@ func (h onlyAtRoot) ServeHTTP(w http.ResponseWriter, r *http.Request) {
823848
}
824849
h.h.ServeHTTP(w, r)
825850
}
851+
852+
func isFileSystemReadOnly() bool {
853+
f, err := os.Open("/proc/mounts")
854+
if err != nil {
855+
return false
856+
}
857+
defer f.Close()
858+
// Look for line:
859+
// /dev/sda1 / ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
860+
bs := bufio.NewScanner(f)
861+
for bs.Scan() {
862+
f := strings.Fields(bs.Text())
863+
mountPoint, state := f[1], f[3]
864+
if mountPoint == "/" {
865+
return strings.HasPrefix(state, "ro,")
866+
}
867+
}
868+
return false
869+
}

0 commit comments

Comments
 (0)