~niemeyer/gozk/fix-exists

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package zookeeper_test

import (
	"bufio"
	"flag"
	"fmt"
	. "launchpad.net/gocheck"
	zk "launchpad.net/gozk/zookeeper"
	"os"
	"os/exec"
	"strings"
	"testing"
	"time"
)

var reattach = flag.Bool("zktest.reattach", false, "internal flag used for testing")
var reattachRunDir = flag.String("zktest.rundir", "", "internal flag used for testing")
var reattachAbnormalStop = flag.Bool("zktest.stop", false, "internal flag used for testing")

// This is the reentrancy point for testing ZooKeeper servers
// started by processes that are not direct children of the
// testing process. This test always succeeds - the status
// will be written to stdout and read by indirectServer.
func TestStartNonChildServer(t *testing.T) {
	if !*reattach {
		// not re-entrant, so ignore this test.
		return
	}
	err := startServer(*reattachRunDir, *reattachAbnormalStop)
	if err != nil {
		fmt.Printf("zktest:error:%v\n", err)
		return
	}
	fmt.Printf("zktest:done\n")
}

func (s *S) startServer(c *C, abort bool) {
	err := startServer(s.zkTestRoot, abort)
	c.Assert(err, IsNil)
}

// startServerIndirect starts a ZooKeeper server that is not
// a direct child of the current process. If abort is true,
// the server will be started and then terminated abnormally.
func (s *S) startServerIndirect(c *C, abort bool) {
	if len(os.Args) == 0 {
		c.Fatal("Cannot find self executable name")
	}
	cmd := exec.Command(
		os.Args[0],
		"-zktest.reattach",
		"-zktest.rundir", s.zkTestRoot,
		"-zktest.stop="+fmt.Sprint(abort),
		"-test.run", "StartNonChildServer",
	)
	r, err := cmd.StdoutPipe()
	c.Assert(err, IsNil)
	defer r.Close()
	cmd.Stderr = cmd.Stdout
	if err := cmd.Start(); err != nil {
		c.Fatalf("cannot start re-entrant gotest process: %v", err)
	}
	defer cmd.Wait()
	bio := bufio.NewReader(r)
	done := false
	for {
		line, err := bio.ReadSlice('\n')
		if err != nil {
			if !done {
				c.Fatalf("indirect server status line not found: %v", err)
			}
			return
		}
		if line[len(line)-1] == '\n' {
			line = line[0 : len(line)-1]
		}
		s := string(line)
		switch {
		case strings.HasPrefix(s, "zktest:error:"):
			c.Fatalf("indirect server error: %s", s[len("error:"):])
		case s == "zktest:done":
			done = true
		default:
			// Log output that doesn't match what we're expecting - it
			// can be informative.
			c.Logf("subcommand: %s", s)
		}
	}
	panic("not reached")
}

// startServer starts a ZooKeeper server, and terminates it abnormally
// if abort is true.
func startServer(runDir string, abort bool) error {
	srv, err := zk.AttachServer(runDir)
	if err != nil {
		return fmt.Errorf("cannot attach to server at %q: %v", runDir, err)
	}
	if err := srv.Start(); err != nil {
		return fmt.Errorf("cannot start server: %v", err)
	}
	if abort {
		// Give it time to start up, then kill the server process abnormally,
		// leaving the pid.txt file behind.
		time.Sleep(0.5e9)
		p, err := srv.Process()
		if err != nil {
			return fmt.Errorf("cannot get server process: %v", err)
		}
		defer p.Release()
		if err := p.Kill(); err != nil {
			return fmt.Errorf("cannot kill server process: %v", err)
		}
	}
	return nil
}

func (s *S) checkCookie(c *C) {
	conn, _ := s.init(c)
	cookie, _, err := conn.Get("/testAttachCookie")
	c.Assert(err, IsNil)
	c.Assert(cookie, Equals, "testAttachCookie")
	conn.Close()
}

// cases to test:
// child server, stopped normally; reattach, start
// non-direct child server, killed abnormally; reattach, start (->error), remove pid.txt; start
// non-direct child server, still running; reattach, start (->error), stop, start
// child server, still running; reattach, start (-> error)
// child server, still running; reattach, stop, start.
// non-direct child server, still running; reattach, stop, start.
func (s *S) TestAttachServer(c *C) {
	// Create a cookie so that we know we are reattaching to the same instance.
	conn, _ := s.init(c)
	_, err := conn.Create("/testAttachCookie", "testAttachCookie", 0, zk.WorldACL(zk.PERM_ALL))
	c.Assert(err, IsNil)
	s.checkCookie(c)
	s.zkServer.Stop()
	s.zkServer = nil

	s.testAttachServer(c, (*S).startServer)
	s.testAttachServer(c, (*S).startServerIndirect)
	s.testAttachServerAbnormalTerminate(c, (*S).startServer)
	s.testAttachServerAbnormalTerminate(c, (*S).startServerIndirect)

	srv, err := zk.AttachServer(s.zkTestRoot)
	c.Assert(err, IsNil)

	s.zkServer = srv
	err = s.zkServer.Start()
	c.Assert(err, IsNil)

	conn, _ = s.init(c)
	err = conn.Delete("/testAttachCookie", -1)
	c.Assert(err, IsNil)
}

func (s *S) testAttachServer(c *C, start func(*S, *C, bool)) {
	start(s, c, false)

	s.checkCookie(c)

	// try attaching to it while it is still running - it should fail.
	srv, err := zk.AttachServer(s.zkTestRoot)
	c.Assert(err, IsNil)

	err = srv.Start()
	c.Assert(err, NotNil)

	// stop it and then start it again - it should succeed.
	err = srv.Stop()
	c.Assert(err, IsNil)

	err = srv.Start()
	c.Assert(err, IsNil)

	s.checkCookie(c)

	err = srv.Stop()
	c.Assert(err, IsNil)
}

func (s *S) testAttachServerAbnormalTerminate(c *C, start func(*S, *C, bool)) {
	start(s, c, true)

	// try attaching to it and starting - it should fail, because pid.txt
	// won't have been removed.
	srv, err := zk.AttachServer(s.zkTestRoot)
	c.Assert(err, IsNil)
	err = srv.Start()
	c.Assert(err, NotNil)

	// stopping it should bring things back to normal.
	err = srv.Stop()
	c.Assert(err, IsNil)
	err = srv.Start()
	c.Assert(err, IsNil)

	s.checkCookie(c)
	err = srv.Stop()
	c.Assert(err, IsNil)
}