1
// Copyright 2012, 2013 Canonical Ltd.
2
// Licensed under the AGPLv3, see LICENCE file for details.
12
"labix.org/v2/mgo/txn"
14
"launchpad.net/juju-core/constraints"
15
"launchpad.net/juju-core/errors"
16
"launchpad.net/juju-core/instance"
17
"launchpad.net/juju-core/state/api/params"
18
"launchpad.net/juju-core/state/presence"
19
"launchpad.net/juju-core/utils"
22
// Machine represents the state of a machine.
29
// MachineJob values define responsibilities that machines may be
30
// expected to fulfil.
40
var jobNames = []params.MachineJob{
41
JobHostUnits: params.JobHostUnits,
42
JobManageEnviron: params.JobManageEnviron,
43
JobManageState: params.JobManageState,
46
func (job MachineJob) String() string {
48
if j <= 0 || j >= len(jobNames) {
49
return fmt.Sprintf("<unknown job %d>", j)
51
return string(jobNames[j])
54
// machineDoc represents the internal state of a machine in MongoDB.
55
// Note the correspondence with MachineInfo in state/api/params.
56
type machineDoc struct {
57
Id string `bson:"_id"`
63
Tools *Tools `bson:",omitempty"`
64
TxnRevno int64 `bson:"txn-revno"`
68
// Deprecated. InstanceId, now lives on instanceData.
69
// This attribute is retained so that data from existing machines can be read.
71
// TODO(wallyworld): remove this attribute when schema upgrades are possible.
72
InstanceId instance.Id
75
func newMachine(st *State, doc *machineDoc) *Machine {
80
machine.annotator = annotator{
81
globalKey: machine.globalKey(),
88
// Id returns the machine id.
89
func (m *Machine) Id() string {
93
// Series returns the operating system series running on the machine.
94
func (m *Machine) Series() string {
98
// ContainerType returns the type of container hosting this machine.
99
func (m *Machine) ContainerType() instance.ContainerType {
100
return instance.ContainerType(m.doc.ContainerType)
103
// machineGlobalKey returns the global database key for the identified machine.
104
func machineGlobalKey(id string) string {
108
// globalKey returns the global database key for the machine.
109
func (m *Machine) globalKey() string {
110
return machineGlobalKey(m.doc.Id)
113
// instanceData holds attributes relevant to a provisioned machine.
114
type instanceData struct {
115
Id string `bson:"_id"`
116
InstanceId instance.Id `bson:"instanceid"`
117
Arch *string `bson:"arch,omitempty"`
118
Mem *uint64 `bson:"mem,omitempty"`
119
CpuCores *uint64 `bson:"cpucores,omitempty"`
120
CpuPower *uint64 `bson:"cpupower,omitempty"`
121
TxnRevno int64 `bson:"txn-revno"`
124
// TODO(wallyworld): move this method to a service.
125
func (m *Machine) HardwareCharacteristics() (*instance.HardwareCharacteristics, error) {
126
hc := &instance.HardwareCharacteristics{}
127
instData, err := getInstanceData(m.st, m.Id())
131
hc.Arch = instData.Arch
132
hc.Mem = instData.Mem
133
hc.CpuCores = instData.CpuCores
134
hc.CpuPower = instData.CpuPower
138
func getInstanceData(st *State, id string) (instanceData, error) {
139
var instData instanceData
140
err := st.instanceData.FindId(id).One(&instData)
141
if err == mgo.ErrNotFound {
142
return instanceData{}, errors.NotFoundf("instance data for machine %v", id)
145
return instanceData{}, fmt.Errorf("cannot get instance data for machine %v: %v", id, err)
150
const machineTagPrefix = "machine-"
152
// MachineTag returns the tag for the
153
// machine with the given id.
154
func MachineTag(id string) string {
155
tag := fmt.Sprintf("%s%s", machineTagPrefix, id)
156
// Containers require "/" to be replaced by "-".
157
tag = strings.Replace(tag, "/", "-", -1)
161
// MachineIdFromTag returns the machine id that was used to create the tag.
162
func MachineIdFromTag(tag string) string {
163
// TODO(dimitern): Possibly change this to return (string, error),
164
// so the case below can be reported.
165
if !strings.HasPrefix(tag, machineTagPrefix) {
168
// Strip off the "machine-" prefix.
169
id := tag[len(machineTagPrefix):]
170
// Put the slashes back.
171
id = strings.Replace(id, "-", "/", -1)
175
// Tag returns a name identifying the machine that is safe to use
176
// as a file name. The returned name will be different from other
177
// Tag values returned by any other entities from the same state.
178
func (m *Machine) Tag() string {
179
return MachineTag(m.Id())
182
// Life returns whether the machine is Alive, Dying or Dead.
183
func (m *Machine) Life() Life {
187
// Jobs returns the responsibilities that must be fulfilled by m's agent.
188
func (m *Machine) Jobs() []MachineJob {
192
// AgentTools returns the tools that the agent is currently running.
193
// It returns an error that satisfies IsNotFound if the tools have not yet been set.
194
func (m *Machine) AgentTools() (*Tools, error) {
195
if m.doc.Tools == nil {
196
return nil, errors.NotFoundf("agent tools for machine %v", m)
198
tools := *m.doc.Tools
202
// SetAgentTools sets the tools that the agent is currently running.
203
func (m *Machine) SetAgentTools(t *Tools) (err error) {
204
defer utils.ErrorContextf(&err, "cannot set agent tools for machine %v", m)
205
if t.Series == "" || t.Arch == "" {
206
return fmt.Errorf("empty series or arch")
209
C: m.st.machines.Name,
212
Update: D{{"$set", D{{"tools", t}}}},
214
if err := m.st.runTransaction(ops); err != nil {
215
return onAbort(err, errDead)
222
// SetMongoPassword sets the password the agent responsible for the machine
223
// should use to communicate with the state servers. Previous passwords
225
func (m *Machine) SetMongoPassword(password string) error {
226
return m.st.setMongoPassword(m.Tag(), password)
229
// SetPassword sets the password for the machine's agent.
230
func (m *Machine) SetPassword(password string) error {
231
hp := utils.PasswordHash(password)
233
C: m.st.machines.Name,
236
Update: D{{"$set", D{{"passwordhash", hp}}}},
238
if err := m.st.runTransaction(ops); err != nil {
239
return fmt.Errorf("cannot set password of machine %v: %v", m, onAbort(err, errDead))
241
m.doc.PasswordHash = hp
245
// PasswordValid returns whether the given password is valid
246
// for the given machine.
247
func (m *Machine) PasswordValid(password string) bool {
248
return utils.PasswordHash(password) == m.doc.PasswordHash
251
// Destroy sets the machine lifecycle to Dying if it is Alive. It does
252
// nothing otherwise. Destroy will fail if the machine has principal
253
// units assigned, or if the machine has JobManageEnviron.
254
// If the machine has assigned units, Destroy will return
255
// a HasAssignedUnitsError.
256
func (m *Machine) Destroy() error {
257
return m.advanceLifecycle(Dying)
260
// EnsureDead sets the machine lifecycle to Dead if it is Alive or Dying.
261
// It does nothing otherwise. EnsureDead will fail if the machine has
262
// principal units assigned, or if the machine has JobManageEnviron.
263
// If the machine has assigned units, EnsureDead will return
264
// a HasAssignedUnitsError.
265
func (m *Machine) EnsureDead() error {
266
return m.advanceLifecycle(Dead)
269
type HasAssignedUnitsError struct {
274
func (e *HasAssignedUnitsError) Error() string {
275
return fmt.Sprintf("machine %s has unit %q assigned", e.MachineId, e.UnitNames[0])
278
func IsHasAssignedUnitsError(err error) bool {
279
_, ok := err.(*HasAssignedUnitsError)
283
// Containers returns the container ids belonging to a parent machine.
284
// TODO(wallyworld): move this method to a service
285
func (m *Machine) Containers() ([]string, error) {
286
var mc machineContainers
287
err := m.st.containerRefs.FindId(m.Id()).One(&mc)
289
return mc.Children, nil
291
if err == mgo.ErrNotFound {
292
return nil, errors.NotFoundf("container info for machine %v", m.Id())
297
// ParentId returns the Id of the host machine if this machine is a container.
298
func (m *Machine) ParentId() (string, bool) {
299
parentId := ParentId(m.Id())
300
return parentId, parentId != ""
303
type HasContainersError struct {
305
ContainerIds []string
308
func (e *HasContainersError) Error() string {
309
return fmt.Sprintf("machine %s is hosting containers %q", e.MachineId, strings.Join(e.ContainerIds, ","))
312
func IsHasContainersError(err error) bool {
313
_, ok := err.(*HasContainersError)
317
// advanceLifecycle ensures that the machine's lifecycle is no earlier
318
// than the supplied value. If the machine already has that lifecycle
319
// value, or a later one, no changes will be made to remote state. If
320
// the machine has any responsibilities that preclude a valid change in
321
// lifecycle, it will return an error.
322
func (original *Machine) advanceLifecycle(life Life) (err error) {
323
containers, err := original.Containers()
327
if len(containers) > 0 {
328
return &HasContainersError{
329
MachineId: original.doc.Id,
330
ContainerIds: containers,
336
// The machine's lifecycle is known to have advanced; it may be
337
// known to have already advanced further than requested, in
338
// which case we set the latest known valid value.
341
} else if m.doc.Life > life {
344
original.doc.Life = life
349
C: m.st.machines.Name,
351
Update: D{{"$set", D{{"life", life}}}},
354
{"jobs", D{{"$nin", []MachineJob{JobManageEnviron}}}},
356
{{"principals", D{{"$size", 0}}}},
357
{{"principals", D{{"$exists", false}}}},
360
// 3 attempts: one with original data, one with refreshed data, and a final
361
// one intended to determine the cause of failure of the preceding attempt.
362
for i := 0; i < 3; i++ {
363
// If the transaction was aborted, grab a fresh copy of the machine data.
364
// We don't write to original, because the expectation is that state-
365
// changing methods only set the requested change on the receiver; a case
366
// could perhaps be made that this is not a helpful convention in the
367
// context of the new state API, but we maintain consistency in the
368
// face of uncertainty.
370
if m, err = m.st.Machine(m.doc.Id); errors.IsNotFoundError(err) {
372
} else if err != nil {
376
// Check that the life change is sane, and collect the assertions
377
// necessary to determine that it remains so.
380
if m.doc.Life != Alive {
383
op.Assert = append(advanceAsserts, isAliveDoc...)
385
if m.doc.Life == Dead {
388
op.Assert = append(advanceAsserts, notDeadDoc...)
390
panic(fmt.Errorf("cannot advance lifecycle to %v", life))
392
// Check that the machine does not have any responsibilities that
393
// prevent a lifecycle change.
394
for _, j := range m.doc.Jobs {
395
if j == JobManageEnviron {
396
// (NOTE: When we enable multiple JobManageEnviron machines,
397
// the restriction will become "there must be at least one
398
// machine with this job".)
399
return fmt.Errorf("machine %s is required by the environment", m.doc.Id)
402
if len(m.doc.Principals) != 0 {
403
return &HasAssignedUnitsError{
405
UnitNames: m.doc.Principals,
408
// Run the transaction...
409
if err := m.st.runTransaction([]txn.Op{op}); err != txn.ErrAborted {
412
// ...and retry on abort.
414
// In very rare circumstances, the final iteration above will have determined
415
// no cause of failure, and attempted a final transaction: if this also failed,
416
// we can be sure that the machine document is changing very fast, in a somewhat
417
// surprising fashion, and that it is sensible to back off for now.
418
return fmt.Errorf("machine %s cannot advance lifecycle: %v", m, ErrExcessiveContention)
421
// Remove removes the machine from state. It will fail if the machine is not
423
func (m *Machine) Remove() (err error) {
424
defer utils.ErrorContextf(&err, "cannot remove machine %s", m.doc.Id)
425
if m.doc.Life != Dead {
426
return fmt.Errorf("machine is not dead")
430
C: m.st.machines.Name,
432
Assert: txn.DocExists,
436
C: m.st.instanceData.Name,
440
removeStatusOp(m.st, m.globalKey()),
441
removeConstraintsOp(m.st, m.globalKey()),
442
annotationRemoveOp(m.st, m.globalKey()),
444
ops = append(ops, removeContainerRefOps(m.st, m.Id())...)
445
// The only abort conditions in play indicate that the machine has already
447
return onAbort(m.st.runTransaction(ops), nil)
450
// Refresh refreshes the contents of the machine from the underlying
451
// state. It returns an error that satisfies IsNotFound if the machine has
453
func (m *Machine) Refresh() error {
455
err := m.st.machines.FindId(m.doc.Id).One(&doc)
456
if err == mgo.ErrNotFound {
457
return errors.NotFoundf("machine %v", m)
460
return fmt.Errorf("cannot refresh machine %v: %v", m, err)
466
// AgentAlive returns whether the respective remote agent is alive.
467
func (m *Machine) AgentAlive() (bool, error) {
468
return m.st.pwatcher.Alive(m.globalKey())
471
// WaitAgentAlive blocks until the respective agent is alive.
472
func (m *Machine) WaitAgentAlive(timeout time.Duration) (err error) {
473
defer utils.ErrorContextf(&err, "waiting for agent of machine %v", m)
474
ch := make(chan presence.Change)
475
m.st.pwatcher.Watch(m.globalKey(), ch)
476
defer m.st.pwatcher.Unwatch(m.globalKey(), ch)
477
for i := 0; i < 2; i++ {
483
case <-time.After(timeout):
484
return fmt.Errorf("still not alive after timeout")
485
case <-m.st.pwatcher.Dead():
486
return m.st.pwatcher.Err()
489
panic(fmt.Sprintf("presence reported dead status twice in a row for machine %v", m))
492
// SetAgentAlive signals that the agent for machine m is alive.
493
// It returns the started pinger.
494
func (m *Machine) SetAgentAlive() (*presence.Pinger, error) {
495
p := presence.NewPinger(m.st.presence, m.globalKey())
503
// InstanceId returns the provider specific instance id for this machine
504
// and whether it has been set.
505
func (m *Machine) InstanceId() (instance.Id, error) {
507
// TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible
508
// (we first check for InstanceId stored on the machineDoc)
509
if m.doc.InstanceId != "" {
510
return m.doc.InstanceId, nil
512
instData, err := getInstanceData(m.st, m.Id())
513
if (err == nil && instData.InstanceId == "") || (err != nil && errors.IsNotFoundError(err)) {
514
err = &NotProvisionedError{m.Id()}
519
return instData.InstanceId, nil
522
// Units returns all the units that have been assigned to the machine.
523
func (m *Machine) Units() (units []*Unit, err error) {
524
defer utils.ErrorContextf(&err, "cannot get units assigned to machine %v", m)
525
pudocs := []unitDoc{}
526
err = m.st.units.Find(D{{"machineid", m.doc.Id}}).All(&pudocs)
530
for _, pudoc := range pudocs {
531
units = append(units, newUnit(m.st, &pudoc))
533
err = m.st.units.Find(D{{"principal", pudoc.Name}}).All(&docs)
537
for _, doc := range docs {
538
units = append(units, newUnit(m.st, &doc))
544
// SetProvisioned sets the provider specific machine id, nonce and also metadata for
545
// this machine. Once set, the instance id cannot be changed.
546
func (m *Machine) SetProvisioned(id instance.Id, nonce string, characteristics *instance.HardwareCharacteristics) (err error) {
547
defer utils.ErrorContextf(&err, "cannot set instance data for machine %q", m)
549
if id == "" || nonce == "" {
550
return fmt.Errorf("instance id and nonce cannot be empty")
553
if characteristics == nil {
554
characteristics = &instance.HardwareCharacteristics{}
559
Arch: characteristics.Arch,
560
Mem: characteristics.Mem,
561
CpuCores: characteristics.CpuCores,
562
CpuPower: characteristics.CpuPower,
565
// TODO(wallyworld) - do not check instanceId on machineDoc after schema is upgraded
566
notSetYet := D{{"instanceid", ""}, {"nonce", ""}}
569
C: m.st.machines.Name,
571
Assert: append(isAliveDoc, notSetYet...),
572
Update: D{{"$set", D{{"instanceid", id}, {"nonce", nonce}}}},
574
C: m.st.instanceData.Name,
576
Assert: txn.DocMissing,
581
if err = m.st.runTransaction(ops); err == nil {
584
// TODO(wallyworld) - remove this backward compatibility code when schema upgrades are possible
585
// (InstanceId is stored on the instanceData document but we duplicate the value on the machineDoc.
586
m.doc.InstanceId = id
588
} else if err != txn.ErrAborted {
590
} else if alive, err := isAlive(m.st.machines, m.doc.Id); err != nil {
595
return fmt.Errorf("already set")
598
// NotProvisionedError records an error when a machine is not provisioned.
599
type NotProvisionedError struct {
603
// IsNotProvisionedError returns true if err is a NotProvisionedError.
604
func IsNotProvisionedError(err error) bool {
605
if _, ok := err.(*NotProvisionedError); ok {
611
func (e *NotProvisionedError) Error() string {
612
return fmt.Sprintf("machine %v is not provisioned", e.machineId)
615
// CheckProvisioned returns true if the machine was provisioned with the given nonce.
616
func (m *Machine) CheckProvisioned(nonce string) bool {
617
return nonce == m.doc.Nonce && nonce != ""
620
// String returns a unique description of this machine.
621
func (m *Machine) String() string {
625
// Constraints returns the exact constraints that should apply when provisioning
626
// an instance for the machine.
627
func (m *Machine) Constraints() (constraints.Value, error) {
628
return readConstraints(m.st, m.globalKey())
631
// SetConstraints sets the exact constraints to apply when provisioning an
632
// instance for the machine. It will fail if the machine is Dead, or if it
633
// is already provisioned.
634
func (m *Machine) SetConstraints(cons constraints.Value) (err error) {
635
defer utils.ErrorContextf(&err, "cannot set constraints")
636
notSetYet := D{{"nonce", ""}}
639
C: m.st.machines.Name,
641
Assert: append(isAliveDoc, notSetYet...),
643
setConstraintsOp(m.st, m.globalKey(), cons),
645
// 3 attempts is enough to push the ErrExcessiveContention case out of the
646
// realm of plausibility: it implies local state indicating unprovisioned,
647
// and remote state indicating provisioned (reasonable); but which changes
648
// back to unprovisioned and then to provisioned again with *very* specific
649
// timing in the course of this loop.
650
for i := 0; i < 3; i++ {
651
if m.doc.Life != Alive {
654
if _, err := m.InstanceId(); err == nil {
655
return fmt.Errorf("machine is already provisioned")
656
} else if !IsNotProvisionedError(err) {
659
if err := m.st.runTransaction(ops); err != txn.ErrAborted {
662
if m, err = m.st.Machine(m.doc.Id); err != nil {
666
return ErrExcessiveContention
669
// Status returns the status of the machine.
670
func (m *Machine) Status() (status params.Status, info string, err error) {
671
doc, err := getStatus(m.st, m.globalKey())
676
info = doc.StatusInfo
680
// SetStatus sets the status of the machine.
681
func (m *Machine) SetStatus(status params.Status, info string) error {
682
if status == params.StatusError && info == "" {
683
panic("machine error status with no info")
685
if status == params.StatusPending {
686
panic("machine status cannot be set to pending")
693
C: m.st.machines.Name,
697
updateStatusOp(m.st, m.globalKey(), doc),
699
if err := m.st.runTransaction(ops); err != nil {
700
return fmt.Errorf("cannot set status of machine %q: %v", m, onAbort(err, errNotAlive))
705
// Clean returns true if the machine does not have any deployed units or containers.
706
func (m *Machine) Clean() bool {