1
// Copyright 2014 Canonical Ltd.
2
// Licensed under the AGPLv3, see LICENCE file for details.
9
"github.com/juju/errors"
10
"gopkg.in/juju/charm.v6-unstable"
11
"gopkg.in/juju/names.v2"
13
"gopkg.in/mgo.v2/bson"
17
type cleanupKind string
20
// SCHEMACHANGE: the names are expressive, the values not so much.
21
cleanupRelationSettings cleanupKind = "settings"
22
cleanupUnitsForDyingService cleanupKind = "units"
23
cleanupCharmForDyingService cleanupKind = "charm"
24
cleanupDyingUnit cleanupKind = "dyingUnit"
25
cleanupRemovedUnit cleanupKind = "removedUnit"
26
cleanupServicesForDyingModel cleanupKind = "applications"
27
cleanupDyingMachine cleanupKind = "dyingMachine"
28
cleanupForceDestroyedMachine cleanupKind = "machine"
29
cleanupAttachmentsForDyingStorage cleanupKind = "storageAttachments"
30
cleanupAttachmentsForDyingVolume cleanupKind = "volumeAttachments"
31
cleanupAttachmentsForDyingFilesystem cleanupKind = "filesystemAttachments"
32
cleanupModelsForDyingController cleanupKind = "models"
33
cleanupMachinesForDyingModel cleanupKind = "modelMachines"
36
// cleanupDoc represents a potentially large set of documents that should be
38
type cleanupDoc struct {
39
DocID string `bson:"_id"`
40
ModelUUID string `bson:"model-uuid"`
45
// newCleanupOp returns a txn.Op that creates a cleanup document with a unique
46
// id and the supplied kind and prefix.
47
func (st *State) newCleanupOp(kind cleanupKind, prefix string) txn.Op {
49
DocID: st.docID(fmt.Sprint(bson.NewObjectId())),
50
ModelUUID: st.ModelUUID(),
61
// NeedsCleanup returns true if documents previously marked for removal exist.
62
func (st *State) NeedsCleanup() (bool, error) {
63
cleanups, closer := st.getCollection(cleanupsC)
65
count, err := cleanups.Count()
72
// Cleanup removes all documents that were previously marked for removal, if
73
// any such exist. It should be called periodically by at least one element
75
func (st *State) Cleanup() (err error) {
77
cleanups, closer := st.getCollection(cleanupsC)
79
iter := cleanups.Find(nil).Iter()
80
defer closeIter(iter, &err, "reading cleanup document")
83
logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix)
85
case cleanupRelationSettings:
86
err = st.cleanupRelationSettings(doc.Prefix)
87
case cleanupCharmForDyingService:
88
err = st.cleanupCharmForDyingService(doc.Prefix)
89
case cleanupUnitsForDyingService:
90
err = st.cleanupUnitsForDyingService(doc.Prefix)
91
case cleanupDyingUnit:
92
err = st.cleanupDyingUnit(doc.Prefix)
93
case cleanupRemovedUnit:
94
err = st.cleanupRemovedUnit(doc.Prefix)
95
case cleanupServicesForDyingModel:
96
err = st.cleanupServicesForDyingModel()
97
case cleanupDyingMachine:
98
err = st.cleanupDyingMachine(doc.Prefix)
99
case cleanupForceDestroyedMachine:
100
err = st.cleanupForceDestroyedMachine(doc.Prefix)
101
case cleanupAttachmentsForDyingStorage:
102
err = st.cleanupAttachmentsForDyingStorage(doc.Prefix)
103
case cleanupAttachmentsForDyingVolume:
104
err = st.cleanupAttachmentsForDyingVolume(doc.Prefix)
105
case cleanupAttachmentsForDyingFilesystem:
106
err = st.cleanupAttachmentsForDyingFilesystem(doc.Prefix)
107
case cleanupModelsForDyingController:
108
err = st.cleanupModelsForDyingController()
109
case cleanupMachinesForDyingModel:
110
err = st.cleanupMachinesForDyingModel()
112
handler, ok := cleanupHandlers[doc.Kind]
114
err = errors.Errorf("unknown cleanup kind %q", doc.Kind)
116
persist := st.newPersistence()
117
err = handler(st, persist, doc.Prefix)
121
logger.Errorf("cleanup failed: %v", err)
129
if err := st.runTransaction(ops); err != nil {
130
return errors.Annotate(err, "cannot remove empty cleanup document")
136
// CleanupHandler is a function that state may call during cleanup
137
// to perform cleanup actions for some cleanup type.
138
type CleanupHandler func(st *State, persist Persistence, prefix string) error
140
var cleanupHandlers = map[cleanupKind]CleanupHandler{}
142
// RegisterCleanupHandler identifies the handler to use a given
144
func RegisterCleanupHandler(kindStr string, handler CleanupHandler) error {
145
kind := cleanupKind(kindStr)
146
if _, ok := cleanupHandlers[kind]; ok {
147
return errors.NewAlreadyExists(nil, fmt.Sprintf("cleanup handler for %q already registered", kindStr))
149
cleanupHandlers[kind] = handler
153
func (st *State) cleanupRelationSettings(prefix string) error {
154
settings, closer := st.getCollection(settingsC)
156
// Documents marked for cleanup are not otherwise referenced in the
157
// system, and will not be under watch, and are therefore safe to
159
settingsW := settings.Writeable()
161
sel := bson.D{{"_id", bson.D{{"$regex", "^" + st.docID(prefix)}}}}
162
if count, err := settingsW.Find(sel).Count(); err != nil {
163
return fmt.Errorf("cannot detect cleanup targets: %v", err)
164
} else if count != 0 {
165
if _, err := settingsW.RemoveAll(sel); err != nil {
166
return fmt.Errorf("cannot remove documents marked for cleanup: %v", err)
172
// cleanupModelsForDyingController sets all models to dying, if
173
// they are not already Dying or Dead. It's expected to be used when a
174
// controller is destroyed.
175
func (st *State) cleanupModelsForDyingController() (err error) {
176
models, err := st.AllModels()
178
return errors.Trace(err)
180
for _, model := range models {
181
if err := model.Destroy(); err != nil {
182
return errors.Trace(err)
188
// cleanupMachinesForDyingModel sets all non-manager, non-manual
189
// machines to Dying, if they are not already Dying or Dead. It's expected to
190
// be used when a model is destroyed.
191
func (st *State) cleanupMachinesForDyingModel() (err error) {
192
// This won't miss machines, because a Dying model cannot have
193
// machines added to it. But we do have to remove the machines themselves
194
// via individual transactions, because they could be in any state at all.
195
machines, err := st.AllMachines()
197
return errors.Trace(err)
199
for _, m := range machines {
203
if _, isContainer := m.ParentId(); isContainer {
206
manual, err := m.IsManual()
212
err = m.ForceDestroy()
214
return errors.Trace(err)
220
// cleanupServicesForDyingModel sets all services to Dying, if they are
221
// not already Dying or Dead. It's expected to be used when a model is
223
func (st *State) cleanupServicesForDyingModel() (err error) {
224
// This won't miss services, because a Dying model cannot have
225
// services added to it. But we do have to remove the services themselves
226
// via individual transactions, because they could be in any state at all.
227
applications, closer := st.getCollection(applicationsC)
229
application := Application{st: st}
230
sel := bson.D{{"life", Alive}}
231
iter := applications.Find(sel).Iter()
232
defer closeIter(iter, &err, "reading service document")
233
for iter.Next(&application.doc) {
234
if err := application.Destroy(); err != nil {
241
// cleanupUnitsForDyingService sets all units with the given prefix to Dying,
242
// if they are not already Dying or Dead. It's expected to be used when a
243
// service is destroyed.
244
func (st *State) cleanupUnitsForDyingService(applicationname string) (err error) {
245
// This won't miss units, because a Dying service cannot have units added
246
// to it. But we do have to remove the units themselves via individual
247
// transactions, because they could be in any state at all.
248
units, closer := st.getCollection(unitsC)
252
sel := bson.D{{"application", applicationname}, {"life", Alive}}
253
iter := units.Find(sel).Iter()
254
defer closeIter(iter, &err, "reading unit document")
255
for iter.Next(&unit.doc) {
256
if err := unit.Destroy(); err != nil {
263
func (st *State) cleanupCharmForDyingService(charmURL string) error {
264
curl, err := charm.ParseURL(charmURL)
266
return errors.Annotatef(err, "invalid charm URL %v", charmURL)
268
ch, err := st.Charm(curl)
269
if errors.IsNotFound(err) {
270
// Charm already removed.
274
return errors.Annotate(err, "cannot read charm record from state")
276
if err := st.deleteCharmArchive(curl, ch.StoragePath()); err != nil && !errors.IsNotFound(err) {
277
return errors.Annotate(err, "cannot remove charm archive from storage")
282
// cleanupDyingUnit marks resources owned by the unit as dying, to ensure
283
// they are cleaned up as well.
284
func (st *State) cleanupDyingUnit(name string) error {
285
unit, err := st.Unit(name)
286
if errors.IsNotFound(err) {
288
} else if err != nil {
291
// Mark the unit as departing from its joined relations, allowing
292
// related units to start converging to a state in which that unit
293
// is gone as quickly as possible.
294
relations, err := unit.RelationsJoined()
298
for _, relation := range relations {
299
relationUnit, err := relation.Unit(unit)
300
if errors.IsNotFound(err) {
302
} else if err != nil {
305
if err := relationUnit.PrepareLeaveScope(); err != nil {
309
// Mark storage attachments as dying, so that they are detached
310
// and removed from state, allowing the unit to terminate.
311
return st.cleanupUnitStorageAttachments(unit.UnitTag(), false)
314
func (st *State) cleanupUnitStorageAttachments(unitTag names.UnitTag, remove bool) error {
315
storageAttachments, err := st.UnitStorageAttachments(unitTag)
319
for _, storageAttachment := range storageAttachments {
320
storageTag := storageAttachment.StorageInstance()
321
err := st.DestroyStorageAttachment(storageTag, unitTag)
322
if errors.IsNotFound(err) {
324
} else if err != nil {
330
err = st.RemoveStorageAttachment(storageTag, unitTag)
331
if errors.IsNotFound(err) {
333
} else if err != nil {
340
// cleanupRemovedUnit takes care of all the final cleanup required when
341
// a unit is removed.
342
func (st *State) cleanupRemovedUnit(unitId string) error {
343
actions, err := st.matchingActionsByReceiverId(unitId)
345
return errors.Trace(err)
347
cancelled := ActionResults{
348
Status: ActionCancelled,
349
Message: "unit removed",
351
for _, action := range actions {
352
if _, err = action.Finish(cancelled); err != nil {
353
return errors.Trace(err)
357
change := payloadCleanupChange{
360
if err := Apply(st.database, change); err != nil {
361
return errors.Trace(err)
366
// cleanupDyingMachine marks resources owned by the machine as dying, to ensure
367
// they are cleaned up as well.
368
func (st *State) cleanupDyingMachine(machineId string) error {
369
machine, err := st.Machine(machineId)
370
if errors.IsNotFound(err) {
372
} else if err != nil {
375
return cleanupDyingMachineResources(machine)
378
// cleanupForceDestroyedMachine systematically destroys and removes all entities
379
// that depend upon the supplied machine, and removes the machine from state. It's
380
// expected to be used in response to destroy-machine --force.
381
func (st *State) cleanupForceDestroyedMachine(machineId string) error {
382
machine, err := st.Machine(machineId)
383
if errors.IsNotFound(err) {
385
} else if err != nil {
388
// In an ideal world, we'd call machine.Destroy() here, and thus prevent
389
// new dependencies being added while we clean up the ones we know about.
390
// But machine destruction is unsophisticated, and doesn't allow for
391
// destruction while dependencies exist; so we just have to deal with that
392
// possibility below.
393
if err := st.cleanupContainers(machine); err != nil {
396
for _, unitName := range machine.doc.Principals {
397
if err := st.obliterateUnit(unitName); err != nil {
401
if err := cleanupDyingMachineResources(machine); err != nil {
404
// We need to refresh the machine at this point, because the local copy
405
// of the document will not reflect changes caused by the unit cleanups
406
// above, and may thus fail immediately.
407
if err := machine.Refresh(); errors.IsNotFound(err) {
409
} else if err != nil {
412
// TODO(fwereade): 2013-11-11 bug 1250104
413
// If this fails, it's *probably* due to a race in which new dependencies
414
// were added while we cleaned up the old ones. If the cleanup doesn't run
415
// again -- which it *probably* will anyway -- the issue can be resolved by
416
// force-destroying the machine again; that's better than adding layer
417
// upon layer of complication here.
418
if err := machine.EnsureDead(); err != nil {
421
removePortsOps, err := machine.removePortsOps()
425
return st.runTransaction(removePortsOps)
427
// Note that we do *not* remove the machine entirely: we leave it for the
428
// provisioner to clean up, so that we don't end up with an unreferenced
429
// instance that would otherwise be ignored when in provisioner-safe-mode.
432
// cleanupContainers recursively calls cleanupForceDestroyedMachine on the supplied
433
// machine's containers, and removes them from state entirely.
434
func (st *State) cleanupContainers(machine *Machine) error {
435
containerIds, err := machine.Containers()
436
if errors.IsNotFound(err) {
438
} else if err != nil {
441
for _, containerId := range containerIds {
442
if err := st.cleanupForceDestroyedMachine(containerId); err != nil {
445
container, err := st.Machine(containerId)
446
if errors.IsNotFound(err) {
448
} else if err != nil {
451
if err := container.Remove(); err != nil {
458
func cleanupDyingMachineResources(m *Machine) error {
459
volumeAttachments, err := m.st.MachineVolumeAttachments(m.MachineTag())
461
return errors.Annotate(err, "getting machine volume attachments")
463
for _, va := range volumeAttachments {
464
if err := m.st.DetachVolume(va.Machine(), va.Volume()); err != nil {
465
if IsContainsFilesystem(err) {
466
// The volume will be destroyed when the
467
// contained filesystem is removed, whose
468
// destruction is initiated below.
471
return errors.Trace(err)
474
filesystemAttachments, err := m.st.MachineFilesystemAttachments(m.MachineTag())
476
return errors.Annotate(err, "getting machine filesystem attachments")
478
for _, fsa := range filesystemAttachments {
479
if err := m.st.DetachFilesystem(fsa.Machine(), fsa.Filesystem()); err != nil {
480
return errors.Trace(err)
486
// obliterateUnit removes a unit from state completely. It is not safe or
487
// sane to obliterate any unit in isolation; its only reasonable use is in
488
// the context of machine obliteration, in which we can be sure that unclean
489
// shutdown of units is not going to leave a machine in a difficult state.
490
func (st *State) obliterateUnit(unitName string) error {
491
unit, err := st.Unit(unitName)
492
if errors.IsNotFound(err) {
494
} else if err != nil {
497
// Unlike the machine, we *can* always destroy the unit, and (at least)
498
// prevent further dependencies being added. If we're really lucky, the
499
// unit will be removed immediately.
500
if err := unit.Destroy(); err != nil {
501
return errors.Annotatef(err, "cannot destroy unit %q", unitName)
503
if err := unit.Refresh(); errors.IsNotFound(err) {
505
} else if err != nil {
508
// Destroy and remove all storage attachments for the unit.
509
if err := st.cleanupUnitStorageAttachments(unit.UnitTag(), true); err != nil {
510
return errors.Annotatef(err, "cannot destroy storage for unit %q", unitName)
512
for _, subName := range unit.SubordinateNames() {
513
if err := st.obliterateUnit(subName); err != nil {
517
if err := unit.EnsureDead(); err != nil {
523
// cleanupAttachmentsForDyingStorage sets all storage attachments related
524
// to the specified storage instance to Dying, if they are not already Dying
525
// or Dead. It's expected to be used when a storage instance is destroyed.
526
func (st *State) cleanupAttachmentsForDyingStorage(storageId string) (err error) {
527
storageTag := names.NewStorageTag(storageId)
529
// This won't miss attachments, because a Dying storage instance cannot
530
// have attachments added to it. But we do have to remove the attachments
531
// themselves via individual transactions, because they could be in
533
coll, closer := st.getCollection(storageAttachmentsC)
536
var doc storageAttachmentDoc
537
fields := bson.D{{"unitid", 1}}
538
iter := coll.Find(bson.D{{"storageid", storageId}}).Select(fields).Iter()
539
defer closeIter(iter, &err, "reading storage attachment document")
540
for iter.Next(&doc) {
541
unitTag := names.NewUnitTag(doc.Unit)
542
if err := st.DestroyStorageAttachment(storageTag, unitTag); err != nil {
543
return errors.Annotate(err, "destroying storage attachment")
549
// cleanupAttachmentsForDyingVolume sets all volume attachments related
550
// to the specified volume to Dying, if they are not already Dying or
551
// Dead. It's expected to be used when a volume is destroyed.
552
func (st *State) cleanupAttachmentsForDyingVolume(volumeId string) (err error) {
553
volumeTag := names.NewVolumeTag(volumeId)
555
// This won't miss attachments, because a Dying volume cannot have
556
// attachments added to it. But we do have to remove the attachments
557
// themselves via individual transactions, because they could be in
559
coll, closer := st.getCollection(volumeAttachmentsC)
562
var doc volumeAttachmentDoc
563
fields := bson.D{{"machineid", 1}}
564
iter := coll.Find(bson.D{{"volumeid", volumeId}}).Select(fields).Iter()
565
defer closeIter(iter, &err, "reading volume attachment document")
566
for iter.Next(&doc) {
567
machineTag := names.NewMachineTag(doc.Machine)
568
if err := st.DetachVolume(machineTag, volumeTag); err != nil {
569
return errors.Annotate(err, "destroying volume attachment")
575
// cleanupAttachmentsForDyingFilesystem sets all filesystem attachments related
576
// to the specified filesystem to Dying, if they are not already Dying or
577
// Dead. It's expected to be used when a filesystem is destroyed.
578
func (st *State) cleanupAttachmentsForDyingFilesystem(filesystemId string) (err error) {
579
filesystemTag := names.NewFilesystemTag(filesystemId)
581
// This won't miss attachments, because a Dying filesystem cannot have
582
// attachments added to it. But we do have to remove the attachments
583
// themselves via individual transactions, because they could be in
585
coll, closer := st.getCollection(filesystemAttachmentsC)
588
var doc filesystemAttachmentDoc
589
fields := bson.D{{"machineid", 1}}
590
iter := coll.Find(bson.D{{"filesystemid", filesystemId}}).Select(fields).Iter()
591
defer closeIter(iter, &err, "reading filesystem attachment document")
592
for iter.Next(&doc) {
593
machineTag := names.NewMachineTag(doc.Machine)
594
if err := st.DetachFilesystem(machineTag, filesystemTag); err != nil {
595
return errors.Annotate(err, "destroying filesystem attachment")
601
func closeIter(iter *mgo.Iter, errOut *error, message string) {
606
err = errors.Annotate(err, message)
611
logger.Errorf("%v", err)