~ubuntu-branches/ubuntu/vivid/swift/vivid-updates

Viewing changes to swift/common/ring/builder.py

Committer: Package Import Robot
Author(s): James Page, Chuck Short, James Page
Date: 2014-10-06 10:06:11 UTC
mfrom: (1.2.31)
Revision ID: package-import@ubuntu.com-20141006100611-wdzkkuoru7ubtlml

Tags: 2.1.0-0ubuntu1

[ Chuck Short ]
* debian/patches/fix-doc-no-network.patch: Refreshed.
* debian/control: Add python-oslosphinx as a build dependency.

[ James Page ]
* New upstream release for OpenStack Juno.
* d/copyright: Add linebreaks to fixup file-without-copyright-
information warning.

files added:
swift/cli/form_signature.py

test/probe/brain.py

test/probe/test_object_metadata_replication.py

test/unit/cli/test_form_signature.py

test/unit/proxy/test_sysmeta.py

files removed:
doc/source/_ga

doc/source/_ga/layout.html

doc/source/_static

doc/source/_static/basic.css

doc/source/_static/default.css

doc/source/_static/tweaks.css

doc/source/_theme

doc/source/_theme/layout.html

doc/source/_theme/theme.conf

files modified:
.mailmap

.pc/fix-doc-no-network.patch/doc/source/conf.py

.pc/fix-ubuntu-tests.patch/test/unit/proxy/test_server.py

AUTHORS

CHANGELOG

CONTRIBUTING.md

PKG-INFO

bin/swift-form-signature

debian/changelog

debian/control

debian/copyright

debian/patches/fix-doc-no-network.patch

debian/patches/fix-ubuntu-tests.patch

doc/source/conf.py

doc/source/deployment_guide.rst

doc/source/development_guidelines.rst

doc/source/development_saio.rst

doc/source/logs.rst

doc/source/overview_architecture.rst

doc/source/overview_auth.rst

doc/source/overview_policies.rst

doc/source/overview_ring.rst

etc/account-server.conf-sample

etc/container-reconciler.conf-sample

etc/container-server.conf-sample

etc/dispersion.conf-sample

etc/drive-audit.conf-sample

etc/object-expirer.conf-sample

etc/object-server.conf-sample

etc/proxy-server.conf-sample

swift.egg-info/PKG-INFO

swift.egg-info/SOURCES.txt

swift/account/auditor.py

swift/account/backend.py

swift/account/reaper.py

swift/account/server.py

swift/cli/info.py

swift/cli/recon.py

swift/common/db.py

swift/common/internal_client.py

swift/common/middleware/formpost.py

swift/common/middleware/list_endpoints.py

swift/common/middleware/proxy_logging.py

swift/common/middleware/tempauth.py

swift/common/middleware/tempurl.py

swift/common/request_helpers.py

swift/common/ring/builder.py

swift/common/ring/ring.py

swift/common/ring/utils.py

swift/common/storage_policy.py

swift/common/utils.py

swift/common/wsgi.py

swift/container/backend.py

swift/container/reconciler.py

swift/container/server.py

swift/container/sync.py

swift/container/updater.py

swift/obj/auditor.py

swift/obj/diskfile.py

swift/obj/expirer.py

swift/obj/mem_diskfile.py

swift/obj/mem_server.py

swift/obj/replicator.py

swift/obj/server.py

swift/obj/ssync_receiver.py

swift/obj/ssync_sender.py

swift/obj/updater.py

swift/proxy/controllers/base.py

swift/proxy/controllers/container.py

swift/proxy/controllers/obj.py

swift/proxy/server.py

test-requirements.txt

test/functional/__init__.py

test/functional/test_object.py

test/functional/tests.py

test/probe/test_container_merge_policy_index.py

test/probe/test_empty_device_handoff.py

test/probe/test_object_failures.py

test/probe/test_object_handoff.py

test/unit/__init__.py

test/unit/account/test_auditor.py

test/unit/account/test_backend.py

test/unit/account/test_reaper.py

test/unit/account/test_server.py

test/unit/cli/test_info.py

test/unit/cli/test_recon.py

test/unit/common/middleware/test_formpost.py

test/unit/common/middleware/test_list_endpoints.py

test/unit/common/middleware/test_recon.py

test/unit/common/middleware/test_slo.py

test/unit/common/middleware/test_tempurl.py

test/unit/common/middleware/test_xprofile.py

test/unit/common/ring/test_builder.py

test/unit/common/ring/test_ring.py

test/unit/common/ring/test_utils.py

test/unit/common/test_direct_client.py

test/unit/common/test_internal_client.py

test/unit/common/test_request_helpers.py

test/unit/common/test_utils.py

test/unit/common/test_wsgi.py

test/unit/container/test_server.py

test/unit/container/test_sync.py

test/unit/obj/test_auditor.py

test/unit/obj/test_diskfile.py

test/unit/obj/test_expirer.py

test/unit/obj/test_replicator.py

test/unit/obj/test_server.py

test/unit/obj/test_ssync_receiver.py

test/unit/obj/test_ssync_sender.py

test/unit/obj/test_updater.py

test/unit/proxy/test_server.py

tox.ini

Show diffs side-by-side

added added

removed removed

swift/common/ring/builder.py

326

that before). Because of this, it keeps rebalancing until the device

327

skew (number of partitions a device wants compared to what it has) gets

328

below 1% or doesn't change by more than 1% (only happens with ring that

329

can't be balanced no matter what -- like with 3 zones of differing

330

weights with replicas set to 3).

329

can't be balanced no matter what).

331

330

332

331

:returns: (number_of_partitions_altered, resulting_balance)

333

332

"""

345

344

last_balance = 0

346

345

new_parts, removed_part_count = self._adjust_replica2part2dev_size()

347

346

retval += removed_part_count

347

if new_parts or removed_part_count:

348

self._set_parts_wanted()

348

349

self._reassign_parts(new_parts)

349

350

retval += len(new_parts)

350

351

while True:

515

516

# indicate its strong desire to give up everything it has.

516

517

dev['parts_wanted'] = -self.parts * self.replicas

517

518

else:

518

dev['parts_wanted'] = \

519

int(weight_of_one_part * dev['weight']) - dev['parts']

519

dev['parts_wanted'] = (

520

# Round up here so that every partition ultimately ends up

521

# with a placement.

522

523

# Imagine 5 partitions to be placed on 4 devices. If we

524

# didn't use math.ceil() here, each device would have a

525

# parts_wanted of 1, so 4 partitions would be placed but

526

# the last would not, probably resulting in a crash. This

527

# way, some devices end up with leftover parts_wanted, but

528

# at least every partition ends up somewhere.

529

int(math.ceil(weight_of_one_part * dev['weight'])) -

530

dev['parts'])

520

531

521

532

def _adjust_replica2part2dev_size(self):

522

533

"""

584

595

self._replica2part2dev.append(

585

596

array('H', (0 for _junk in xrange(desired_length))))

586

597

587

return (list(to_assign.iteritems()), removed_replicas)

598

return (to_assign.items(), removed_replicas)

588

599

589

600

def _initial_balance(self):

590

601

"""

752

763

replicas_to_replace may be shared for multiple

753

764

partitions, so be sure you do not modify it.

754

765

"""

766

parts_available_in_tier = defaultdict(int)

755

767

for dev in self._iter_devs():

756

768

dev['sort_key'] = self._sort_key_for(dev)

757

dev['tiers'] = tiers_for_dev(dev)

769

tiers = tiers_for_dev(dev)

770

dev['tiers'] = tiers

771

for tier in tiers:

772

# Note: this represents how many partitions may be assigned to

773

# a given tier (region/zone/server/disk). It does not take

774

# into account how many partitions a given tier wants to shed.

775

776

# If we did not do this, we could have a zone where, at some

777

# point during assignment, number-of-parts-to-gain equals

778

# number-of-parts-to-shed. At that point, no further placement

779

# into that zone would occur since its parts_available_in_tier

780

# would be 0. This would happen any time a zone had any device

781

# with partitions to shed, which is any time a device is being

782

# removed, which is a pretty frequent operation.

783

parts_available_in_tier[tier] += max(dev['parts_wanted'], 0)

758

784

759

785

available_devs = \

760

786

sorted((d for d in self._iter_devs() if d['weight']),

793

819

# Gather up what other tiers (regions, zones, ip/ports, and

794

820

# devices) the replicas not-to-be-moved are in for this part.

795

821

other_replicas = defaultdict(int)

796

unique_tiers_by_tier_len = defaultdict(set)

822

occupied_tiers_by_tier_len = defaultdict(set)

797

823

for replica in self._replicas_for_part(part):

798

824

if replica not in replace_replicas:

799

825

dev = self.devs[self._replica2part2dev[replica][part]]

800

826

for tier in dev['tiers']:

801

827

other_replicas[tier] += 1

802

unique_tiers_by_tier_len[len(tier)].add(tier)

828

occupied_tiers_by_tier_len[len(tier)].add(tier)

803

829

804

830

for replica in replace_replicas:

831

# Find a new home for this replica

805

832

tier = ()

806

833

depth = 1

807

834

while depth <= max_tier_depth:

808

835

# Order the tiers by how many replicas of this

809

836

# partition they already have. Then, of the ones

810

# with the smallest number of replicas, pick the

811

# tier with the hungriest drive and then continue

812

# searching in that subtree.

837

# with the smallest number of replicas and that have

838

# room to accept more partitions, pick the tier with

839

# the hungriest drive and then continue searching in

840

# that subtree.

813

841

814

842

# There are other strategies we could use here,

815

843

# such as hungriest-tier (i.e. biggest

817

845

# However, hungriest-drive is what was used here

818

846

# before, and it worked pretty well in practice.

819

847

820

# Note that this allocator will balance things as

821

# evenly as possible at each level of the device

822

# layout. If your layout is extremely unbalanced,

823

# this may produce poor results.

848

# Note that this allocator prioritizes even device

849

# filling over dispersion, so if your layout is

850

# extremely unbalanced, you may not get the replica

851

# dispersion that you expect, and your durability

852

# may be lessened.

824

853

825

854

# This used to be a cute, recursive function, but it's been

826

855

# unrolled for performance.

832

861

# short-circuit the search while still ensuring we get the

833

862

# right tier.

834

863

candidates_with_replicas = \

835

unique_tiers_by_tier_len[len(tier) + 1]

836

# Find a tier with the minimal replica count and the

837

# hungriest drive among all the tiers with the minimal

838

# replica count.

839

if len(tier2children[tier]) > \

864

occupied_tiers_by_tier_len[len(tier) + 1]

865

866

# Among the tiers with room for more partitions,

867

# find one with the smallest possible number of

868

# replicas already in it, breaking ties by which one

869

# has the hungriest drive.

870

candidates_with_room = [

871

t for t in tier2children[tier]

872

if parts_available_in_tier[t] > 0]

873

874

if len(candidates_with_room) > \

840

875

len(candidates_with_replicas):

841

# There exists at least one tier with 0 other replicas

842

tier = max((t for t in tier2children[tier]

876

# There exists at least one tier with room for

877

# another partition and 0 other replicas already

878

# in it, so we can use a faster search. The else

879

# branch's search would work here, but it's

880

# significantly slower.

881

tier = max((t for t in candidates_with_room

843

882

if other_replicas[t] == 0),

844

883

key=tier2sort_key.__getitem__)

845

884

else:

846

tier = max(tier2children[tier],

885

tier = max(candidates_with_room,

847

886

key=lambda t: (-other_replicas[t],

848

887

tier2sort_key[t]))

849

888

depth += 1

853

892

old_sort_key = dev['sort_key']

854

893

new_sort_key = dev['sort_key'] = self._sort_key_for(dev)

855

894

for tier in dev['tiers']:

895

parts_available_in_tier[tier] -= 1

856

896

other_replicas[tier] += 1

857

unique_tiers_by_tier_len[len(tier)].add(tier)

897

occupied_tiers_by_tier_len[len(tier)].add(tier)

858

898

859

899

index = bisect.bisect_left(tier2dev_sort_key[tier],

860

900

old_sort_key)

Older »