~ubuntu-branches/ubuntu/saucy/db/saucy-proposed

« back to all changes in this revision

Viewing changes to repmgr/repmgr_util.c

  • Committer: Bazaar Package Importer
  • Author(s): Colin Watson
  • Date: 2010-10-25 22:16:35 UTC
  • mfrom: (13.1.11 sid)
  • Revision ID: james.westby@ubuntu.com-20101025221635-k0o38lxdx9kle3mh
Tags: 5.0.26-3ubuntu1
* Resynchronise with Debian.  Remaining changes:
  - Pass --build/--host to configure to support cross-building, and don't
    override CC.
  - Disable the Java build when cross-building, for now.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*-
2
2
 * See the file LICENSE for redistribution information.
3
3
 *
4
 
 * Copyright (c) 2005-2009 Oracle.  All rights reserved.
 
4
 * Copyright (c) 2005, 2010 Oracle and/or its affiliates.  All rights reserved.
5
5
 *
6
6
 * $Id$
7
7
 */
8
8
 
9
9
#include "db_config.h"
10
10
 
11
 
#define __INCLUDE_NETWORKING    1
12
11
#include "db_int.h"
13
12
 
14
13
#define INITIAL_SITES_ALLOCATION        10           /* Arbitrary guess. */
176
175
 
177
176
/*
178
177
 * PUBLIC: int __repmgr_new_site __P((ENV *, REPMGR_SITE**,
179
 
 * PUBLIC:     const char *, u_int, int));
 
178
 * PUBLIC:     const char *, u_int, int, int));
180
179
 *
181
180
 * Manipulates the process-local copy of the sites list.  So, callers should
182
181
 * hold the db_rep->mutex (except for single-threaded, pre-open configuration).
183
182
 */
184
183
int
185
 
__repmgr_new_site(env, sitep, host, port, state)
 
184
__repmgr_new_site(env, sitep, host, port, state, peer)
186
185
        ENV *env;
187
186
        REPMGR_SITE **sitep;
188
187
        const char *host;
189
188
        u_int port;
190
189
        int state;
 
190
        int peer;
191
191
{
192
192
        DB_REP *db_rep;
193
193
        REPMGR_SITE *site;
217
217
 
218
218
        ZERO_LSN(site->max_ack);
219
219
        site->flags = 0;
 
220
        if (peer)
 
221
                F_SET(site, SITE_IS_PEER);
220
222
        timespecclear(&site->last_rcvd_timestamp);
221
223
        TAILQ_INIT(&site->sub_conns);
222
224
        site->state = state;
579
581
        if ((ret = __mutex_alloc(env, MTX_REPMGR, 0, &rep->mtx_repmgr)) != 0)
580
582
                return (ret);
581
583
 
582
 
        DB_ASSERT(env, rep->siteaddr_seq == 0 && db_rep->siteaddr_seq == 0);
583
 
        rep->netaddr_off = INVALID_ROFF;
584
 
        rep->siteaddr_seq = 0;
 
584
        DB_ASSERT(env, rep->siteinfo_seq == 0 && db_rep->siteinfo_seq == 0);
 
585
        rep->siteinfo_off = INVALID_ROFF;
 
586
        rep->siteinfo_seq = 0;
585
587
        if ((ret = __repmgr_share_netaddrs(env, rep, 0, db_rep->site_cnt)) != 0)
586
588
                return (ret);
587
 
        rep->peer = db_rep->peer;
588
589
 
589
590
        if ((host = db_rep->my_addr.host) != NULL) {
590
591
                sz = strlen(host) + 1;
593
594
                (void)strcpy(hostbuf, host);
594
595
                rep->my_addr.host = R_OFFSET(infop, hostbuf);
595
596
                rep->my_addr.port = db_rep->my_addr.port;
596
 
                rep->siteaddr_seq++;
 
597
                rep->siteinfo_seq++;
597
598
        } else
598
599
                rep->my_addr.host = INVALID_ROFF;
599
600
 
624
625
        DB_REP *db_rep;
625
626
        REGINFO *infop;
626
627
        REP *rep;
627
 
        SITEADDR *p;
 
628
        SITEINFO *p;
628
629
        REPMGR_SITE temp, *unused;
629
630
        repmgr_netaddr_t *addrp;
630
631
        char *host;
667
668
         * shared list.
668
669
         */
669
670
        i = 0;
670
 
        if (rep->netaddr_off != INVALID_ROFF) {
671
 
                p = R_ADDR(infop, rep->netaddr_off);
 
671
        if (rep->siteinfo_off != INVALID_ROFF) {
 
672
                p = R_ADDR(infop, rep->siteinfo_off);
672
673
 
673
674
                /* For each address in the shared list ... */
674
675
                for (; i < rep->site_cnt; i++) {
675
 
                        host = R_ADDR(infop, p[i].host);
 
676
                        host = R_ADDR(infop, p[i].addr.host);
676
677
 
677
 
                        RPRINT(env, DB_VERB_REPMGR_MISC,
678
 
                            (env, "Site %s:%lu found at EID %u",
679
 
                                host, (u_long)p[i].port, i));
 
678
                        RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 
679
                            "Site %s:%lu found at EID %u",
 
680
                                host, (u_long)p[i].addr.port, i));
680
681
                        /*
681
682
                         * Find it in the local list.  Everything before 'i'
682
683
                         * already matches the shared list, and is therefore in
683
684
                         * the right place.  So we only need to search starting
684
 
                         * from 'i'.
 
685
                         * from 'i'.  When found, local peer value will be used
 
686
                         * because it is assumed to be "fresher".
685
687
                         */
686
688
                        for (j = i; j < db_rep->site_cnt; j++) {
687
689
                                addrp = &db_rep->sites[j].net_addr;
688
690
                                if (strcmp(host, addrp->host) == 0 &&
689
 
                                    p[i].port == addrp->port)
 
691
                                    p[i].addr.port == addrp->port)
690
692
                                        break;
691
693
                        }
692
694
 
 
695
                        /*
 
696
                         * When not found in local list, copy peer value
 
697
                         * from shared list.
 
698
                         */
693
699
                        if (j == db_rep->site_cnt &&
694
700
                            (ret = __repmgr_new_site(env, &unused,
695
 
                            host, p[i].port, SITE_IDLE)) != 0)
 
701
                            host, p[i].addr.port, SITE_IDLE, p[i].peer)) != 0)
696
702
                                goto unlock;
697
703
                        DB_ASSERT(env, j < db_rep->site_cnt);
698
704
 
701
707
                                temp = db_rep->sites[j];
702
708
                                db_rep->sites[j] = db_rep->sites[i];
703
709
                                db_rep->sites[i] = temp;
704
 
 
705
 
                                /*
706
 
                                 * Keep peer pointer in sync with swapped
707
 
                                 * location.
708
 
                                 */
709
 
                                if (db_rep->peer == (int)j)
710
 
                                        db_rep->peer = (int)i;
711
 
                                else if (db_rep->peer == (int)i)
712
 
                                        db_rep->peer = (int)j;
713
710
                        }
714
711
                }
715
712
        }
716
713
        if ((ret = __repmgr_share_netaddrs(env, rep, i, db_rep->site_cnt)) != 0)
717
714
                goto unlock;
718
715
 
719
 
        /*
720
 
         * Assume that any config settings I've made locally are "fresher" than
721
 
         * anything lying around in the shared region, so the local setting
722
 
         * overrides here.
723
 
         */
724
 
        if (IS_VALID_EID(db_rep->peer))
725
 
                rep->peer = db_rep->peer;
726
 
        db_rep->siteaddr_seq = rep->siteaddr_seq;
 
716
        db_rep->siteinfo_seq = rep->siteinfo_seq;
727
717
 
728
718
unlock:
729
719
        MUTEX_UNLOCK(env, rep->mtx_repmgr);
766
756
}
767
757
 
768
758
/*
769
 
 * Copy network address information from the indicated local array slots into
770
 
 * the shared region.
 
759
 * Copy network address information from the indicated local array slots,
 
760
 * and peer information changes from any of the local array slots, into the
 
761
 * shared region.
771
762
 *
772
763
 * PUBLIC: int __repmgr_share_netaddrs __P((ENV *, void *, u_int, u_int));
773
764
 *
786
777
        REP *rep;
787
778
        REGINFO *infop;
788
779
        REGENV *renv;
789
 
        SITEADDR *orig, *shared_array;
 
780
        SITEINFO *orig, *shared_array;
790
781
        char *host, *hostbuf;
791
782
        size_t sz;
792
783
        u_int i, n;
804
795
        for (i = start; i < limit; i++) {
805
796
                if (rep->site_cnt >= rep->site_max) {
806
797
                        /* Table is full, we need more space. */
807
 
                        if (rep->netaddr_off == INVALID_ROFF) {
 
798
                        if (rep->siteinfo_off == INVALID_ROFF) {
808
799
                                n = INITIAL_SITES_ALLOCATION;
809
 
                                sz = n * sizeof(SITEADDR);
 
800
                                sz = n * sizeof(SITEINFO);
810
801
                                if ((ret = __env_alloc(infop,
811
802
                                    sz, &shared_array)) != 0)
812
803
                                        goto out;
813
804
                        } else {
814
805
                                n = 2 * rep->site_max;
815
 
                                sz = n * sizeof(SITEADDR);
 
806
                                sz = n * sizeof(SITEINFO);
816
807
                                if ((ret = __env_alloc(infop,
817
808
                                    sz, &shared_array)) != 0)
818
809
                                        goto out;
819
 
                                orig = R_ADDR(infop, rep->netaddr_off);
 
810
                                orig = R_ADDR(infop, rep->siteinfo_off);
820
811
                                memcpy(shared_array, orig,
821
 
                                    sizeof(SITEADDR) * rep->site_cnt);
 
812
                                    sizeof(SITEINFO) * rep->site_cnt);
822
813
                                __env_alloc_free(infop, orig);
823
814
                        }
824
 
                        rep->netaddr_off = R_OFFSET(infop, shared_array);
 
815
                        rep->siteinfo_off = R_OFFSET(infop, shared_array);
825
816
                        rep->site_max = n;
826
817
                } else
827
 
                        shared_array = R_ADDR(infop, rep->netaddr_off);
 
818
                        shared_array = R_ADDR(infop, rep->siteinfo_off);
828
819
 
829
820
                DB_ASSERT(env, rep->site_cnt < rep->site_max &&
830
 
                    rep->netaddr_off != INVALID_ROFF);
 
821
                    rep->siteinfo_off != INVALID_ROFF);
831
822
 
832
823
                host = db_rep->sites[i].net_addr.host;
833
824
                sz = strlen(host) + 1;
835
826
                        goto out;
836
827
                eid = (int)rep->site_cnt++;
837
828
                (void)strcpy(hostbuf, host);
838
 
                shared_array[eid].host = R_OFFSET(infop, hostbuf);
839
 
                shared_array[eid].port = db_rep->sites[i].net_addr.port;
840
 
                RPRINT(env, DB_VERB_REPMGR_MISC,
841
 
                    (env, "EID %d is assigned for site %s:%lu",
842
 
                        eid, host, (u_long)shared_array[eid].port));
 
829
                shared_array[eid].addr.host = R_OFFSET(infop, hostbuf);
 
830
                shared_array[eid].addr.port = db_rep->sites[i].net_addr.port;
 
831
                shared_array[eid].peer =
 
832
                    F_ISSET(&db_rep->sites[i], SITE_IS_PEER) ? TRUE : FALSE;
 
833
                RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 
834
                    "EID %d is assigned for site %s:%lu",
 
835
                        eid, host, (u_long)shared_array[eid].addr.port));
843
836
                touched = TRUE;
844
837
        }
845
838
 
 
839
        /* Get any peer information changes from local copy. */
 
840
        if (rep->siteinfo_off != INVALID_ROFF) {
 
841
                shared_array = R_ADDR(infop, rep->siteinfo_off);
 
842
                for (i = 0; i < rep->site_cnt; i++) {
 
843
                        if (!F_ISSET(&db_rep->sites[i], SITE_IS_PEER) &&
 
844
                            shared_array[i].peer) {
 
845
                                shared_array[i].peer = FALSE;
 
846
                                touched = TRUE;
 
847
                        } else if (F_ISSET(&db_rep->sites[i], SITE_IS_PEER) &&
 
848
                            !shared_array[i].peer) {
 
849
                                shared_array[i].peer = TRUE;
 
850
                                touched = TRUE;
 
851
                        }
 
852
                }
 
853
        }
 
854
 
846
855
out:
847
856
        if (touched)
848
 
                rep->siteaddr_seq++;
 
857
                rep->siteinfo_seq++;
849
858
        MUTEX_UNLOCK(env, renv->mtx_regenv);
850
859
        return (ret);
851
860
}
852
861
 
853
862
/*
854
 
 * Copy into our local list any newly added remote site addresses that we
855
 
 * haven't seen yet.
 
863
 * Copy into our local list any newly added/changed remote site
 
864
 * configuration information.
856
865
 *
857
866
 * !!! Caller must hold db_rep->mutex and mtx_repmgr locks.
858
867
 *
865
874
        DB_REP *db_rep;
866
875
        REP *rep;
867
876
        REGINFO *infop;
868
 
        SITEADDR *base, *p;
 
877
        SITEINFO *base, *p;
869
878
        REPMGR_SITE *site;
870
879
        char *host;
871
880
        int ret;
874
883
        db_rep = env->rep_handle;
875
884
        rep = db_rep->region;
876
885
 
877
 
        if (rep->netaddr_off == INVALID_ROFF)
 
886
        if (rep->siteinfo_off == INVALID_ROFF)
878
887
                return (0);
879
888
 
880
889
        infop = env->reginfo;
881
 
        base = R_ADDR(infop, rep->netaddr_off);
 
890
        base = R_ADDR(infop, rep->siteinfo_off);
 
891
        /* Update existing local site peer values with shared values. */
 
892
        for (i = 0; i < db_rep->site_cnt; i++) {
 
893
                p = &base[i];
 
894
                if (p->peer)
 
895
                        F_SET(&db_rep->sites[i], SITE_IS_PEER);
 
896
                else
 
897
                        F_CLR(&db_rep->sites[i], SITE_IS_PEER);
 
898
        }
882
899
        for (i = db_rep->site_cnt; i < rep->site_cnt; i++) {
883
900
                p = &base[i];
884
 
                host = R_ADDR(infop, p->host);
 
901
                host = R_ADDR(infop, p->addr.host);
885
902
                if ((ret = __repmgr_new_site(env,
886
 
                    &site, host, p->port, SITE_IDLE)) != 0)
 
903
                    &site, host, p->addr.port, SITE_IDLE, p->peer)) != 0)
887
904
                        return (ret);
888
 
                RPRINT(env, DB_VERB_REPMGR_MISC,
889
 
                    (env, "Site %s:%lu found at EID %u",
890
 
                        host, (u_long)p->port, i));
 
905
                RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 
906
                    "Site %s:%lu found at EID %u",
 
907
                        host, (u_long)p->addr.port, i));
891
908
        }
892
909
 
893
910
        DB_ASSERT(env, db_rep->site_cnt == rep->site_cnt);
894
 
        db_rep->peer = rep->peer;
895
 
        db_rep->siteaddr_seq = rep->siteaddr_seq;
 
911
        db_rep->siteinfo_seq = rep->siteinfo_seq;
896
912
        return (0);
897
913
}
898
914
 
990
1006
 
991
1007
        return (0);
992
1008
}
 
1009
 
 
1010
/*
 
1011
 * PUBLIC: int __repmgr_master_is_known __P((ENV *));
 
1012
 */
 
1013
int
 
1014
__repmgr_master_is_known(env)
 
1015
        ENV *env;
 
1016
{
 
1017
        DB_REP *db_rep;
 
1018
        REP *rep;
 
1019
        int master;
 
1020
 
 
1021
        db_rep = env->rep_handle;
 
1022
        rep = db_rep->region;
 
1023
        master = rep->master_id;
 
1024
 
 
1025
        /*
 
1026
         * We are the master, or we know of a master and have a healthy
 
1027
         * connection to it.
 
1028
         */
 
1029
        return (master == SELF_EID || __repmgr_master_connection(env) != NULL);
 
1030
}
 
1031
 
 
1032
/*
 
1033
 * PUBLIC: int __repmgr_stable_lsn __P((ENV *, DB_LSN *));
 
1034
 *
 
1035
 * This function may be called before any of repmgr's threads have
 
1036
 * been started.  This code must not be called before env open.
 
1037
 * Currently that is impossible since its only caller is log_archive
 
1038
 * which itself cannot be called before env_open.
 
1039
 */
 
1040
int
 
1041
__repmgr_stable_lsn(env, stable_lsn)
 
1042
        ENV *env;
 
1043
        DB_LSN *stable_lsn;
 
1044
{
 
1045
        DB_LSN min_lsn;
 
1046
        DB_REP *db_rep;
 
1047
        REP *rep;
 
1048
        REPMGR_SITE *site;
 
1049
        u_int eid;
 
1050
 
 
1051
        db_rep = env->rep_handle;
 
1052
        rep = db_rep->region;
 
1053
 
 
1054
        ZERO_LSN(min_lsn);
 
1055
        LOCK_MUTEX(db_rep->mutex);
 
1056
        for (eid = 0; eid < db_rep->site_cnt; eid++) {
 
1057
                site = SITE_FROM_EID(eid);
 
1058
                /*
 
1059
                 * Record the smallest ack'ed LSN from all connected sites.
 
1060
                 * If we're a client, ignore the master because the master
 
1061
                 * does not maintain nor send out its repmgr perm LSN in
 
1062
                 * this way.
 
1063
                 */
 
1064
                if ((int)eid == rep->master_id)
 
1065
                        continue;
 
1066
                if (IS_SITE_AVAILABLE(site) &&
 
1067
                    !IS_ZERO_LSN(site->max_ack) &&
 
1068
                    (IS_ZERO_LSN(min_lsn) ||
 
1069
                    LOG_COMPARE(&site->max_ack, &min_lsn) < 0))
 
1070
                        min_lsn = site->max_ack;
 
1071
        }
 
1072
        UNLOCK_MUTEX(db_rep->mutex);
 
1073
        if (!IS_ZERO_LSN(min_lsn) && LOG_COMPARE(&min_lsn, stable_lsn) < 0)
 
1074
                *stable_lsn = min_lsn;
 
1075
        RPRINT(env, (env, DB_VERB_REPMGR_MISC,
 
1076
            "Repmgr_stable_lsn: Returning stable_lsn[%lu][%lu]",
 
1077
            (u_long)stable_lsn->file, (u_long)stable_lsn->offset));
 
1078
        return (0);
 
1079
}