~jtv/corpusfiltergraph/cross-python

if type(defaultreallimits) not in [ListType,TupleType] or len(defaultreallimits)==1: # only one limit given, assumed to be lower one & upper is calc'd

526

lowerreallimit = defaultreallimits

527

upperreallimit = 1.000001 * max(inlist)

528

else: # assume both limits given

529

lowerreallimit = defaultreallimits[0]

530

upperreallimit = defaultreallimits[1]

531

binsize = (upperreallimit-lowerreallimit)/float(numbins)

532

else: # no limits given for histogram, both must be calc'd

533

estbinwidth=(max(inlist)-min(inlist))/float(numbins) +1e-6 #1=>cover all

534

binsize = ((max(inlist)-min(inlist)+estbinwidth))/float(numbins)

535

lowerreallimit = min(inlist) - binsize/2 #lower real limit,1st bin

536

bins = [0]*(numbins)

537

extrapoints = 0

538

for num in inlist:

539

try:

540

if (num-lowerreallimit) < 0:

541

extrapoints = extrapoints + 1

542

else:

543

bintoincrement = int((num-lowerreallimit)/float(binsize))

544

bins[bintoincrement] = bins[bintoincrement] + 1

545

except:

546

extrapoints = extrapoints + 1

547

if (extrapoints > 0 and printextras == 1):

548

print '\nPoints outside given histogram range =',extrapoints

549

return (bins, lowerreallimit, binsize, extrapoints)

550

551

552

def lcumfreq(inlist,numbins=10,defaultreallimits=None):

553

'''

554

Returns a cumulative frequency histogram, using the histogram function.

555

556

Usage: lcumfreq(inlist,numbins=10,defaultreallimits=None)

557

Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints

558

'''

559

h,l,b,e = histogram(inlist,numbins,defaultreallimits)

560

cumhist = cumsum(copy.deepcopy(h))

561

return cumhist,l,b,e

562

563

564

def lrelfreq(inlist,numbins=10,defaultreallimits=None):

565

'''

566

Returns a relative frequency histogram, using the histogram function.

567

568

Usage: lrelfreq(inlist,numbins=10,defaultreallimits=None)

569

Returns: list of cumfreq bin values, lowerreallimit, binsize, extrapoints

570

'''

571

h,l,b,e = histogram(inlist,numbins,defaultreallimits)

572

for i in range(len(h)):

573

h[i] = h[i]/float(len(inlist))

574

return h,l,b,e

575

576

577

####################################

578

##### VARIABILITY FUNCTIONS ######

579

####################################

580

581

def lobrientransform(*args):

582

'''

583

Computes a transform on input data (any number of columns). Used to

584

test for homogeneity of variance prior to running one-way stats. From

585

Maxwell and Delaney, p.112.

586

587

Usage: lobrientransform(*args)

588

Returns: transformed data for use in an ANOVA

589

'''

590

TINY = 1e-10

591

k = len(args)

592

n = [0.0]*k

593

v = [0.0]*k

594

m = [0.0]*k

595

nargs = []

596

for i in range(k):

597

nargs.append(copy.deepcopy(args[i]))

598

n[i] = float(len(nargs[i]))

599

v[i] = var(nargs[i])

600

m[i] = mean(nargs[i])

601

for j in range(k):

602

for i in range(n[j]):

603

t1 = (n[j]-1.5)*n[j]*(nargs[j][i]-m[j])**2

604

t2 = 0.5*v[j]*(n[j]-1.0)

605

t3 = (n[j]-1.0)*(n[j]-2.0)

606

nargs[j][i] = (t1-t2) / float(t3)

607

check = 1

608

for j in range(k):

609

if v[j] - mean(nargs[j]) > TINY:

610

check = 0

611

if check <> 1:

612

raise ValueError, 'Problem in obrientransform.'

613

else:

614

return nargs

615

616

617

def lsamplevar(inlist):

618

'''

619

Returns the variance of the values in the passed list using

620

N for the denominator (i.e., DESCRIBES the sample variance only).

621

622

Usage: lsamplevar(inlist)

623

'''

624

n = len(inlist)

625

mn = mean(inlist)

626

deviations = []

627

for item in inlist:

628

deviations.append(item-mn)

629

return ss(deviations)/float(n)

630

631

632

def lsamplestdev(inlist):

633

'''

634

Returns the standard deviation of the values in the passed list using

635

N for the denominator (i.e., DESCRIBES the sample stdev only).

636

637

Usage: lsamplestdev(inlist)

638

'''

639

return math.sqrt(samplevar(inlist))

640

641

642

def lcov(x,y, keepdims=0):

643

'''

644

Returns the estimated covariance of the values in the passed

645

array (i.e., N-1). Dimension can equal None (ravel array first), an

646

integer (the dimension over which to operate), or a sequence (operate

647

over multiple dimensions). Set keepdims=1 to return an array with the

648

same number of dimensions as inarray.

649

650

Usage: lcov(x,y,keepdims=0)

651

'''

652

n = len(x)

653

xmn = mean(x)

654

ymn = mean(y)

655

xdeviations = [0]*len(x)

656

ydeviations = [0]*len(y)

657

for i in range(len(x)):

658

xdeviations[i] = x[i] - xmn

659

ydeviations[i] = y[i] - ymn

660

ss = 0.0

661

for i in range(len(xdeviations)):

662

ss = ss + xdeviations[i]*ydeviations[i]

663

return ss/float(n-1)

664

665

666

def lvar(inlist):

667

'''

668

Returns the variance of the values in the passed list using N-1

669

for the denominator (i.e., for estimating population variance).

670

671

Usage: lvar(inlist)

672

'''

673

n = len(inlist)

674

mn = mean(inlist)

675

deviations = [0]*len(inlist)

676

for i in range(len(inlist)):

677

deviations[i] = inlist[i] - mn

678

try:

679

x = ss(deviations)/float(n-1)

680

except Exception, e:

681

x = 0

682

return x

683

684

685

def lstdev(inlist):

686

'''

687

Returns the standard deviation of the values in the passed list

688

using N-1 in the denominator (i.e., to estimate population stdev).

689

690

Usage: lstdev(inlist)

691

'''

692

return math.sqrt(var(inlist))

693

694

695

def lsterr(inlist):

696

'''

697

Returns the standard error of the values in the passed list using N-1

698

in the denominator (i.e., to estimate population standard error).

699

700

Usage: lsterr(inlist)

701

'''

702

return stdev(inlist) / float(math.sqrt(len(inlist)))

703

704

705

def lsem(inlist):

706

'''

707

Returns the estimated standard error of the mean (sx-bar) of the

708

values in the passed list. sem = stdev / sqrt(n)

709

710

Usage: lsem(inlist)

711

'''

712

sd = stdev(inlist)

713

n = len(inlist)

714

return sd/math.sqrt(n)

715

716

717

def lz(inlist, score):

718

'''

719

Returns the z-score for a given input score, given that score and the

720

list from which that score came. Not appropriate for population calculations.

721

722

Usage: lz(inlist, score)

723

'''

724

z = (score-mean(inlist))/samplestdev(inlist)

725

return z

726

727

728

def lzs(inlist):

729

'''

730

Returns a list of z-scores, one for each score in the passed list.

731

732

Usage: lzs(inlist)

733

'''

734

zscores = []

735

for item in inlist:

736

zscores.append(z(inlist,item))

737

return zscores

738

739

740

####################################

741

####### TRIMMING FUNCTIONS #######

742

####################################

743

744

def ltrimboth(l,proportiontocut):

745

'''

746

Slices off the passed proportion of items from BOTH ends of the passed

747

list (i.e., with proportiontocut=0.1, slices 'leftmost' 10% AND 'rightmost'

748

10% of scores. Assumes list is sorted by magnitude. Slices off LESS if

749

proportion results in a non-integer slice index (i.e., conservatively

750

slices off proportiontocut).

751

752

Usage: ltrimboth (l,proportiontocut)

753

Returns: trimmed version of list l

754

'''

755

lowercut = int(proportiontocut*len(l))

756

uppercut = len(l) - lowercut

757

return l[lowercut:uppercut]

758

759

760

def ltrim1(l,proportiontocut,tail='right'):

761

'''

762

Slices off the passed proportion of items from ONE end of the passed

763

list (i.e., if proportiontocut=0.1, slices off 'leftmost' or 'rightmost'

764

10% of scores). Slices off LESS if proportion results in a non-integer

765

slice index (i.e., conservatively slices off proportiontocut).

766

767

Usage: ltrim1 (l,proportiontocut,tail='right') or set tail='left'

768

Returns: trimmed version of list l

769

'''

770

if tail == 'right':

771

lowercut = 0

772

uppercut = len(l) - int(proportiontocut*len(l))

773

elif tail == 'left':

774

lowercut = int(proportiontocut*len(l))

775

uppercut = len(l)

776

return l[lowercut:uppercut]

777

778

779

####################################

780

##### CORRELATION FUNCTIONS ######

781

####################################

782

783

def lpaired(x,y):

784

'''

785

Interactively determines the type of data and then runs the

786

appropriated statistic for paired group data.

787

788

Usage: lpaired(x,y)

789

Returns: appropriate statistic name, value, and probability

790

'''

791

samples = ''

792

while samples not in ['i','r','I','R','c','C']:

793

print '\nIndependent or related samples, or correlation (i,r,c): ',

794

samples = raw_input()

795

796

if samples in ['i','I','r','R']:

797

print '\nComparing variances ...',

798

# USE O'BRIEN'S TEST FOR HOMOGENEITY OF VARIANCE, Maxwell & delaney, p.112

799

r = obrientransform(x,y)

800

f,p = F_oneway(pstat.colex(r,0),pstat.colex(r,1))

801

if p<0.05:

802

vartype='unequal, p='+str(round(p,4))

803

else:

804

vartype='equal'

805

print vartype

806

if samples in ['i','I']:

807

if vartype[0]=='e':

808

t,p = ttest_ind(x,y,0)

809

print '\nIndependent samples t-test: ', round(t,4),round(p,4)

810

else:

811

if len(x)>20 or len(y)>20:

812

z,p = ranksums(x,y)

813

print '\nRank Sums test (NONparametric, n>20): ', round(z,4),round(p,4)

814

else:

815

u,p = mannwhitneyu(x,y)

816

print '\nMann-Whitney U-test (NONparametric, ns<20): ', round(u,4),round(p,4)

817

818

else: # RELATED SAMPLES

819

if vartype[0]=='e':

820

t,p = ttest_rel(x,y,0)

821

print '\nRelated samples t-test: ', round(t,4),round(p,4)

822

else:

823

t,p = ranksums(x,y)

824

print '\nWilcoxon T-test (NONparametric): ', round(t,4),round(p,4)

825

else: # CORRELATION ANALYSIS

826

corrtype = ''

827

while corrtype not in ['c','C','r','R','d','D']:

828

print '\nIs the data Continuous, Ranked, or Dichotomous (c,r,d): ',

829

corrtype = raw_input()

830

if corrtype in ['c','C']:

831

m,b,r,p,see = linregress(x,y)

832

print '\nLinear regression for continuous variables ...'

833

lol = [['Slope','Intercept','r','Prob','SEestimate'],[round(m,4),round(b,4),round(r,4),round(p,4),round(see,4)]]

834

pstat.printcc(lol)

835

elif corrtype in ['r','R']:

836

r,p = spearmanr(x,y)

837

print '\nCorrelation for ranked variables ...'

838

print "Spearman's r: ",round(r,4),round(p,4)

839

else: # DICHOTOMOUS

840

r,p = pointbiserialr(x,y)

841

print '\nAssuming x contains a dichotomous variable ...'

842

print 'Point Biserial r: ',round(r,4),round(p,4)

843

print '\n\n'

844

return None

845

846

847

def lpearsonr(x,y):

848

'''

849

Calculates a Pearson correlation coefficient and the associated

850

probability value. Taken from Heiman's Basic Statistics for the Behav.

851

Sci (2nd), p.195.

852

853

Usage: lpearsonr(x,y) where x and y are equal-length lists

854

Returns: Pearson's r value, two-tailed p-value

855

'''

856

TINY = 1.0e-30

857

if len(x) <> len(y):

858

raise ValueError, 'Input values not paired in pearsonr. Aborting.'

859

n = len(x)

860

x = map(float,x)

861

y = map(float,y)

862

xmean = mean(x)

863

ymean = mean(y)

864

r_num = n*(summult(x,y)) - sum(x)*sum(y)

865

r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))

866

r = (r_num / r_den) # denominator already a float

867

df = n-2

868

t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))

869

prob = betai(0.5*df,0.5,df/float(df+t*t))

870

return r, prob

871

872

873

def llincc(x,y):

874

'''

875

Calculates Lin's concordance correlation coefficient.

876

877

Usage: alincc(x,y) where x, y are equal-length arrays

878

Returns: Lin's CC

879

'''

880

covar = lcov(x,y)*(len(x)-1)/float(len(x)) # correct denom to n

881

xvar = lvar(x)*(len(x)-1)/float(len(x)) # correct denom to n

882

yvar = lvar(y)*(len(y)-1)/float(len(y)) # correct denom to n

883

lincc = (2 * covar) / ((xvar+yvar) +((amean(x)-amean(y))**2))

884

return lincc

885

886

887

def lspearmanr(x,y):

888

'''

889

Calculates a Spearman rank-order correlation coefficient. Taken

890

from Heiman's Basic Statistics for the Behav. Sci (1st), p.192.

891

892

Usage: lspearmanr(x,y) where x and y are equal-length lists

893

Returns: Spearman's r, two-tailed p-value

894

'''

895

TINY = 1e-30

896

if len(x) <> len(y):

897

raise ValueError, 'Input values not paired in spearmanr. Aborting.'

898

n = len(x)

899

rankx = rankdata(x)

900

ranky = rankdata(y)

901

dsq = sumdiffsquared(rankx,ranky)

902

rs = 1 - 6*dsq / float(n*(n**2-1))

903

t = rs * math.sqrt((n-2) / ((rs+1.0)*(1.0-rs)))

904

df = n-2

905

probrs = betai(0.5*df,0.5,df/(df+t*t))

906

''' t already a float

907

probability values for rs are from part 2 of the spearman function in

908

Numerical Recipies, p.510. They are close to tables, but not exact. (?)'''

909

return rs, probrs

910

911

912

def lpointbiserialr(x,y):

913

'''

914

Calculates a point-biserial correlation coefficient and the associated

915

probability value. Taken from Heiman's Basic Statistics for the Behav.

916

Sci (1st), p.194.

917

918

Usage: lpointbiserialr(x,y) where x,y are equal-length lists

919

Returns: Point-biserial r, two-tailed p-value

920

'''

921

TINY = 1e-30

922

if len(x) <> len(y):

923

raise ValueError, 'INPUT VALUES NOT PAIRED IN pointbiserialr. ABORTING.'

924

data = pstat.abut(x,y)

925

categories = pstat.unique(x)

926

if len(categories) <> 2:

927

raise ValueError, "Exactly 2 categories required for pointbiserialr()."

928

else: # there are 2 categories, continue

929

codemap = pstat.abut(categories,range(2))

930

recoded = pstat.recode(data,codemap,0)

931

x = pstat.linexand(data,0,categories[0])

932

y = pstat.linexand(data,0,categories[1])

933

xmean = mean(pstat.colex(x,1))

934

ymean = mean(pstat.colex(y,1))

935

n = len(data)

936

adjust = math.sqrt((len(x)/float(n))*(len(y)/float(n)))

937

rpb = (ymean - xmean)/samplestdev(pstat.colex(data,1))*adjust

938

df = n-2

939

t = rpb*math.sqrt(df/((1.0-rpb+TINY)*(1.0+rpb+TINY)))

940

prob = betai(0.5*df,0.5,df/(df+t*t)) # t already a float

941

return rpb, prob

942

943

944

def lkendalltau(x,y):

945

'''

946

Calculates Kendall's tau ... correlation of ordinal data. Adapted

947

from function kendl1 in Numerical Recipies. Needs good test-routine.@@@

948

949

Usage: lkendalltau(x,y)

950

Returns: Kendall's tau, two-tailed p-value

951

'''

952

n1 = 0

953

n2 = 0

954

iss = 0

955

for j in range(len(x)-1):

956

for k in range(j,len(y)):

957

a1 = x[j] - x[k]

958

a2 = y[j] - y[k]

959

aa = a1 * a2

960

if (aa): # neither list has a tie

961

n1 = n1 + 1

962

n2 = n2 + 1

963

if aa > 0:

964

iss = iss + 1

965

else:

966

iss = iss -1

967

else:

968

if (a1):

969

n1 = n1 + 1

970

else:

971

n2 = n2 + 1

972

tau = iss / math.sqrt(n1*n2)

973

svar = (4.0*len(x)+10.0) / (9.0*len(x)*(len(x)-1))

974

z = tau / math.sqrt(svar)

975

prob = erfcc(abs(z)/1.4142136)

976

return tau, prob

977

978

979

def llinregress(x,y):

980

'''

981

Calculates a regression line on x,y pairs.

982

983

Usage: llinregress(x,y) x,y are equal-length lists of x-y coordinates

984

Returns: slope, intercept, r, two-tailed prob, sterr-of-estimate

985

'''

986

TINY = 1.0e-20

987

if len(x) <> len(y):

988

raise ValueError, 'Input values not paired in linregress. Aborting.'

989

n = len(x)

990

x = map(float,x)

991

y = map(float,y)

992

xmean = mean(x)

993

ymean = mean(y)

994

r_num = float(n*(summult(x,y)) - sum(x)*sum(y))

995

r_den = math.sqrt((n*ss(x) - square_of_sums(x))*(n*ss(y)-square_of_sums(y)))

996

r = r_num / r_den

997

z = 0.5*math.log((1.0+r+TINY)/(1.0-r+TINY))

998

df = n-2

999

t = r*math.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY)))

1000

prob = betai(0.5*df,0.5,df/(df+t*t))

1001

slope = r_num / float(n*ss(x) - square_of_sums(x))

1002

intercept = ymean - slope*xmean

1003

sterrest = math.sqrt(1-r*r)*samplestdev(y)

1004

return slope, intercept, r, prob, sterrest

1005

1006

1007

####################################

1008

##### INFERENTIAL STATISTICS #####

1009

####################################

1010

1011

def lttest_1samp(a,popmean,printit=0,name='Sample',writemode='a'):

1012

'''

1013

Calculates the t-obtained for the independent samples T-test on ONE group

1014

of scores a, given a population mean. If printit=1, results are printed

1015

to the screen. If printit='filename', the results are output to 'filename'

1016

using the given writemode (default=append). Returns t-value, and prob.

1017

1018

Usage: lttest_1samp(a,popmean,Name='Sample',printit=0,writemode='a')

1019

Returns: t-value, two-tailed prob

1020

'''

1021

x = mean(a)

1022

v = var(a)

1023

n = len(a)

1024

df = n-1

1025

svar = ((n-1)*v)/float(df)

1026

t = (x-popmean)/math.sqrt(svar*(1.0/n))

1027

prob = betai(0.5*df,0.5,float(df)/(df+t*t))

1028

1029

if printit <> 0:

1030

statname = 'Single-sample T-test.'

1031

outputpairedstats(printit,writemode,

1032

'Population','--',popmean,0,0,0,

1033

name,n,x,v,min(a),max(a),

1034

statname,t,prob)

1035

return t,prob

1036

1037

1038

def lttest_ind(a, b, printit=0, name1='Samp1', name2='Samp2', writemode='a'):

1039

'''

1040

Calculates the t-obtained T-test on TWO INDEPENDENT samples of

1041

scores a, and b. From Numerical Recipies, p.483. If printit=1, results

1042

are printed to the screen. If printit='filename', the results are output

1043

to 'filename' using the given writemode (default=append). Returns t-value,

1044

and prob.

1045

1046

Usage: lttest_ind(a,b,printit=0,name1='Samp1',name2='Samp2',writemode='a')

1047

Returns: t-value, two-tailed prob

1048

'''

1049

x1 = mean(a)

1050

x2 = mean(b)

1051

v1 = stdev(a)**2

1052

v2 = stdev(b)**2

1053

n1 = len(a)

1054

n2 = len(b)

1055

df = n1+n2-2

1056

svar = ((n1-1)*v1+(n2-1)*v2)/float(df)

1057

t = (x1-x2)/math.sqrt(svar*(1.0/n1 + 1.0/n2))

1058

prob = betai(0.5*df,0.5,df/(df+t*t))

1059

1060

if printit <> 0:

1061

statname = 'Independent samples T-test.'

1062

outputpairedstats(printit,writemode,

1063

name1,n1,x1,v1,min(a),max(a),

1064

name2,n2,x2,v2,min(b),max(b),

1065

statname,t,prob)

1066

return t,prob

1067

1068

1069

def lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a'):

1070

'''

1071

Calculates the t-obtained T-test on TWO RELATED samples of scores,

1072

a and b. From Numerical Recipies, p.483. If printit=1, results are

1073

printed to the screen. If printit='filename', the results are output to

1074

'filename' using the given writemode (default=append). Returns t-value,

1075

and prob.

1076

1077

Usage: lttest_rel(a,b,printit=0,name1='Sample1',name2='Sample2',writemode='a')

1078

Returns: t-value, two-tailed prob

1079

'''

1080

if len(a)<>len(b):

1081

raise ValueError, 'Unequal length lists in ttest_rel.'

1082

x1 = mean(a)

1083

x2 = mean(b)

1084

v1 = var(a)

1085

v2 = var(b)

1086

n = len(a)

1087

cov = 0

1088

for i in range(len(a)):

1089

cov = cov + (a[i]-x1) * (b[i]-x2)

1090

df = n-1

1091

cov = cov / float(df)

1092

sd = math.sqrt((v1+v2 - 2.0*cov)/float(n))

1093

t = (x1-x2)/sd

1094

prob = betai(0.5*df,0.5,df/(df+t*t))

1095

1096

if printit <> 0:

1097

statname = 'Related samples T-test.'

1098

outputpairedstats(printit,writemode,

1099

name1,n,x1,v1,min(a),max(a),

1100

name2,n,x2,v2,min(b),max(b),

1101

statname,t,prob)

1102

return t, prob

1103

1104

1105

def lchisquare(f_obs,f_exp=None):

1106

'''

1107

Calculates a one-way chi square for list of observed frequencies and returns

1108

the result. If no expected frequencies are given, the total N is assumed to

1109

be equally distributed across all groups.

1110

1111

Usage: lchisquare(f_obs, f_exp=None) f_obs = list of observed cell freq.

1112

Returns: chisquare-statistic, associated p-value

1113

'''

1114

k = len(f_obs) # number of groups

1115

if f_exp == None:

1116

f_exp = [sum(f_obs)/float(k)] * len(f_obs) # create k bins with = freq.

1117

chisq = 0

1118

for i in range(len(f_obs)):

1119

chisq = chisq + (f_obs[i]-f_exp[i])**2 / float(f_exp[i])

1120

return chisq, chisqprob(chisq, k-1)

1121

1122

1123

def lks_2samp(data1,data2):

1124

'''

1125

Computes the Kolmogorov-Smirnof statistic on 2 samples. From

1126

Numerical Recipies in C, page 493.

1127

1128

Usage: lks_2samp(data1,data2) data1&2 are lists of values for 2 conditions

1129

Returns: KS D-value, associated p-value

1130

'''

1131

j1 = 0

1132

j2 = 0

1133

fn1 = 0.0

1134

fn2 = 0.0

1135

n1 = len(data1)

1136

n2 = len(data2)

1137

en1 = n1

1138

en2 = n2

1139

d = 0.0

1140

data1.sort()

1141

data2.sort()

1142

while j1 < n1 and j2 < n2:

1143

d1=data1[j1]

1144

d2=data2[j2]

1145

if d1 <= d2:

1146

fn1 = (j1)/float(en1)

1147

j1 = j1 + 1

1148

if d2 <= d1:

1149

fn2 = (j2)/float(en2)

1150

j2 = j2 + 1

1151

dt = (fn2-fn1)

1152

if math.fabs(dt) > math.fabs(d):

1153

d = dt

1154

try:

1155

en = math.sqrt(en1*en2/float(en1+en2))

1156

prob = ksprob((en+0.12+0.11/en)*abs(d))

1157

except:

1158

prob = 1.0

1159

return d, prob

1160

1161

1162

def lmannwhitneyu(x,y):

1163

'''

1164

Calculates a Mann-Whitney U statistic on the provided scores and

1165

returns the result. Use only when the n in each condition is < 20 and

1166

you have 2 independent samples of ranks. NOTE: Mann-Whitney U is

1167

significant if the u-obtained is LESS THAN or equal to the critical

1168

value of U found in the tables. Equivalent to Kruskal-Wallis H with

1169

just 2 groups.

1170

1171

Usage: lmannwhitneyu(data)

1172

Returns: u-statistic, one-tailed p-value (i.e., p(z(U)))

1173

'''

1174

n1 = len(x)

1175

n2 = len(y)

1176

ranked = rankdata(x+y)

1177

rankx = ranked[0:n1] # get the x-ranks

1178

ranky = ranked[n1:] # the rest are y-ranks

1179

u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx) # calc U for x

1180

u2 = n1*n2 - u1 # remainder is U for y

1181

bigu = max(u1,u2)

1182

smallu = min(u1,u2)

1183

T = math.sqrt(tiecorrect(ranked)) # correction factor for tied scores

1184

if T == 0:

1185

raise ValueError, 'All numbers are identical in lmannwhitneyu'

1186

sd = math.sqrt(T*n1*n2*(n1+n2+1)/12.0)

1187

z = abs((bigu-n1*n2/2.0) / sd) # normal approximation for prob calc

1188

return smallu, 1.0 - zprob(z)

1189

1190

1191

def ltiecorrect(rankvals):

1192

'''

1193

Corrects for ties in Mann Whitney U and Kruskal Wallis H tests. See

1194

Siegel, S. (1956) Nonparametric Statistics for the Behavioral Sciences.

1195

New York: McGraw-Hill. Code adapted from |Stat rankind.c code.

1196

1197

Usage: ltiecorrect(rankvals)

1198

Returns: T correction factor for U or H

1199

'''

1200

sorted,posn = shellsort(rankvals)

1201

n = len(sorted)

1202

T = 0.0

1203

i = 0

1204

while (i<n-1):

1205

if sorted[i] == sorted[i+1]:

1206

nties = 1

1207

while (i<n-1) and (sorted[i] == sorted[i+1]):

1208

nties = nties +1

1209

i = i +1

1210

T = T + nties**3 - nties

1211

i = i+1

1212

T = T / float(n**3-n)

1213

return 1.0 - T

1214

1215

1216

def lranksums(x,y):

1217

'''

1218

Calculates the rank sums statistic on the provided scores and

1219

returns the result. Use only when the n in each condition is > 20 and you

1220

have 2 independent samples of ranks.

1221

1222

Usage: lranksums(x,y)

1223

Returns: a z-statistic, two-tailed p-value

1224

'''

1225

n1 = len(x)

1226

n2 = len(y)

1227

alldata = x+y

1228

ranked = rankdata(alldata)

1229

x = ranked[:n1]

1230

y = ranked[n1:]

1231

s = sum(x)

1232

expected = n1*(n1+n2+1) / 2.0

1233

z = (s - expected) / math.sqrt(n1*n2*(n1+n2+1)/12.0)

1234

prob = 2*(1.0 -zprob(abs(z)))

1235

return z, prob

1236

1237

1238

def lwilcoxont(x,y):

1239

'''

1240

Calculates the Wilcoxon T-test for related samples and returns the

1241

result. A non-parametric T-test.

1242

1243

Usage: lwilcoxont(x,y)

1244

Returns: a t-statistic, two-tail probability estimate

1245

'''

1246

if len(x) <> len(y):

1247

raise ValueError, 'Unequal N in wilcoxont. Aborting.'

1248

d=[]

1249

for i in range(len(x)):

1250

diff = x[i] - y[i]

1251

if diff <> 0:

1252

d.append(diff)

1253

count = len(d)

1254

absd = map(abs,d)

1255

absranked = rankdata(absd)

1256

r_plus = 0.0

1257

r_minus = 0.0

1258

for i in range(len(absd)):

1259

if d[i] < 0:

1260

r_minus = r_minus + absranked[i]

1261

else:

1262

r_plus = r_plus + absranked[i]

1263

wt = min(r_plus, r_minus)

1264

mn = count * (count+1) * 0.25

1265

se = math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)

1266

z = math.fabs(wt-mn) / se

1267

prob = 2*(1.0 -zprob(abs(z)))

1268

return wt, prob

1269

1270

1271

def lkruskalwallish(*args):

1272

'''

1273

The Kruskal-Wallis H-test is a non-parametric ANOVA for 3 or more

1274

groups, requiring at least 5 subjects in each group. This function

1275

calculates the Kruskal-Wallis H-test for 3 or more independent samples

1276

and returns the result.

1277

1278

Usage: lkruskalwallish(*args)

1279

Returns: H-statistic (corrected for ties), associated p-value

1280

'''

1281

args = list(args)

1282

n = [0]*len(args)

1283

all = []

1284

n = map(len,args)

1285

for i in range(len(args)):

1286

all = all + args[i]

1287

ranked = rankdata(all)

1288

T = tiecorrect(ranked)

1289

for i in range(len(args)):

1290

args[i] = ranked[0:n[i]]

1291

del ranked[0:n[i]]

1292

rsums = []

1293

for i in range(len(args)):

1294

rsums.append(sum(args[i])**2)

1295

rsums[i] = rsums[i] / float(n[i])

1296

ssbn = sum(rsums)

1297

totaln = sum(n)

1298

h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)

1299

df = len(args) - 1

1300

if T == 0:

1301

raise ValueError, 'All numbers are identical in lkruskalwallish'

1302

h = h / float(T)

1303

return h, chisqprob(h,df)

1304

1305

1306

def lfriedmanchisquare(*args):

1307

'''

1308

Friedman Chi-Square is a non-parametric, one-way within-subjects

1309

ANOVA. This function calculates the Friedman Chi-square test for repeated

1310

measures and returns the result, along with the associated probability

1311

value. It assumes 3 or more repeated measures. Only 3 levels requires a

1312

minimum of 10 subjects in the study. Four levels requires 5 subjects per

1313

level(??).

1314

1315

Usage: lfriedmanchisquare(*args)

1316

Returns: chi-square statistic, associated p-value

1317

'''

1318

k = len(args)

1319

if k < 3:

1320

raise ValueError, 'Less than 3 levels. Friedman test not appropriate.'

1321

n = len(args[0])

1322

data = apply(pstat.abut,tuple(args))

1323

for i in range(len(data)):

1324

data[i] = rankdata(data[i])

1325

ssbn = 0

1326

for i in range(k):

1327

ssbn = ssbn + sum(args[i])**2

1328

chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)

1329

return chisq, chisqprob(chisq,k-1)

1330

1331

1332

####################################

1333

#### PROBABILITY CALCULATIONS ####

1334

####################################

1335

1336

def lchisqprob(chisq,df):

1337

'''

1338

Returns the (1-tailed) probability value associated with the provided

1339

chi-square value and df. Adapted from chisq.c in Gary Perlman's |Stat.

1340

1341

Usage: lchisqprob(chisq,df)

1342

'''

1343

BIG = 20.0

1344

def ex(x):

1345

BIG = 20.0

1346

if x < -BIG:

1347

return 0.0

1348

else:

1349

return math.exp(x)

1350

1351

if chisq <=0 or df < 1:

1352

return 1.0

1353

a = 0.5 * chisq

1354

if df%2 == 0:

1355

even = 1

1356

else:

1357

even = 0

1358

if df > 1:

1359

y = ex(-a)

1360

if even:

1361

s = y

1362

else:

1363

s = 2.0 * zprob(-math.sqrt(chisq))

1364

if (df > 2):

1365

chisq = 0.5 * (df - 1.0)

1366

if even:

1367

z = 1.0

1368

else:

1369

z = 0.5

1370

if a > BIG:

1371

if even:

1372

e = 0.0

1373

else:

1374

e = math.log(math.sqrt(math.pi))

1375

c = math.log(a)

1376

while (z <= chisq):

1377

e = math.log(z) + e

1378

s = s + ex(c*z-a-e)

1379

z = z + 1.0

1380

return s

1381

else:

1382

if even:

1383

e = 1.0

1384

else:

1385

e = 1.0 / math.sqrt(math.pi) / math.sqrt(a)

1386

c = 0.0

1387

while (z <= chisq):

1388

e = e * (a/float(z))

1389

c = c + e

1390

z = z + 1.0

1391

return (c*y+s)

1392

else:

1393

return s

1394

1395

1396

def lerfcc(x):

1397

'''

1398

Returns the complementary error function erfc(x) with fractional

1399

error everywhere less than 1.2e-7. Adapted from Numerical Recipies.

1400

1401

Usage: lerfcc(x)

1402

'''

1403

z = abs(x)

1404

t = 1.0 / (1.0+0.5*z)

1405

ans = t * math.exp(-z*z-1.26551223 + t*(1.00002368+t*(0.37409196+t*(0.09678418+t*(-0.18628806+t*(0.27886807+t*(-1.13520398+t*(1.48851587+t*(-0.82215223+t*0.17087277)))))))))

1406

if x >= 0:

1407

return ans

1408

else:

1409

return 2.0 - ans

1410

1411

1412

def lzprob(z):

1413

'''

1414

Returns the area under the normal curve 'to the left of' the given z value.

1415

Thus,

1416

for z<0, zprob(z) = 1-tail probability

1417

for z>0, 1.0-zprob(z) = 1-tail probability

1418

for any z, 2.0*(1.0-zprob(abs(z))) = 2-tail probability

1419

Adapted from z.c in Gary Perlman's |Stat.

1420

1421

Usage: lzprob(z)

1422

'''

1423

Z_MAX = 6.0 # maximum meaningful z-value

1424

if z == 0.0:

1425

x = 0.0

1426

else:

1427

y = 0.5 * math.fabs(z)

1428

if y >= (Z_MAX*0.5):

1429

x = 1.0

1430

elif (y < 1.0):

1431

w = y*y

1432

x = ((((((((0.000124818987 * w

1433

-0.001075204047) * w +0.005198775019) * w

1434

-0.019198292004) * w +0.059054035642) * w

1435

-0.151968751364) * w +0.319152932694) * w

1436

-0.531923007300) * w +0.797884560593) * y * 2.0

1437

else:

1438

y = y - 2.0

1439

x = (((((((((((((-0.000045255659 * y

1440

+0.000152529290) * y -0.000019538132) * y

1441

-0.000676904986) * y +0.001390604284) * y

1442

-0.000794620820) * y -0.002034254874) * y

1443

+0.006549791214) * y -0.010557625006) * y

1444

+0.011630447319) * y -0.009279453341) * y

1445

+0.005353579108) * y -0.002141268741) * y

1446

+0.000535310849) * y +0.999936657524

1447

if z > 0.0:

1448

prob = ((x+1.0)*0.5)

1449

else:

1450

prob = ((1.0-x)*0.5)

1451

return prob

1452

1453

1454

def lksprob(alam):

1455

'''

1456

Computes a Kolmolgorov-Smirnov t-test significance level. Adapted from

1457

Numerical Recipies.

1458

1459

Usage: lksprob(alam)

1460

'''

1461

fac = 2.0

1462

sum = 0.0

1463

termbf = 0.0

1464

a2 = -2.0*alam*alam

1465

for j in range(1,201):

1466

term = fac*math.exp(a2*j*j)

1467

sum = sum + term

1468

if math.fabs(term) <= (0.001*termbf) or math.fabs(term) < (1.0e-8*sum):

1469

return sum

1470

fac = -fac

1471

termbf = math.fabs(term)

1472

return 1.0 # Get here only if fails to converge; was 0.0!!

1473

1474

1475

def lfprob(dfnum, dfden, F):

1476

'''

1477

Returns the (1-tailed) significance level (p-value) of an F

1478

statistic given the degrees of freedom for the numerator (dfR-dfF) and

1479

the degrees of freedom for the denominator (dfF).

1480

1481

Usage: lfprob(dfnum, dfden, F) where usually dfnum=dfbn, dfden=dfwn

1482

'''

1483

p = betai(0.5*dfden, 0.5*dfnum, dfden/float(dfden+dfnum*F))

1484

return p

1485

1486

1487

def lbetacf(a,b,x):

1488

'''

1489

This function evaluates the continued fraction form of the incomplete

1490

Beta function, betai. (Adapted from: Numerical Recipies in C.)

1491

1492

Usage: lbetacf(a,b,x)

1493

'''

1494

ITMAX = 200

1495

EPS = 3.0e-7

1496

1497

bm = az = am = 1.0

1498

qab = a+b

1499

qap = a+1.0

1500

qam = a-1.0

1501

bz = 1.0-qab*x/qap

1502

for i in range(ITMAX+1):

1503

em = float(i+1)

1504

tem = em + em

1505

d = em*(b-em)*x/((qam+tem)*(a+tem))

1506

ap = az + d*am

1507

bp = bz+d*bm

1508

d = -(a+em)*(qab+em)*x/((qap+tem)*(a+tem))

1509

app = ap+d*az

1510

bpp = bp+d*bz

1511

aold = az

1512

am = ap/bpp

1513

bm = bp/bpp

1514

az = app/bpp

1515

bz = 1.0

1516

if (abs(az-aold)<(EPS*abs(az))):

1517

return az

1518

print 'a or b too big, or ITMAX too small in Betacf.'

1519

1520

1521

def lgammln(xx):

1522

'''

1523

Returns the gamma function of xx.

1524

Gamma(z) = Integral(0,infinity) of t^(z-1)exp(-t) dt.

1525

(Adapted from: Numerical Recipies in C.)

1526

1527

Usage: lgammln(xx)

1528

'''

1529

coeff = [76.18009173, -86.50532033, 24.01409822, -1.231739516,

1530

0.120858003e-2, -0.536382e-5]

1531

x = xx - 1.0

1532

tmp = x + 5.5

1533

tmp = tmp - (x+0.5)*math.log(tmp)

1534

ser = 1.0

1535

for j in range(len(coeff)):

1536

x = x + 1

1537

ser = ser + coeff[j]/x

1538

return -tmp + math.log(2.50662827465*ser)

1539

1540

1541

def lbetai(a,b,x):

1542

'''

1543

Returns the incomplete beta function:

1544

1545

I-sub-x(a,b) = 1/B(a,b)*(Integral(0,x) of t^(a-1)(1-t)^(b-1) dt)

1546

1547

where a,b>0 and B(a,b) = G(a)*G(b)/(G(a+b)) where G(a) is the gamma

1548

function of a. The continued fraction formulation is implemented here,

1549

using the betacf function. (Adapted from: Numerical Recipies in C.)

1550

1551

Usage: lbetai(a,b,x)

1552

'''

1553

if (x<0.0 or x>1.0):

1554

raise ValueError, 'Bad x in lbetai'

1555

if (x==0.0 or x==1.0):

1556

bt = 0.0

1557

else:

1558

bt = math.exp(gammln(a+b)-gammln(a)-gammln(b)+a*math.log(x)+b*

1559

math.log(1.0-x))

1560

if (x<(a+1.0)/(a+b+2.0)):

1561

return bt*betacf(a,b,x)/float(a)

1562

else:

1563

return 1.0-bt*betacf(b,a,1.0-x)/float(b)

1564

1565

1566

####################################

1567

####### ANOVA CALCULATIONS #######

1568

####################################

1569

1570

def lF_oneway(*lists):

1571

'''

1572

Performs a 1-way ANOVA, returning an F-value and probability given

1573

any number of groups. From Heiman, pp.394-7.

1574

1575

Usage: F_oneway(*lists) where *lists is any number of lists, one per

1576

treatment group

1577

Returns: F value, one-tailed p-value

1578

'''

1579

a = len(lists) # ANOVA on 'a' groups, each in it's own list

1580

means = [0]*a

1581

vars = [0]*a

1582

ns = [0]*a

1583

alldata = []

1584

tmp = map(N.array,lists)

1585

means = map(amean,tmp)

1586

vars = map(avar,tmp)

1587

ns = map(len,lists)

1588

for i in range(len(lists)):

1589

alldata = alldata + lists[i]

1590

alldata = N.array(alldata)

1591

bign = len(alldata)

1592

sstot = ass(alldata)-(asquare_of_sums(alldata)/float(bign))

1593

ssbn = 0

1594

for list in lists:

1595

ssbn = ssbn + asquare_of_sums(N.array(list))/float(len(list))

1596

ssbn = ssbn - (asquare_of_sums(alldata)/float(bign))

1597

sswn = sstot-ssbn

1598

dfbn = a-1

1599

dfwn = bign - a

1600

msb = ssbn/float(dfbn)

1601

msw = sswn/float(dfwn)

1602

f = msb/msw

1603

prob = fprob(dfbn,dfwn,f)

1604

return f, prob

1605

1606

1607

def lF_value(ER,EF,dfnum,dfden):

1608

'''

1609

Returns an F-statistic given the following:

1610

ER = error associated with the null hypothesis (the Restricted model)

1611

EF = error associated with the alternate hypothesis (the Full model)

1612

dfR-dfF = degrees of freedom of the numerator

1613

dfF = degrees of freedom associated with the denominator/Full model

1614

1615

Usage: lF_value(ER,EF,dfnum,dfden)

1616

'''

1617

return ((ER-EF)/float(dfnum) / (EF/float(dfden)))

1618

1619

1620

####################################

1621

######## SUPPORT FUNCTIONS #######

1622

####################################

1623

1624

def writecc(listoflists,file,writetype='w',extra=2):

1625

'''

1626

Writes a list of lists to a file in columns, customized by the max

1627

size of items within the columns (max size of items in col, +2 characters)

1628

to specified file. File-overwrite is the default.

1629

1630

Usage: writecc (listoflists,file,writetype='w',extra=2)

1631

Returns: None

1632

'''

1633

if type(listoflists[0]) not in [ListType,TupleType]:

1634

listoflists = [listoflists]

1635

outfile = open(file,writetype)

1636

rowstokill = []

1637

list2print = copy.deepcopy(listoflists)

1638

for i in range(len(listoflists)):

1639

if listoflists[i] == ['\n'] or listoflists[i]=='\n' or listoflists[i]=='dashes':

1640

rowstokill = rowstokill + [i]

1641

rowstokill.reverse()

1642

for row in rowstokill:

1643

del list2print[row]

1644

maxsize = [0]*len(list2print[0])

1645

for col in range(len(list2print[0])):

1646

items = pstat.colex(list2print,col)

1647

items = map(pstat.makestr,items)

1648

maxsize[col] = max(map(len,items)) + extra

1649

for row in listoflists:

1650

if row == ['\n'] or row == '\n':

1651

outfile.write('\n')

1652

elif row == ['dashes'] or row == 'dashes':

1653

dashes = [0]*len(maxsize)

1654

for j in range(len(maxsize)):

1655

dashes[j] = '-'*(maxsize[j]-2)

1656

outfile.write(pstat.lineincustcols(dashes,maxsize))

1657

else:

1658

outfile.write(pstat.lineincustcols(row,maxsize))

1659

outfile.write('\n')

1660

outfile.close()

1661

return None

1662

1663

1664

def lincr(l,cap): # to increment a list up to a max-list of 'cap'

1665

'''

1666

Simulate a counting system from an n-dimensional list.

1667

1668

Usage: lincr(l,cap) l=list to increment, cap=max values for each list pos'n

1669

Returns: next set of values for list l, OR -1 (if overflow)

1670

'''

1671

l[0] = l[0] + 1 # e.g., [0,0,0] --> [2,4,3] (=cap)

1672

for i in range(len(l)):

1673

if l[i] > cap[i] and i < len(l)-1: # if carryover AND not done

1674

l[i] = 0

1675

l[i+1] = l[i+1] + 1

1676

elif l[i] > cap[i] and i == len(l)-1: # overflow past last column, must be finished

1677

l = -1

1678

return l

1679

1680

1681

def lsum(inlist):

1682

'''

1683

Returns the sum of the items in the passed list.

1684

1685

Usage: lsum(inlist)

1686

'''

1687

s = 0

1688

for item in inlist:

1689

s = s + item

1690

return s

1691

1692

1693

def lcumsum(inlist):

1694

'''

1695

Returns a list consisting of the cumulative sum of the items in the

1696

passed list.

1697

1698

Usage: lcumsum(inlist)

1699

'''

1700

newlist = copy.deepcopy(inlist)

1701

for i in range(1,len(newlist)):

1702

newlist[i] = newlist[i] + newlist[i-1]

1703

return newlist

1704

1705

1706

def lss(inlist):

1707

'''

1708

Squares each value in the passed list, adds up these squares and

1709

returns the result.

1710

1711

Usage: lss(inlist)

1712

'''

1713

ss = 0

1714

for item in inlist:

1715

ss = ss + item*item

1716

return ss

1717

1718

1719

def lsummult(list1,list2):

1720

'''

1721

Multiplies elements in list1 and list2, element by element, and

1722

returns the sum of all resulting multiplications. Must provide equal

1723

length lists.

1724

1725

Usage: lsummult(list1,list2)

1726

'''

1727

if len(list1) <> len(list2):

1728

raise ValueError, "Lists not equal length in summult."

1729

s = 0

1730

for item1,item2 in pstat.abut(list1,list2):

1731

s = s + item1*item2

1732

return s

1733

1734

1735

def lsumdiffsquared(x,y):

1736

'''

1737

Takes pairwise differences of the values in lists x and y, squares

1738

these differences, and returns the sum of these squares.

1739

1740

Usage: lsumdiffsquared(x,y)

1741

Returns: sum[(x[i]-y[i])**2]

1742

'''

1743

sds = 0

1744

for i in range(len(x)):

1745

sds = sds + (x[i]-y[i])**2

1746

return sds

1747

1748

1749

def lsquare_of_sums(inlist):

1750

'''

1751

Adds the values in the passed list, squares the sum, and returns

1752

the result.

1753

1754

Usage: lsquare_of_sums(inlist)

1755

Returns: sum(inlist[i])**2

1756

'''

1757

s = sum(inlist)

1758

return float(s)*s

1759

1760

1761

def lshellsort(inlist):

1762

'''

1763

Shellsort algorithm. Sorts a 1D-list.

1764

1765

Usage: lshellsort(inlist)

1766

Returns: sorted-inlist, sorting-index-vector (for original list)

1767

'''

1768

n = len(inlist)

1769

svec = copy.deepcopy(inlist)

1770

ivec = range(n)

1771

gap = n/2 # integer division needed

1772

while gap >0:

1773

for i in range(gap,n):

1774

for j in range(i-gap,-1,-gap):

1775

while j>=0 and svec[j]>svec[j+gap]:

1776

temp = svec[j]

1777

svec[j] = svec[j+gap]

1778

svec[j+gap] = temp

1779

itemp = ivec[j]

1780

ivec[j] = ivec[j+gap]

1781

ivec[j+gap] = itemp

1782

gap = gap / 2 # integer division needed

1783

# svec is now sorted inlist, and ivec has the order svec[i] = vec[ivec[i]]

1784

return svec, ivec

1785

1786

1787

def lrankdata(inlist):

1788

'''

1789

Ranks the data in inlist, dealing with ties appropritely. Assumes

1790

a 1D inlist. Adapted from Gary Perlman's |Stat ranksort.

1791

1792

Usage: lrankdata(inlist)

1793

Returns: a list of length equal to inlist, containing rank scores

1794

'''

1795

n = len(inlist)

1796

svec, ivec = shellsort(inlist)

1797

sumranks = 0

1798

dupcount = 0

1799

newlist = [0]*n

1800

for i in range(n):

1801

sumranks = sumranks + i

1802

dupcount = dupcount + 1

1803

if i==n-1 or svec[i] <> svec[i+1]:

1804

averank = sumranks / float(dupcount) + 1

1805

for j in range(i-dupcount+1,i+1):

1806

newlist[ivec[j]] = averank

1807

sumranks = 0

1808

dupcount = 0

1809

return newlist

1810

1811

1812

def outputpairedstats(fname,writemode,name1,n1,m1,se1,min1,max1,name2,n2,m2,se2,min2,max2,statname,stat,prob):

1813

'''

1814

Prints or write to a file stats for two groups, using the name, n,

1815

mean, sterr, min and max for each group, as well as the statistic name,

1816

its value, and the associated p-value.

1817

1818

Usage: outputpairedstats(fname,writemode,

1819

name1,n1,mean1,stderr1,min1,max1,

1820

name2,n2,mean2,stderr2,min2,max2,

1821

statname,stat,prob)

1822

Returns: None

1823

'''

1824

suffix = '' # for *s after the p-value

1825

try:

1826

x = prob.shape

1827

prob = prob[0]

1828

except:

1829

pass

1830

if prob < 0.001: suffix = ' ***'

1831

elif prob < 0.01: suffix = ' **'

1832

elif prob < 0.05: suffix = ' *'

1833

title = [['Name','N','Mean','SD','Min','Max']]

1834

lofl = title+[[name1,n1,round(m1,3),round(math.sqrt(se1),3),min1,max1],

1835

[name2,n2,round(m2,3),round(math.sqrt(se2),3),min2,max2]]

1836

if type(fname)<>StringType or len(fname)==0:

1837

1838

print statname

1839

1840

pstat.printcc(lofl)

1841

1842

try:

1843

if stat.shape == ():

1844

stat = stat[0]

1845

if prob.shape == ():

1846

prob = prob[0]

1847

except:

1848

pass

1849

print 'Test statistic = ',round(stat,3),' p = ',round(prob,3),suffix

1850

1851

else:

1852

file = open(fname,writemode)

1853

file.write('\n'+statname+'\n\n')

1854

file.close()

1855

writecc(lofl,fname,'a')

1856

file = open(fname,'a')

1857

try:

1858

if stat.shape == ():

1859

stat = stat[0]

1860

if prob.shape == ():

1861

prob = prob[0]

1862

except:

1863

pass

1864

file.write(pstat.list2string(['\nTest statistic = ',round(stat,4),' p = ',round(prob,4),suffix,'\n\n']))

1865

file.close()

1866

return None

1867

1868

1869

def lfindwithin(data):

1870

'''

1871

Returns an integer representing a binary vector, where 1=within-

1872

subject factor, 0=between. Input equals the entire data 2D list (i.e.,

1873

column 0=random factor, column -1=measured values (those two are skipped).

1874

Note: input data is in |Stat format ... a list of lists ("2D list") with

1875

one row per measured value, first column=subject identifier, last column=

1876

score, one in-between column per factor (these columns contain level

1877

designations on each factor). See also stats.anova.__doc__.

1878

1879

Usage: lfindwithin(data) data in |Stat format

1880

'''

1881

numfact = len(data[0])-1

1882

withinvec = 0

1883

for col in range(1,numfact):

1884

examplelevel = pstat.unique(pstat.colex(data,col))[0]

1885

rows = pstat.linexand(data,col,examplelevel) # get 1 level of this factor

1886

factsubjs = pstat.unique(pstat.colex(rows,0))

1887

allsubjs = pstat.unique(pstat.colex(data,0))

1888

if len(factsubjs) == len(allsubjs): # fewer Ss than scores on this factor?

1889

withinvec = withinvec + (1 << col)

1890

return withinvec

1891

1892

1893

#########################################################

1894

#########################################################

1895

####### DISPATCH LISTS AND TUPLES TO ABOVE FCNS #########

1896

#########################################################

1897

#########################################################

1898

1899

## CENTRAL TENDENCY:

1900

geometricmean = Dispatch ( (lgeometricmean, (ListType, TupleType)), )

1901

harmonicmean = Dispatch ( (lharmonicmean, (ListType, TupleType)), )

1902

mean = Dispatch ( (lmean, (ListType, TupleType)), )

1903

median = Dispatch ( (lmedian, (ListType, TupleType)), )

1904

medianscore = Dispatch ( (lmedianscore, (ListType, TupleType)), )

1905

mode = Dispatch ( (lmode, (ListType, TupleType)), )

1906

1907

## MOMENTS:

1908

moment = Dispatch ( (lmoment, (ListType, TupleType)), )

1909

variation = Dispatch ( (lvariation, (ListType, TupleType)), )

1910

skew = Dispatch ( (lskew, (ListType, TupleType)), )

1911

kurtosis = Dispatch ( (lkurtosis, (ListType, TupleType)), )

1912

describe = Dispatch ( (ldescribe, (ListType, TupleType)), )

1913

1914

## FREQUENCY STATISTICS:

1915

itemfreq = Dispatch ( (litemfreq, (ListType, TupleType)), )

1916

scoreatpercentile = Dispatch ( (lscoreatpercentile, (ListType, TupleType)), )

1917

percentileofscore = Dispatch ( (lpercentileofscore, (ListType, TupleType)), )

1918

histogram = Dispatch ( (lhistogram, (ListType, TupleType)), )

1919

cumfreq = Dispatch ( (lcumfreq, (ListType, TupleType)), )

1920

relfreq = Dispatch ( (lrelfreq, (ListType, TupleType)), )

1921

1922

## VARIABILITY:

1923

obrientransform = Dispatch ( (lobrientransform, (ListType, TupleType)), )

1924

samplevar = Dispatch ( (lsamplevar, (ListType, TupleType)), )

1925

samplestdev = Dispatch ( (lsamplestdev, (ListType, TupleType)), )

1926

var = Dispatch ( (lvar, (ListType, TupleType)), )

1927

stdev = Dispatch ( (lstdev, (ListType, TupleType)), )

1928

sterr = Dispatch ( (lsterr, (ListType, TupleType)), )

1929

sem = Dispatch ( (lsem, (ListType, TupleType)), )

1930

z = Dispatch ( (lz, (ListType, TupleType)), )

1931

zs = Dispatch ( (lzs, (ListType, TupleType)), )

1932

1933

## TRIMMING FCNS:

1934

trimboth = Dispatch ( (ltrimboth, (ListType, TupleType)), )

1935

trim1 = Dispatch ( (ltrim1, (ListType, TupleType)), )

1936

1937

## CORRELATION FCNS:

1938

paired = Dispatch ( (lpaired, (ListType, TupleType)), )

1939

pearsonr = Dispatch ( (lpearsonr, (ListType, TupleType)), )

1940

spearmanr = Dispatch ( (lspearmanr, (ListType, TupleType)), )

1941

pointbiserialr = Dispatch ( (lpointbiserialr, (ListType, TupleType)), )

1942

kendalltau = Dispatch ( (lkendalltau, (ListType, TupleType)), )

1943

linregress = Dispatch ( (llinregress, (ListType, TupleType)), )

1944

1945

## INFERENTIAL STATS:

1946

ttest_1samp = Dispatch ( (lttest_1samp, (ListType, TupleType)), )

1947

ttest_ind = Dispatch ( (lttest_ind, (ListType, TupleType)), )

1948

ttest_rel = Dispatch ( (lttest_rel, (ListType, TupleType)), )

1949

chisquare = Dispatch ( (lchisquare, (ListType, TupleType)), )

1950

ks_2samp = Dispatch ( (lks_2samp, (ListType, TupleType)), )

1951

mannwhitneyu = Dispatch ( (lmannwhitneyu, (ListType, TupleType)), )

1952

ranksums = Dispatch ( (lranksums, (ListType, TupleType)), )

1953

tiecorrect = Dispatch ( (ltiecorrect, (ListType, TupleType)), )

1954

wilcoxont = Dispatch ( (lwilcoxont, (ListType, TupleType)), )

1955

kruskalwallish = Dispatch ( (lkruskalwallish, (ListType, TupleType)), )

1956

friedmanchisquare = Dispatch ( (lfriedmanchisquare, (ListType, TupleType)), )

1957

1958

## PROBABILITY CALCS:

1959

chisqprob = Dispatch ( (lchisqprob, (IntType, FloatType)), )

1960

zprob = Dispatch ( (lzprob, (IntType, FloatType)), )

1961

ksprob = Dispatch ( (lksprob, (IntType, FloatType)), )

1962

fprob = Dispatch ( (lfprob, (IntType, FloatType)), )

1963

betacf = Dispatch ( (lbetacf, (IntType, FloatType)), )

1964

betai = Dispatch ( (lbetai, (IntType, FloatType)), )

1965

erfcc = Dispatch ( (lerfcc, (IntType, FloatType)), )

1966

gammln = Dispatch ( (lgammln, (IntType, FloatType)), )

1967

1968

## ANOVA FUNCTIONS:

1969

F_oneway = Dispatch ( (lF_oneway, (ListType, TupleType)), )

1970

F_value = Dispatch ( (lF_value, (ListType, TupleType)), )

1971

1972

## SUPPORT FUNCTIONS:

1973

incr = Dispatch ( (lincr, (ListType, TupleType)), )

1974

sum = Dispatch ( (lsum, (ListType, TupleType)), )

1975

cumsum = Dispatch ( (lcumsum, (ListType, TupleType)), )

1976

ss = Dispatch ( (lss, (ListType, TupleType)), )

1977

summult = Dispatch ( (lsummult, (ListType, TupleType)), )

1978

square_of_sums = Dispatch ( (lsquare_of_sums, (ListType, TupleType)), )

1979

sumdiffsquared = Dispatch ( (lsumdiffsquared, (ListType, TupleType)), )

1980

shellsort = Dispatch ( (lshellsort, (ListType, TupleType)), )

1981

rankdata = Dispatch ( (lrankdata, (ListType, TupleType)), )

1982

findwithin = Dispatch ( (lfindwithin, (ListType, TupleType)), )

1983

1984

1985