~ubuntu-branches/ubuntu/raring/plink/raring

Viewing changes to linear.cpp

Committer: Bazaar Package Importer
Author(s): Andreas Tille
Date: 2009-10-23 13:35:02 UTC
mfrom: (1.1.2 upstream)
Revision ID: james.westby@ubuntu.com-20091023133502-rmnkrfi314mevnvt

Tags: 1.07-1

* New upstream version
* Removed debian/patches/20_plink-1.06-gcc4.4.patch because it
is applied upstream
* Standards-Version: 3.8.3 (no changes needed)
* Debhelper 7
* Build-Depends: zlib1g-dev

files added:
annot.cpp

blox.cpp~

dosage.cpp

metaanal.cpp

webcheck.cpp~

zed.cpp

zed.h

zfstream.cpp

zfstream.h

files removed:
debian/patches/20_plink-1.06-gcc4.4.patch

files modified:
Makefile

Rsrv.h

assoc.cpp

binput.cpp

blox.cpp

bmerge.cpp

cluster.cpp

cnv.cpp

cnv.h

debian/changelog

debian/compat

debian/control

debian/patches/series

epi.cpp

filters.cpp

genepi.cpp

genome.cpp

glm.cpp

greport.cpp

helper.cpp

helper.h

hotel.cpp

idhelp.cpp

informative.cpp

input.cpp

lapackf.cpp

lapackf.h

linear.cpp

linear.h

linput.cpp

logistic.cpp

logistic.h

lookup2.cpp

mds.cpp

merge.cpp

mh.cpp

model.cpp

model.h

nlist.cpp

options.cpp

options.h

output.cpp

parse.cpp

plink.cpp

plink.h

profile.cpp

sets.cpp

simul.cpp

stats.cpp

stats.h

webcheck.cpp

Show diffs side-by-side

added added

removed removed

linear.cpp

//////////////////////////////////////////////////////////////////

// //

// This file is distributed under the GNU General Public //

// License, Version 2. Please see the file COPYING for more //

180

}

181

182

183

void LinearModel::setCluster(vector<int> & cl)

184

{

185

cluster=true;

186

clst = cl;

187

nc=0;

188

if (cl.size() != nind)

189

error("Setting LM cluster to wrong N -- internal error");

190

map<int,int> novelc;

191

int k=0;

192

for (int i=0; i<nind; i++)

193

if ( novelc.find(clst[i]) == novelc.end() )

194

novelc.insert(make_pair(clst[i],k++));

195

nc = novelc.size();

196

if (nc<2) error("Must have at least two clusters");

197

for (int i=0; i<nind; i++)

198

clst[i] = novelc.find(cl[i])->second;

199

200

}

201

202

void LinearModel::noCluster()

203

{

204

cluster=false;

205

clst.clear();

206

nc=0;

207

}

208

209

183

void LinearModel::setVariance()

210

184

{

211

185

varY=0;

212

double mean=0;

186

meanY = 0;

213

187

int actualN=0;

214

188

215

189

for (int i=0; i<nind; i++)

216

190

{

217

191

actualN++;

218

mean += Y[i];

192

meanY += Y[i];

219

193

}

220

194

if (actualN==0)

221

195

{

222

196

varY=0;

223

197

return;

224

198

}

225

mean /= (double)actualN;

199

meanY /= (double)actualN;

226

200

227

201

for (int i=0; i<nind; i++)

228

202

{

229

varY += (Y[i] - mean) * (Y[i] - mean);

203

varY += (Y[i] - meanY) * (Y[i] - meanY);

230

204

}

231

205

varY /= (double)(actualN-1);

232

206

234

208

error("actualN <> nind...");

235

209

}

236

210

211

void LinearModel::standardise()

212

{

213

214

// Get mean and variance for all variable

215

double sdY = sqrt(varY);

216

for (int i=0; i<nind; i++)

217

Y[i] = ( Y[i] - meanY ) / sdY;

218

219

vector_t mX(np,0);

220

vector_t sdX(np,0);

221

222

// Standardise all predictors, except intercept

223

for (int i=0; i<nind; i++)

224

for (int j=1; j<np; j++)

225

mX[j] += X[i][j];

226

for (int j=1; j<np; j++)

227

mX[j] /= nind;

228

for (int i=0; i<nind; i++)

229

for (int j=1; j<np; j++)

230

sdX[j] += (X[i][j] - mX[j]) * (X[i][j] - mX[j]);

231

for (int j=1; j<np; j++)

232

{

233

sdX[j] = sqrt(sdX[j]/(nind-1));

234

if ( sdX[j]==0 )

235

sdX[j] = 1;

236

}

237

for (int i=0; i<nind; i++)

238

for (int j=1; j<np; j++)

239

X[i][j] = ( X[i][j] - mX[j] ) / sdX[j];

240

}

241

242

237

243

void LinearModel::fitLM()

238

244

{

239

245

243

249

{

244

250

cout << "VO " << i << "\t"

245

251

<< Y[i] << "\t";

246

for (int j=0; j<np; j++)

252

for (int j=0; j<np; j++)

247

253

cout << X[i][j] << "\t";

248

254

cout << "\n";

249

255

}

263

269

}

264

270

265

271

setVariance();

272

273

if ( par::standard_beta )

274

standardise();

275

276

266

277

sig.resize(nind, sqrt(1.0/sqrt((double)nind)) );

267

278

268

279

w.resize(np);

285

296

b[i]=Y[i]*tmp;

286

297

}

287

298

288

svdcmp(u,w,v);

299

bool flag = svdcmp(u,w,v);

300

301

if ( ! flag )

302

{

303

all_valid = false;

304

return;

305

}

289

306

290

307

wmax=0.0;

291

308

for (j=0;j<np;j++)

309

326

/////////////////////////////////////////

310

327

// Obtain covariance matrix of estimates

311

328

312

// OLS variance estimator = s^2 * ( X'X )^-1

313

// where s^2 = (1/(N-k)) \sum_i=1^N e_i^2

314

329

315

330

// Robust cluster variance estimator

316

331

// V_cluster = (X'X)^-1 * \sum_{j=1}^{n_C} u_{j}' * u_j * (X'X)^-1

324

339

325

340

// The formula for the clustered estimator is simply that of the

326

341

// robust (unclustered) estimator with the individual ei*s replaced

327

// by their sums over each cluster. xi

342

// by their sums over each cluster.

343

344

// http://www.stata.com/support/faqs/stat/cluster.html

345

// SEE http://aje.oxfordjournals.org/cgi/content/full/kwm223v1#APP1

328

346

329

347

// Williams, R. L. 2000. A note on robust variance estimation for

330

348

// cluster-correlated data. Biometrics 56: 64

331

349

332

333

350

// t ( y - yhat X ) %*% ( y - yhat) / nind - np

334

335

351

// = variance of residuals

336

337

// Variance of residuals

338

339

352

// j <- ( t( y- m %*% t(b) ) %*% ( y - m %*% t(b) ) ) / ( N - p )

340

353

// print( sqrt(kronecker( solve( t(m) %*% m ) , j ) ))

341

342

343

// Calcuate S = (XtX)^-1

344

345

346

354

355

356

////////////////////////////////////////////////

357

// OLS variance estimator = s^2 * ( X'X )^-1

358

// where s^2 = (1/(N-k)) \sum_i=1^N e_i^2

359

360

// 1. Calcuate S = (X'X)^-1

361

347

362

matrix_t Xt;

348

363

sizeMatrix(Xt, np, nind);

349

364

for (int i=0; i<nind; i++)

350

365

for (int j=0; j<np; j++)

351

366

Xt[j][i] = X[i][j];

352

353

354

355

367

matrix_t S0;

356

368

multMatrix(Xt,X,S0);

357

358

if (par::verbose)

369

flag = true;

370

S0 = svd_inverse(S0,flag);

371

if ( ! flag )

359

372

{

360

cout << "beta...\n";

361

display(coef);

362

cout << "Sigma(S0a)\n";

363

display(S0);

364

cout << "\n";

373

all_valid = false;

374

return;

365

375

}

366

376

367

S0 = svd_inverse(S0);

368

369

377

if (par::verbose)

370

378

{

371

379

cout << "beta...\n";

376

384

}

377

385

378

386

379

////////////////////

380

// Standard OLS s^2

387

////////////////////////

388

// Calculate s^2 (sigma)

381

389

382

390

if (!cluster)

383

391

{

384

385

392

double sigma= 0.0;

386

393

for (int i=0; i<nind; i++)

387

394

{

404

411

405

412

if (cluster)

406

413

{

407

408

vector<vector_t> sc(nc);

409

for (int i=0; i<nc; i++)

410

sc[i].resize(np,0);

411

412

for (int i=0; i<nind; i++)

413

{

414

double partial = 0.0;

415

for (int j=0; j<np; j++)

414

415

vector<vector_t> sc(nc);

416

for (int i=0; i<nc; i++)

417

sc[i].resize(np,0);

418

419

for (int i=0; i<nind; i++)

420

{

421

double partial = 0.0;

422

for (int j=0; j<np; j++)

416

423

partial += coef[j] * X[i][j];

417

partial -= Y[i];

418

419

for (int j=0; j<np; j++)

420

sc[clst[i]][j] += partial * X[i][j];

421

}

422

423

matrix_t meat;

424

sizeMatrix(meat, np, np);

425

for (int k=0; k<nc; k++)

426

{

427

424

partial -= Y[i];

425

426

for (int j=0; j<np; j++)

427

sc[clst[i]][j] += partial * X[i][j];

428

}

429

430

matrix_t meat;

431

sizeMatrix(meat, np, np);

432

for (int k=0; k<nc; k++)

433

{

428

434

for (int i=0; i<np; i++)

429

435

for (int j=0; j<np; j++)

430

436

meat[i][j] += sc[k][i] * sc[k][j];

431

437

432

438

}

433

439

434

440

matrix_t tmp1;

435

441

multMatrix( S0 , meat, tmp1);

436

442

multMatrix( tmp1 , S0, S);

437

443

438

444

}

439

445

440

446

441

447

if (par::verbose)

442

448

{

443

cout << "beta...\n";

449

cout << "coefficients:\n";

444

450

display(coef);

445

cout << "Sigma\n";

451

cout << "var-cov matrix:\n";

446

452

display(S);

447

453

cout << "\n";

448

454

}

524

530

525

531

if (cluster)

526

532

multiplier = (double)(nc)/((double)(nc-np));

527

533

534

multiplier = 1;

535

536

//cout << "Mult = " << multiplier << "\n";

537

528

538

vector_t var(np);

529

539

for (int i=0; i<np; i++)

530

540

var[i] = multiplier * S[i][i];

540

550

541

551

if (cluster)

542

552

multiplier = (double)(nc)/((double)(nc-np));

543

553

554

multiplier = 1;

555

544

556

vector_t var(np);

545

557

for (int i=0; i<np; i++)

546

558

var[i] = sqrt ( multiplier * S[i][i] ) ;

564

576

565

577

void LinearModel::displayResults(ofstream & OUT, Locus * loc)

566

578

{

567

568

579

569

580

vector_t var;

570

581

588

599

if (okay)

589

600

{

590

601

se = sqrt(var[p]);

591

Z = coef[p] / se;

602

Z = coef[p] / se;

592

603

pvalue = pT(Z,Y.size()-np);

593

604

}

594

605

740

751

testParameter = tmp;

741

752

return res;

742

753

}

754

755

void LinearModel::HuberWhite()

756

{

757

758

}

Older »