266
266
/// Separate the file in data blocks and parse them to sort tags, loops and comments.
267
267
/// All is stored in the original strings.
268
void Parse(std::stringstream &in);
269
/// Returns the name of the next data block
270
void Parse(std::istream &in);
269
271
/// The data blocks, after parsing. The key is the name of the data block
270
272
std::map<std::string,CIFData> mvData;
271
273
/// Global comments, outside and data block
311
313
if(loop->second.find("_atom_site_Cartn_y")!=loop->second.end()) empty_iucrjournal_block=false;
312
314
if(loop->second.find("_atom_site_Cartn_z")!=loop->second.end()) empty_iucrjournal_block=false;
314
if(empty_iucrjournal_block)
316
if(empty_iucrjournal_block)
317
319
ss << "CIF WARNING: found en empty 'data_global' block - SKIPPING\n"
369
371
mvLatticePar[3] = static_cast<float> (mvLatticePar[3] * DEG_TO_RAD);// pi/180
370
372
mvLatticePar[4] = static_cast<float> (mvLatticePar[4] * DEG_TO_RAD);
371
373
mvLatticePar[5] = static_cast<float> (mvLatticePar[5] * DEG_TO_RAD);
373
375
// Fill values depending on spacegroup, *only* when missing
374
376
if((spgid>2)&&(spgid<=15))
375
377
{// :TODO: monoclinic spg, depending on unique axis....
397
399
if(mvLatticePar[0]==0) mvLatticePar[0]=a;
398
400
if(mvLatticePar[1]==0) mvLatticePar[1]=a;
399
401
if(mvLatticePar[2]==0) mvLatticePar[2]=a;
402
404
if(mvLatticePar[3]>alpha) alpha=mvLatticePar[3];
403
405
if(mvLatticePar[4]>alpha) alpha=mvLatticePar[4];
542
544
if(verbose) cout<<"Found spacegroup Hermann-Mauguin symbol (with OBSOLETE CIF #1.0 TAG):"<<mSpacegroupHermannMauguin<<endl;
547
// DDL2 tag is "_space_group.IT_coordinate_system_code", converted by the cif reader to "_space_group_IT_coordinate_system_code"
548
positem=mvItem.find("_space_group_IT_coordinate_system_code");
549
if(positem!=mvItem.end())
551
if(verbose) cout<<"Found spacegroup IT_coordinate_system_code:"<<positem->second<<endl;
552
if((mSpacegroupHermannMauguin.length()>0) && (positem->second=="1" || positem->second=="2"))
554
// this is a HACK which will work as long as the HM symbols in spacegroups.txt have the ":1" or ":2" extension listed, when needed
555
mSpacegroupHermannMauguin=mSpacegroupHermannMauguin+string(":")+positem->second;
560
ss << "CIF Error: found DDL2 tag _space_group.IT_coordinate_system_code ("<<positem->second<<")"<<endl
561
<<" but could not interpret it ! Origin choice or axis may be incorrect.";
562
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
545
566
mSpaceGroup=NULL;
546
567
// be forgiving - if spg not found, try again
547
568
// Prefer Hall > HM == number, as Hall symbol is truly unique
563
584
if(mSpaceGroup == NULL) {
564
585
SpaceGroup *sg = new SpaceGroup();
565
positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
586
positem=mvItem.find("_space_group_symop_operation_xyz");
587
if(positem==mvItem.end())
588
positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
566
589
if(positem!=mvItem.end())
568
591
sg->AddTransform (positem->second);
575
598
map<ci_string,vector<string> >::const_iterator pos;
577
pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
600
pos=loop->second.find("_space_group_symop_operation_xyz");
601
if (pos==loop->second.end())
602
pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
578
603
if (pos!=loop->second.end())
580
605
nb=pos->second.size();
930
958
CIF::CIF(istream &is, const bool interpret,const bool verbose)
932
//Copy to an iostream so that we can put back characters if necessary
935
while(is.get(c))in.put(c);
937
// Extract structure from blocks
939
for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
940
posd->second.ExtractAll(verbose);
960
bool found_atoms=false;
963
// :TODO: we don't need a vector of CIFData, since only one block is read at a time
966
// Extract structure from 1 block
968
for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
970
posd->second.ExtractAll(verbose);
971
if(posd->second.mvAtom.size()>0) found_atoms=true;
943
976
bool iseol(const char c) { return ((c=='\n')||(c=='\r'));}
945
978
/// Read one value, whether it is numeric, string or text
946
string CIFReadValue(stringstream &in,char &lastc)
979
string CIFReadValue(istream &in,char &lastc)
948
981
bool vv=false;//very verbose ?
949
982
string value("");
1042
1075
if((in.peek()=='d') || (in.peek()=='D'))
1077
if(mvData.size()>0) return; // We want just a single data block
1046
1081
block=tmp.substr(5);
1047
1082
if(vv) cout<<endl<<endl<<"NEW BLOCK DATA: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ->"<<block<<endl<<endl<<endl;
1048
1085
mvData[block]=CIFData();
1049
1086
mvData[block].mDataBlockName=tmp;
1081
1118
map<ci_string,vector<string> > lp;
1121
std::ios::pos_type pos0=in.tellg();
1084
1122
while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
1085
1123
if(in.eof()) break;
1086
if(vv) cout<<"LOOP VALUES...: "<<(char)in.peek()<<" "<<endl;
1087
1124
if(in.peek()=='_') break;
1088
1125
if(in.peek()=='#')
1089
1126
{// Comment (in a loop ??)
1090
const std::ios::pos_type pos=in.tellg();
1127
//const std::ios::pos_type pos=in.tellg();
1092
1129
getline(in,tmp);
1093
1131
if(block=="") mvComment.push_back(tmp);
1094
1132
else mvData[block].mvComment.push_back(tmp);
1096
1134
if(vv) cout<<"Comment in a loop (?):"<<tmp<<endl;
1100
const std::ios::pos_type pos=in.tellg();
1102
if(vv) cout<<"WHATNEXT? "<<tmp;
1139
tmp=CIFReadValue(in,lastc);
1103
1140
if(ci_string(tmp.c_str())=="loop_")
1104
1141
{//go back and continue
1105
if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<endl;
1143
in.seekg(pos0,std::ios::beg);
1144
if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg()<<endl;
1109
1147
if(tmp.size()>=5)
1110
1148
if(ci_string(tmp.substr(0,5).c_str())=="data_")
1111
1149
{//go back and continue
1112
if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<endl;
1151
in.seekg(pos0,std::ios::beg);
1152
if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg()<<endl;
1118
1155
for(unsigned int i=0;i<tit.size();++i)
1119
1156
{//Read all values
1120
const string value=CIFReadValue(in,lastc);
1121
lp[tit[i]].push_back(value);
1122
if(vv) cout<<" #"<<i<<" : "<<value<<endl;
1157
if(i>0) tmp=CIFReadValue(in,lastc);
1158
lp[tit[i]].push_back(tmp);
1159
if(vv) cout<<" LOOP VALUE #"<<lp[tit[i]].size()<<","<<i<<" : "<<tmp<<endl;
1125
1162
// The key to the mvLoop map is the set of column titles
1340
1377
else tmpSymbol="C";//Something went wrong, no symbol ! Default to C ??
1342
1379
int atomicNum = etab.GetAtomicNum(tmpSymbol.c_str());
1380
// Test for some oxygens with subscripts
1381
if (atomicNum == 0 && tmpSymbol[0] == 'O') {
1382
atomicNum = 8; // e.g. Ob, OH, etc.
1343
1385
atom->SetAtomicNum(atomicNum); //set atomic number, or '0' if the atom type is not recognized
1344
1386
atom->SetType(tmpSymbol); //set atomic number, or '0' if the atom type is not recognized
1345
1387
atom->SetVector(posat->mCoordCart[0],posat->mCoordCart[1],posat->mCoordCart[2]);
1388
if(posat->mLabel.size()>0)
1390
OBPairData *label = new OBPairData;
1391
label->SetAttribute("_atom_site_label");
1392
label->SetValue(posat->mLabel);
1393
label->SetOrigin(fileformatInput);
1394
atom->SetData(label);
1347
1398
if (!pConv->IsOption("b",OBConversion::INOPTIONS))
1348
1399
pmol->ConnectTheDots();
1413
1464
const SpaceGroup* pSG = pUC->GetSpaceGroup();
1414
1465
if (pSG != NULL)
1416
ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
1467
// Do we have an extended HM symbol, with origin choice as ":1" or ":2" ? If so, remove it.
1468
size_t n=pSG->GetHMName().find(":");
1470
ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
1472
ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName().substr(0,n) << "'" << endl;
1417
1473
ofs << "_space_group_name_Hall '" << pSG->GetHallName() << "'" << endl;
1418
1474
ofs << "loop_" <<endl
1419
1475
<< " _symmetry_equiv_pos_as_xyz" << endl;
1437
1493
unsigned int i=0;
1438
1494
FOR_ATOMS_OF_MOL(atom, *pmol)
1440
snprintf(buffer, BUFF_SIZE, " %3s %3s%d %10.5f %10.5f %10.5f\n",
1441
etab.GetSymbol(atom->GetAtomicNum()),
1442
etab.GetSymbol(atom->GetAtomicNum()),
1496
if (atom->HasData("_atom_site_label"))
1498
OBPairData *label = dynamic_cast<OBPairData *> (atom->GetData("_atom_site_label"));
1499
snprintf(buffer, BUFF_SIZE, " %3s %3s %10.5f %10.5f %10.5f\n",
1500
etab.GetSymbol(atom->GetAtomicNum()),
1501
label->GetValue().c_str(),
1508
snprintf(buffer, BUFF_SIZE, " %3s %3s%d %10.5f %10.5f %10.5f\n",
1509
etab.GetSymbol(atom->GetAtomicNum()),
1510
etab.GetSymbol(atom->GetAtomicNum()),