~ubuntu-branches/debian/stretch/openbabel/stretch

« back to all changes in this revision

Viewing changes to src/formats/cifformat.cpp

  • Committer: Package Import Robot
  • Author(s): Daniel Leidert
  • Date: 2013-05-22 19:08:27 UTC
  • mfrom: (1.1.11) (7.1.9 sid)
  • Revision ID: package-import@ubuntu.com-20130522190827-72q0fnx5y2nm3bc0
Tags: 2.3.2+dfsg-1
* New upstream release.
* debian/control: Dropped DM-Upload-Allowed field.
  (Standards-Version): Bumped to 3.9.4.
* debian/copyright: Massive update.
* debian/upstream: Author name update.
* debian/get-orig-source.sh: Remove the windows-*/ directory too.
* debian/openbabel.install: Removed roundtrip manpage.
* debian/openbabel-gui.install: Fixed manpage name.
* debian/openbabel-gui.links: Removed unused file.
* debian/rules: Enable OpenMP. Disable tests on `nocheck'.
* debian/patches/gaussformat_nosym.patch: Dropped. Applied upstream.
* debian/patches/moldenformat_coordonly.patch: Ditto.
* debian/patches/obspectrophore_man.patch: Ditto.
* debian/patches/fix_ftbfs.patch: Added.
  - Fix several FTBFS issues in upstream build system.
* debian/patches/series: Adjusted.

Show diffs side-by-side

added added

removed removed

Lines of Context:
265
265
    //private:
266
266
    /// Separate the file in data blocks and parse them to sort tags, loops and comments.
267
267
    /// All is stored in the original strings.
268
 
    void Parse(std::stringstream &in);
 
268
    ///
 
269
    /// Returns the name of the next data block
 
270
    void Parse(std::istream &in);
269
271
    /// The data blocks, after parsing. The key is the name of the data block
270
272
    std::map<std::string,CIFData> mvData;
271
273
    /// Global comments, outside and data block
311
313
            if(loop->second.find("_atom_site_Cartn_y")!=loop->second.end()) empty_iucrjournal_block=false;
312
314
            if(loop->second.find("_atom_site_Cartn_z")!=loop->second.end()) empty_iucrjournal_block=false;
313
315
          }
314
 
        if(empty_iucrjournal_block) 
 
316
        if(empty_iucrjournal_block)
315
317
          {
316
318
            stringstream ss;
317
319
            ss << "CIF WARNING: found en empty 'data_global' block - SKIPPING\n"
369
371
        mvLatticePar[3] = static_cast<float> (mvLatticePar[3] * DEG_TO_RAD);// pi/180
370
372
        mvLatticePar[4] = static_cast<float> (mvLatticePar[4] * DEG_TO_RAD);
371
373
        mvLatticePar[5] = static_cast<float> (mvLatticePar[5] * DEG_TO_RAD);
372
 
        
 
374
 
373
375
        // Fill values depending on spacegroup, *only* when missing
374
376
        if((spgid>2)&&(spgid<=15))
375
377
        {// :TODO: monoclinic spg, depending on unique axis....
397
399
            if(mvLatticePar[0]==0) mvLatticePar[0]=a;
398
400
            if(mvLatticePar[1]==0) mvLatticePar[1]=a;
399
401
            if(mvLatticePar[2]==0) mvLatticePar[2]=a;
400
 
            
 
402
 
401
403
            float alpha=0;
402
404
            if(mvLatticePar[3]>alpha) alpha=mvLatticePar[3];
403
405
            if(mvLatticePar[4]>alpha) alpha=mvLatticePar[4];
542
544
            if(verbose) cout<<"Found spacegroup Hermann-Mauguin symbol (with OBSOLETE CIF #1.0 TAG):"<<mSpacegroupHermannMauguin<<endl;
543
545
          }
544
546
      }
 
547
    // DDL2 tag is "_space_group.IT_coordinate_system_code", converted by the cif reader to "_space_group_IT_coordinate_system_code"
 
548
    positem=mvItem.find("_space_group_IT_coordinate_system_code");
 
549
    if(positem!=mvItem.end())
 
550
      {
 
551
        if(verbose) cout<<"Found spacegroup IT_coordinate_system_code:"<<positem->second<<endl;
 
552
        if((mSpacegroupHermannMauguin.length()>0) && (positem->second=="1" || positem->second=="2"))
 
553
        {
 
554
          // this is a HACK which will work as long as the HM symbols in spacegroups.txt have the ":1" or ":2" extension listed, when needed
 
555
          mSpacegroupHermannMauguin=mSpacegroupHermannMauguin+string(":")+positem->second;
 
556
        }
 
557
        else
 
558
        {
 
559
          stringstream ss;
 
560
          ss << "CIF Error: found DDL2 tag _space_group.IT_coordinate_system_code ("<<positem->second<<")"<<endl
 
561
             <<"            but could not interpret it ! Origin choice or axis may be incorrect.";
 
562
          obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
 
563
        }
 
564
      }
 
565
 
545
566
    mSpaceGroup=NULL;
546
567
    // be forgiving - if spg not found, try again
547
568
    // Prefer Hall > HM == number, as Hall symbol is truly unique
562
583
    }
563
584
    if(mSpaceGroup == NULL) {
564
585
      SpaceGroup *sg = new SpaceGroup();
565
 
      positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
 
586
      positem=mvItem.find("_space_group_symop_operation_xyz");
 
587
      if(positem==mvItem.end())
 
588
        positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
566
589
      if(positem!=mvItem.end())
567
590
        {
568
591
          sg->AddTransform (positem->second);
574
597
          {
575
598
            map<ci_string,vector<string> >::const_iterator pos;
576
599
            unsigned i, nb;
577
 
            pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
 
600
            pos=loop->second.find("_space_group_symop_operation_xyz");
 
601
            if (pos==loop->second.end())
 
602
              pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
578
603
            if (pos!=loop->second.end())
579
604
              {
580
605
                nb=pos->second.size();
586
611
          }
587
612
        if (found)
588
613
          mSpaceGroup = SpaceGroup::Find(sg);
589
 
        delete sg;
 
614
        if (mSpaceGroup == NULL && sg->IsValid())
 
615
          mSpaceGroup = sg;
 
616
        else
 
617
          delete sg;
590
618
      }
591
619
    }
592
620
    if(mSpaceGroup == NULL)
929
957
 
930
958
  CIF::CIF(istream &is, const bool interpret,const bool verbose)
931
959
  {
932
 
    //Copy to an iostream so that we can put back characters if necessary
933
 
    stringstream in;
934
 
    char c;
935
 
    while(is.get(c))in.put(c);
936
 
    this->Parse(in);
937
 
    // Extract structure from blocks
938
 
    if(interpret)
939
 
      for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
940
 
        posd->second.ExtractAll(verbose);
 
960
    bool found_atoms=false;
 
961
    while(!found_atoms)
 
962
    {
 
963
      // :TODO: we don't need a vector of CIFData, since only one block is read at a time
 
964
      mvData.clear();
 
965
      this->Parse(is);
 
966
      // Extract structure from 1 block
 
967
      if(interpret)
 
968
        for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
 
969
        {
 
970
          posd->second.ExtractAll(verbose);
 
971
          if(posd->second.mvAtom.size()>0) found_atoms=true;
 
972
        }
 
973
    }
941
974
  }
942
975
 
943
976
  bool iseol(const char c) { return ((c=='\n')||(c=='\r'));}
944
977
 
945
978
  /// Read one value, whether it is numeric, string or text
946
 
  string CIFReadValue(stringstream &in,char &lastc)
 
979
  string CIFReadValue(istream &in,char &lastc)
947
980
  {
948
981
    bool vv=false;//very verbose ?
949
982
    string value("");
1009
1042
    return value;
1010
1043
  }
1011
1044
 
1012
 
  void CIF::Parse(stringstream &in)
 
1045
  void CIF::Parse(istream &in)
1013
1046
  {
1014
1047
    bool vv=false;//very verbose ?
1015
1048
    char lastc=' ';
1041
1074
          }
1042
1075
        if((in.peek()=='d') || (in.peek()=='D'))
1043
1076
          {// Data
 
1077
            if(mvData.size()>0) return; // We want just a single data block
 
1078
 
1044
1079
            string tmp;
1045
1080
            in>>tmp;
1046
1081
            block=tmp.substr(5);
1047
1082
            if(vv) cout<<endl<<endl<<"NEW BLOCK DATA: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ->"<<block<<endl<<endl<<endl;
 
1083
 
 
1084
 
1048
1085
            mvData[block]=CIFData();
1049
1086
            mvData[block].mDataBlockName=tmp;
1050
1087
            continue;
1081
1118
            map<ci_string,vector<string> > lp;
1082
1119
            while(true)
1083
1120
              {
 
1121
                std::ios::pos_type pos0=in.tellg();
1084
1122
                while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
1085
1123
                if(in.eof()) break;
1086
 
                if(vv) cout<<"LOOP VALUES...: "<<(char)in.peek()<<" "<<endl;
1087
1124
                if(in.peek()=='_') break;
1088
1125
                if(in.peek()=='#')
1089
1126
                  {// Comment (in a loop ??)
1090
 
                    const std::ios::pos_type pos=in.tellg();
 
1127
                    //const std::ios::pos_type pos=in.tellg();
1091
1128
                    string tmp;
1092
1129
                    getline(in,tmp);
 
1130
                    pos0=in.tellg();
1093
1131
                    if(block=="") mvComment.push_back(tmp);
1094
1132
                    else mvData[block].mvComment.push_back(tmp);
1095
1133
                    lastc='\r';
1096
1134
                    if(vv) cout<<"Comment in a loop (?):"<<tmp<<endl;
1097
 
                    in.seekg(pos);
 
1135
                    //in.seekg(pos);
1098
1136
                    break;
1099
1137
                  }
1100
 
                const std::ios::pos_type pos=in.tellg();
1101
 
                in>>tmp;
1102
 
                if(vv) cout<<"WHATNEXT? "<<tmp;
 
1138
                //in>>tmp;
 
1139
                tmp=CIFReadValue(in,lastc);
1103
1140
                if(ci_string(tmp.c_str())=="loop_")
1104
1141
                  {//go back and continue
1105
 
                    if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<endl;
1106
 
                    in.seekg(pos);
 
1142
                    in.clear();
 
1143
                    in.seekg(pos0,std::ios::beg);
 
1144
                    if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg()<<endl;
1107
1145
                    break;
1108
1146
                  }
1109
1147
                if(tmp.size()>=5)
1110
1148
                  if(ci_string(tmp.substr(0,5).c_str())=="data_")
1111
1149
                    {//go back and continue
1112
 
                      if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<endl;
1113
 
                      in.seekg(pos);
 
1150
                      in.clear();
 
1151
                      in.seekg(pos0,std::ios::beg);
 
1152
                      if(vv) cout<<endl<<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg()<<endl;
1114
1153
                      break;
1115
1154
                    }
1116
 
                // go back
1117
 
                in.seekg(pos);
1118
1155
                for(unsigned int i=0;i<tit.size();++i)
1119
1156
                  {//Read all values
1120
 
                    const string value=CIFReadValue(in,lastc);
1121
 
                    lp[tit[i]].push_back(value);
1122
 
                    if(vv) cout<<"     #"<<i<<" :  "<<value<<endl;
 
1157
                    if(i>0) tmp=CIFReadValue(in,lastc);
 
1158
                    lp[tit[i]].push_back(tmp);
 
1159
                    if(vv) cout<<" LOOP VALUE    #"<<lp[tit[i]].size()<<","<<i<<" :  "<<tmp<<endl;
1123
1160
                  }
1124
1161
              }
1125
1162
            // The key to the mvLoop map is the set of column titles
1340
1377
              else tmpSymbol="C";//Something went wrong, no symbol ! Default to C ??
1341
1378
 
1342
1379
              int atomicNum = etab.GetAtomicNum(tmpSymbol.c_str());
 
1380
              // Test for some oxygens with subscripts
 
1381
              if (atomicNum == 0 && tmpSymbol[0] == 'O') {
 
1382
                atomicNum = 8; // e.g. Ob, OH, etc.
 
1383
              }
 
1384
 
1343
1385
              atom->SetAtomicNum(atomicNum); //set atomic number, or '0' if the atom type is not recognized
1344
1386
              atom->SetType(tmpSymbol); //set atomic number, or '0' if the atom type is not recognized
1345
1387
              atom->SetVector(posat->mCoordCart[0],posat->mCoordCart[1],posat->mCoordCart[2]);
 
1388
              if(posat->mLabel.size()>0)
 
1389
                {
 
1390
                  OBPairData *label = new OBPairData;
 
1391
                  label->SetAttribute("_atom_site_label");
 
1392
                  label->SetValue(posat->mLabel);
 
1393
                  label->SetOrigin(fileformatInput);
 
1394
                  atom->SetData(label);
 
1395
                }
 
1396
 
1346
1397
            }
1347
1398
          if (!pConv->IsOption("b",OBConversion::INOPTIONS))
1348
1399
            pmol->ConnectTheDots();
1413
1464
        const SpaceGroup* pSG = pUC->GetSpaceGroup();
1414
1465
        if (pSG != NULL)
1415
1466
          {
1416
 
            ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
 
1467
            // Do we have an extended HM symbol, with origin choice as ":1" or ":2" ? If so, remove it.
 
1468
            size_t n=pSG->GetHMName().find(":");
 
1469
            if(n==string::npos)
 
1470
              ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
 
1471
            else
 
1472
              ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName().substr(0,n) << "'" << endl;
1417
1473
            ofs << "_space_group_name_Hall '" << pSG->GetHallName() << "'" << endl;
1418
1474
            ofs << "loop_" <<endl
1419
1475
                << "    _symmetry_equiv_pos_as_xyz" << endl;
1437
1493
    unsigned int i=0;
1438
1494
    FOR_ATOMS_OF_MOL(atom, *pmol)
1439
1495
      {
1440
 
        snprintf(buffer, BUFF_SIZE, "    %3s  %3s%d  %10.5f %10.5f %10.5f\n",
1441
 
                 etab.GetSymbol(atom->GetAtomicNum()),
1442
 
                 etab.GetSymbol(atom->GetAtomicNum()),
1443
 
                 ++i,
1444
 
                 atom->GetX(),
1445
 
                 atom->GetY(),
1446
 
                 atom->GetZ());
 
1496
        if (atom->HasData("_atom_site_label"))
 
1497
          {
 
1498
            OBPairData *label = dynamic_cast<OBPairData *> (atom->GetData("_atom_site_label"));
 
1499
            snprintf(buffer, BUFF_SIZE, "    %3s  %3s  %10.5f %10.5f %10.5f\n",
 
1500
                     etab.GetSymbol(atom->GetAtomicNum()),
 
1501
                     label->GetValue().c_str(),
 
1502
                     atom->GetX(),
 
1503
                     atom->GetY(),
 
1504
                     atom->GetZ());
 
1505
          }
 
1506
        else
 
1507
          {
 
1508
            snprintf(buffer, BUFF_SIZE, "    %3s  %3s%d  %10.5f %10.5f %10.5f\n",
 
1509
                     etab.GetSymbol(atom->GetAtomicNum()),
 
1510
                     etab.GetSymbol(atom->GetAtomicNum()),
 
1511
                     ++i,
 
1512
                     atom->GetX(),
 
1513
                     atom->GetY(),
 
1514
                     atom->GetZ());
 
1515
          }
1447
1516
        ofs << buffer;
1448
1517
      }
1449
1518
    return true;