2
* Created on April 01, 2007
3
* Updated on May 03, 2007
5
package net.sf.jabref.msbib;
6
import net.sf.jabref.*;
7
import net.sf.jabref.export.layout.format.*;
8
import net.sf.jabref.export.layout.*;
9
import net.sf.jabref.mods.*;
11
import javax.xml.parsers.*;
12
import javax.xml.transform.*;
13
import javax.xml.transform.dom.*;
14
import javax.xml.transform.stream.*;
18
import java.util.regex.*;
21
* @author S M Mahbub Murshed
22
* @email udvranto@yahoo.com
25
* @see http://mahbub.wordpress.com/2007/03/24/details-of-microsoft-office-2007-bibliographic-format-compared-to-bibtex/
26
* @see http://mahbub.wordpress.com/2007/03/22/deciphering-microsoft-office-2007-bibliography-format/
28
* Date: May 15, 2007; May 03, 2007
31
* May 03, 2007 - Added export functionality
32
* May 15, 2007 - Added import functionality
33
* May 16, 2007 - Changed all interger entries to strings,
34
* except LCID which must be an integer.
35
* To avoid exception during integer parsing
36
* the exception is caught and LCID is set to zero.
38
public class MSBibEntry {
39
protected String sourceType = "Misc";
40
protected String bibTexEntry = null;
42
protected String tag = null;
43
protected String GUID = null;
44
protected int LCID = -1;
46
protected List authors = null;
47
protected List bookAuthors = null;
48
protected List editors = null;
49
protected List translators = null;
50
protected List producerNames = null;
51
protected List composers = null;
52
protected List conductors = null;
53
protected List performers = null;
54
protected List writers = null;
55
protected List directors = null;
56
protected List compilers = null;
57
protected List interviewers = null;
58
protected List interviewees = null;
59
protected List inventors = null;
60
protected List counsels = null;
62
protected String title = null;
63
protected String year = null;
64
protected String month = null;
65
protected String day = null;
67
protected String shortTitle = null;
68
protected String comments = null;
70
protected PageNumbers pages = null;
71
protected String volume = null;
72
protected String numberOfVolumes = null;
73
protected String edition = null;
74
protected String standardNumber = null;
75
protected String publisher = null;
77
protected String address = null;
78
protected String bookTitle = null;
79
protected String chapterNumber = null;
80
protected String journalName = null;
81
protected String issue = null;
82
protected String periodicalTitle = null;
83
protected String conferenceName = null;
84
protected String department = null;
85
protected String institution = null;
86
protected String thesisType = null;
87
protected String internetSiteTitle = null;
88
protected String dateAccessed = null;
89
protected String url = null;
90
protected String productionCompany = null;
91
protected String publicationTitle = null;
92
protected String medium = null;
93
protected String albumTitle = null;
94
protected String recordingNumber = null;
95
protected String theater = null;
96
protected String distributor = null;
97
protected String broadcastTitle = null;
98
protected String broadcaster = null;
99
protected String station = null;
100
protected String type = null;
101
protected String patentNumber = null;
102
protected String court = null;
103
protected String reporter = null;
104
protected String caseNumber = null;
105
protected String abbreviatedCaseNumber = null;
106
protected String bibTex_Series = null;
107
protected String bibTex_Abstract = null;
108
protected String bibTex_KeyWords = null;
109
protected String bibTex_CrossRef = null;
110
protected String bibTex_HowPublished = null;
111
protected String bibTex_Affiliation = null;
112
protected String bibTex_Contents = null;
113
protected String bibTex_Copyright = null;
114
protected String bibTex_Price = null;
115
protected String bibTex_Size = null;
117
private final String BIBTEX = "BIBTEX_";
118
private final String MSBIB = "msbib-";
120
private final String bcol = "b:";
122
private final boolean FORMATXML = false;
124
public MSBibEntry() {
127
public MSBibEntry(BibtexEntry bibtex) {
129
populateFromBibtex(bibtex);
132
public MSBibEntry(Element entry, String _bcol) {
134
populateFromXml(entry,_bcol);
137
protected String getFromXml(String name, Element entry) {
139
NodeList nodeLst = entry.getElementsByTagName(name);
140
if(nodeLst.getLength()>0)
141
value = nodeLst.item(0).getTextContent();
146
protected void populateFromXml(Element entry, String _bcol) {
149
sourceType = getFromXml(_bcol+"SourceType", entry);
151
tag = getFromXml(_bcol+"Tag", entry);
153
temp = getFromXml(_bcol+"LCID", entry);
157
LCID = Integer.parseInt(temp); }
158
catch (Exception e) {
163
title = getFromXml(_bcol+"Title", entry);
164
year = getFromXml(_bcol+"Year", entry);
165
month = getFromXml(_bcol+"Month", entry);
166
day = getFromXml(_bcol+"Day", entry);
168
shortTitle = getFromXml(_bcol+"ShortTitle", entry);
169
comments = getFromXml(_bcol+"Comments", entry);
171
temp = getFromXml(_bcol+"Pages", entry);
173
pages = new PageNumbers(temp);
175
volume = getFromXml(_bcol+"Volume", entry);
177
numberOfVolumes = getFromXml(_bcol+"NumberVolumes", entry);
179
edition = getFromXml(_bcol+"Edition", entry);
181
standardNumber = getFromXml(_bcol+"StandardNumber", entry);
183
publisher = getFromXml(_bcol+"Publisher", entry);
185
String city = getFromXml(_bcol+"City", entry);
186
String state = getFromXml(_bcol+"StateProvince", entry);
187
String country = getFromXml(_bcol+"CountryRegion", entry);
190
address += city + ", ";
192
address += state + " ";
195
address = address.trim();
196
if(address.equals("") || address.equals(","))
199
bookTitle = getFromXml(_bcol+"BookTitle", entry);
201
chapterNumber = getFromXml(_bcol+"ChapterNumber", entry);
203
journalName = getFromXml(_bcol+"JournalName", entry);
205
issue = getFromXml(_bcol+"Issue", entry);
207
periodicalTitle = getFromXml(_bcol+"PeriodicalTitle", entry);
209
conferenceName = getFromXml(_bcol+"ConferenceName", entry);
210
department = getFromXml(_bcol+"Department", entry);
211
institution = getFromXml(_bcol+"Institution", entry);
213
thesisType = getFromXml(_bcol+"ThesisType", entry);
214
internetSiteTitle = getFromXml(_bcol+"InternetSiteTitle", entry);
215
String month = getFromXml(_bcol+"MonthAccessed", entry);
216
String day = getFromXml(_bcol+"DayAccessed", entry);
217
String year = getFromXml(_bcol+"YearAccessed", entry);
220
dateAccessed += month + " ";
222
dateAccessed += day + ", ";
224
dateAccessed += year;
225
dateAccessed = dateAccessed.trim();
226
if(dateAccessed.equals("") || dateAccessed.equals(","))
229
url = getFromXml(_bcol+"URL", entry);
230
productionCompany = getFromXml(_bcol+"ProductionCompany", entry);
232
publicationTitle = getFromXml(_bcol+"PublicationTitle", entry);
233
medium = getFromXml(_bcol+"Medium", entry);
234
albumTitle = getFromXml(_bcol+"AlbumTitle", entry);
235
recordingNumber = getFromXml(_bcol+"RecordingNumber", entry);
236
theater = getFromXml(_bcol+"Theater", entry);
237
distributor = getFromXml(_bcol+"Distributor", entry);
238
broadcastTitle = getFromXml(_bcol+"BroadcastTitle", entry);
239
broadcaster = getFromXml(_bcol+"Broadcaster", entry);
240
station = getFromXml(_bcol+"Station", entry);
241
type = getFromXml(_bcol+"Type", entry);
242
patentNumber = getFromXml(_bcol+"PatentNumber", entry);
243
court = getFromXml(_bcol+"Court", entry);
244
reporter = getFromXml(_bcol+"Reporter", entry);
245
caseNumber = getFromXml(_bcol+"CaseNumber", entry);
246
abbreviatedCaseNumber = getFromXml(_bcol+"AbbreviatedCaseNumber", entry);
247
bibTex_Series = getFromXml(_bcol+BIBTEX+"Series", entry);
248
bibTex_Abstract = getFromXml(_bcol+BIBTEX+"Abstract", entry);
249
bibTex_KeyWords = getFromXml(_bcol+BIBTEX+"KeyWords", entry);
250
bibTex_CrossRef = getFromXml(_bcol+BIBTEX+"CrossRef", entry);
251
bibTex_HowPublished = getFromXml(_bcol+BIBTEX+"HowPublished", entry);
252
bibTex_Affiliation = getFromXml(_bcol+BIBTEX+"Affiliation", entry);
253
bibTex_Contents = getFromXml(_bcol+BIBTEX+"Contents", entry);
254
bibTex_Copyright = getFromXml(_bcol+BIBTEX+"Copyright", entry);
255
bibTex_Price = getFromXml(_bcol+BIBTEX+"Price", entry);
256
bibTex_Size = getFromXml(_bcol+BIBTEX+"Size", entry);
258
NodeList nodeLst = entry.getElementsByTagName(_bcol+"Author");
259
if(nodeLst.getLength()>0)
260
getAuthors((Element)(nodeLst.item(0)),_bcol);
263
protected void populateFromBibtex(BibtexEntry bibtex) {
264
// date = getDate(bibtex);
265
sourceType = getMSBibSourceType(bibtex);
267
if (bibtex.getField("bibtexkey") != null)
268
tag = bibtex.getField("bibtexkey").toString();
270
if (bibtex.getField("language") != null)
271
LCID = getLCID(bibtex.getField("language").toString());
273
if (bibtex.getField("title") != null)
274
title = bibtex.getField("title").toString();
275
if (bibtex.getField("year") != null)
276
year = bibtex.getField("year").toString();
277
if (bibtex.getField("month") != null)
278
month = bibtex.getField("month").toString();
279
if (bibtex.getField(MSBIB+"day") != null)
280
day = bibtex.getField(MSBIB+"day").toString();
282
if (bibtex.getField(MSBIB+"shorttitle") != null)
283
shortTitle = bibtex.getField(MSBIB+"shorttitle").toString();
284
if (bibtex.getField("note") != null)
285
comments = bibtex.getField("note").toString();
287
if (bibtex.getField("pages") != null)
288
pages = new PageNumbers(bibtex.getField("pages").toString());
290
if (bibtex.getField("volume") != null)
291
volume = bibtex.getField("volume").toString();
293
if (bibtex.getField(MSBIB+"numberofvolume") != null)
294
numberOfVolumes = bibtex.getField(MSBIB+"numberofvolume").toString();
296
if (bibtex.getField("edition") != null)
297
edition = bibtex.getField("edition").toString();
299
standardNumber = new String();
300
if (bibtex.getField("ISBN") != null)
301
standardNumber += ":ISBN:" + bibtex.getField("ISBN").toString();
302
if (bibtex.getField("ISSN") != null)
303
standardNumber += ":ISSN:"+ bibtex.getField("ISSN").toString();
304
if (bibtex.getField("LCCN") != null)
305
standardNumber += ":LCCN:"+ bibtex.getField("LCCN").toString();
306
if (bibtex.getField("mrnumber") != null)
307
standardNumber += ":MRN:"+ bibtex.getField("mrnumber").toString();
308
if(standardNumber.equals(""))
309
standardNumber = null;
311
if (bibtex.getField("publisher") != null)
312
publisher = bibtex.getField("publisher").toString();
314
if (bibtex.getField("address") != null)
315
address = bibtex.getField("address").toString();
317
if (bibtex.getField("booktitle") != null)
318
bookTitle = bibtex.getField("booktitle").toString();
320
if (bibtex.getField("chapter") != null)
321
chapterNumber = bibtex.getField("chapter").toString();
323
if (bibtex.getField("journal") != null)
324
journalName = bibtex.getField("journal").toString();
326
if (bibtex.getField("issue") != null)
327
issue = bibtex.getField("issue").toString();
329
if (bibtex.getField(MSBIB+"periodical") != null)
330
periodicalTitle = bibtex.getField(MSBIB+"periodical").toString();
332
if (bibtex.getField("organization") != null)
333
conferenceName = bibtex.getField("organization").toString();
334
if (bibtex.getField("school") != null)
335
department = bibtex.getField("school").toString();
336
if (bibtex.getField("institution") != null)
337
institution = bibtex.getField("institution").toString();
339
if (bibtex.getField("type") != null)
340
thesisType = bibtex.getField("type").toString();
341
if ( (sourceType.equals("InternetSite")==true || sourceType.equals("DocumentFromInternetSite")==true)
342
&& bibtex.getField("title") != null)
343
internetSiteTitle = bibtex.getField("title").toString();
344
if (bibtex.getField(MSBIB+"accessed") != null)
345
dateAccessed = bibtex.getField(MSBIB+"accessed").toString();
346
if (bibtex.getField("URL") != null)
347
url = bibtex.getField("URL").toString();
348
if (bibtex.getField(MSBIB+"productioncompany") != null)
349
productionCompany = bibtex.getField(MSBIB+"productioncompany").toString();
351
if ( (sourceType.equals("ElectronicSource")==true
352
|| sourceType.equals("Art")==true
353
|| sourceType.equals("Misc")==true)
354
&& bibtex.getField("title") != null)
355
publicationTitle = bibtex.getField("title").toString();
356
if (bibtex.getField(MSBIB+"medium") != null)
357
medium = bibtex.getField(MSBIB+"medium").toString();
358
if (sourceType.equals("SoundRecording")==true && bibtex.getField("title") != null)
359
albumTitle = bibtex.getField("title").toString();
360
if (bibtex.getField(MSBIB+"recordingnumber") != null)
361
recordingNumber = bibtex.getField(MSBIB+"recordingnumber").toString();
362
if (bibtex.getField(MSBIB+"theater") != null)
363
theater = bibtex.getField(MSBIB+"theater").toString();
364
if (bibtex.getField(MSBIB+"distributor") != null)
365
distributor = bibtex.getField(MSBIB+"distributor").toString();
366
if (sourceType.equals("Interview")==true && bibtex.getField("title") != null)
367
broadcastTitle = bibtex.getField("title").toString();
368
if (bibtex.getField(MSBIB+"broadcaster") != null)
369
broadcaster = bibtex.getField(MSBIB+"broadcaster").toString();
370
if (bibtex.getField(MSBIB+"station") != null)
371
station = bibtex.getField(MSBIB+"station").toString();
372
if (bibtex.getField(MSBIB+"type") != null)
373
type = bibtex.getField(MSBIB+"type").toString();
374
if (bibtex.getField(MSBIB+"patentnumber") != null)
375
patentNumber = bibtex.getField(MSBIB+"patentnumber").toString();
376
if (bibtex.getField(MSBIB+"court") != null)
377
court = bibtex.getField(MSBIB+"court").toString();
378
if (bibtex.getField(MSBIB+"reporter") != null)
379
reporter = bibtex.getField(MSBIB+"reporter").toString();
380
if (bibtex.getField(MSBIB+"casenumber") != null)
381
caseNumber = bibtex.getField(MSBIB+"casenumber").toString();
382
if (bibtex.getField(MSBIB+"abbreviatedcasenumber") != null)
383
abbreviatedCaseNumber = bibtex.getField(MSBIB+"abbreviatedcasenumber").toString();
384
if (bibtex.getField("series") != null)
385
bibTex_Series = bibtex.getField("series").toString();
386
if (bibtex.getField("abstract") != null)
387
bibTex_Abstract = bibtex.getField("abstract").toString();
388
if (bibtex.getField("keywords") != null)
389
bibTex_KeyWords = bibtex.getField("keywords").toString();
390
if (bibtex.getField("crossref") != null)
391
bibTex_CrossRef = bibtex.getField("crossref").toString();
392
if (bibtex.getField("howpublished") != null)
393
bibTex_HowPublished = bibtex.getField("howpublished").toString();
394
if (bibtex.getField("affiliation") != null)
395
bibTex_Affiliation = bibtex.getField("affiliation").toString();
396
if (bibtex.getField("contents") != null)
397
bibTex_Contents = bibtex.getField("contents").toString();
398
if (bibtex.getField("copyright") != null)
399
bibTex_Copyright = bibtex.getField("copyright").toString();
400
if (bibtex.getField("price") != null)
401
bibTex_Price = bibtex.getField("price").toString();
402
if (bibtex.getField("size") != null)
403
bibTex_Size = bibtex.getField("size").toString();
406
if (bibtex.getField("author") != null)
407
authors = getAuthors(bibtex.getField("author").toString());
412
title = format(title);
413
// shortTitle = format(shortTitle);
414
// publisher = format(publisher);
415
// conferenceName = format(conferenceName);
416
// department = format(department);
417
// institution = format(institution);
418
// internetSiteTitle = format(internetSiteTitle);
419
// publicationTitle = format(publicationTitle);
420
// albumTitle = format(albumTitle);
421
// theater = format(theater);
422
// distributor = format(distributor);
423
// broadcastTitle = format(broadcastTitle);
424
// broadcaster = format(broadcaster);
425
// station = format(station);
426
// court = format(court);
427
// reporter = format(reporter);
428
// bibTex_Series = format(bibTex_Series);
429
bibTex_Abstract = format(bibTex_Abstract);
433
private String format(String value)
437
String result = null;
438
LayoutFormatter chars = new XMLChars();
439
result = chars.format(value);
443
// http://www.microsoft.com/globaldev/reference/lcid-all.mspx
444
protected int getLCID(String language)
447
// TODO: add lanaguage to LCID mapping
452
// http://www.microsoft.com/globaldev/reference/lcid-all.mspx
453
protected String getLanguage(int LCID)
455
String language = "english";
456
// TODO: add lanaguage to LCID mapping
461
protected List getSpecificAuthors(String type, Element authors, String _bcol) {
463
NodeList nodeLst = authors.getElementsByTagName(_bcol+type);
464
if(nodeLst.getLength()<=0)
466
nodeLst = ((Element)(nodeLst.item(0))).getElementsByTagName(_bcol+"NameList");
467
if(nodeLst.getLength()<=0)
469
NodeList person = ((Element)(nodeLst.item(0))).getElementsByTagName(_bcol+"Person");
470
if(person.getLength()<=0)
473
result = new LinkedList();
474
for(int i=0;i<person.getLength();i++)
476
NodeList firstName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"First");
477
NodeList lastName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"Last");
478
NodeList middleName = ((Element)(person.item(i))).getElementsByTagName(_bcol+"Middle");
479
PersonName name = new PersonName();
480
if(firstName.getLength()>0)
481
name.setFirstname(firstName.item(0).getTextContent());
482
if(middleName.getLength()>0)
483
name.setMiddlename(middleName.item(0).getTextContent());
484
if(lastName.getLength()>0)
485
name.setSurname(lastName.item(0).getTextContent());
492
protected void getAuthors(Element authorsElem, String _bcol) {
493
authors = getSpecificAuthors("Author",authorsElem,_bcol);
494
bookAuthors = getSpecificAuthors("BookAuthor",authorsElem,_bcol);
495
editors = getSpecificAuthors("Editor",authorsElem,_bcol);
496
translators = getSpecificAuthors("Translator",authorsElem,_bcol);
497
producerNames = getSpecificAuthors("ProducerName",authorsElem,_bcol);
498
composers = getSpecificAuthors("Composer",authorsElem,_bcol);
499
conductors = getSpecificAuthors("Conductor",authorsElem,_bcol);
500
performers = getSpecificAuthors("Performer",authorsElem,_bcol);
501
writers = getSpecificAuthors("Writer",authorsElem,_bcol);
502
directors = getSpecificAuthors("Director",authorsElem,_bcol);
503
compilers = getSpecificAuthors("Compiler",authorsElem,_bcol);
504
interviewers = getSpecificAuthors("Interviewer",authorsElem,_bcol);
505
interviewees = getSpecificAuthors("Interviewee",authorsElem,_bcol);
506
inventors = getSpecificAuthors("Inventor",authorsElem,_bcol);
507
counsels = getSpecificAuthors("Counsel",authorsElem,_bcol);
510
protected List getAuthors(String authors) {
511
List result = new LinkedList();
512
LayoutFormatter chars = new XMLChars();
514
if (authors.indexOf(" and ") == -1)
517
// result.add(new PersonName(chars.format(authors)));
519
result.add(new PersonName(authors));
523
String[] names = authors.split(" and ");
524
for (int i=0; i<names.length; i++)
527
// result.add(new PersonName(chars.format(names[i])));
529
result.add(new PersonName(names[i]));
535
/* construct a MSBib date object */
536
protected String getDate(BibtexEntry bibtex) {
538
if (bibtex.getField("year") != null)
539
result += (bibtex.getField("year").toString());
540
if (bibtex.getField("month") != null)
541
result += "-" + bibtex.getField("month").toString();
546
protected String getMSBibSourceType(BibtexEntry bibtex) {
547
String bibtexType = bibtex.getType().getName();
549
String result = "Misc";
550
if (bibtexType.equalsIgnoreCase("book"))
552
else if(bibtexType.equalsIgnoreCase("inbook"))
553
result = "BookSection";
554
else if(bibtexType.equalsIgnoreCase("booklet"))
555
{ result = "BookSection"; bibTexEntry = "booklet"; }
556
else if(bibtexType.equalsIgnoreCase("incollection"))
557
{ result = "BookSection"; bibTexEntry = "incollection"; }
559
else if(bibtexType.equalsIgnoreCase("article"))
560
result = "JournalArticle";
562
else if(bibtexType.equalsIgnoreCase("inproceedings"))
563
result = "ConferenceProceedings";
564
else if(bibtexType.equalsIgnoreCase("conference"))
565
{ result = "ConferenceProceedings"; bibTexEntry = "conference"; }
566
else if(bibtexType.equalsIgnoreCase("proceedings"))
567
{ result = "ConferenceProceedings"; bibTexEntry = "proceedings"; }
568
else if(bibtexType.equalsIgnoreCase("collection"))
569
{ result = "ConferenceProceedings"; bibTexEntry = "collection"; }
571
else if(bibtexType.equalsIgnoreCase("techreport"))
573
else if(bibtexType.equalsIgnoreCase("manual"))
574
{ result = "Report"; bibTexEntry = "manual"; }
575
else if(bibtexType.equalsIgnoreCase("mastersthesis"))
576
{ result = "Report"; bibTexEntry = "mastersthesis"; }
577
else if(bibtexType.equalsIgnoreCase("phdthesis"))
578
{ result = "Report"; bibTexEntry = "phdthesis"; }
579
else if(bibtexType.equalsIgnoreCase("unpublished"))
580
{ result = "Report"; bibTexEntry = "unpublished"; }
582
else if(bibtexType.equalsIgnoreCase("patent"))
585
else if(bibtexType.equalsIgnoreCase("misc"))
591
public Document getDOMrepresentation() {
592
Document result = null;
594
DocumentBuilder d = DocumentBuilderFactory.newInstance().newDocumentBuilder();
596
// result = getDOMrepresentation(d);
605
// private String healXML(String value)
607
// String healedValue = value;
609
//// if(value.contains("A net energy gain"))
610
//// System.out.println(value);
611
//// restore converted html-char
612
// Pattern p = Pattern.compile("&#([0-9A-Fa-f]{2,4});");
613
// // Pattern p = Pattern.compile("&#(\\d{1,4});");
614
// Matcher m = p.matcher(healedValue);
617
// int n = Integer.parseInt(m.group(1),16);
618
// char ch = Character.forDigit(n,10);
619
// System.out.println(m.group(1));
620
// System.out.println(""+n);
621
// System.out.println(""+ch);
622
// healedValue = healedValue.replaceAll("\\&#"+m.group(1)+";",""+ch);
625
// return healedValue;
628
public void addField(Document d,Element parent, String name, String value) {
631
Element elem = d.createElement(bcol+name);
632
// elem.appendChild(d.createTextNode(healXML(value)));
633
// Text txt = d.createTextNode(value);
634
// if(!txt.getTextContent().equals(value))
635
// System.out.println("Values dont match!");
636
// // throw new Exception("Values dont match!");
637
// elem.appendChild(txt);
638
elem.appendChild(d.createTextNode(stripNonValidXMLCharacters(value)));
639
parent.appendChild(elem);
642
public void addAuthor(Document d, Element allAuthors, String entryName, List authorsLst) {
643
if(authorsLst == null)
645
Element authorTop = d.createElement(bcol+entryName);
646
Element nameList = d.createElement(bcol+"NameList");
647
for(Iterator iter = authorsLst.iterator(); iter.hasNext();) {
648
PersonName name = (PersonName) iter.next();
649
Element person = d.createElement(bcol+"Person");
650
addField(d, person,"Last",name.getSurname());
651
addField(d, person,"Middle",name.getMiddlename());
652
addField(d, person,"First",name.getFirstname());
653
nameList.appendChild(person);
655
authorTop.appendChild(nameList);
657
allAuthors.appendChild(authorTop);
660
public void addAdrress(Document d,Element parent, String address) {
665
// See documentation here http://regexlib.com/REDetails.aspx?regexp_id=472
666
// Pattern p = Pattern.compile("^(?n:(((?<address1>(\\d{1,5}(\\ 1\\/[234])?(\\x20[A-Z]([a-z])+)+ )|(P\\.O\\.\\ Box\\ \\d{1,5}))\\s{1,2}(?i:(?<address2>(((APT|B LDG|DEPT|FL|HNGR|LOT|PIER|RM|S(LIP|PC|T(E|OP))|TRLR|UNIT)\\x20\\w{1,5})|(BSMT|FRNT|LBBY|LOWR|OFC|PH|REAR|SIDE|UPPR)\\.?)\\s{1,2})?))?)(?<city>[A-Z]([a-z])+(\\.?)(\\x20[A-Z]([a-z])+){0,2})([,\\x20]+?)(?<state>A[LKSZRAP]|C[AOT]|D[EC]|F[LM]|G[AU]|HI|I[ADL N]|K[SY]|LA|M[ADEHINOPST]|N[CDEHJMVY]|O[HKR]|P[ARW]|RI|S[CD] |T[NX]|UT|V[AIT]|W[AIVY])([,\\x20]+?)(?<zipcode>(?!0{5})\\d{5}(-\\d {4})?)((([,\\x20]+?)(?<country>[A-Z]([a-z])+(\\.?)(\\x20[A-Z]([a-z])+){0,2}))?))$");
667
// the pattern above is for C#, may not work with java. Never tested though.
669
// reduced subset, supports only "CITY , STATE, COUNTRY"
670
// \b(\w+)\s?[,]?\s?(\w+)\s?[,]?\s?(\w+)\b
671
// WORD SPACE , SPACE WORD SPACE , SPACE WORD
672
// tested using http://www.javaregex.com/test.html
673
Pattern p = Pattern.compile("\\b(\\w+)\\s*[,]?\\s*(\\w+)\\s*[,]?\\s*(\\w+)\\b");
674
Matcher m = p.matcher(address);
675
if (m.matches() && m.groupCount()>3)
677
addField(d, parent,"City",m.group(1));
678
addField(d, parent,"StateProvince",m.group(2));
679
addField(d, parent,"CountryRegion",m.group(3));
683
public void addDate(Document d,Element parent, String date, String extra) {
687
// Allows 20.3-2007|||20/3- 2007 etc.
688
// (\d{1,2})\s?[.,-/]\s?(\d{1,2})\s?[.,-/]\s?(\d{2,4})
689
// 1-2 DIGITS SPACE SEPERATOR SPACE 1-2 DIGITS SPACE SEPERATOR SPACE 2-4 DIGITS
690
// tested using http://www.javaregex.com/test.html
691
Pattern p = Pattern.compile("(\\d{1,2})\\s*[.,-/]\\s*(\\d{1,2})\\s*[.,-/]\\s*(\\d{2,4})");
692
Matcher m = p.matcher(date);
693
if (m.matches() && m.groupCount()>3)
695
addField(d, parent,"Month"+extra,m.group(1));
696
addField(d, parent,"Day"+extra,m.group(2));
697
addField(d, parent,"Year"+extra,m.group(3));
701
public Element getDOMrepresentation(Document d) {
704
Element msbibEntry = d.createElement(bcol+"Source");
706
addField(d,msbibEntry,"SourceType",sourceType);
707
addField(d,msbibEntry,BIBTEX+"Entry",bibTexEntry);
709
addField(d,msbibEntry,"Tag",tag);
710
addField(d,msbibEntry,"GUID",GUID);
712
addField(d,msbibEntry,"LCID",Integer.toString(LCID));
713
addField(d,msbibEntry,"Title",title);
714
addField(d,msbibEntry,"Year",year);
715
addField(d,msbibEntry,"ShortTitle",shortTitle);
716
addField(d,msbibEntry,"Comments",comments);
718
Element allAuthors = d.createElement(bcol+"Author");
720
addAuthor(d,allAuthors,"Author",authors);
721
addAuthor(d,allAuthors,"BookAuthor",bookAuthors);
722
addAuthor(d,allAuthors,"Editor",editors);
723
addAuthor(d,allAuthors,"Translator",translators);
724
addAuthor(d,allAuthors,"ProducerName",producerNames);
725
addAuthor(d,allAuthors,"Composer",composers);
726
addAuthor(d,allAuthors,"Conductor",conductors);
727
addAuthor(d,allAuthors,"Performer",performers);
728
addAuthor(d,allAuthors,"Writer",writers);
729
addAuthor(d,allAuthors,"Director",directors);
730
addAuthor(d,allAuthors,"Compiler",compilers);
731
addAuthor(d,allAuthors,"Interviewer",interviewers);
732
addAuthor(d,allAuthors,"Interviewee",interviewees);
733
addAuthor(d,allAuthors,"Inventor",inventors);
734
addAuthor(d,allAuthors,"Counsel",counsels);
736
msbibEntry.appendChild(allAuthors);
739
addField(d,msbibEntry,"Pages",pages.toString("-"));
740
addField(d,msbibEntry,"Volume",volume);
741
addField(d,msbibEntry,"NumberVolumes",numberOfVolumes);
742
addField(d,msbibEntry,"Edition",edition);
743
addField(d,msbibEntry,"StandardNumber",standardNumber);
744
addField(d,msbibEntry,"Publisher",publisher);
746
addAdrress(d,msbibEntry,address);
748
addField(d,msbibEntry,"BookTitle",bookTitle);
749
addField(d,msbibEntry,"ChapterNumber",chapterNumber);
751
addField(d,msbibEntry,"JournalName",journalName);
752
addField(d,msbibEntry,"Issue",issue);
753
addField(d,msbibEntry,"PeriodicalTitle",periodicalTitle);
754
addField(d,msbibEntry,"ConferenceName",conferenceName);
756
addField(d,msbibEntry,"Department",department);
757
addField(d,msbibEntry,"Institution",institution);
758
addField(d,msbibEntry,"ThesisType",thesisType);
759
addField(d,msbibEntry,"InternetSiteTitle",internetSiteTitle);
761
addDate(d,msbibEntry, dateAccessed, "Accessed");
763
addField(d,msbibEntry,"URL",url);
764
addField(d,msbibEntry,"ProductionCompany",productionCompany);
765
addField(d,msbibEntry,"PublicationTitle",publicationTitle);
766
addField(d,msbibEntry,"Medium",medium);
767
addField(d,msbibEntry,"AlbumTitle",albumTitle);
768
addField(d,msbibEntry,"RecordingNumber",recordingNumber);
769
addField(d,msbibEntry,"Theater",theater);
770
addField(d,msbibEntry,"Distributor",distributor);
771
addField(d,msbibEntry,"BroadcastTitle",broadcastTitle);
772
addField(d,msbibEntry,"Broadcaster",broadcaster);
773
addField(d,msbibEntry,"Station",station);
774
addField(d,msbibEntry,"Type",type);
775
addField(d,msbibEntry,"PatentNumber",patentNumber);
776
addField(d,msbibEntry,"Court",court);
777
addField(d,msbibEntry,"Reporter",reporter);
778
addField(d,msbibEntry,"CaseNumber",caseNumber);
779
addField(d,msbibEntry,"AbbreviatedCaseNumber",abbreviatedCaseNumber);
781
addField(d,msbibEntry,BIBTEX+"Series",bibTex_Series);
782
addField(d,msbibEntry,BIBTEX+"Abstract",bibTex_Abstract);
783
addField(d,msbibEntry,BIBTEX+"KeyWords",bibTex_KeyWords);
784
addField(d,msbibEntry,BIBTEX+"CrossRef",bibTex_CrossRef);
785
addField(d,msbibEntry,BIBTEX+"HowPublished",bibTex_HowPublished);
786
addField(d,msbibEntry,BIBTEX+"Affiliation",bibTex_Affiliation);
787
addField(d,msbibEntry,BIBTEX+"Contents",bibTex_Contents);
788
addField(d,msbibEntry,BIBTEX+"Copyright",bibTex_Copyright);
789
addField(d,msbibEntry,BIBTEX+"Price",bibTex_Price);
790
addField(d,msbibEntry,BIBTEX+"Size",bibTex_Size);
796
System.out.println("Exception caught..." + e);
803
protected void parseSingleStandardNumber(String type,String bibtype, String standardNum, HashMap hm) {
804
// teste using http://www.javaregex.com/test.html
805
Pattern p = Pattern.compile(":"+type+":(.[^:]+)");
806
Matcher m = p.matcher(standardNum);
808
hm.put(bibtype,m.group(1));
811
protected void parseStandardNumber(String standardNum, HashMap hm) {
812
if(standardNumber == null)
814
parseSingleStandardNumber("ISBN","ISBN",standardNum,hm);
815
parseSingleStandardNumber("ISSN","ISSN",standardNum,hm);
816
parseSingleStandardNumber("LCCN","LCCN",standardNum,hm);
817
parseSingleStandardNumber("MRN","mrnumber",standardNum,hm);
820
public void addAuthor(HashMap hm, String type, List authorsLst) {
821
if(authorsLst == null)
823
String allAuthors = "";
824
boolean First = true;
825
for(Iterator iter = authorsLst.iterator(); iter.hasNext();) {
826
PersonName name = (PersonName) iter.next();
828
allAuthors += " and ";
829
allAuthors += name.getFullname();
832
hm.put(type,allAuthors);
835
// public String mapMSBibToBibtexTypeString(String msbib) {
836
// String bibtex = "other";
837
// if(msbib.equals("Book"))
839
// else if(msbib.equals("BookSection"))
840
// bibtex = "inbook";
841
// else if(msbib.equals("JournalArticle"))
842
// bibtex = "article";
843
// else if(msbib.equals("ArticleInAPeriodical"))
844
// bibtex = "article";
845
// else if(msbib.equals("ConferenceProceedings"))
846
// bibtex = "conference";
847
// else if(msbib.equals("Report"))
848
// bibtex = "techreport";
849
// else if(msbib.equals("InternetSite"))
851
// else if(msbib.equals("DocumentFromInternetSite"))
853
// else if(msbib.equals("DocumentFromInternetSite"))
855
// else if(msbib.equals("ElectronicSource"))
857
// else if(msbib.equals("Art"))
859
// else if(msbib.equals("SoundRecording"))
861
// else if(msbib.equals("Performance"))
863
// else if(msbib.equals("Film"))
865
// else if(msbib.equals("Interview"))
867
// else if(msbib.equals("Patent"))
869
// else if(msbib.equals("Case"))
871
// else if(msbib.equals("Misc"))
879
public BibtexEntryType mapMSBibToBibtexType(String msbib)
881
BibtexEntryType bibtex = BibtexEntryType.OTHER;
882
if(msbib.equals("Book"))
883
bibtex = BibtexEntryType.BOOK;
884
else if(msbib.equals("BookSection"))
885
bibtex = BibtexEntryType.INBOOK;
886
else if(msbib.equals("JournalArticle"))
887
bibtex = BibtexEntryType.ARTICLE;
888
else if(msbib.equals("ArticleInAPeriodical"))
889
bibtex = BibtexEntryType.ARTICLE;
890
else if(msbib.equals("ConferenceProceedings"))
891
bibtex = BibtexEntryType.CONFERENCE;
892
else if(msbib.equals("Report"))
893
bibtex = BibtexEntryType.TECHREPORT;
894
else if(msbib.equals("InternetSite"))
895
bibtex = BibtexEntryType.OTHER;
896
else if(msbib.equals("DocumentFromInternetSite"))
897
bibtex = BibtexEntryType.OTHER;
898
else if(msbib.equals("DocumentFromInternetSite"))
899
bibtex = BibtexEntryType.OTHER;
900
else if(msbib.equals("ElectronicSource"))
901
bibtex = BibtexEntryType.OTHER;
902
else if(msbib.equals("Art"))
903
bibtex = BibtexEntryType.OTHER;
904
else if(msbib.equals("SoundRecording"))
905
bibtex = BibtexEntryType.OTHER;
906
else if(msbib.equals("Performance"))
907
bibtex = BibtexEntryType.OTHER;
908
else if(msbib.equals("Film"))
909
bibtex = BibtexEntryType.OTHER;
910
else if(msbib.equals("Interview"))
911
bibtex = BibtexEntryType.OTHER;
912
else if(msbib.equals("Patent"))
913
bibtex = BibtexEntryType.OTHER;
914
else if(msbib.equals("Case"))
915
bibtex = BibtexEntryType.OTHER;
916
else if(msbib.equals("Misc"))
917
bibtex = BibtexEntryType.MISC;
919
bibtex = BibtexEntryType.MISC;
923
public BibtexEntry getBibtexRepresentation() {
924
// BibtexEntry entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
925
// Globals.getEntryType(mapMSBibToBibtexTypeString(sourceType)));
927
// BibtexEntry entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
928
// mapMSBibToBibtexType(sourceType));
930
BibtexEntry entry = null;
932
entry = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID,
933
mapMSBibToBibtexType(sourceType));
935
entry = new BibtexEntry(tag,
936
mapMSBibToBibtexType(sourceType)); // id assumes an existing database so don't
939
// Todo: add check for BibTexEntry types
940
// BibtexEntry entry = new BibtexEntry();
941
// if(sourceType.equals("Book"))
942
// entry.setType(BibtexEntryType.BOOK);
943
// else if(sourceType.equals("BookSection"))
944
// entry.setType(BibtexEntryType.INBOOK);
945
// else if(sourceType.equals("JournalArticle"))
946
// entry.setType(BibtexEntryType.ARTICLE);
947
// else if(sourceType.equals("ArticleInAPeriodical"))
948
// entry.setType(BibtexEntryType.ARTICLE);
949
// else if(sourceType.equals("ConferenceProceedings"))
950
// entry.setType(BibtexEntryType.CONFERENCE);
951
// else if(sourceType.equals("Report"))
952
// entry.setType(BibtexEntryType.TECHREPORT);
953
// else if(sourceType.equals("InternetSite"))
954
// entry.setType(BibtexEntryType.OTHER);
955
// else if(sourceType.equals("DocumentFromInternetSite"))
956
// entry.setType(BibtexEntryType.OTHER);
957
// else if(sourceType.equals("DocumentFromInternetSite"))
958
// entry.setType(BibtexEntryType.OTHER);
959
// else if(sourceType.equals("ElectronicSource"))
960
// entry.setType(BibtexEntryType.OTHER);
961
// else if(sourceType.equals("Art"))
962
// entry.setType(BibtexEntryType.OTHER);
963
// else if(sourceType.equals("SoundRecording"))
964
// entry.setType(BibtexEntryType.OTHER);
965
// else if(sourceType.equals("Performance"))
966
// entry.setType(BibtexEntryType.OTHER);
967
// else if(sourceType.equals("Film"))
968
// entry.setType(BibtexEntryType.OTHER);
969
// else if(sourceType.equals("Interview"))
970
// entry.setType(BibtexEntryType.OTHER);
971
// else if(sourceType.equals("Patent"))
972
// entry.setType(BibtexEntryType.OTHER);
973
// else if(sourceType.equals("Case"))
974
// entry.setType(BibtexEntryType.OTHER);
975
// else if(sourceType.equals("Misc"))
976
// entry.setType(BibtexEntryType.MISC);
978
// entry.setType(BibtexEntryType.MISC);
980
HashMap hm = new HashMap();
983
hm.put("bibtexkey",tag);
985
// hm.put("GUID",GUID);
987
hm.put("language",getLanguage(LCID));
989
hm.put("title",title);
992
if(shortTitle != null)
993
hm.put(MSBIB+"shorttitle",shortTitle);
995
hm.put("note",comments);
997
addAuthor(hm,"author",authors);
998
addAuthor(hm,MSBIB+"bookauthor",bookAuthors);
999
addAuthor(hm,"editor",editors);
1000
addAuthor(hm,MSBIB+"translator",translators);
1001
addAuthor(hm,MSBIB+"producername",producerNames);
1002
addAuthor(hm,MSBIB+"composer",composers);
1003
addAuthor(hm,MSBIB+"conductor",conductors);
1004
addAuthor(hm,MSBIB+"performer",performers);
1005
addAuthor(hm,MSBIB+"writer",writers);
1006
addAuthor(hm,MSBIB+"director",directors);
1007
addAuthor(hm,MSBIB+"compiler",compilers);
1008
addAuthor(hm,MSBIB+"interviewer",interviewers);
1009
addAuthor(hm,MSBIB+"interviewee",interviewees);
1010
addAuthor(hm,MSBIB+"inventor",inventors);
1011
addAuthor(hm,MSBIB+"counsel",counsels);
1014
hm.put("pages",pages.toString("--"));
1016
hm.put("volume",volume);
1017
if(numberOfVolumes !=null )
1018
hm.put(MSBIB+"numberofvolume",numberOfVolumes);
1020
hm.put("edition",edition);
1022
hm.put("edition",edition);
1023
parseStandardNumber(standardNumber,hm);
1025
if(publisher !=null )
1026
hm.put("publisher",publisher);
1027
if(publisher !=null )
1028
hm.put("publisher",publisher);
1030
hm.put("address",address);
1031
if(bookTitle !=null )
1032
hm.put("booktitle",bookTitle);
1033
if(chapterNumber !=null )
1034
hm.put("chapter",chapterNumber);
1035
if(journalName !=null )
1036
hm.put("journal",journalName);
1038
hm.put("number",issue);
1039
if(periodicalTitle !=null )
1040
hm.put("organization",periodicalTitle);
1041
if(conferenceName !=null )
1042
hm.put("organization",conferenceName);
1043
if(department !=null )
1044
hm.put("school",department);
1045
if(institution !=null )
1046
hm.put("institution",institution);
1047
// if(thesisType !=null )
1048
// hm.put("type",thesisType);
1049
// if(internetSiteTitle !=null )
1050
// hm.put("title",internetSiteTitle);
1051
if(dateAccessed !=null )
1052
hm.put(MSBIB+"accessed",dateAccessed);
1055
if(productionCompany !=null )
1056
hm.put(MSBIB+"productioncompany",productionCompany);
1057
// if(publicationTitle !=null )
1058
// hm.put("title",publicationTitle);
1060
hm.put(MSBIB+"medium",medium);
1061
// if(albumTitle !=null )
1062
// hm.put("title",albumTitle);
1063
if(recordingNumber !=null )
1064
hm.put(MSBIB+"recordingnumber",recordingNumber);
1066
hm.put(MSBIB+"theater",theater);
1067
if(distributor !=null )
1068
hm.put(MSBIB+"distributor",distributor);
1069
// if(broadcastTitle !=null )
1070
// hm.put("title",broadcastTitle);
1071
if(broadcaster !=null )
1072
hm.put(MSBIB+"broadcaster",broadcaster);
1074
hm.put(MSBIB+"station",station);
1076
hm.put(MSBIB+"type",type);
1077
if(patentNumber !=null )
1078
hm.put(MSBIB+"patentnumber",patentNumber);
1080
hm.put(MSBIB+"court",court);
1081
if(reporter !=null )
1082
hm.put(MSBIB+"reporter",reporter);
1083
if(caseNumber !=null )
1084
hm.put(MSBIB+"casenumber",caseNumber);
1085
if(abbreviatedCaseNumber !=null )
1086
hm.put(MSBIB+"abbreviatedcasenumber",abbreviatedCaseNumber);
1088
if(bibTex_Series !=null )
1089
hm.put("series",bibTex_Series);
1090
if(bibTex_Abstract !=null )
1091
hm.put("abstract",bibTex_Abstract);
1092
if(bibTex_KeyWords !=null )
1093
hm.put("keywords",bibTex_KeyWords);
1094
if(bibTex_CrossRef !=null )
1095
hm.put("crossref",bibTex_CrossRef);
1096
if(bibTex_HowPublished !=null )
1097
hm.put("howpublished",bibTex_HowPublished);
1098
if(bibTex_Affiliation !=null )
1099
hm.put("affiliation",bibTex_Affiliation);
1100
if(bibTex_Contents !=null )
1101
hm.put("contents",bibTex_Contents);
1102
if(bibTex_Copyright !=null )
1103
hm.put("copyright",bibTex_Copyright);
1104
if(bibTex_Price !=null )
1105
hm.put("price",bibTex_Price);
1106
if(bibTex_Size !=null )
1107
hm.put("size",bibTex_Size);
1114
* This method ensures that the output String has only
1115
* valid XML unicode characters as specified by the
1116
* XML 1.0 standard. For reference, please see
1117
* <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
1118
* standard</a>. This method will return an empty
1119
* String if the input is null or empty.
1121
* URL: http://cse-mjmcl.cse.bris.ac.uk/blog/2007/02/14/1171465494443.html
1123
* @param in The String whose non-valid characters we want to remove.
1124
* @return The in String, stripped of non-valid characters.
1126
public String stripNonValidXMLCharacters(String in) {
1127
StringBuffer out = new StringBuffer(); // Used to hold the output.
1128
char current; // Used to reference the current character.
1130
if (in == null || ("".equals(in))) return ""; // vacancy test.
1131
for (int i = 0; i < in.length(); i++) {
1132
current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
1133
if ((current == 0x9) ||
1136
((current >= 0x20) && (current <= 0xD7FF)) ||
1137
((current >= 0xE000) && (current <= 0xFFFD)) ||
1138
((current >= 0x10000) && (current <= 0x10FFFF)))
1139
out.append(current);
1141
return out.toString();
1147
public String toString() {
1148
StringWriter sresult = new StringWriter();
1150
DOMSource source = new DOMSource(getDOMrepresentation());
1151
StreamResult result = new StreamResult(sresult);
1152
Transformer trans = TransformerFactory.newInstance().newTransformer();
1153
trans.setOutputProperty(OutputKeys.INDENT, "yes");
1154
trans.transform(source, result);
1156
catch (Exception e) {
1159
return sresult.toString();