~ubuntu-branches/debian/jessie/bibus/jessie : contents of Import/RIS.py at revision 6

~ubuntu-branches/debian/jessie/bibus/jessie : (revision 6)
# Copyright 2004,2005 Pierre Martineau <pmartino@users.sourceforge.net>
# This file is part of Bibus, a bibliographic database that can
# work together with OpenOffice.org to generate bibliographic indexes.
#
# Bibus is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Bibus is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Bibus; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA.
#
# RIS format
# from ReferenceManger Web site
#
from __future__ import generators		# to be removed in python 2.3
import BIB

DEFAULT_ENCODING = 'latin_1'

class importRef(object):
	"""Class is iterable. Return records one by one with None for the id (first field)."""
	# conversion RIS <-> Openoffice.org Publication Type: dictionary Type[RIS name]=OpenofficeName. Only REQUIRED for NON ARTICLE
 	Type={
	'ABST':'MISC',
	'ADVS':'MISC' ,
	'ART':'MISC' ,
	'BILL':'MISC' ,
	'BOOK':'BOOK' ,
	'CASE':'MISC' ,
	'CHAP':'INBOOK' ,
	'COMP':'MISC' ,
	'CONF':'PROCEEDINGS' ,
	'CTLG':'MISC' ,
	'DATA':'MISC' ,
	'ELEC':'MISC' ,
	'GEN':'MISC' ,
	'HEAR':'UNPUBLISHED' ,
	'ICOMM':'UNPUBLISHED' ,
	'INPR':'ARTICLE' ,
	'JFULL':'ARTICLE' ,
	'JOUR':'ARTICLE' ,
	'MAP':'MISC' ,
	'MGZN':'JOURNAL' ,
	'MPCT': 'MISC' ,
	'MUSIC': 'MISC' ,
	'NEWS':'JOURNAL' ,
	'PAMP': 'MISC' ,
	'PAT': 'MISC' ,
	'PCOMM':'UNPUBLISHED' ,
	'RPRT': 'TECHREPORT' ,
	'SER': 'INCOLLECTION',
	'SLIDE': 'MISC' ,
	'SOUND': 'MISC' ,
	'STAT': 'MISC' ,
	'THES': 'PHDTHESIS',
	'UNBILL': 'MISC' ,
	'UNPB':'UNPUBLISHED' ,
	'VIDEO': 'MISC'
	}
	# types in clear to put in the field howpublished
 	TypeFull={
	'ABST':'Abstract',
	'ADVS':'Audiovisual material' ,
	'ART':'Art Work' ,
	'BILL':'Bill/Resolution' ,
	'BOOK':'Book' ,
	'CASE':'Case' ,
	'CHAP':'Book chapter' ,
	'COMP':'Computer program' ,
	'CONF':'Conference proceeding' ,
	'CTLG':'Catalog' ,
	'DATA':'Data file' ,
	'ELEC':'Electronic citation' ,
	'GEN':'Generic' ,
	'HEAR':'Hearing' ,
	'ICOMM':'Internet Communication' ,
	'INPR':'In Press' ,
	'JFULL':'Journal' ,
	'JOUR':'Journal' ,
	'MAP':'Map' ,
	'MGZN':'Magazine article' ,
	'MPCT': 'Motion picture' ,
	'MUSIC': 'Music score' ,
	'NEWS':'Newspaper' ,
	'PAMP': 'Pamphlet' ,
	'PAT': 'Patent' ,
	'PCOMM':'Personal communication' ,
	'RPRT': 'Report' ,
	'SER': 'Serial',
	'SLIDE': 'Slide' ,
	'SOUND': 'Sound recording' ,
	'STAT': 'Statute' ,
	'THES': 'Thesis/Dissertation',
	'UNBILL': 'Unenacted bill/resolution' ,
	'UNPB':'Unpublished work' ,
	'VIDEO': 'Video recording'
	}

	def __init__(self,infile):
		self.infile = infile	# must be a file type. Need a readline() function.
		while self.infile.read(1) != 'T': pass	# this is needed because EndNote8 refer files start with strange chars
		else: self.infile.seek(-1,1)

	def __iter__(self):
		"""Generator of records. for record in <instance>: ...
		RIS record starts with "TY  - " and end with "ER  - "
		"""
		record  = []
		line = self.infile.readline()
		while line != '':					# we should make it more robust by testing for TY as the first field
			if line.strip() != '':
				#print 'line = %r' %line
				if line[:6] != "ER  - ":	# still not finished
					if line[2:6] != "  - ": # it is a continuation of the previous line
						#print record,line
						record[-1] = ' '.join([record[-1],line[:-1].strip()])
					else:
						record.append(line[:-1].strip())
				elif record != []:
					#print record
					yield self.__convertRecord(record)
					record  = []
				else:
					pass
			line = self.infile.readline()

	def __convertRecord(self,record):
		"""return a list of the record fields using
		('Identifier', 'Bibliographic_Type', 'Address', 'Annote', 'Author', 'Booktitle', 'Chapter', 'Edition', 'Editor','HowPublished', 'Institution', 'Journal', 'Month', 'Note', 'Number', 'Organizations', 'Pages', 'Publisher', 'School', 'Series', 'Title', 'Report_Type', 'Volume', 'Year', 'URL', 'Custom1', 'Custom2', 'Custom3', 'Custom4', 'Custom5', 'ISBN','Abstract')"""
		Record={}
		for line in record:
				refKey,tmpline = line[:2].strip(),line[6:].strip()
				if tmpline.endswith('.'):
					tmpline = tmpline[:-1].rstrip()   	# remove final dot if present
				if Record.has_key(refKey):	   # create a new key or add to a previous one
					Record[refKey] = BIB.SEP.join([Record[refKey],tmpline])
				else:
					Record[refKey] = tmpline
		#print Record
		#
		try: Identifier = Record['ID']
		except KeyError : Identifier = ''
		#
		try: Bibliographic_Type = BIB.BIBLIOGRAPHIC_TYPE[importRef.Type[Record['TY'].split(BIB.SEP)[0]]]
		except: Bibliographic_Type = BIB.BIBLIOGRAPHIC_TYPE['ARTICLE'] # default type = ARTICLE
		#
		try: Address = Record['AD']
		except KeyError: Address = ''
		#
		Annote = ''
		#
		try: Author = Record['A1'].replace('.','')		# remove all the dots
		except KeyError:
			try: Author = Record['AU'].replace('.','')	# remove all the dots
			except KeyError: Author = ''
		#
		try:
			if Bibliographic_Type != 'BOOK':
				Booktitle = Record['BT']
			else:
				raise KeyError
		except KeyError: Booktitle = ''
		#
		Chapter = ''
		#
		Edition = ''
		#
		try: Editor = Record['A3'].replace('.','')	# remove all the dots
		except KeyError: Editor = ''
		#
		HowPublished = importRef.TypeFull[Record['TY']]					# we put the record type, so we now it is a patent, etc...
		#
		Institution = ''
		#
		try: Journal =  Record['JA'].replace('.','')
		except KeyError:
			try: Journal =  Record['JO']
			except KeyError:
				try: Journal =  Record['JF']
				except KeyError: Journal = ''
		#
		try: Month = Record['Y1'].split('/')[1]
		except KeyError:
			try: Month = Record['PY'].split('/')[1]
			except (KeyError,IndexError): Month = ''
		except IndexError:
			Month = ''
		#
		try: Note = Record['N1']
		except: Note = ''
		#
		try: Number = Record['IS']
		except KeyError: Number = ''
		#
		Organizations = ''
		#
		try:
			endPage = Record['EP']
			try: Pages = '-'.join( (Record['SP'] , endPage) )
			except KeyError: Pages = endPage
		except KeyError:
			try: Pages = Record['SP']
			except KeyError: Pages = ''
		#
		try: publiAddress = Record['CY']
		except KeyError: publiAddress = ''
		if publiAddress:
			try: Publisher = ', '.join( (Record['PB'], publiAddress) )	# Publisher, Address
			except KeyError: Publisher = publiAddress
		else:
			try: Publisher = Record['PB']
			except KeyError: Publisher = ''
		#
		School = ''
		#
		try: Series = Record['T3']
		except KeyError: Series = ''
		#
		try:
			Title = Record['T1']
		except KeyError:
			try: Title = Record['TI']
			except KeyError:
				if Bibliographic_Type != 'BOOK':
					try:
						Title = Record['BT']
					except KeyError:
						Title = ''
				else:
					Title = ''
		#
		Report_Type = ''
		#
		try: Volume = Record['VL']
		except KeyError: Volume = ''
		#
		try: Year = Record['Y1'].split('/')[0]
		except:
			try: Year = Record['PY'].split('/')[0]
			except: Year = ''
		#
		try: URL = Record['UR']
		except KeyError: URL = ''
		#
		try: Custom1 = Record['U1']
		except KeyError: Custom1 = ''
		#
		try: Custom2 = Record['U2']
		except KeyError: Custom2 = ''
		#
		try: Custom3 = Record['U3']
		except KeyError: Custom3 = ''
		#
		try: Custom4 = Record['U4']
		except KeyError: Custom4 = ''
		#
		try: Custom5 = Record['U5']
		except KeyError: Custom5 = ''
		#
		try: ISBN = Record['SN']
		except KeyError: ISBN = ''
		#
		try: Abstract = Record['N2']
		except KeyError:
			try: Abstract = Record['AB']		# ScienceDirect uses 'AB' for Abstracts instead of 'N2'
			except KeyError: Abstract = ''
		#
		#print [Identifier, Bibliographic_Type,Address, Annote, Author, Booktitle, Chapter, Edition, Editor,HowPublished, Institution, Journal, Month, Note, Number,Organizations, Pages,Publisher,School, Series, Title, Report_Type, Volume,Year,URL,Custom1,Custom2,Custom3,Custom4,Custom5,ISBN,Abstract]
		return [None,Identifier, Bibliographic_Type, Address, Annote, Author, Booktitle, Chapter, Edition, Editor,HowPublished, Institution, Journal, Month, Note, Number,Organizations, Pages, Publisher, School, Series, Title, Report_Type, Volume,Year,URL,Custom1,Custom2,Custom3,Custom4,Custom5,ISBN,Abstract]