~stefanor/ibid/apt-1020751

« back to all changes in this revision

Viewing changes to ibid/plugins/conversions.py

  • Committer: Tarmac
  • Author(s): Stefano Rivera
  • Date: 2011-10-23 20:30:55 UTC
  • mfrom: (1031.1.37 exchange-825217)
  • Revision ID: ibid-lp-lander@rivera.za.net-20111023203055-n80azn66wh2utl1v
XE.com no longer has a nice helpful country to currency list, so we build our own, based on ISO-4127. This means many heuristics, so we include a reasonable test suite.
Author: Stefano Rivera
Merge Request: http://code.launchpad.net/~stefanor/ibid/exchange-825217/+merge/71367
Approved by: Max Rabkin
Fixes LP: #825217

Show diffs side-by-side

added added

removed removed

Lines of Context:
9
9
 
10
10
import ibid
11
11
from ibid.plugins import Processor, handler, match
12
 
from ibid.compat import any, defaultdict
 
12
from ibid.compat import any, defaultdict, ElementTree
13
13
from ibid.config import Option
14
 
from ibid.utils import file_in_path, get_country_codes, human_join, \
15
 
                       unicode_output, generic_webservice
 
14
from ibid.utils import (cacheable_download, file_in_path, get_country_codes,
 
15
                        human_join, unicode_output, generic_webservice)
16
16
from ibid.utils.html import get_html_parse_tree
17
17
 
18
18
features = {}
312
312
    country_codes = {}
313
313
 
314
314
    def _load_currencies(self):
315
 
        etree = get_html_parse_tree(
316
 
                'http://www.xe.com/iso4217.php', headers = {
317
 
                    'User-Agent': 'Mozilla/5.0',
318
 
                    'Referer': 'http://www.xe.com/',
319
 
                }, treetype='etree')
320
 
 
321
 
        tbl_main = [x for x in etree.getiterator('table') if x.get('class') == 'tbl_main'][0]
322
 
 
 
315
        iso4127_file = cacheable_download(
 
316
                'http://www.currency-iso.org/dl_iso_table_a1.xml',
 
317
                'conversions/iso4217.xml')
 
318
        document = ElementTree.parse(iso4127_file)
 
319
        # Code -> [Countries..., Currency Name]
323
320
        self.currencies = {}
324
 
        for tbl_sub in tbl_main.getiterator('table'):
325
 
            if tbl_sub.get('class') == 'tbl_sub':
326
 
                for tr in tbl_sub.getiterator('tr'):
327
 
                    code, place = [x.text for x in tr.getchildren()]
328
 
                    name = u''
329
 
                    if not place:
330
 
                        place = u''
331
 
                    if u',' in place[1:-1]:
332
 
                        place, name = place.split(u',', 1)
333
 
                    place = place.strip()
334
 
                    if code in self.currencies:
335
 
                        currency = self.currencies[code]
336
 
                        # Are we using another country's currency?
337
 
                        if place != u'' and name != u'' and (currency[1] == u'' or currency[1].rsplit(None, 1)[0] in place
338
 
                                or (u'(also called' in currency[1] and currency[1].split(u'(', 1)[0].rsplit(None, 1)[0] in place)):
339
 
                            currency[0].insert(0, place)
340
 
                            currency[1] = name.strip()
341
 
                        else:
342
 
                            currency[0].append(place)
343
 
                    else:
344
 
                        self.currencies[code] = [[place], name.strip()]
 
321
        # Country -> Code
 
322
        self.country_currencies = {}
 
323
        self.country_codes = get_country_codes()
 
324
        # Non-currencies:
 
325
        non_currencies = set(('BOV CLF COU MXV '
 
326
                              'UYI XSU XUA '     # Various Fund codes
 
327
                              'CHE CHW '         # Swiss WIR currencies
 
328
                              'USN USS '         # US Dollar fund codes
 
329
                              'XAG XAU XPD XPT ' # Metals
 
330
                              'XBA XBB XBC XBD ' # Euro Bond Market
 
331
                              'XDR XTS XXX '     # Other specials
 
332
                             ).split())
 
333
        no_country_codes = set(('Saint Martin',
 
334
                                'Virgin Islands (Us)',
 
335
                                'Virgin Islands (British)',))
 
336
        accociated_all_countries = True
 
337
        for currency in document.getiterator('ISO_CURRENCY'):
 
338
            code = currency.findtext('ALPHABETIC_CODE').strip()
 
339
            name = currency.findtext('CURRENCY').strip()
 
340
            place = currency.findtext('ENTITY').strip().title()
 
341
            if code == '' or code in non_currencies:
 
342
                continue
 
343
            # Fund codes
 
344
            if re.match(r'^Zz[0-9]{2}', place, re.UNICODE):
 
345
                continue
 
346
            if code in self.currencies:
 
347
                self.currencies[code][0].append(place)
 
348
            else:
 
349
                self.currencies[code] = [[place], name]
 
350
            if place in no_country_codes:
 
351
                continue
 
352
            if (code[:2] in self.country_codes
 
353
                        and code[:2] not in self.country_currencies):
 
354
                    self.country_currencies[code[:2]] = code
 
355
                    continue
 
356
            ascii_place = (unicodedata.normalize('NFD', unicode(place))
 
357
                           .encode('ASCII', 'ignore')
 
358
                           .replace('-', ' ')
 
359
                           .replace('Sint', 'Saint'))
 
360
 
 
361
            # Countries with (alternative names)
 
362
            swapped_place = None
 
363
            m = re.match(r'^(.+?)\s+\((.+)\)$', ascii_place)
 
364
            if m is not None:
 
365
                swapped_place = '%s (%s)' % (m.group(2), m.group(1))
 
366
 
 
367
            for ccode, country in self.country_codes.iteritems():
 
368
                country = country.title()
 
369
                ascii_country = (unicodedata.normalize('NFD', country)
 
370
                                 .encode('ASCII', 'ignore')
 
371
                                 .replace('-', ' ')
 
372
                                 .replace('Sint', 'Saint'))
 
373
                if ascii_country in (ascii_place, swapped_place):
 
374
                    if ccode not in self.country_currencies:
 
375
                        self.country_currencies[ccode] = code
 
376
                    break
 
377
            else:
 
378
                log.info(u"ISO4127 parsing: Can't identify %s as a known "
 
379
                         u"country", place)
 
380
                accociated_all_countries = False
345
381
 
346
382
        # Special cases for shared currencies:
347
 
        self.currencies['EUR'][0].insert(0, u'Euro Member Countries')
348
 
        self.currencies['XOF'][0].insert(0, u'Communaut\xe9 Financi\xe8re Africaine')
349
 
        self.currencies['XOF'][1] = u'Francs'
 
383
        self.currencies['EUR'][0].append(u'Euro Member Countries')
 
384
        self.currencies['XAF'][0].append(u"Communaut\xe9 financi\xe8re d'Afrique")
 
385
        self.currencies['XCD'][0].append(u'Organisation of Eastern Caribbean States')
 
386
        self.currencies['XOF'][0].append(u'Coop\xe9ration financi\xe8re en Afrique centrale')
 
387
        self.currencies['XPF'][0].append(u'Comptoirs Fran\xe7ais du Pacifique')
 
388
        return accociated_all_countries
350
389
 
351
 
    def _resolve_currency(self, name, rough=True):
 
390
    def resolve_currency(self, name, rough=True, plural_recursion=False):
352
391
        "Return the canonical name for a currency"
353
392
 
 
393
        if not self.currencies:
 
394
            self._load_currencies()
 
395
 
354
396
        if name.upper() in self.currencies:
355
397
            return name.upper()
356
398
 
357
 
        strip_currency_re = re.compile(r'^[\.\s]*([\w\s]+?)s?$', re.UNICODE)
358
 
        m = strip_currency_re.match(name)
359
 
 
 
399
        # Strip leading dots (.TLD)
 
400
        m = re.match(r'^[\.\s]*(.+)$', name, re.UNICODE)
360
401
        if m is None:
361
402
            return False
362
 
 
363
403
        name = m.group(1).lower()
364
404
 
365
 
        # TLD -> country name
366
 
        if rough and len(name) == 2 and name.upper() in self.country_codes:
367
 
           name = self.country_codes[name.upper()].lower()
 
405
        # TLD:
 
406
        if rough and len(name) == 2 and name.upper() in self.country_currencies:
 
407
            return self.country_currencies[name.upper()]
368
408
 
369
409
        # Currency Name
370
410
        if name == u'dollar':
371
411
            return "USD"
372
 
 
373
 
        name_re = re.compile(r'^(.+\s+)?\(?%ss?\)?(\s+.+)?$' % name, re.I | re.UNICODE)
 
412
        if name == u'pound':
 
413
            return "GBP"
374
414
        for code, (places, currency) in self.currencies.iteritems():
375
 
            if name_re.match(currency) or [True for place in places if name_re.match(place)]:
376
 
                return code
377
 
 
 
415
            if name == currency.lower():
 
416
                return code
 
417
            if name.title() in places:
 
418
                return code
 
419
 
 
420
        # There are also country names in country_codes:
 
421
        for code, place in self.country_codes.iteritems():
 
422
            if name == place.lower() and code in self.country_currencies:
 
423
                return self.country_currencies[code]
 
424
 
 
425
        # Second pass, not requiring exact match:
 
426
        if rough:
 
427
            for code, (places, currency) in self.currencies.iteritems():
 
428
                if name in currency.lower():
 
429
                    return code
 
430
                if any(name in place.lower() for place in places):
 
431
                    return code
 
432
 
 
433
            for code, place in self.country_codes.iteritems():
 
434
                if name in place.lower() and code in self.country_currencies:
 
435
                    return self.country_currencies[code]
 
436
 
 
437
        # Maybe it's a plural?
 
438
        if name.endswith('s') and not plural_recursion:
 
439
            return self.resolve_currency(name[:-1], rough, True)
378
440
        return False
379
441
 
380
442
    @match(r'^(exchange|convert)\s+([0-9.]+)\s+(.+)\s+(?:for|to|into)\s+(.+)$')
381
443
    def exchange(self, event, command, amount, frm, to):
382
 
        if not self.currencies:
383
 
            self._load_currencies()
384
 
 
385
 
        if not self.country_codes:
386
 
            self.country_codes = get_country_codes()
387
 
 
388
444
        rough = command.lower() == 'exchange'
389
445
 
390
 
        canonical_frm = self._resolve_currency(frm, rough)
391
 
        canonical_to = self._resolve_currency(to, rough)
 
446
        canonical_frm = self.resolve_currency(frm, rough)
 
447
        canonical_to = self.resolve_currency(to, rough)
392
448
        if not canonical_frm or not canonical_to:
393
449
            if rough:
394
 
                event.addresponse(u"Sorry, I don't know about a currency for %s", (not canonical_frm and frm or to))
 
450
                event.addresponse(
 
451
                    u"Sorry, I don't know about a currency for %s",
 
452
                    (not canonical_frm and frm or to))
 
453
            return
 
454
        if canonical_frm == canonical_to:
 
455
            event.addresponse(
 
456
                u"Um, that's the same currency. Tell you what, "
 
457
                u"I can offer you my special rate of 0.5 %(currency)s for "
 
458
                u"each %(code)s you sell me.", {
 
459
                    'currency': self.currencies[canonical_frm][1],
 
460
                    'code': canonical_frm,
 
461
            })
395
462
            return
396
463
 
397
464
        data = generic_webservice(
407
474
            return
408
475
 
409
476
        event.addresponse(
410
 
            u'%(fresult)s %(fcode)s (%(fcountry)s %(fcurrency)s) = '
411
 
            u'%(tresult)0.2f %(tcode)s (%(tcountry)s %(tcurrency)s) '
 
477
            u'%(fresult)s %(fcode)s (%(fcurrency)s) = '
 
478
            u'%(tresult)0.2f %(tcode)s (%(tcurrency)s) '
412
479
            u'(Last trade rate: %(rate)s, Bid: %(bid)s, Ask: %(ask)s)', {
413
480
                'fresult': amount,
414
481
                'tresult': float(amount) * float(last_trade_rate),
415
 
                'fcountry': self.currencies[canonical_frm][0][0],
416
482
                'fcurrency': self.currencies[canonical_frm][1],
417
 
                'tcountry': self.currencies[canonical_to][0][0],
418
483
                'tcurrency': self.currencies[canonical_to][1],
419
484
                'fcode': canonical_frm,
420
485
                'tcode': canonical_to,
423
488
                'ask': ask,
424
489
            })
425
490
 
 
491
    @match(r'^(exchange|convert)\s+(.+)\s+([0-9.]+)\s+(?:for|to|into)\s+(.+)$')
 
492
    def exchange_reversed(self, event, command, amount, frm, to):
 
493
        self.exchange(event, command, frm, amount, to)
 
494
 
 
495
 
426
496
    @match(r'^(?:currency|currencies)\s+for\s+(?:the\s+)?(.+)$')
427
497
    def currency(self, event, place):
428
498
        if not self.currencies:
429
499
            self._load_currencies()
430
500
 
431
 
        search = re.compile(place, re.I)
432
 
        results = []
433
 
        for code, (places, name) in self.currencies.iteritems():
434
 
            for place in places:
435
 
                if search.search(place):
436
 
                    results.append(u'%s uses %s (%s)' % (place, name, code))
 
501
        results = defaultdict(list)
 
502
        for code, (c_places, name) in self.currencies.iteritems():
 
503
            for c_place in c_places:
 
504
                if re.search(place, c_place, re.I):
 
505
                    results[c_place].append(u'%s (%s)' % (name, code))
437
506
                    break
438
507
 
439
508
        if results:
440
 
            event.addresponse(human_join(results))
 
509
            event.addresponse(human_join(
 
510
                u'%s uses %s' % (place, human_join(currencies))
 
511
                for place, currencies in results.iteritems()
 
512
            ))
441
513
        else:
442
514
            event.addresponse(u'No currencies found')
443
515