~onboard/onboard/1.1

« back to all changes in this revision

Viewing changes to Onboard/TextDomain.py

  • Committer: marmuta
  • Date: 2015-08-19 11:41:16 UTC
  • Revision ID: marmvta@gmail.com-20150819114116-gxx4sfq2rcjfdmxt
Merge trunk rev. 1960: Insert auto-separator "/" after top level domain "co". Entering "co.uk" requires slightly more effort now. "co" is ambiguous, unfortunately.

Show diffs side-by-side

added added

removed removed

Lines of Context:
757
757
        '.'
758
758
        >>> p.get_auto_separator("http://www.domain.org")
759
759
        '/'
760
 
        >>> p.get_auto_separator("http://www.domain.co")
761
 
        '.'
 
760
        >>> p.get_auto_separator("http://www.domain.co") # ambiguous co/ or co.uk/
 
761
        '/'
762
762
        >>> p.get_auto_separator("http://www.domain.co.uk")
763
763
        '/'
 
764
        >>> p.get_auto_separator("http://www.domain.co.uk/home")
 
765
        '/'
 
766
        >>> p.get_auto_separator("http://www.domain.co/home")
 
767
        '/'
764
768
        >>> p.get_auto_separator("http://www.domain.org/home")
765
769
        '/'
766
770
        >>> p.get_auto_separator("http://www.domain.org/home/index.html")
802
806
        SCHEME, PROTOCOL, DOMAIN, PATH = range(4)
803
807
        component = SCHEME
804
808
        last_septok = ""
805
 
        matches = self.iter_url(context)
806
 
        for match in matches:
 
809
        matches = tuple(self.iter_url(context))
 
810
        for index, match in enumerate(matches):
807
811
            groups = match.groups()
808
812
            token  = groups[0]
809
813
            septok = groups[1]
810
814
 
811
815
            if septok:
812
816
                last_septok = septok
 
817
            if index < len(matches)-1:
 
818
                next_septok = matches[index+1].groups()[1]
 
819
            else:
 
820
                next_septok = ""
813
821
 
814
822
            if component == SCHEME:
815
823
                if token:
832
840
                if token:
833
841
                    separator = "."
834
842
                    if last_septok == "." and \
835
 
                       token in self._TLDs and \
836
 
                       token != "co":  # special case for co.uk TLD
 
843
                       next_septok != "." and \
 
844
                       token in self._TLDs:
837
845
                        separator = "/"
838
846
                        component = PATH
839
847
                        continue