3
# $Id: utf8_fixups,v 1.1 2009/08/09 00:18:05 knowledgejunkie Exp $
5
# This file contains fixups for mis-encoded UTF-8 characters that are
6
# frequently seen in the source data from the Radio Times.
8
# Each entry comprises two pipe-separated fields:
10
# i) the hex chars of the mis-encoded character(s), and
11
# ii) the hex chars of the replacement character(s) to substitute
15
\xC3\xA2\xC2\xC2\xA6|\xE2\x80\xA6
17
\xC3\xA9\xC2\xAF\xC2\xAE|\xC3\xA9\x6F\x6E
19
\xC3\xA7\xC2\xAF\xC2\xA9\x73|\xC3\xA7\x6F\x69\x73
21
\x4A\x65\x74\xC3\xA9\xC2\xA5\xC2\xAC|\x4A\x65\x74\xC3\xA9\x65\x2C
23
\x64\xC3\xA9\xC2\xA3\xC2\xAF\x6C|\x64\xC3\xA9\x63\x6F\x6C
25
\x50\x65\xC3\xB1\xC2\xA1\xC2\xBC\xC2\x83\x61|\x50\x65\xC3\xB1\x61\x7C\x43\x61
27
\x52\x65\x6E\xC3\xA9\xC2\xA0\xC2\x84\x65|\x52\x65\x6E\xC3\xA9\x20\x44\x65
29
\x63\x6C\x69\x63\x68\xC3\xA9\xC2\xA4\xC2\xA0|\x63\x6C\x69\x63\x68\xC3\xA9\x64\x20
31
\x63\x6C\x69\x63\x68\xC3\xAF\xC2\xBF\xC2\xBD\x73|\x63\x6C\x69\x63\x68\xC3\xA9\x73
33
\x4E\x6F\xC3\xAB\xC2\xAC\xC2\xA0|\x4E\x6F\xC3\xAB\x6C\x20