1
# Copyright (C) 2006-2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Black-box tests for bzr handling non-ascii characters."""
27
from bzrlib.tests import EncodingAdapter
30
def load_tests(standard_tests, module, loader):
31
return tests.multiply_tests(standard_tests,
32
EncodingAdapter.encoding_scenarios,
36
class TestNonAscii(tests.TestCaseWithTransport):
37
"""Test that bzr handles files/committers/etc which are non-ascii."""
40
super(TestNonAscii, self).setUp()
41
self._check_can_encode_paths()
43
self.addCleanup(setattr, osutils, "_cached_user_encoding",
44
osutils._cached_user_encoding)
45
osutils._cached_user_encoding = self.encoding
46
email = self.info['committer'] + ' <joe@foo.com>'
47
os.environ['BZR_EMAIL'] = email.encode(osutils.get_user_encoding())
50
def run_bzr_decode(self, args, encoding=None, fail=False, retcode=None,
52
"""Run bzr and decode the output into a particular encoding.
54
Returns a string containing the stdout output from bzr.
56
:param fail: If true, the operation is expected to fail with
60
encoding = osutils.get_user_encoding()
62
out = self.run_bzr(args,
63
output_encoding=encoding, encoding=encoding,
64
retcode=retcode, working_dir=working_dir)[0]
65
return out.decode(encoding)
66
except UnicodeError, e:
70
# This command, run from the regular command line, will give a
71
# traceback to the user. That's not really good for a situation
72
# that can be provoked just by the interaction of their input data
73
# and locale, as some of these are. What would be better?
75
self.fail("Expected UnicodeError not raised")
77
def _check_OSX_can_roundtrip(self, path, fs_enc=None):
78
"""Stop the test if it's about to fail or errors out.
80
Until we get proper support on OSX for accented paths (in fact, any
81
path whose NFD decomposition is different than the NFC one), this is
82
the best way to keep test active (as opposed to disabling them
83
completely). This is a stop gap. The tests should at least be rewritten
84
so that the failing ones are clearly separated from the passing ones.
87
fs_enc = osutils._fs_enc
88
if sys.platform == 'darwin':
89
encoded = path.encode(fs_enc)
91
normal_thing = unicodedata.normalize('NFD', path)
92
mac_encoded = normal_thing.encode(fs_enc)
93
if mac_encoded != encoded:
94
raise tests.KnownFailure(
95
'Unable to roundtrip path %r on OSX filesystem'
96
' using encoding "%s"'
99
def _check_can_encode_paths(self):
100
fs_enc = osutils._fs_enc
101
terminal_enc = osutils.get_terminal_encoding()
102
fname = self.info['filename']
103
dir_name = self.info['directory']
104
for thing in [fname, dir_name]:
107
except UnicodeEncodeError:
108
raise tests.TestSkipped(
109
'Unable to represent path %r in filesystem encoding "%s"'
112
thing.encode(terminal_enc)
113
except UnicodeEncodeError:
114
raise tests.TestSkipped(
115
'Unable to represent path %r in terminal encoding "%s"'
116
' (even though it is valid in filesystem encoding "%s")'
117
% (thing, terminal_enc, fs_enc))
119
def create_base(self):
120
wt = self.make_branch_and_tree('.')
121
self.build_tree_contents([('a', 'foo\n')])
123
wt.commit('adding a')
125
self.build_tree_contents(
126
[('b', 'non-ascii \xFF\xFF\xFC\xFB\x00 in b\n')])
128
wt.commit(self.info['message'])
130
fname = self.info['filename']
131
self.build_tree_contents([(fname, 'unicode filename\n')])
133
wt.commit(u'And a unicode file\n')
135
# FIXME: We don't check that the add went well, in fact, it doesn't on
136
# OSX (when LC_ALL is set correctly) because the added path doesn't
137
# match the one used on OSX. But checking here will require more
138
# invasive changes than adding the _check_OSX_can_roundtrip(), so I
139
# punt for now -- vila 20090702
141
def test_status(self):
142
self.build_tree_contents(
143
[(self.info['filename'], 'changed something\n')])
144
txt = self.run_bzr_decode('status')
145
self._check_OSX_can_roundtrip(self.info['filename'])
146
self.assertEqual(u'modified:\n %s\n' % (self.info['filename'],), txt)
148
txt = self.run_bzr_decode('status', encoding='ascii')
149
expected = u'modified:\n %s\n' % (
150
self.info['filename'].encode('ascii', 'replace'),)
151
self.assertEqual(expected, txt)
154
# bzr cat shouldn't change the contents
155
# using run_bzr since that doesn't decode
156
txt = self.run_bzr('cat b')[0]
157
self.assertEqual('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n', txt)
159
self._check_OSX_can_roundtrip(self.info['filename'])
160
txt = self.run_bzr(['cat', self.info['filename']])[0]
161
self.assertEqual('unicode filename\n', txt)
163
def test_cat_revision(self):
164
committer = self.info['committer']
165
txt = self.run_bzr_decode('cat-revision -r 1')
166
self.failUnless(committer in txt,
167
'failed to find %r in %r' % (committer, txt))
169
msg = self.info['message']
170
txt = self.run_bzr_decode('cat-revision -r 2')
171
self.failUnless(msg in txt, 'failed to find %r in %r' % (msg, txt))
173
def test_mkdir(self):
174
txt = self.run_bzr_decode(['mkdir', self.info['directory']])
175
self.assertEqual(u'added %s\n' % self.info['directory'], txt)
177
# The text should be garbled, but the command should succeed
178
txt = self.run_bzr_decode(['mkdir', self.info['directory'] + '2'],
180
expected = u'added %s2\n' % (self.info['directory'],)
181
expected = expected.encode('ascii', 'replace')
182
self.assertEqual(expected, txt)
184
def test_relpath(self):
185
txt = self.run_bzr_decode(['relpath', self.info['filename']])
186
self.assertEqual(self.info['filename'] + '\n', txt)
188
self.run_bzr_decode(['relpath', self.info['filename']],
189
encoding='ascii', fail=True)
191
def test_inventory(self):
192
txt = self.run_bzr_decode('inventory')
193
self._check_OSX_can_roundtrip(self.info['filename'])
194
self.assertEqual(['a', 'b', self.info['filename']],
197
# inventory should fail if unable to encode
198
self.run_bzr_decode('inventory', encoding='ascii', fail=True)
200
# We don't really care about the ids themselves,
201
# but the command shouldn't fail
202
txt = self.run_bzr_decode('inventory --show-ids')
204
def test_revno(self):
205
# There isn't a lot to test here, since revno should always
207
self.assertEqual('3\n', self.run_bzr_decode('revno'))
208
self.assertEqual('3\n', self.run_bzr_decode('revno', encoding='ascii'))
210
def test_revision_info(self):
211
self.run_bzr_decode('revision-info -r 1')
213
# TODO: jam 20060105 If we support revisions with non-ascii characters,
214
# this should be strict and fail.
215
self.run_bzr_decode('revision-info -r 1', encoding='ascii')
218
fname1 = self.info['filename']
219
fname2 = self.info['filename'] + '2'
220
dirname = self.info['directory']
222
# fname1 already exists
223
self.run_bzr_decode(['mv', 'a', fname1], fail=True)
225
txt = self.run_bzr_decode(['mv', 'a', fname2])
226
self.assertEqual(u'a => %s\n' % fname2, txt)
227
self.failIfExists('a')
228
self.failUnlessExists(fname2)
230
# After 'mv' we need to re-open the working tree
231
self.wt = self.wt.bzrdir.open_workingtree()
232
self.wt.commit('renamed to non-ascii')
236
txt = self.run_bzr_decode(['mv', fname1, fname2, dirname])
237
self._check_OSX_can_roundtrip(self.info['filename'])
238
self.assertEqual([u'%s => %s/%s' % (fname1, dirname, fname1),
239
u'%s => %s/%s' % (fname2, dirname, fname2)]
242
# The rename should still succeed
243
newpath = u'%s/%s' % (dirname, fname2)
244
txt = self.run_bzr_decode(['mv', newpath, 'a'], encoding='ascii')
245
self.failUnlessExists('a')
246
self.assertEqual(newpath.encode('ascii', 'replace') + ' => a\n', txt)
248
def test_branch(self):
249
# We should be able to branch into a directory that
250
# has a unicode name, even if we can't display the name
251
self.run_bzr_decode(['branch', u'.', self.info['directory']])
252
self.run_bzr_decode(['branch', u'.', self.info['directory'] + '2'],
256
# Make sure we can pull from paths that can't be encoded
257
dirname1 = self.info['directory']
258
dirname2 = self.info['directory'] + '2'
259
url1 = urlutils.local_path_to_url(dirname1)
260
url2 = urlutils.local_path_to_url(dirname2)
261
out_bzrdir = self.wt.bzrdir.sprout(url1)
262
out_bzrdir.sprout(url2)
264
self.build_tree_contents(
265
[(osutils.pathjoin(dirname1, "a"), 'different text\n')])
266
self.wt.commit('mod a')
268
txt = self.run_bzr_decode('pull', working_dir=dirname2)
270
expected = osutils.pathjoin(osutils.getcwd(), dirname1)
271
self.assertEqual(u'Using saved parent location: %s/\n'
272
'No revisions to pull.\n' % (expected,), txt)
274
self.build_tree_contents(
275
[(osutils.pathjoin(dirname1, 'a'), 'and yet more\n')])
276
self.wt.commit(u'modifying a by ' + self.info['committer'])
278
# We should be able to pull, even if our encoding is bad
279
self.run_bzr_decode('pull --verbose', encoding='ascii',
280
working_dir=dirname2)
283
# TODO: Test push to an SFTP location
284
# Make sure we can pull from paths that can't be encoded
285
# TODO: jam 20060427 For drastically improving performance, we probably
286
# could create a local repository, so it wouldn't have to copy
287
# the files around as much.
288
# Note that the tests don't actually fail, but if we don't set this
289
# flag, we end up getting "Lock was not Unlocked" warnings
291
dirname = self.info['directory']
292
self.run_bzr_decode(['push', dirname])
294
self.build_tree_contents([('a', 'adding more text\n')])
295
self.wt.commit('added some stuff')
297
# TODO: check the output text is properly encoded
298
self.run_bzr_decode('push')
300
self.build_tree_contents(
301
[('a', 'and a bit more: \n%s\n' % (dirname.encode('utf-8'),))])
303
self.wt.commit('Added some ' + dirname)
304
self.run_bzr_decode('push --verbose', encoding='ascii')
306
self.run_bzr_decode(['push', '--verbose', dirname + '2'])
308
self.run_bzr_decode(['push', '--verbose', dirname + '3'],
311
self.run_bzr_decode(['push', '--verbose', '--create-prefix',
312
dirname + '4/' + dirname + '5'])
313
self.run_bzr_decode(['push', '--verbose', '--create-prefix',
314
dirname + '6/' + dirname + '7'], encoding='ascii')
316
def test_renames(self):
317
fname = self.info['filename'] + '2'
318
self.wt.rename_one('a', fname)
319
txt = self.run_bzr_decode('renames')
320
self.assertEqual(u'a => %s\n' % fname, txt)
322
self.run_bzr_decode('renames', fail=True, encoding='ascii')
324
def test_remove(self):
325
fname = self.info['filename']
326
txt = self.run_bzr_decode(['remove', fname], encoding='ascii')
328
def test_remove_verbose(self):
329
fname = self.info['filename']
330
txt = self.run_bzr_decode(['remove', '--verbose', fname],
333
def test_file_id(self):
334
fname = self.info['filename']
335
txt = self.run_bzr_decode(['file-id', fname])
337
# TODO: jam 20060106 We don't support non-ascii file ids yet,
338
# so there is nothing which would fail in ascii encoding
339
# This *should* be retcode=3
340
txt = self.run_bzr_decode(['file-id', fname], encoding='ascii')
342
def test_file_path(self):
343
# Create a directory structure
344
fname = self.info['filename']
345
dirname = self.info['directory']
346
self.build_tree_contents([
348
(osutils.pathjoin('base', '%s/' % (dirname,)), )])
350
self.wt.add('base/'+dirname)
351
path = osutils.pathjoin('base', dirname, fname)
352
self._check_OSX_can_roundtrip(self.info['filename'])
353
self.wt.rename_one(fname, path)
354
self.wt.commit('moving things around')
356
txt = self.run_bzr_decode(['file-path', path])
358
# TODO: jam 20060106 We don't support non-ascii file ids yet,
359
# so there is nothing which would fail in ascii encoding
360
# This *should* be retcode=3
361
txt = self.run_bzr_decode(['file-path', path], encoding='ascii')
363
def test_revision_history(self):
364
# TODO: jam 20060106 We don't support non-ascii revision ids yet,
365
# so there is nothing which would fail in ascii encoding
366
txt = self.run_bzr_decode('revision-history')
368
def test_ancestry(self):
369
# TODO: jam 20060106 We don't support non-ascii revision ids yet,
370
# so there is nothing which would fail in ascii encoding
371
txt = self.run_bzr_decode('ancestry')
374
self._check_OSX_can_roundtrip(self.info['filename'])
375
# TODO: jam 20060106 diff is a difficult one to test, because it
376
# shouldn't encode the file contents, but it needs some sort
377
# of encoding for the paths, etc which are displayed.
378
self.build_tree_contents([(self.info['filename'], 'newline\n')])
379
txt = self.run_bzr('diff', retcode=1)[0]
381
def test_deleted(self):
382
self._check_OSX_can_roundtrip(self.info['filename'])
383
fname = self.info['filename']
385
self.wt.remove(fname)
387
txt = self.run_bzr_decode('deleted')
388
self.assertEqual(fname+'\n', txt)
390
txt = self.run_bzr_decode('deleted --show-ids')
391
self.failUnless(txt.startswith(fname))
393
# Deleted should fail if cannot decode
394
# Because it is giving the exact paths
395
# which might be used by a front end
396
self.run_bzr_decode('deleted', encoding='ascii', fail=True)
398
def test_modified(self):
399
fname = self.info['filename']
400
self.build_tree_contents([(fname, 'modified\n')])
402
txt = self.run_bzr_decode('modified')
403
self._check_OSX_can_roundtrip(self.info['filename'])
404
self.assertEqual('"'+fname+'"'+'\n', txt)
406
self.run_bzr_decode('modified', encoding='ascii', fail=True)
408
def test_added(self):
409
fname = self.info['filename'] + '2'
410
self.build_tree_contents([(fname, 'added\n')])
413
txt = self.run_bzr_decode('added')
414
self.assertEqual('"'+fname+'"'+'\n', txt)
416
self.run_bzr_decode('added', encoding='ascii', fail=True)
419
dirname = self.info['directory']
420
url = urlutils.local_path_to_url(dirname)
421
self.run_bzr_decode('root')
423
self.wt.bzrdir.sprout(url)
425
txt = self.run_bzr_decode('root', working_dir=dirname)
426
self.failUnless(txt.endswith(dirname+'\n'))
428
txt = self.run_bzr_decode('root', encoding='ascii', fail=True,
432
fname = self.info['filename']
434
txt = self.run_bzr_decode('log')
435
self.assertNotEqual(-1, txt.find(self.info['committer']))
436
self.assertNotEqual(-1, txt.find(self.info['message']))
438
txt = self.run_bzr_decode('log --verbose')
439
# FIXME: iso-8859-2 test shouldn't be skipped here --vila 20090702
440
self._check_OSX_can_roundtrip(self.info['filename'])
441
self.assertNotEqual(-1, txt.find(fname))
443
# Make sure log doesn't fail even if we can't write out
444
txt = self.run_bzr_decode('log --verbose', encoding='ascii')
445
self.assertEqual(-1, txt.find(fname))
446
self.assertNotEqual(-1, txt.find(fname.encode('ascii', 'replace')))
448
def test_touching_revisions(self):
449
fname = self.info['filename']
450
txt = self.run_bzr_decode(['touching-revisions', fname])
451
self._check_OSX_can_roundtrip(self.info['filename'])
452
self.assertEqual(u' 3 added %s\n' % (fname,), txt)
454
fname2 = self.info['filename'] + '2'
455
self.wt.rename_one(fname, fname2)
456
self.wt.commit(u'Renamed %s => %s' % (fname, fname2))
458
txt = self.run_bzr_decode(['touching-revisions', fname2])
459
expected_txt = (u' 3 added %s\n'
460
u' 4 renamed %s => %s\n'
461
% (fname, fname, fname2))
462
self.assertEqual(expected_txt, txt)
464
self.run_bzr_decode(['touching-revisions', fname2], encoding='ascii',
468
txt = self.run_bzr_decode('ls')
469
self.assertEqual(sorted(['a', 'b', self.info['filename']]),
470
sorted(txt.splitlines()))
471
txt = self.run_bzr_decode('ls --null')
472
self.assertEqual(sorted(['', 'a', 'b', self.info['filename']]),
473
sorted(txt.split('\0')))
475
txt = self.run_bzr_decode('ls', encoding='ascii', fail=True)
476
txt = self.run_bzr_decode('ls --null', encoding='ascii', fail=True)
478
def test_unknowns(self):
479
fname = self.info['filename'] + '2'
480
self.build_tree_contents([(fname, 'unknown\n')])
482
# TODO: jam 20060112 bzr unknowns is the only one which
483
# quotes paths do we really want it to?
484
# awilkins 20080521 added and modified do it now as well
485
txt = self.run_bzr_decode('unknowns')
486
self._check_OSX_can_roundtrip(self.info['filename'])
487
self.assertEqual(u'"%s"\n' % (fname,), txt)
489
self.run_bzr_decode('unknowns', encoding='ascii', fail=True)
491
def test_ignore(self):
492
fname2 = self.info['filename'] + '2.txt'
493
self.build_tree_contents([(fname2, 'ignored\n')])
495
def check_unknowns(expected):
496
self.assertEqual(expected, list(self.wt.unknowns()))
498
self._check_OSX_can_roundtrip(self.info['filename'])
499
check_unknowns([fname2])
501
self.run_bzr_decode(['ignore', './' + fname2])
504
fname3 = self.info['filename'] + '3.txt'
505
self.build_tree_contents([(fname3, 'unknown 3\n')])
506
check_unknowns([fname3])
508
# Ignore should not care what the encoding is
509
# (right now it doesn't print anything)
510
self.run_bzr_decode(['ignore', fname3], encoding='ascii')
513
# Now try a wildcard match
514
fname4 = self.info['filename'] + '4.txt'
515
self.build_tree_contents([(fname4, 'unknown 4\n')])
516
self.run_bzr_decode('ignore *.txt')
519
# and a different wildcard that matches everything
520
os.remove('.bzrignore')
521
self.run_bzr_decode(['ignore', self.info['filename'] + '*'])
524
def test_missing(self):
525
# create empty tree as reference for missing
526
self.make_branch_and_tree('empty-tree')
528
msg = self.info['message']
530
txt = self.run_bzr_decode('missing empty-tree')
531
self.assertNotEqual(-1, txt.find(self.info['committer']))
532
self.assertNotEqual(-1, txt.find(msg))
534
# Make sure missing doesn't fail even if we can't write out
535
txt = self.run_bzr_decode('missing empty-tree', encoding='ascii')
536
self.assertEqual(-1, txt.find(msg))
537
self.assertNotEqual(-1, txt.find(msg.encode('ascii', 'replace')))
540
self.run_bzr_decode(['branch', u'.', self.info['directory']])
541
self.run_bzr_decode(['info', self.info['directory']])
542
self.run_bzr_decode(['info', self.info['directory']],
545
def test_ignored(self):
546
fname = self.info['filename'] + '1.txt'
547
self.build_tree_contents([(fname, 'ignored\n')])
548
self.run_bzr(['ignore', fname])
549
txt = self.run_bzr_decode(['ignored'])
550
self.assertEqual(txt, '%-50s %s\n' % (fname, fname))
551
txt = self.run_bzr_decode(['ignored'], encoding='ascii')
552
fname = fname.encode('ascii', 'replace')
553
self.assertEqual(txt, '%-50s %s\n' % (fname, fname))