3
# Delete unwanted article types from the wikipedia article space.
4
# We won't be parsing these; they (mostly) don't contain any valid
5
# French-language sentences.
7
# Copyright (c) 2008, 2013 Linas Vepstas <linas@linas.org>
15
find . -name 'Category:*' -print | wc
16
find . -name 'MediaWiki:*' -print | wc
17
find . -name 'Help:*' -print | wc
19
find . -name 'File:*' -print | wc
20
find . -name 'Image:*' -print | wc
22
find . -name 'Template:*' -print | wc
23
find . -name 'Wikipedia:*' -print | wc
24
find . -name '"List of "*' -print | wc
25
find . -name '"Lists of "*' -print | wc
27
# Must use "find" to accomplish this, since using "rm Category:*"
28
# leads to an overflow of the command line.
31
time find . -name 'Category:*' -exec rm {} \;
32
time find . -name 'MediaWiki:*' -exec rm {} \;
33
time find . -name 'Help:*' -exec rm {} \;
34
# File: includes mp3's, ogg's, many different image types
36
time find . -name 'File:*' -exec rm {} \;
37
time find . -name 'Image:*' -exec rm {} \;
39
time find . -name 'Template:*' -exec rm {} \;
40
time find . -name 'Wikipedia:*' -exec rm {} \;
41
time find . -name '"List of "*' -exec rm {} \;
42
time find . -name '"Lists of "*' -exec rm {} \;