4
* A parser extension that adds two tags, <ref> and <references> for adding
7
* @addtogroup Extensions
9
* @link http://meta.wikimedia.org/wiki/Cite/Cite.php Documentation
10
* @link http://www.w3.org/TR/html4/struct/text.html#edef-CITE <cite> definition in HTML
11
* @link http://www.w3.org/TR/2005/WD-xhtml2-20050527/mod-text.html#edef_text_cite <cite> definition in XHTML 2.0
15
* @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
16
* @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
17
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
26
* Datastructure representing <ref> input, in the format of:
29
* 'user supplied' => array(
30
* 'text' => 'user supplied reference & key',
31
* 'count' => 1, // occurs twice
32
* 'number' => 1, // The first reference, we want
33
* // all occourances of it to
34
* // use the same number
36
* 0 => 'Anonymous reference',
37
* 1 => 'Another anonymous reference',
38
* 'some key' => array(
39
* 'text' => 'this one occurs once'
48
* * PHP's datastructures are guarenteed to be returned in the
49
* order that things are inserted into them (unless you mess
51
* * User supplied keys can't be integers, therefore avoiding
52
* conflict with anonymous keys
59
* Count for user displayed output (ref[1], ref[2], ...)
64
var $mGroupCnt = array();
67
* Internal counter for anonymous references, separate from
68
* $mOutCnt because anonymous references won't increment it,
69
* but will incremement $mOutCnt
76
* The backlinks, in order, to pass as $3 to
77
* 'cite_references_link_many_format', defined in
78
* 'cite_references_link_many_format_backlink_labels
90
* True when a <ref> or <references> tag is being processed.
91
* Used to avoid infinite recursion
106
/**#@+ @access private */
109
* Callback function for <ref>
111
* @param string $str Input
112
* @param array $argv Arguments
115
function ref( $str, $argv, $parser ) {
116
wfLoadExtensionMessages( 'Cite' );
117
if ( $this->mInCite ) {
118
return htmlspecialchars( "<ref>$str</ref>" );
120
$this->mInCite = true;
121
$ret = $this->guardedRef( $str, $argv, $parser );
122
$this->mInCite = false;
127
function guardedRef( $str, $argv, $parser, $default_group=CITE_DEFAULT_GROUP ) {
128
$this->mParser = $parser;
130
# The key here is the "name" attribute.
131
list($key,$group) = $this->refArg( $argv );
134
# <ref ...></ref>. This construct is invalid if
135
# it's a contentful ref, but OK if it's a named duplicate and should
136
# be equivalent <ref ... />, for compatability with #tag.
138
return $this->error( 'cite_error_ref_no_input' );
143
if( $key === false ) {
144
# TODO: Comment this case; what does this condition mean?
145
return $this->error( 'cite_error_ref_too_many_keys' );
148
if( $str === null and $key === null ) {
149
# Something like <ref />; this makes no sense.
150
return $this->error( 'cite_error_ref_no_key' );
153
if( preg_match( '/^[0-9]+$/', $key ) ) {
154
# Numeric names mess up the resulting id's, potentially produ-
155
# cing duplicate id's in the XHTML. The Right Thing To Do
156
# would be to mangle them, but it's not really high-priority
157
# (and would produce weird id's anyway).
158
return $this->error( 'cite_error_ref_numeric_key' );
163
preg_replace( '#<([^ ]+?).*?>.*?</\\1 *>|<!--.*?-->#', '', $str )
165
# (bug 6199) This most likely implies that someone left off the
166
# closing </ref> tag, which will cause the entire article to be
167
# eaten up until the next <ref>. So we bail out early instead.
168
# The fancy regex above first tries chopping out anything that
169
# looks like a comment or SGML tag, which is a crude way to avoid
170
# false alarms for <nowiki>, <pre>, etc.
172
# Possible improvement: print the warning, followed by the contents
173
# of the <ref> tag. This way no part of the article will be eaten
175
return $this->error( 'cite_error_included_ref' );
178
# Split these into groups.
179
if( $group === null ) {
180
$group = $default_group;
183
if( is_string( $key ) or is_string( $str ) ) {
184
# We don't care about the content: if the key exists, the ref
185
# is presumptively valid. Either it stores a new ref, or re-
186
# fers to an existing one. If it refers to a nonexistent ref,
187
# we'll figure that out later. Likewise it's definitely valid
188
# if there's any content, regardless of key.
189
return $this->stack( $str, $key, $group );
192
# Not clear how we could get here, but something is probably
193
# wrong with the types. Let's fail fast.
194
$this->croak( 'cite_error_key_str_invalid', serialize( "$str; $key" ) );
198
* Parse the arguments to the <ref> tag
202
* @param array $argv The argument vector
203
* @return mixed false on invalid input, a string on valid
204
* input and null on no input
206
function refArg( $argv ) {
207
global $wgAllowCiteGroups;
208
$cnt = count( $argv );
213
// There should only be one key and one group
215
else if ( $cnt >= 1 ) {
216
if ( isset( $argv['name'] ) ) {
218
$key = Sanitizer::escapeId( $argv['name'], 'noninitial' );
219
unset( $argv['name']);
222
if ( isset( $argv['group'] ) ){
223
if (! $wgAllowCiteGroups ) return array(false); //remove when groups are fully tested.
225
$group = $argv['group'];
226
unset( $argv['group']);
231
return array ($key,$group);
234
return array(false,false);
238
return array(null,$group);
242
* Populate $this->mRefs based on input and arguments to <ref>
244
* @param string $str Input from the <ref> tag
245
* @param mixed $key Argument to the <ref> tag as returned by $this->refArg()
248
function stack( $str, $key = null, $group ) {
249
if (! isset($this->mRefs[$group]))
250
$this->mRefs[$group]=array();
251
if (! isset($this->mGroupCnt[$group]))
252
$this->mGroupCnt[$group]=0;
254
if ( $key === null ) {
256
//$this->mRefs[$group][] = $str;
257
$this->mRefs[$group][] = array('count'=>-1, 'text'=>$str, 'key'=>++$this->mOutCnt);
259
return $this->linkRef( $group, $this->mInCnt++ );
260
} else if ( is_string( $key ) ) {
262
if ( ! isset( $this->mRefs[$group][$key] ) || ! is_array( $this->mRefs[$group][$key] ) ) {
264
$this->mRefs[$group][$key] = array(
267
'key' => ++$this->mOutCnt,
268
'number' => ++$this->mGroupCnt[$group]
275
$this->mRefs[$group][$key]['key']."-".$this->mRefs[$group][$key]['count'],
276
$this->mRefs[$group][$key]['number'],
277
"-".$this->mRefs[$group][$key]['key']
280
// We've been here before
281
if ( $this->mRefs[$group][$key]['text'] === null && $str !== '' ) {
282
// If no text found before, use this text
283
$this->mRefs[$group][$key]['text'] = $str;
289
$this->mRefs[$group][$key]['key']."-".++$this->mRefs[$group][$key]['count'],
290
$this->mRefs[$group][$key]['number'],
291
"-".$this->mRefs[$group][$key]['key']
296
$this->croak( 'cite_error_stack_invalid_input', serialize( array( $key, $str ) ) );
300
* Callback function for <references>
302
* @param string $str Input
303
* @param array $argv Arguments
306
function references( $str, $argv, $parser ) {
307
wfLoadExtensionMessages( 'Cite' );
308
if ( $this->mInCite ) {
309
if ( is_null( $str ) ) {
310
return htmlspecialchars( "<references/>" );
312
return htmlspecialchars( "<references>$str</references>" );
315
$this->mInCite = true;
316
$ret = $this->guardedReferences( $str, $argv, $parser );
317
$this->mInCite = false;
322
function guardedReferences( $str, $argv, $parser, $group = CITE_DEFAULT_GROUP ) {
323
global $wgAllowCiteGroups;
325
$this->mParser = $parser;
327
if ( strval( $str ) !== '' )
328
return $this->error( 'cite_error_references_invalid_input' );
331
if ( isset( $argv['group'] ) and $wgAllowCiteGroups) {
332
$group = $argv['group'];
333
unset ($argv['group']);
337
if ( count( $argv ) && $wgAllowCiteGroups )
338
return $this->error( 'cite_error_references_invalid_parameters_group' );
339
elseif ( count( $argv ) )
340
return $this->error( 'cite_error_references_invalid_parameters' );
342
return $this->referencesFormat($group);
346
* Make output to be returned from the references() function
348
* @return string XHTML ready for output
350
function referencesFormat($group) {
351
if (( count( $this->mRefs ) == 0 ) or (empty( $this->mRefs[$group] ) ))
354
wfProfileIn( __METHOD__ );
355
wfProfileIn( __METHOD__ .'-entries' );
357
foreach ( $this->mRefs[$group] as $k => $v )
358
$ent[] = $this->referencesFormatEntry( $k, $v );
360
$prefix = wfMsgForContentNoTrans( 'cite_references_prefix' );
361
$suffix = wfMsgForContentNoTrans( 'cite_references_suffix' );
362
$content = implode( "\n", $ent );
364
// Let's try to cache it.
365
$parserInput = $prefix . $content . $suffix;
367
$cacheKey = wfMemcKey( 'citeref', md5($parserInput), $this->mParser->Title()->getArticleID() );
369
wfProfileOut( __METHOD__ .'-entries' );
371
global $wgCiteCacheReferences;
372
if ( $wgCiteCacheReferences ) {
373
wfProfileIn( __METHOD__.'-cache-get' );
374
$data = $wgMemc->get( $cacheKey );
375
wfProfileOut( __METHOD__.'-cache-get' );
378
if ( empty($data) ) {
379
wfProfileIn( __METHOD__ .'-parse' );
381
// Live hack: parse() adds two newlines on WM, can't reproduce it locally -ævar
382
$ret = rtrim( $this->parse( $parserInput ), "\n" );
384
if ( $wgCiteCacheReferences ) {
385
$serData = $this->mParser->serialiseHalfParsedText( $ret );
386
$wgMemc->set( $cacheKey, $serData, 86400 );
389
wfProfileOut( __METHOD__ .'-parse' );
391
$ret = $this->mParser->unserialiseHalfParsedText( $data );
394
wfProfileOut( __METHOD__ );
396
//done, clean up so we can reuse the group
397
unset ($this->mRefs[$group]);
398
unset($this->mGroupCnt[$group]);
404
* Format a single entry for the referencesFormat() function
406
* @param string $key The key of the reference
407
* @param mixed $val The value of the reference, string for anonymous
408
* references, array for user-suppplied
409
* @return string Wikitext
411
function referencesFormatEntry( $key, $val ) {
412
// Anonymous reference
413
if ( ! is_array( $val ) )
415
wfMsgForContentNoTrans(
416
'cite_references_link_one',
417
$this->referencesKey( $key ),
418
$this->refKey( $key ),
421
else if ($val['text']=='') return
422
wfMsgForContentNoTrans(
423
'cite_references_link_one',
424
$this->referencesKey( $key ),
425
$this->refKey( $key, $val['count'] ),
426
$this->error( 'cite_error_references_no_text', $key )
428
if ( $val['count'] < 0 )
430
wfMsgForContentNoTrans(
431
'cite_references_link_one',
432
$this->referencesKey( $val['key'] ),
433
#$this->refKey( $val['key'], $val['count'] ),
434
$this->refKey( $val['key'] ),
436
( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
438
// Standalone named reference, I want to format this like an
439
// anonymous reference because displaying "1. 1.1 Ref text" is
440
// overkill and users frequently use named references when they
441
// don't need them for convenience
442
else if ( $val['count'] === 0 )
444
wfMsgForContentNoTrans(
445
'cite_references_link_one',
446
$this->referencesKey( $key ."-" . $val['key'] ),
447
#$this->refKey( $key, $val['count'] ),
448
$this->refKey( $key, $val['key']."-".$val['count'] ),
449
( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
451
// Named references with >1 occurrences
454
//for group handling, we have an extra key here.
455
for ( $i = 0; $i <= $val['count']; ++$i ) {
456
$links[] = wfMsgForContentNoTrans(
457
'cite_references_link_many_format',
458
$this->refKey( $key, $val['key']."-$i" ),
459
$this->referencesFormatEntryNumericBacklinkLabel( $val['number'], $i, $val['count'] ),
460
$this->referencesFormatEntryAlternateBacklinkLabel( $i )
464
$list = $this->listToText( $links );
467
wfMsgForContentNoTrans( 'cite_references_link_many',
468
$this->referencesKey( $key ."-" . $val['key'] ),
470
( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
476
* Generate a numeric backlink given a base number and an
477
* offset, e.g. $base = 1, $offset = 2; = 1.2
478
* Since bug #5525, it correctly does 1.9 -> 1.10 as well as 1.099 -> 1.100
482
* @param int $base The base
483
* @param int $offset The offset
484
* @param int $max Maximum value expected.
487
function referencesFormatEntryNumericBacklinkLabel( $base, $offset, $max ) {
489
$scope = strlen( $max );
490
$ret = $wgContLang->formatNum(
491
sprintf("%s.%0{$scope}s", $base, $offset)
497
* Generate a custom format backlink given an offset, e.g.
498
* $offset = 2; = c if $this->mBacklinkLabels = array( 'a',
499
* 'b', 'c', ...). Return an error if the offset > the # of
502
* @param int $offset The offset
506
function referencesFormatEntryAlternateBacklinkLabel( $offset ) {
507
if ( !isset( $this->mBacklinkLabels ) ) {
508
$this->genBacklinkLabels();
510
if ( isset( $this->mBacklinkLabels[$offset] ) ) {
511
return $this->mBacklinkLabels[$offset];
514
return $this->error( 'cite_error_references_no_backlink_label' );
519
* Return an id for use in wikitext output based on a key and
520
* optionally the number of it, used in <references>, not <ref>
521
* (since otherwise it would link to itself)
525
* @param string $key The key
526
* @param int $num The number of the key
527
* @return string A key for use in wikitext
529
function refKey( $key, $num = null ) {
530
$prefix = wfMsgForContent( 'cite_reference_link_prefix' );
531
$suffix = wfMsgForContent( 'cite_reference_link_suffix' );
533
$key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
535
return $prefix . $key . $suffix;
539
* Return an id for use in wikitext output based on a key and
540
* optionally the number of it, used in <ref>, not <references>
541
* (since otherwise it would link to itself)
545
* @param string $key The key
546
* @param int $num The number of the key
547
* @return string A key for use in wikitext
549
function referencesKey( $key, $num = null ) {
550
$prefix = wfMsgForContent( 'cite_references_link_prefix' );
551
$suffix = wfMsgForContent( 'cite_references_link_suffix' );
553
$key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
555
return $prefix . $key . $suffix;
559
* Generate a link (<sup ...) for the <ref> element from a key
560
* and return XHTML ready for output
562
* @param string $key The key for the link
563
* @param int $count The index of the key, used for distinguishing
564
* multiple occurances of the same key
565
* @param int $label The label to use for the link, I want to
566
* use the same label for all occourances of
567
* the same named reference.
570
function linkRef( $group, $key, $count = null, $label = null, $subkey = '' ) {
574
wfMsgForContentNoTrans(
575
'cite_reference_link',
576
$this->refKey( $key, $count ),
577
$this->referencesKey( $key . $subkey ),
578
(($group == CITE_DEFAULT_GROUP)?'':"$group ").$wgContLang->formatNum( is_null( $label ) ? ++$this->mGroupCnt[$group] : $label )
584
* This does approximately the same thing as
585
* Language::listToText() but due to this being used for a
586
* slightly different purpose (people might not want , as the
587
* first separator and not 'and' as the second, and this has to
588
* use messages from the content language) I'm rolling my own.
592
* @param array $arr The array to format
595
function listToText( $arr ) {
596
$cnt = count( $arr );
598
$sep = wfMsgForContentNoTrans( 'cite_references_link_many_sep' );
599
$and = wfMsgForContentNoTrans( 'cite_references_link_many_and' );
602
// Enforce always returning a string
603
return (string)$arr[0];
605
$t = array_slice( $arr, 0, $cnt - 1 );
606
return implode( $sep, $t ) . $and . $arr[$cnt - 1];
611
* Parse a given fragment and fix up Tidy's trail of blood on
614
* @param string $in The text to parse
615
* @return string The parsed text
617
function parse( $in ) {
618
if ( method_exists( $this->mParser, 'recursiveTagParse' ) ) {
620
return $this->mParser->recursiveTagParse( $in );
623
$ret = $this->mParser->parse(
625
$this->mParser->mTitle,
626
$this->mParser->mOptions,
627
// Avoid whitespace buildup
629
// Important, otherwise $this->clearState()
630
// would get run every time <ref> or
631
// <references> is called, fucking the whole
635
$text = $ret->getText();
637
return $this->fixTidy( $text );
642
* Tidy treats all input as a block, it will e.g. wrap most
643
* input in <p> if it isn't already, fix that and return the fixed text
647
* @param string $text The text to fix
648
* @return string The fixed text
650
function fixTidy( $text ) {
656
$text = preg_replace( '~^<p>\s*~', '', $text );
657
$text = preg_replace( '~\s*</p>\s*~', '', $text );
658
$text = preg_replace( '~\n$~', '', $text );
665
* Generate the labels to pass to the
666
* 'cite_references_link_many_format' message, the format is an
667
* arbitary number of tokens separated by [\t\n ]
669
function genBacklinkLabels() {
670
wfProfileIn( __METHOD__ );
671
$text = wfMsgForContentNoTrans( 'cite_references_link_many_format_backlink_labels' );
672
$this->mBacklinkLabels = preg_split( '#[\n\t ]#', $text );
673
wfProfileOut( __METHOD__ );
677
* Gets run when Parser::clearState() gets run, since we don't
678
* want the counts to transcend pages and other instances
680
function clearState() {
681
# Don't clear state when we're in the middle of parsing
686
$this->mGroupCnt = array();
689
$this->mRefs = array();
695
* Called at the end of page processing to append an error if refs were
696
* used without a references tag.
698
function checkRefsNoReferences(&$parser, &$text){
699
if ( $parser->getOptions()->getIsSectionPreview() ) return true;
701
foreach ( $this->mRefs as $group => $refs ) {
702
if ( count( $refs ) == 0 ) continue;
704
if ( $group == CITE_DEFAULT_GROUP ) {
705
$text .= $this->error( 'cite_error_refs_without_references' );
707
$text .= $this->error( 'cite_error_group_refs_without_references', htmlspecialchars( $group ) );
714
* Initialize the parser hooks
716
function setHooks() {
717
global $wgParser, $wgHooks;
719
$wgParser->setHook( 'ref' , array( &$this, 'ref' ) );
720
$wgParser->setHook( 'references' , array( &$this, 'references' ) );
722
$wgHooks['ParserClearState'][] = array( &$this, 'clearState' );
723
$wgHooks['ParserBeforeTidy'][] = array( &$this, 'checkRefsNoReferences' );
727
* Return an error message based on an error ID
729
* @param string $key Message name for the error
730
* @param string $param Parameter to pass to the message
731
* @return string XHTML ready for output
733
function error( $key, $param=null ) {
734
# We rely on the fact that PHP is okay with passing unused argu-
735
# ments to functions. If $1 is not used in the message, wfMsg will
736
# just ignore the extra parameter.
739
'<strong class="error">' .
740
wfMsgNoTrans( 'cite_error', wfMsgNoTrans( $key, $param ) ) .
746
* Die with a backtrace if something happens in the code which
749
* @param int $error ID for the error
750
* @param string $data Serialized error data
752
function croak( $error, $data ) {
753
wfDebugDieBacktrace( wfMsgForContent( 'cite_croak', $this->error( $error ), $data ) );