1
package Glib::ParseXSDoc;
7
use Storable qw(store_fd);
11
our @ISA = qw(Exporter);
16
our $VERSION = '1.003';
18
our $NOISY = $ENV{NOISYDOC};
22
Glib::ParseXSDoc - Parse POD and XSub declarations from XS files.
26
This is the heart of an automatic API reference documentation system for
27
XS-based Perl modules. FIXME more info here!!
29
FIXME document recognized POD directives and the output data structures
35
=item xsdocparse (@filenames)
37
Parse xs files for xsub signatures and pod. Writes to standard output a
38
data structure suitable for eval'ing in another Perl script, describing
39
all the stuff found. The output contains three variables:
43
=item $xspods = ARRAYREF
45
array of pods found in the verbatim C portion of the XS file, listed in the
46
order found. These are assumed to pertain to the XS/C api, not the Perl api.
47
Any C<=for apidoc> paragraphs following an C<=object> paragraphs in the
48
verbatim sections are stripped (as are the C<=object> paragraphs), and will
49
appear instead in C<< $data->{$package}{pods} >>.
53
big hash keyed by package name (as found in the MODULE line), containing under
54
each key a hash with all the xsubs and pods in that package, in the order
55
found. Packages are consolidated across multiple files.
59
FYI, this creates a new parser and calls C<parse_file> on it for each
60
input filename; then calls C<swizzle_pods> to ensure that any
61
C<=for apidoc name> pods are matched up with their target xsubs; and
62
finally calls Data::Dumper to write the data to stdout. So, if you want
63
to get finer control over how the output is created, or keep all the data
64
in-process, now you know how. :-)
71
my $parser = Glib::ParseXSDoc->new;
72
foreach my $filename (@filenames) {
73
$parser->parse_file ($filename);
75
$parser->canonicalize_xsubs;
76
$parser->swizzle_pods;
77
$parser->preprocess_pods;
78
$parser->clean_out_empty_pods;
80
print "# THIS FILE IS AUTOMATICALLY GENERATED - ANY CHANGES WILL BE LOST\n";
81
print "# generated by $0 ".scalar (localtime)."\n";
82
print "# input files:\n";
83
map { print "# $_\n" } @filenames;
86
# Data::Dumper converts the whole output to a string, and consequently
87
# uses an obscene amount of ram on Gtk2's nearly 200 xs files. Use
88
# Storable unless the user really really wants to force us to fall back
89
# to Data::Dumper. Storable doesn't seem to work well on win32, so
90
# always use Data::Dumper there.
91
my $use_dd = $ENV{FORCE_DATA_DUMPER} || $^O eq 'MSWin32';
93
$Data::Dumper::Purity = 1;
94
print Data::Dumper->Dump([$parser->{xspods}, $parser->{data}],
98
print "use Storable qw(fd_retrieve);\n";
99
print "\$xspods = fd_retrieve \\*DATA;\n";
100
print "\$data = fd_retrieve \\*DATA;\n";
105
# NOTE: don't assume STDOUT, because other code may have select'd
106
# a different file handle.
107
store_fd $parser->{xspods}, select;
108
store_fd $parser->{data}, select;
111
return [ keys %{$parser->{data}} ];
119
# =========================================================================
125
=item $Glib::ParseXSDoc::verbose
127
If true, this causes the parser to be verbose.
131
our $verbose = undef;
134
=item $parser = Glib::ParseXSDoc->new
136
Create a new xsub parser.
148
xspods => [], #pods for the exported xs interface, e.g. the C stuff
149
data => {}, # all the shizzle, by package name
153
=item string = $parser->package
155
Get the current package name. Falls back to the module name. Will be undef
156
if the parser hasn't reached the first MODULE line.
162
return ($self->{package} || $self->{module})
165
=item HASHREF = $parser->pkgdata
167
The data hash corresponding to the current package, honoring the most recently
168
encountered C<=for object> directive. Ensures that it exists.
169
Returns a reference to the member of the main data structure, so modifications
170
are permanent and useful.
176
my $pkg = $self->{object} || $self->package;
177
my $pkgdata = $self->{data}{$pkg};
178
if (not defined $pkgdata) {
180
$self->{data}{$pkg} = $pkgdata;
186
=item $parser->parse_file (filename)
188
Parse one xs file. Stores all the collected data in I<$parser>'s internal
195
my $filename = shift;
198
open IN, $filename or die "can't open $filename: $!\n";
199
print STDERR "scanning $filename\n" if $verbose;
200
$self->{filehandle} = \*IN;
201
$self->{filename} = $filename;
203
# there was once a single state machine to parse an entire
204
# file, but it turned into a bi-level state machine because
205
# of the two-part nature of XS files. that's silly, so i've
206
# broken it into two loops: the part that scans up to the
207
# first MODULE line, and the part that scans the rest of the
210
my $lastpod = undef; # most recently-read pod (for next xsub)
211
my @thesepackages = (); # packages seen in this file
213
# In the verbatim C portion of the file:
214
# seek the first MODULE line *outside* comments.
215
# collect any pod we encounter; only certain ones are
216
# precious to us... my... preciousssss... ahem.
217
$self->{module} = undef;
218
$self->{package} = undef;
219
$self->{prefix} = undef;
220
$self->{object} = undef;
223
# in the verbatim C section before the first MODULE line,
224
# we need to be on the lookout for a few things...
225
# we need the first MODULE line, of course...
226
if ($self->is_module_line ($_)) {
227
last; # go to the next state machine.
229
# mostly we want pods.
231
my $thispod = $self->slurp_pod_paragraph ($_);
232
# we're only interested in certain pod directives here.
233
if (/^=for\s+(apidoc|object)\b/) {
235
warn "$filename:".($.-@{$thispod->{lines}}+1).":"
236
. " =for $which found before "
237
. "MODULE directive\n";
239
push @{ $self->{xspods} }, $thispod;
241
## # we also need to track whether we're in a C comment, because
242
## # MODULE directives are ignore in multiline comments.
243
## } elsif (m{/\*}) {
244
## # there was an open comment marker on this line.
245
## # see if it's alone.
248
## # look for the end...
255
# preprocessor conditionals
261
# we're seeking xsubs and pods to document the Perl interface.
263
if ($self->is_module_line ($_)) {
264
# xsubs cannot steal pods across MODULE lines.
268
# ignore blank lines; but a blank line after a pod
269
# means it can't be associated with an xsub.
272
} elsif (/^\s*#\s*(if|ifdef|ifndef)\s*(\s.*)$/) {
273
#warn "conditional $1 $2\n";
275
#print Dumper(\@cond);
276
} elsif (/^\s*#\s*else\s*(\s.*)?$/) {
277
#warn "else $cond[-1]\n";
278
if (exists $cond[$#cond]) {
279
$cond[$#cond] = '!' . $cond[$#cond];
281
} elsif (/^\s*#\s*endif\s*(\s.*)?$/) {
282
#warn "endif $cond[-1]\n";
285
# ignore comments. we've already determined that
286
# this isn't a preprocessor directive (or at least
287
# not one in which we're interested).
289
} elsif (/^(BOOT|PROTOTYPES)/) {
290
# ignore keyword lines in which we aren't interested
293
# slurp in pod, up to and including the next =cut.
294
# put it in $lastpod so that the next-discovered
296
$lastpod = $self->slurp_pod_paragraph ($_);
298
# we're interested in certain pod directives at
300
if (/^=for\s+object(?:\s+([\w\:]*))?(.*)/) {
301
$self->{object} = $1;
303
$self->pkgdata->{blurb} = $2;
304
$self->pkgdata->{blurb} =~ s/^\s*-\s*//;
306
# If the line has the special form
307
# "=for object Foo (Bar)", we take this
308
# to mean: document the object Bar in
310
if ($self->pkgdata->{blurb} =~ s/\s*\((.*)\)//)
312
print STDERR "Documenting object $1 in file "
313
.$self->{object}."\n";
314
$self->pkgdata->{object} = $1;
315
if ('' eq $self->pkgdata->{blurb})
317
delete $self->pkgdata->{blurb};
321
} elsif (/^=for\s+(enum|flags)\s+([\w:]+)/) {
322
push @{ $self->pkgdata->{enums} }, {
327
# claim this pod now!
329
} elsif (/^=for\s+see_also\s+(.+)$/) {
330
push @{ $self->pkgdata->{see_alsos} }, $1;
331
# claim this pod now!
333
} elsif (/^=for\s+deprecated_by\s+([\w:]+)$/) {
334
push @{ $self->pkgdata->{deprecated_bys} }, $1;
337
push @{ $self->pkgdata->{pods} }, $lastpod
341
# there's something at the beginning of the line!
342
# we've ruled out everything else, so this must be
343
# an xsub. slurp in everything up to the next
344
# blank line (or end of file). i know that's not
345
# *really* an entire XSUB body, but we don't care
346
# -- we only need the return value, name, arg types,
347
# and body type, and there aren't supposed to be
348
# blank lines in all of that.
355
my $xsub = $self->parse_xsub (\@thisxsub);
357
# aha! we'll lay claim to that...
358
pop @{ $self->pkgdata->{pods} };
359
$xsub->{pod} = $lastpod;
362
$xsub->{preprocessor_conditionals} = [ @cond ];
363
push @{ $self->pkgdata->{xsubs} }, $xsub;
366
# this is probably xsub function body, comment, or
367
# some other stuff we don't care about.
371
# that's it for this file...
373
delete $self->{filehandle};
374
delete $self->{filename};
378
=item $parser->swizzle_pods
380
Match C<=for apidoc> pods to xsubs.
386
foreach my $package (keys %{$self->{data}}) {
387
my $pkgdata = $self->{data}{$package};
388
next unless $pkgdata->{pods};
389
next unless $pkgdata->{xsubs};
390
my $pods = $pkgdata->{pods};
391
for (my $i = @$pods-1 ; $i >= 0 ; $i--) {
392
my $firstline = $pods->[$i]{lines}[0];
393
next unless $firstline =~ /=for\s+apidoc\s+([:\w]+)\s*/;
395
foreach my $xsub (@{ $pkgdata->{xsubs} }) {
396
if ($name eq $xsub->{symname}) {
397
$xsub->{pod} = $pods->[$i];
398
splice @$pods, $i, 1;
407
=item $parser->preprocess_pods
409
Honor the C<__hide__> and C<__function__> directives in C<=for apidoc> lines.
411
We look for the strings anywhere, but you'll typically have it at the end of
414
=for apidoc symname __hide__ for detached blocks
415
=for apidoc __hide__ for attached blocks
417
=for apidoc symname __function__ for functions rather than methods
418
=for apidoc __function__ for functions rather than methods
422
sub preprocess_pods {
424
foreach my $package (keys %{$self->{data}}) {
425
my $pkgdata = $self->{data}{$package};
427
foreach (@{$pkgdata->{pods}})
429
my $firstline = $_->{lines}[0];
431
$_->{position} = $1 if ($firstline =~ /=for\s+position\s+(\w+)/);
435
next unless $pkgdata->{xsubs};
437
# look for magic keywords in the =for apidoc
438
foreach (@{$pkgdata->{xsubs}})
440
my $firstline = $_->{pod}{lines}[0];
442
$_->{function} = ($firstline =~ /__function__/);
443
$_->{hidden} = ($firstline =~ /__hide__/);
444
$_->{deprecated} = ($firstline =~ /__deprecated__/);
445
$_->{gerror} = ($firstline =~ /__gerror__/);
452
# ===============================================================
454
=item bool = $parser->is_module_line ($line)
456
Analyze I<$line> to see if it contains an XS MODULE directive. If so, returns
457
true after setting the I<$parser>'s I<module>, I<package>, and I<prefix>
465
if ($l =~ /^MODULE\s*=\s*([:\w]+)
466
(?:\s+PACKAGE\s*=\s*([:\w]+)
467
(?:\s+PREFIX\s*=\s*([:\w]+))?)?
469
$self->{module} = $1;
470
$self->{package} = $2 || $self->{module};
471
$self->{prefix} = $3;
472
$self->{object} = undef;
480
=item $pod = $parser->slurp_pod_paragraph ($firstline, $term_regex=/^=cut\s*/)
482
Slurp up POD lines from I<$filehandle> from here to the next
483
I<$term_regex> or EOF. Since you probably already read a
484
line to determine that we needed to start a pod, you can pass
485
that first line to be included.
489
sub slurp_pod_paragraph {
491
my $firstline = shift;
492
my $term_regex = shift || qr/^=cut\s*/o;
493
my $filehandle = $parser->{filehandle};
498
my @lines = $firstline ? ($firstline) : ();
499
while (my $line = <$filehandle>) {
502
last if $line =~ m/$term_regex/;
506
filename => $parser->{filename},
513
=item $xsub = $parser->parse_xsub (\@lines)
515
=item $xsub = $parser->parse_xsub (@lines)
517
Parse an xsub header, in the form of a list of lines,
518
into a data structure describing the xsub. That includes
519
pulling out the argument types, aliases, and code type.
521
Without artificial intelligence, we cannot reliably
522
determine anything about the types or number of parameters
523
returned from xsubs with PPCODE bodies.
525
OUTLIST parameters are pulled from the args list and put
526
into an "outlist" key. IN_OUTLIST parameters are put into
529
Data type names are not mangled at all.
531
Note that the method can take either a list of lines or a reference to a
532
list of lines. The flat list form is provided for compatibility; the
533
reference form is preferred, to avoid duplicating a potentially large list
539
my ($self, @thisxsub) = @_;
541
# allow for pass-by-reference.
542
@thisxsub = @{ $thisxsub[0] }
543
if @thisxsub == 1 && 'ARRAY' eq ref $thisxsub[0];
545
map { s/#.*$// } @thisxsub;
547
my $filename = $self->{filename};
548
my $oldwarn = $SIG{__WARN__};
549
#$SIG{__WARN__} = sub {
550
# warn "$self->{filename}:$.: "
551
# . join(" / ", $self->{module}||"", $self->{package}||"")
552
# . "\n $_[0]\n ".Dumper(\@thisxsub)
555
my $lineno = $. - @thisxsub;
557
'filename' => $filename,
558
'line' => ($.-@thisxsub),
559
'module' => $self->{module},
560
'package' => $self->package, # to be overwritten as needed
564
#warn Dumper(\@thisxsub);
566
# merge continuation lines. xsubpp allows continuation lines in the
567
# xsub arguments list and barfs on them in other spots, but with xsubpp
568
# providing such validation, we'll just cheat and merge any that we find.
569
# this will bork the line counting logic we have below, but i don't see
570
# a fix for it without major tearup of the code here.
572
@thisxsub = shift @foo;
573
while (my $s = shift @foo) {
574
if ($thisxsub[$#thisxsub] =~ s/\\$//) {
575
chomp $thisxsub[$#thisxsub];
576
$thisxsub[$#thisxsub] .= $s;
582
if ($thisxsub[0] =~ /^([^(]+\s+\*?) # return type, possibly with a *
583
\b([:\w]+)\s* # symbol name
585
(.*) # whatever's inside, if anything
586
\) # close paren, maybe with space
587
\s*;?\s*$/x) { # and maybe other junk at the end
592
$xsub{return_type} = [$r]
593
unless $r =~ /^void\s*$/;
594
shift @thisxsub; $lineno++;
596
} elsif ($thisxsub[1] =~ /^(\S+)\s*\((.*)\);?\s*$/) {
600
# return type is on line 0
601
$thisxsub[0] =~ s/\s*$//;
602
$xsub{return_type} = [$thisxsub[0]]
603
unless $thisxsub[0] =~ /^void\s*$/;
604
shift @thisxsub; $lineno++;
605
shift @thisxsub; $lineno++;
608
# eat padding spaces from the arg string. i tried several ways of
609
# building this into the regexen above, but found nothing that still
610
# allowed the arg string to be empty, which we'll have for functions
611
# (not methods) without resorting to extremely arcane negatory
612
# lookbeside assertiveness operators.
616
# we can get empty arg strings on non-methods.
617
#warn "$filename:$lineno: WTF : args string is empty\n"
618
# if not defined $args;
621
my @argstr = split /\s*,\s*/, $args;
622
#warn Dumper([$args, \%args, \@argstr]);
623
for (my $i = 0 ; $i < @argstr ; $i++) {
624
# the last one can be an ellipsis, let's handle that specially
625
if ($i == $#argstr and $argstr[$i] eq '...') {
626
$args{'...'} = { name => '...', };
627
push @{ $xsub{args} }, $args{'...'};
631
/^(?:(IN_OUTLIST|OUTLIST)\s+)? # OUTLIST would be 1st
632
([^=]+(?:\b|\s))? # arg type is optional, too
634
(?:\s*=\s*(.+))? # possibly a default value
637
push @{ $xsub{outlist} }, {
641
if ($1 eq 'IN_OUTLIST') {
647
$args{$3}{default} = $4 if defined $4;
648
push @{ $xsub{args} }, $args{$3};
656
$args{$3}{default} = $4 if defined $4;
657
push @{ $xsub{args} }, $args{$3};
659
} elsif ($argstr[$i] =~ /^g?int\s+length\((\w+)\)$/) {
660
#warn " ******* $i is string length of $1 *****\n";
662
warn "$filename:$lineno: ($xsub{symname}) don't know how to"
663
. " parse arg $i, '$argstr[$i]'\n";
670
while ($_ = shift @thisxsub) {
673
} elsif (/\s*(PREINIT|CLEANUP|OUTPUT|C_ARGS):/) {
675
} elsif (/\s*(PPCODE|CODE):/) {
676
$xsub{codetype} = $1;
678
} elsif ($xstate eq 'alias') {
679
/^\s*([:\w]+)\s*=\s*(\d+)\s*$/;
681
$xsub{alias}{$1} = $2;
683
warn "$filename:$lineno: WTF : seeking alias on line $_\n";
685
} elsif ($xstate eq 'args') {
687
(.+(?:\b|\s)) # datatype
689
;? # optional trailing semicolon
692
if (exists $args{$2}) {
695
warn "$filename:$lineno: unused arg $2\n";
696
warn " line was '$_'\n";
699
# must've stripped a comment.
701
warn "$filename:$lineno: WTF : seeking args on line $_\n";
707
# mangle the symbol name from an xsub into its actual perl name.
708
$xsub{original_name} = $xsub{symname};
709
if (defined $self->{prefix}) {
710
my $pkg = $self->package;
711
$xsub{symname} =~ s/^($self->{prefix})?/$pkg\::/;
713
$xsub{symname} = ($self->package)."::".$xsub{symname};
716
# sanitize all the C type declarations, which we have
717
# collected in the arguments, outlist, and return types.
719
foreach my $a (@{ $xsub{args} }) {
720
$a->{type} = sanitize_type ($a->{type})
721
if defined $a->{type};
724
if ($xsub{outlist}) {
725
foreach my $a (@{ $xsub{outlist} }) {
726
$a->{type} = sanitize_type ($a->{type})
727
if defined $a->{type};
730
if ($xsub{return_type}) {
731
for (my $i = 0 ; $i < @{ $xsub{return_type} } ; $i++) {
732
$xsub{return_type}[$i] =
733
sanitize_type ($xsub{return_type}[$i]);
737
$SIG{__WARN__} = $oldwarn;
746
s/\s+/ /g; # squash all whitespace
747
s/^\s//; # zap leading space
748
s/\s$//; # zap trailing space
749
s/(?<=\S)\*$/ */; # stars may not be glued to the name
754
sub canonicalize_xsubs {
757
return undef unless 'HASH' eq ref $self->{data};
759
# make sure that each package contains an xsub hash for each
760
# xsub, whether an alias or not.
761
foreach my $package (keys %{$self->{data}}) {
762
my $pkgdata = $self->{data}{$package};
763
next unless $pkgdata or $pkgdata->{xsubs};
764
my $xsubs = $pkgdata->{xsubs};
765
@$xsubs = map { split_aliases ($_) } @$xsubs;
771
return $xsub unless exists $xsub->{alias};
772
return $xsub unless 'HASH' eq ref $xsub->{alias};
773
my %aliases = %{ $xsub->{alias} };
776
foreach my $a (sort { $aliases{$a} <=> $aliases{$b} } keys %aliases) {
781
# we do a deep copy on the args, so that changes to one do not
782
# affect another. in particular, adding docs or hiding an arg
783
# in one xsub shouldn't affect another.
784
args => deep_copy_ref ($xsub->{args}),
786
$seen{ $aliases{$a} }++;
789
unshift @xsubs, $xsub;
798
return undef if not $ref;
799
my $reftype = ref $ref;
800
if ('ARRAY' eq $reftype) {
801
my @newary = map { deep_copy_ref ($_) } @$ref;
803
} elsif ('HASH' eq $reftype) {
804
my %newhash = map { $_, deep_copy_ref ($ref->{$_}) } keys %$ref;
811
=item $parser->clean_out_empty_pods
813
Looks through the data member of the parser and removes any keys (and
814
associated values) when no pod, enums, and xsubs exist for the package.
818
sub clean_out_empty_pods
821
return unless (exists ($data->{data}));
822
$data = $data->{data};
826
foreach (keys %$data)
829
next if ((exists $pod->{pods} and scalar @{$pod->{pods}}) or
830
exists $pod->{enums} or
831
scalar (grep (!/DESTROY/,
836
#print STDERR "Deleting $_ from doc.pl's \$data\n";
850
muppet E<lt>scott at asofyet dot orgE<gt>
852
=head1 COPYRIGHT AND LICENSE
854
Copyright (C) 2003, 2004 by muppet
856
This library is free software; you can redistribute it and/or modify it under
857
the terms of the GNU Library General Public License as published by the Free
858
Software Foundation; either version 2.1 of the License, or (at your option) any
861
This library is distributed in the hope that it will be useful, but WITHOUT ANY
862
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
863
PARTICULAR PURPOSE. See the GNU Library General Public License for more
866
You should have received a copy of the GNU Library General Public License along
867
with this library; if not, write to the Free Software Foundation, Inc., 59
868
Temple Place - Suite 330, Boston, MA 02111-1307 USA.