~ubuntu-branches/ubuntu/hardy/php5/hardy-updates

« back to all changes in this revision

Viewing changes to ext/tidy/examples/urlgrab5.php

  • Committer: Bazaar Package Importer
  • Author(s): Adam Conrad
  • Date: 2005-10-09 03:14:32 UTC
  • Revision ID: james.westby@ubuntu.com-20051009031432-kspik3lobxstafv9
Tags: upstream-5.0.5
ImportĀ upstreamĀ versionĀ 5.0.5

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?php
 
2
    /*
 
3
     * urlgrab5.php
 
4
     *
 
5
     * A simple command-line utility to extract all of the URLS contained
 
6
     * within <A HREF> tags from a document.
 
7
     *
 
8
     * NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x
 
9
     *
 
10
     * By: John Coggeshall <john@php.net>
 
11
     *
 
12
     * Usage: php urlgrab5.php <file>
 
13
     *
 
14
     */
 
15
    function dump_nodes(tidy_node $node, &$urls = NULL) {
 
16
 
 
17
        $urls = (is_array($urls)) ? $urls : array();
 
18
        
 
19
        if(isset($node->id)) {
 
20
            if($node->id == TIDY_TAG_A) {
 
21
                $urls[] = $node->attribute['href'];
 
22
            }
 
23
        }
 
24
                    
 
25
        if($node->hasChildren()) {
 
26
 
 
27
            foreach($node->child as $c) {
 
28
                dump_nodes($c, $urls);
 
29
            }
 
30
 
 
31
        }
 
32
        
 
33
        return $urls;
 
34
    }
 
35
 
 
36
    $a = tidy_parse_file($_SERVER['argv'][1]);
 
37
    $a->cleanRepair();
 
38
    print_r(dump_nodes($a->html()));
 
39
?>