1
-----------------------------------------------------------------------------
2
-- Little program that checks links in HTML files, using coroutines and
3
-- non-blocking I/O via the dispatcher module.
4
-- LuaSocket sample files
7
-----------------------------------------------------------------------------
8
local url = require("socket.url")
9
local dispatch = require("dispatch")
10
local http = require("socket.http")
13
-- make sure the user knows how to invoke us
15
if table.getn(arg) < 1 then
16
print("Usage:\n luasocket check-links.lua [-n] {<url>}")
20
-- '-n' means we are running in non-blocking mode
21
if arg[1] == "-n" then
22
-- if non-blocking I/O was requested, use real dispatcher interface
24
handler = dispatch.newhandler("coroutine")
26
-- if using blocking I/O, use fake dispatcher interface
27
handler = dispatch.newhandler("sequential")
32
-- get the status of a URL using the dispatcher
33
function getstatus(link)
34
local parsed = url.parse(link, {scheme = "file"})
35
if parsed.scheme == "http" then
36
nthreads = nthreads + 1
37
handler:start(function()
38
local r, c, h, s = http.request{
43
if r and c == 200 then io.write('\t', link, '\n')
44
else io.write('\t', link, ': ', tostring(c), '\n') end
45
nthreads = nthreads - 1
50
function readfile(path)
51
path = url.unescape(path)
52
local file, error = io.open(path, "r")
54
local body = file:read("*a")
57
else return nil, error end
61
local parsed = url.parse(u, { scheme = "file" })
62
local body, headers, code, error
64
if parsed.scheme == "http" then
65
body, code, headers = http.request(u)
67
-- if there was a redirect, update base to reflect it
68
base = headers.location or base
73
elseif parsed.scheme == "file" then
74
body, error = readfile(parsed.path)
75
else error = string.format("unhandled scheme '%s'", parsed.scheme) end
76
return base, body, error
79
function getlinks(body, base)
80
-- get rid of comments
81
body = string.gsub(body, "%<%!%-%-.-%-%-%>", "")
84
body = string.gsub(body, '[Hh][Rr][Ee][Ff]%s*=%s*"([^"]*)"', function(href)
85
table.insert(links, url.absolute(base, href))
87
body = string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*'([^']*)'", function(href)
88
table.insert(links, url.absolute(base, href))
90
string.gsub(body, "[Hh][Rr][Ee][Ff]%s*=%s*(.-)>", function(href)
91
table.insert(links, url.absolute(base, href))
96
function checklinks(address)
97
local base, body, error = load(address)
98
if not body then print(error) return end
99
print("Checking ", base)
100
local links = getlinks(body, base)
101
for _, link in ipairs(links) do
106
for _, address in ipairs(arg) do
107
checklinks(url.absolute("file:", address))
110
while nthreads > 0 do