2
# linkcheck.rb: tDiary filter for checking link to my site in TrackBack source site.
5
# * if source site has no URI of my site of top page, it's spam!
6
# * reading only top of 100KB of source site.
7
# * no response over 10 sec, it's mybe spam.
9
# Copyright (C) 2007 by TADA Tadashi <sho@spc.gr.jp>
10
# Distributed under GPL2.
17
class SpamlinkcheckFilter < Filter
18
def initialize( *args )
20
@filter_mode = @conf['spamfilter.filter_mode']
21
@filter_mode = true if @filter_mode == nil
24
def comment_filter( diary, comment )
25
if @conf['spamfilter.linkcheck'] == 0 then
26
debug( "No linkcheck to TrackBacks.", DEBUG_FULL )
30
# check only TrackBack
31
return true unless comment.name == 'TrackBack'
33
dest_uri = @conf.index.dup
34
dest_uri[0, 0] = @conf.base_url if %r|^https?://|i !~ @conf.index
35
dest_uri.gsub!( %r|/\./|, '/' )
37
# TrackBack URI is the 1st line of comment.body.
38
src_uri, = comment.body.split( /\n/ )
39
unless %r|^https?://|i =~ src_uri then
40
debug( "TrackBack has bad source URI." )
44
if src_uri.index( dest_uri ) == 0 then
45
debug( "TrackBack was sent to myself.", DEBUG_FULL )
50
Timeout::timeout( 10 ) do
51
open( src_uri ) do |f|
52
if f.read( 100 * 1024 ).include?( dest_uri ) then
53
debug( "TrackBack has links to me.", DEBUG_FULL )
56
debug( "TrackBack dose not have links to me." )
63
debug( "TrackBack source was no response." )
67
debug( "Cannot access to TrackBack source (#{$!})." )