~semi-hallikas/sspamm/3.0-devel

« back to all changes in this revision

Viewing changes to sspamm.py

  • Committer: Sami-Pekka Hallikas
  • Date: 2010-11-24 10:21:50 UTC
  • Revision ID: semi@hallikas.com-20101124102150-ey73bv22ypi3wb47
Again, lots of bug hunting and fixing.

Added 'big' features are:
 - RRD
   Keeps RRD database of messages, used to generate graphics
 - CRC
   Blocking by message body SHA1 cheksum. Save all SHA1 CRC's of messages.
   Expire from database last seen after >12h (configurable).
   Keeps CRC in file, so keeps database even on restart.
   If message is blocked or flagged 1 to 5 times in 'crc hours'. Flag it.
   If message is blocked or flagged > 5 times in 'crc hours', do 'crc action'.

Show diffs side-by-side

added added

removed removed

Lines of Context:
17
17
import re
18
18
import thread
19
19
import formatter, htmllib, urllib
20
 
 
 
20
UseSHA=0
 
21
try:
 
22
# For python > 2.4
 
23
        UseSHA=25
 
24
        import hashlib
 
25
        
 
26
except:
 
27
# For Python >= 2.4
 
28
        UseSHA=24
 
29
        import sha
21
30
 
22
31
from email import message_from_file, message_from_string
23
32
from email.Header import decode_header
39
48
        LOG_EMERG, LOG_ALERT, LOG_CRIT, LOG_ERR, \
40
49
        LOG_WARNING, LOG_NOTICE, LOG_INFO, LOG_DEBUG
41
50
 
 
51
## rpmforge repository:
 
52
import rrdtool
 
53
 
42
54
## http://sourceforge.net/projects/pydns
43
55
import DNS
44
56
from DNS import Base
72
84
                "timeme":               True,
73
85
                "tmpdir":               "/dev/shm",
74
86
                "savedir":              None,
 
87
                "nonspamonly":          False,
 
88
                "crcsave":              False,
 
89
                "crchours":             12,
75
90
                "watchmode":            False,
76
91
        },
77
92
        "filter": {
92
107
                "bayesian":             "Flag",
93
108
                "rbl":                  "Flag",
94
109
                "charset":              "Flag",
 
110
                "crc":                  "Flag",
95
111
        },
96
112
        "settings": {
97
113
                "ipservers":            ["dnsbl-2.uceprotect.net"],
124
140
        "conffile":     None,
125
141
        "offline":      False,
126
142
        "rrd":          {
127
 
                "blocked":      0,
 
143
                "ham":          0,
128
144
                "unsure":       0,
129
 
                "delivered":    0,
 
145
                "spam":         0,
130
146
        },
131
147
}
132
148
 
 
149
msgbase = {
 
150
        0: {
 
151
                'seen': 0,
 
152
                'block': 0,
 
153
                'pass': 0,
 
154
                'flag': 0,
 
155
        }
 
156
}
133
157
globaltmp = {}
134
158
loglines = []
135
159
 
244
268
                                                tmpconf[s][k] = t
245
269
                                        elif "%s/%s" % (s,k) in [ "filter/defaulttests", "settings/ipservers" ]:
246
270
                                                tmpconf[s][k] = cp.getlist(s, k)
247
 
                                        elif "%s/%s" % (s,k) in [ "settings/maxbodysize" ]:
248
 
                                                tmpconf[s][k] = cp.getint(s, k)
249
 
                                        elif "%s/%s" % (s,k) in [ "main/verbose" ]:
 
271
                                        elif "%s/%s" % (s,k) in [ "main/verbose", "main/crchours", "settings/maxbodysize" ]:
250
272
                                                tmpconf[s][k] = cp.getint(s, k)
251
273
                                        elif "%s/%s" % (s,k) in [ "main/name", "main/port", "main/pid", "main/sspammdir", "main/tmpdir", "main/logfile", "main/savedir" ]:
252
274
                                                tmpconf[s][k] = cp.get(s, k)
262
284
                                                for t in tmpconf[s][k]:
263
285
                                                        if dumbregtest(t): testconf.append(t)
264
286
                                                tmpconf[s][k] = testconf
265
 
                                        elif k in ["verbose", "enabled", "timeme", "watchmode", "childs"]:
 
287
                                        elif k in ["verbose", "enabled", "timeme", "watchmode", "childs", "crcsave", "nonspamonly"]:
266
288
                                                tmpconf[s][k] = cp.getvalue(s, k)
267
289
                                        elif type(tmpconf[s][k]) is int:
268
290
                                                debug("CONFIG INT %s / %s" % (s, k), LOG_ALERT)
1386
1408
        if conf["main"]["timeme"] is True: mail["timer"]["charset"] = str("%.4f") % timeme(timer, id=mail["id"])
1387
1409
        return (res, mail)
1388
1410
 
 
1411
###
 
1412
###
 
1413
###
 
1414
def test_crc(mail):
 
1415
        if not conf["main"]["crcsave"]: return (None, mail)
 
1416
        debug("*test_crc()", LOG_INFO, id=mail["id"])
 
1417
        if conf["main"]["timeme"] is True: timer = timeme()
 
1418
        res = None
 
1419
        mail["tests"]["crc"] = 0
 
1420
 
 
1421
        if mail.has_key("checksum") and msgbase.has_key(mail["checksum"]):
 
1422
                if msgbase[mail["checksum"]]['block'] > 5:
 
1423
                        res = (conf["actions"]["crc"], ">5 block/%dh" % (conf["main"]["crchours"]))
 
1424
                elif msgbase[mail["checksum"]]['block'] > 1:
 
1425
                        res = ('flag', ">1 block/%dh" % (conf["main"]["crchours"]))
 
1426
                elif msgbase[mail["checksum"]]['flag'] > 5:
 
1427
                        res = (conf["actions"]["crc"], ">5 flag/%dh" % (conf["main"]["crchours"]))
 
1428
                elif msgbase[mail["checksum"]]['flag'] > 1:
 
1429
                        res = ('flag', ">1 flag/%dh" % (conf["main"]["crchours"]))
 
1430
 
 
1431
        if res and res[0] == True:
 
1432
                tmp = res[1]
 
1433
                res = (conf["actions"]["crc"], tmp)
 
1434
        if conf["main"]["timeme"] is True: mail["timer"]["crc"] = str("%.4f") % timeme(timer, id=mail["id"])
 
1435
        return (res, mail)
 
1436
 
 
1437
###
 
1438
###
 
1439
###
1389
1440
def test_bayesian(mail):
1390
1441
        debug("*test_bayesian()", LOG_INFO, id=mail["id"])
1391
1442
        if conf["main"]["timeme"] is True: timer = timeme()
1451
1502
## connects.
1452
1503
        def _cleanup(self):
1453
1504
                debug("SpamMilter._cleanup()", LOG_DEBUG, id=self.id)
 
1505
# Update statistics
1454
1506
                if self.tmp:
1455
1507
                        self.tmp.close()
1456
1508
                        rm(self.tmp.name, id=self.id)
1457
 
                if self.mail:
1458
 
                        if conf["main"]["timeme"] and self.mail["timer"].has_key("timepass"): self.mail["timer"]["timepass"] = str("%.4f") % timeme(self.mail["timer"]["timepass"], id=self.id, title="TTimer")
 
1509
                if not self.mail:
 
1510
                        return
 
1511
                if not self.mail["tests"]:
 
1512
                        del self.mail
 
1513
                        self.mail = None
 
1514
                        return
 
1515
 
 
1516
                if conf["main"]["crcsave"] and self.mail.has_key("checksum"):
 
1517
                        crc = self.mail["checksum"]
 
1518
                        if not msgbase.has_key(crc):
 
1519
                                msgbase[crc] = {
 
1520
                                        'seen':         0,
 
1521
                                        'block':        0,
 
1522
                                        'pass':         0,
 
1523
                                        'flag':         0,
 
1524
                                }
 
1525
                        msgbase[crc]['seen'] = int(time.time())
 
1526
                        if self.mail["action"][0] in ['reject','delete','discard','block']:
 
1527
                                msgbase[crc]['block'] += 1
 
1528
                        elif self.mail["action"][0] in ['flag','warn']:
 
1529
                                msgbase[crc]['flag'] += 1
 
1530
                        else:
 
1531
                                msgbase[crc]['pass'] += 1
 
1532
 
 
1533
                if conf["main"]["timeme"] and self.mail["timer"].has_key("timepass"): self.mail["timer"]["timepass"] = str("%.4f") % timeme(self.mail["timer"]["timepass"], id=self.id, title="TTimer")
1459
1534
 
1460
1535
###
1461
1536
### USED FOR DEBUG!
1462
1537
###
1463
 
                if self.mail and self.mail["size"] > 0:
 
1538
                if self.mail["size"] > 0:
1464
1539
                        if conf["main"]["verbose"] == 5 or conf["runtime"]["offline"]:
1465
1540
                                print
 
1541
#                               if conf["main"]["singleview"]:
 
1542
#                                       print " ",
 
1543
#                               else:
1466
1544
                                print "#############################################################################"
1467
 
                                print "Received:"
 
1545
                                print "Received:",
1468
1546
                                for rec in self.mail["received"]:
1469
 
                                        print "\t",
 
1547
                                        print "\n\t",
1470
1548
                                        if self.mail["received"][rec].has_key("dns"):
1471
1549
                                                print "%s" % (self.mail["received"][rec]["dns"]),
1472
1550
                                        print " [%s]" % (self.mail["received"][rec]["ip"]),
1474
1552
                                                print " (%s)" % (self.mail["received"][rec]["helo"]),
1475
1553
                                print """
1476
1554
 
 
1555
ID:\t\t%s
1477
1556
Mail From:\t%s
1478
1557
From:\t\t%s
1479
1558
Rcpt To:\t%s
1480
1559
To:\t\t%s
1481
1560
Subject:\t%s
1482
1561
Size:\t\t%d
1483
 
""" % (
 
1562
Checksum:\t%s""" % (
 
1563
self.mail["id"],
1484
1564
self.mail["from"][0],
1485
1565
self.mail["header"]["From"],
1486
1566
self.mail["to"],
1487
1567
self.mail["header"]["To"],
1488
1568
self.mail["subject"][0:80],
1489
 
self.mail["size"]
 
1569
self.mail["size"],
 
1570
self.mail["checksum"]
1490
1571
)
 
1572
                                if conf["main"]["crcsave"]:
 
1573
                                        print "\t\t", msgbase[self.mail["checksum"]]
 
1574
                                print
1491
1575
                                tc = 0
1492
1576
                                print "%s\t\t%s\t%s\t%s" % ("Test", "Time", "Tests", "Action and why")
1493
1577
                                print "-----------------------------------------------------------------------------"
1514
1598
###
1515
1599
### /USED FOR DEBUG!
1516
1600
###
1517
 
                        if conf["main"]["savedir"]:
 
1601
                if conf["main"]["savedir"]:
 
1602
                        if not (conf["main"]["nonspamonly"] and self.mail["action"][0][:2] in ['re','de','di','bl']):
1518
1603
                                save_vars(self.mail, "%s/%08d.var" % (conf["main"]["savedir"], self.id), id=self.id)
1519
 
                        for a in ['raw']:
1520
 
                                if self.mail.has_key(a): del self.mail[a]
1521
 
                        for a in ['smtpcmds','tmpfile','rules','ipfromto','id']:
1522
 
                                if self.mail.has_key(a): del self.mail[a]
1523
 
                        for a in ['mime','header','type','from','charset','todomain','rawsubject','subject','my','id','size']:
1524
 
                                if self.mail.has_key(a): del self.mail[a]
 
1604
                if not conf["main"]["childs"]: Tcrc()
 
1605
                for a in ['raw']:
 
1606
                        if self.mail.has_key(a): del self.mail[a]
 
1607
                for a in ['smtpcmds','tmpfile','rules','ipfromto','id']:
 
1608
                        if self.mail.has_key(a): del self.mail[a]
 
1609
                for a in ['mime','header','type','from','charset','todomain','rawsubject','subject','my','id','size']:
 
1610
                        if self.mail.has_key(a): del self.mail[a]
1525
1611
                del self.mail
1526
1612
                self.mail = None
1527
1613
                return
1682
1768
                return CONTINUE
1683
1769
 
1684
1770
        def body(self,chunk):
 
1771
                global UseSHA
1685
1772
                if conf["main"]["timeme"]: timer = timeme()
1686
1773
                debug("SpamMilter.body() (chunk size: %d)" % len(chunk), LOG_DEBUG, id=self.id)
1687
1774
 
1688
1775
                if self.mail: self.mail["smtpcmds"].append("body")
1689
1776
                if self.tmp and self.mail["size"] == 0:
1690
1777
                        self.tmp.write("\n")
 
1778
# Make SHA checksum only for first chunk of message, maximum size of chunk
 
1779
# seems to be 65535. So if first 64K bytes of message is same, so is
 
1780
# checksum. But usually this is not needed.
 
1781
# For python > 2.4
 
1782
                        if UseSHA > 24:
 
1783
                                self.mail["checksum"] = hashlib.sha1(chunk).hexdigest()
 
1784
# For Python >= 2.4
 
1785
                        elif UseSHA > 0:
 
1786
                                self.mail["checksum"] = sha.new(chunk).hexdigest()
 
1787
 
1691
1788
                self.mail["size"] += len(chunk)
1692
1789
                if self.tmp: self.tmp.write(chunk)
1693
1790
 
1789
1886
 
1790
1887
                for to in self.mail["to"]:
1791
1888
                        log = "%s" % time.strftime('%Y%m%d %H:%M:%S')
 
1889
                        if conf["main"]["savedir"] and not (conf["main"]["nonspamonly"] and self.mail["action"][0][:2] in ['re','de','di','bl']):
 
1890
                                log += " [%s]" % (self.mail["id"])
 
1891
                        else:
 
1892
                                log += " (%s)" % (self.mail["id"])
1792
1893
                        if ret:
1793
1894
                                log += " %s (%s: %s)" % (ret[0].lower(), test.lower(), ret[1][:254])
1794
1895
                        else:
1813
1914
                        debug("Current Action: %s" % (action), LOG_DEBUG, id=self.mail["id"])
1814
1915
                        if action in ['reject']:
1815
1916
                                if not (domainrule(self.mail, 'watch', conf["main"]["watchmode"]) or domainrule(self.mail, 'flagall')):
 
1917
                                        conf["runtime"]["rrd"]["spam"] += 1
1816
1918
                                        debug("Milter.REJECT (500 5.7.1 - %s)" % (reason), LOG_DEBUG, id=self.mail["id"])
1817
1919
                                        self.setreply("550", "5.7.1", reason)
1818
 
                                        conf["runtime"]["rrd"]["blocked"] += 1
1819
1920
                                        return REJECT
1820
1921
 
1821
1922
                        elif action in ['delete','discard','block']:
1822
1923
                                if not (domainrule(self.mail, 'watch', conf["main"]["watchmode"]) or domainrule(self.mail, 'flagall')):
 
1924
                                        conf["runtime"]["rrd"]["spam"] += 1
1823
1925
                                        debug("Milter.DISCARD (%s)" % (reason), LOG_DEBUG, id=self.mail["id"])
1824
 
                                        conf["runtime"]["rrd"]["blocked"] += 1
1825
1926
                                        return DISCARD
1826
1927
 
1827
1928
                        if not domainrule(self.mail, 'watch', conf["main"]["watchmode"]):
1847
1948
                                self.mail["action"].insert(0, "%s in WATCH MODE" % (self.mail["action"][0]))
1848
1949
 
1849
1950
                        if action in ['reject','delete','discard','block']:
1850
 
                                conf["runtime"]["rrd"]["blocked"] += 1
1851
 
                        if action in ['flag','warn']:
 
1951
                                conf["runtime"]["rrd"]["spam"] += 1
 
1952
                        elif action in ['flag','warn']:
1852
1953
                                conf["runtime"]["rrd"]["unsure"] += 1
1853
1954
                        elif action not in ['ignore'] :
1854
 
                                conf["runtime"]["rrd"]["delivered"] += 1
 
1955
                                conf["runtime"]["rrd"]["ham"] += 1
1855
1956
#                       return ACCEPT
1856
1957
                return CONTINUE
1857
1958
 
1872
1973
### CHILD THREADS
1873
1974
###
1874
1975
def Tconfig(childname=None):
1875
 
        global conffile, conf
 
1976
        global conffile, conf, msgbase
1876
1977
        
1877
1978
        debug("Tconfig")
1878
1979
        if conf["runtime"]["conffile"] == None:
1892
1993
                        debug("FATAL: Can't find or read %s." % (cf), LOG_EMERG)
1893
1994
                        return
1894
1995
 
1895
 
                debug("Config file %s found." % conffile)
 
1996
                debug("Config file %s found." % conffile, LOG_NOTICE)
1896
1997
 
1897
1998
        if childname: debug("Tconfig loop started", LOG_INFO)
1898
1999
        while conf["runtime"]["endtime"] == 0:
1970
2071
        if childname: debug("Logger thread quited", LOG_INFO)
1971
2072
        return
1972
2073
 
 
2074
## Thread to keep CRC database clean
 
2075
def Tcrc(childname=None):
 
2076
        global msgbase
 
2077
        debug("Create CRC child thread", LOG_DEBUG)
 
2078
 
 
2079
        expire = time.time()-(60*60*conf["main"]["crchours"])
 
2080
        while conf["runtime"]["endtime"] == 0:
 
2081
                if childname and not (conf["main"]["pid"] and os.path.exists(conf["main"]["pid"])): break
 
2082
### DO SOMETHING
 
2083
                if conf["main"]["crcsave"]:
 
2084
                        tmpmb = {};
 
2085
## Keep messages seen in last 12 hours
 
2086
                        for crc in msgbase:
 
2087
                                if not (msgbase[crc]["seen"] < expire):
 
2088
                                        tmpmb[crc] = msgbase[crc].copy()
 
2089
 
 
2090
                        msgbase = tmpmb.copy()
 
2091
                        tmpmb.clear()
 
2092
 
 
2093
                        save_vars(msgbase, "crc-db.var")
 
2094
### / DO SOMETHING
 
2095
                if childname != "CRC" or not conf["main"]["childs"]: return
 
2096
## Loop every 10 minutes
 
2097
                if conf["main"]["crcsave"]:
 
2098
                        time.sleep(600)
 
2099
                else:
 
2100
                        time.sleep(5)
 
2101
        debug("CRC thread quited", LOG_INFO)
 
2102
        return
 
2103
 
 
2104
## Thread that keeps RRD information updated, should update RRD info every 5 minutes or so.
 
2105
def Trrd(childname=None):
 
2106
        if childname != "RRD": return
 
2107
        debug("Create RRD child thread", LOG_DEBUG)
 
2108
        conf["runtime"]["rrd"] = { "ham": 0, "unsure": 0, "spam": 0, }
 
2109
        if not os.path.exists("%s/sspamm.rrd" % conf["runtime"]["confpath"]):
 
2110
                debug("RRD Create: %s/sspamm.rrd" % conf["runtime"]["confpath"], LOG_NOTICE)
 
2111
                rrdtool.create("%s/sspamm.rrd" % conf["runtime"]["confpath"], "-s 300", 
 
2112
                        'DS:pass:GAUGE:600:-1:65535',
 
2113
                        'DS:flag:GAUGE:600:-1:65535',
 
2114
                        'DS:block:GAUGE:600:-1:65535',
 
2115
                        'RRA:AVERAGE:0.5:1:288',
 
2116
                        'RRA:AVERAGE:0.5:12:336',
 
2117
                        'RRA:AVERAGE:0.5:288:365',
 
2118
                )
 
2119
 
 
2120
# Clock specific things need to do, because vmware clock skew/jumps ?
 
2121
        while conf["runtime"]["endtime"] == 0:
 
2122
                if childname and not (conf["main"]["pid"] and os.path.exists(conf["main"]["pid"])): break
 
2123
                if (time.strftime('%M')[-1] in ["0", "5"]) and (int(time.strftime('%S')) < 10):
 
2124
                        timestamp=int(time.time())
 
2125
                        tmp=conf["runtime"]["rrd"].copy()
 
2126
                        conf["runtime"]["rrd"] = { "ham": 0, "unsure": 0, "spam": 0, }
 
2127
                        try:
 
2128
                                rrdtool.update("%s/sspamm.rrd" % conf["runtime"]["confpath"], "%s:%d:%d:%d" % (timestamp, tmp["ham"], tmp["unsure"], tmp["spam"]))
 
2129
                                debug("RRD Update %s: Ham: %s, Unsure: %s, Spam: %s" % (timestamp, tmp["ham"], tmp["unsure"], tmp["spam"]), LOG_ERR)
 
2130
                        except:
 
2131
                                debug("RRD Update: %s: %s" % (sys.exc_type, sys.exc_value), LOG_ERR)
 
2132
# Statistics updated, sleep for about 4 minutes.
 
2133
                        time.sleep(300-60)
 
2134
                elif int(time.strftime('%S')) >= 58:
 
2135
                        time.sleep(0.2)
 
2136
                elif int(time.strftime('%S')) >= 40:
 
2137
                        time.sleep(1)
 
2138
                else:
 
2139
                        time.sleep(10)
 
2140
        debug("RRD thread quited", LOG_INFO)
 
2141
        return
 
2142
 
1973
2143
###
1974
2144
### Main Function
1975
2145
###
1993
2163
        global conf
1994
2164
 
1995
2165
        Tconfig()
 
2166
        if os.path.exists("crc-db.var") and os.access("crc-db.var", os.R_OK):
 
2167
                debug("Loading CRC database", LOG_NOTICE)
 
2168
#               try:
 
2169
                msgbase = load_vars("crc-db.var")
 
2170
#               except:
 
2171
#                       pass
 
2172
 
1996
2173
        if conf["main"]["childs"]:
1997
2174
                thread.start_new_thread(Tconfig,("Configuration Loader",))
1998
2175
        else:
2024
2201
        if conf["main"]["childs"]:
2025
2202
#               thread.start_new_thread(Tclock,("Clock",))
2026
2203
                thread.start_new_thread(Tlogger,("Logger",))
2027
 
#               thread.start_new_thread(Trrd,("RRD",))
 
2204
                thread.start_new_thread(Trrd,("RRD",))
 
2205
                thread.start_new_thread(Tcrc,("CRC",))
2028
2206
#               thread.start_new_thread(Tstats,("Stats",))
2029
2207
#       else:
2030
2208
#               debug("Logging, RRD, etc. are disabled in foreground mode.", LOG_ALERT)
 
2209
 
2031
2210
        try:
2032
2211
                Milter.runmilter(conf["main"]["name"],conf["main"]["port"],300)
2033
2212
        except SystemExit:
2119
2298
                # Now we have done everything as in 'online'
2120
2299
                m.eom()
2121
2300
                m.close()
2122
 
#               if m.mail.has_key("raw"): del m.mail["raw"]
2123
 
                #if m.mail.has_key("received"): del m.mail["received"]
2124
 
                #if m.mail.has_key("from"): del m.mail["from"]
2125
 
                #if m.mail.has_key("smtpcmds"): del m.mail["smtpcmds"]
2126
 
                #if m.mail.has_key("timer"): del m.mail["timer"]
2127
 
                #if m.mail.has_key("my"): del m.mail["my"]
2128
 
                #if m.mail.has_key("mime"): del m.mail["mime"]
2129
 
                #if m.mail.has_key("subject"): del m.mail["subject"]
2130
 
        #       print show_vars(m.mail)
2131
2301
                if self.tmp:
2132
2302
                        self.tmp.close()
2133
2303
                        rm(self.tmp.name)