1
/* dns_answer.c - Receive and process incoming dns queries.
3
Copyright (C) 2000, 2001 Thomas Moestl
4
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009 Paul A. Rombouts
6
This file is part of the pdnsd package.
8
pdnsd is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 3 of the License, or
11
(at your option) any later version.
13
pdnsd is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with pdnsd; see the file COPYING. If not, see
20
<http://www.gnu.org/licenses/>.
26
* There are several standard conformity issues noted in the comments.
27
* Some additional comments:
29
* I always set RA but I ignore RD largely (in everything but CNAME recursion),
30
* not because it is not supported, but because I _always_ do a recursive
31
* resolve in order to be able to cache the results.
38
#include <sys/types.h>
39
#ifdef HAVE_SYS_POLL_H
42
#include <sys/param.h>
55
#include "dns_answer.h"
56
#include "dns_query.h"
62
#if !defined(lint) && !defined(NO_RCSIDS)
63
static char rcsid[]="$Id: dns_answer.c,v 1.60 2002/08/07 08:55:33 tmm Exp $";
67
* This is for error handling to prevent spewing the log files.
68
* Maximums of different message types are set.
69
* Races do not really matter here, so no locks.
71
#define TCP_MAX_ERRS 10
72
#define UDP_MAX_ERRS 10
73
#define MEM_MAX_ERRS 10
74
#define THRD_MAX_ERRS 10
75
#define MISC_MAX_ERRS 10
76
static volatile unsigned long da_tcp_errs=0;
77
static volatile unsigned long da_udp_errs=0;
78
static volatile unsigned long da_mem_errs=0;
79
static volatile unsigned long da_thrd_errs=0;
81
static volatile unsigned long da_misc_errs=0;
83
static volatile int procs=0; /* active query processes */
84
static volatile int qprocs=0; /* queued query processes */
85
static volatile unsigned long dropped=0,spawned=0;
86
static volatile unsigned thrid_cnt=0;
87
static pthread_mutex_t proc_lock = PTHREAD_MUTEX_INITIALIZER;
90
static pthread_mutex_t s_lock = PTHREAD_MUTEX_INITIALIZER;
95
# if (TARGET==TARGET_LINUX)
96
struct in_pktinfo pi4;
102
struct in6_pktinfo pi6;
106
#define udp_buf_len 512
111
struct sockaddr_in sin4;
114
struct sockaddr_in6 sin6;
123
unsigned char buf[udp_buf_len];
128
#ifndef NO_TCP_SERVER
132
} __attribute__((packed)) dns_ans_t;
141
unsigned short qtype;
142
unsigned short qclass;
143
unsigned char query[0];
148
#define S_AUTHORITY 2
149
#define S_ADDITIONAL 3
152
unsigned short tp,dlen;
154
/* unsigned char data[0]; */
159
* Mark an additional record as added to avoid double records.
161
static int sva_add(dlist *sva, const unsigned char *rhn, unsigned short tp, unsigned short dlen, void* data)
164
size_t rlen=rhnlen(rhn);
166
if (!(*sva=dlist_grow(*sva,sizeof(sva_t)+rlen+dlen))) {
172
memcpy(mempcpy(st->nm,rhn,rlen),data,dlen);
177
/* ans_ttl computes the ttl value to return to the client.
178
This is the ttl value stored in the cache entry minus the time
179
the cache entry has lived in the cache.
180
Local cache entries are an exception, they never "age".
182
inline static time_t ans_ttl(rr_set_t *rrset, time_t queryts)
184
time_t ttl= rrset->ttl;
186
if (!(rrset->flags&CF_LOCAL)) {
187
time_t tpassed= queryts - rrset->ts;
188
if(tpassed<0) tpassed=0;
195
/* follow_cname_chain takes a cache entry and a buffer (must be at least 256 bytes),
196
and copies the name indicated by the first cname record in the cache entry.
197
The name is returned in length-byte string notation.
198
follow_cname_chain returns 1 if a cname record is found, otherwise 0.
200
inline static int follow_cname_chain(dns_cent_t *c, unsigned char *name)
202
rr_set_t *rrset=c->rr[T_CNAME-T_MIN];
204
if (!rrset || !(rr=rrset->rrs))
206
PDNSD_ASSERT(rr->rdlen <= 256, "follow_cname_chain: record too long");
207
memcpy(name,rr->data,rr->rdlen);
213
* Add data from a rr_bucket_t (as in cache) into a dns message in ans. Ans is grown
214
* to fit, sz is the old size of the packet (it is modified so at the end of the procedure
215
* it is the new size), type is the rr type and ltime is the time in seconds the record is
217
* cb is the buffer used for message compression. *cb should be NULL when you call compress_name
218
* or add_to_response the first time.
219
* It gets filled with a pointer to compression information that can be reused in subsequent calls
220
* to add_to_response.
221
* sect is the section (S_ANSWER, S_AUTHORITY or S_ADDITIONAL) in which the record
222
* belongs logically. Note that you still have to add the rrs in the right order (answer rrs first,
223
* then authority and last additional).
225
static int add_rr(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned short type, uint32_t ttl,
226
int dlen, void *data, char section, char udp, dlist *cb)
228
int ilen,blen,osz,rdlen;
234
unsigned char nbuf[256];
236
if (!(nlen=compress_name(rrn,nbuf,*sz,cb)))
239
/* This buffer is over-allocated usually due to compression. Never mind, just a few bytes,
240
* and the buffer is freed soon*/
242
dns_ans_t *nans=(dns_ans_t *)pdnsd_realloc(*ans,ansoffset+*sz+nlen+sizeof_rr_hdr_t+dlen);
247
memcpy((unsigned char *)(&(*ans)->hdr)+*sz,nbuf,nlen);
251
/* the rr header will be filled in later. Just reserve some space for it. */
252
rrht=((unsigned char *)(&(*ans)->hdr))+(*sz);
253
*sz+=sizeof_rr_hdr_t;
264
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
266
PDNSD_ASSERT(rdlen <= dlen, "T_CNAME/T_MB/...: got longer");
273
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
276
ilen=rhnlen((unsigned char *)data);
277
PDNSD_ASSERT(rdlen <= ilen, "T_MINFO/T_RP: got longer");
278
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
281
PDNSD_ASSERT(rdlen <= dlen, "T_MINFO/T_RP: got longer");
290
PDNSD_ASSERT(dlen > 2, "T_MX/T_AFSDB/...: rr botch");
291
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
293
if (!(blen=compress_name(((unsigned char *)data)+2, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
296
PDNSD_ASSERT(rdlen <= dlen, "T_MX/T_AFSDB/...: got longer");
300
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
303
ilen=rhnlen((unsigned char *)data);
304
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
305
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
309
ilen+=rhnlen(((unsigned char *)data)+ilen);
310
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
311
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,20);
313
PDNSD_ASSERT(rdlen <= dlen, "T_SOA: rr botch");
318
PDNSD_ASSERT(dlen > 2, "T_PX: rr botch");
319
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
322
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
326
ilen+=rhnlen(((unsigned char *)data)+ilen);
327
PDNSD_ASSERT(rdlen <= ilen, "T_PX: got longer");
328
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
331
PDNSD_ASSERT(rdlen <= dlen, "T_PX: got longer");
335
PDNSD_ASSERT(dlen > 6, "T_SRV: rr botch");
336
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,6);
338
if (!(blen=compress_name(((unsigned char *)data)+6, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
341
PDNSD_ASSERT(rdlen <= dlen, "T_SRV: got longer");
345
if (!(blen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
349
ilen=rhnlen((unsigned char *)data);
350
PDNSD_ASSERT(rdlen <= ilen, "T_NXT: got longer");
351
PDNSD_ASSERT(dlen >= ilen, "T_NXT: rr botch");
353
int wlen = dlen - ilen;
354
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,wlen);
360
PDNSD_ASSERT(dlen > 4, "T_NAPTR: rr botch");
365
ilen += ((int)*(((unsigned char *)data)+ilen)) + 1;
366
PDNSD_ASSERT(dlen > ilen, "T_NAPTR: rr botch 2");
369
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),ilen);
372
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
375
PDNSD_ASSERT(rdlen <= dlen, "T_NAPTR: got longer");
380
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),dlen);
385
if (udp && (*sz)>512 && section==S_ADDITIONAL) /* only add the record if we do not increase the length over 512 */
386
*sz=osz; /* in additionals for udp answer*/
391
PUTINT16(rdlen,rrht);
395
(*ans)->hdr.ancount=htons(ntohs((*ans)->hdr.ancount)+1);
398
(*ans)->hdr.nscount=htons(ntohs((*ans)->hdr.nscount)+1);
401
(*ans)->hdr.arcount=htons(ntohs((*ans)->hdr.arcount)+1);
409
typedef struct rre_s {
411
unsigned short tsz; /* Size of tnm field */
412
uint32_t ttl; /* ttl of the record in the answer (if tp==T_NS or T_SOA) */
413
unsigned char tnm[0]; /* Name for the domain a record refers to */
414
/* unsigned char nm[0]; */ /* Name of the domain the record is for (if tp==T_NS or T_SOA) */
418
/* types for the tp field */
419
/* #define RRETP_NS T_NS */ /* For name server: add to authority, add address to additional. */
420
/* #define RRETP_SOA T_SOA */ /* For SOA record: add to authority. */
421
#define RRETP_ADD 0 /* For other records: add the address of buf to additional */
423
static int add_ar(dlist *ar,unsigned short tp, unsigned short tsz,void *tnm,unsigned char *nm, uint32_t ttl)
427
size_t nmsz=0,size=sizeof(rr_ext_t)+tsz;
428
if(tp==T_NS || tp==T_SOA) {
432
if (!(*ar=dlist_grow(*ar,size)))
438
p=mempcpy(re->tnm,tnm,tsz);
439
if(tp==T_NS || tp==T_SOA) {
446
/* Select a random rr record from a list. */
447
inline static rr_bucket_t *randrr(rr_bucket_t *rrb)
452
/* In order to have an equal chance for each record to be selected, we have to count first. */
453
for(rr=rrb; rr; rr=rr->next) ++cnt;
455
/* We do not use the pdnsd random functions (these might use /dev/urandom if the user is paranoid,
456
* and we do not need any good PRNG here). */
457
if(cnt) for(cnt=random()%cnt; cnt; --cnt) rrb=rrb->next;
467
static const int ar_recs[AR_NUM]={T_NS, T_MD, T_MF, T_MB, T_MX
472
/* offsets from record data start to server name */
473
static const int ar_offs[AR_NUM]={0,0,0,0,2
479
/* This adds an rrset, optionally randomizing the first element it adds.
480
* if that is done, all rrs after the randomized one appear in order, starting from
481
* that one and wrapping over if needed. */
482
static int add_rrset(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned tp, time_t queryts,
483
dns_cent_t *cached, char udp, dlist *cb, dlist *sva, dlist *ar)
485
rr_set_t *crrset=cached->rr[tp-T_MIN];
487
if (crrset && crrset->rrs) {
489
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
491
int rnd_recs=global.rnd_recs;
494
if (rnd_recs) b=first=randrr(crrset->rrs);
497
if (!add_rr(ans, sz, rrn, tp, ans_ttl(crrset,queryts),
498
b->rdlen, b->data, S_ANSWER, udp, cb))
500
if (tp==T_NS || tp==T_A || tp==T_AAAA) {
501
/* mark it as added */
502
if (!sva_add(sva,rrn,tp,b->rdlen,b->data))
505
/* Mark for additional address records. XXX: this should be a more effective algorithm; at least the list is small */
506
for (i=0;i<AR_NUM;i++) {
507
if (ar_recs[i]==tp) {
508
if (!add_ar(ar, RRETP_ADD,b->rdlen-ar_offs[i],((unsigned char *)(b->data))+ar_offs[i],
516
if(!b) b=crrset->rrs; /* wraparound */
525
* Add the fitting elements of the cached record to the message in ans, where ans
526
* is grown to fit, sz is the size of the packet and is modified to be the new size.
527
* The query is in qe.
528
* cb is the buffer used for message compression. *cb should be NULL if you call add_to_response
529
* the first time. It gets filled with a pointer to compression information that can be
530
* reused in subsequent calls to add_to_response.
532
static int add_to_response(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned qtype, time_t queryts,
533
dns_cent_t *cached, char udp, dlist *cb, dlist *sva, dlist *ar)
535
/* First of all, unless we have records of qtype, add cnames.
536
Well, actually, there should be at max one cname. */
537
if (qtype!=T_CNAME && qtype!=QT_ALL && !(qtype>=T_MIN && qtype<=T_MAX && have_rr(cached,qtype)))
538
if (!add_rrset(ans, sz, rrn, T_CNAME, queryts, cached, udp, cb, sva, ar))
541
/* We need no switch for qclass, since we already have filtered packets we cannot understand */
542
if (qtype==QT_AXFR || qtype==QT_IXFR) {
543
/* I do not know what to do in this case. Since we do not maintain zones (and since we are
544
no master server, so it is not our task), I just return an error message. If anyone
545
knows how to do this better, please notify me.
546
Anyway, this feature is rarely used in client communication, and there is no need for
547
other name servers to ask pdnsd. Btw: many bind servers reject an ?XFR query for security
550
} else if (qtype==QT_MAILB) {
551
if (!add_rrset(ans, sz, rrn, T_MB, queryts, cached, udp, cb, sva, ar))
553
if (!add_rrset(ans, sz, rrn, T_MG, queryts, cached, udp, cb, sva, ar))
555
if (!add_rrset(ans, sz, rrn, T_MR, queryts, cached, udp, cb, sva, ar))
557
} else if (qtype==QT_MAILA) {
558
if (!add_rrset(ans, sz, rrn, T_MD, queryts, cached, udp, cb, sva, ar))
560
if (!add_rrset(ans, sz, rrn, T_MF, queryts, cached, udp, cb, sva, ar))
562
} else if (qtype==QT_ALL) {
564
for (i=T_MIN;i<=T_MAX;i++) {
565
if (!add_rrset(ans, sz, rrn, i, queryts, cached, udp, cb, sva, ar))
568
} else if (qtype>=T_MIN && qtype<=T_MAX) {
569
if (!add_rrset(ans, sz, rrn, qtype, queryts, cached, udp, cb, sva, ar))
571
} else /* Shouldn't get here. */
574
if (!ntohs((*ans)->hdr.ancount)) {
575
/* Add a SOA if we have one and no other records are present in the answer.
576
* This is to aid caches so that they have a ttl. */
577
if (!add_rrset(ans, sz, rrn, T_SOA , queryts, cached, udp, cb, sva, ar))
587
static int add_additional_rr(dns_ans_t **ans, long *rlen, unsigned char *rhn, unsigned tp, time_t ttl,
588
unsigned dlen, void *data, int sect, char udp, dlist *cb, dlist *sva)
592
/* Check if already added; no double additionals */
593
for (st=dlist_first(*sva); st; st=dlist_next(st)) {
594
if (st->tp==tp && rhnicmp(st->nm,rhn) && st->dlen==dlen &&
595
(memcmp(skiprhn(st->nm),data, dlen)==0))
600
/* add_rr will do nothing when sz>512 bytes. */
601
if(!add_rr(ans, rlen, rhn, tp, ttl, dlen, data, sect, udp, cb))
603
/* mark it as added */
604
if (!sva_add(sva,rhn,tp,dlen,data))
611
* Add one or more additionals from an rr bucket.
613
static int add_additional_rrs(dns_ans_t **ans, long *rlen, unsigned char *rhn, unsigned tp, time_t ttl,
614
rr_bucket_t *rrb, int sect, char udp, dlist *cb, dlist *sva)
617
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
618
int rnd_recs=global.rnd_recs;
621
if (rnd_recs) rr=first=randrr(rrb);
624
if (!add_additional_rr(ans, rlen, rhn, tp, ttl, rr->rdlen,rr->data, sect, udp, cb, sva))
628
if(!rr) rr=rrb; /* wraparound */
636
* The code below actually handles A and AAAA additionals.
638
static int add_additional_a(dns_ans_t **ans, long *rlen, unsigned char *rhn, time_t queryts,
639
char udp, dlist *cb, dlist *sva)
644
if ((ae=lookup_cache(rhn,NULL))) {
645
rr_set_t *rrset; rr_bucket_t *rr;
646
rrset=ae->rr[T_A-T_MIN];
647
if (rrset && (rr=rrset->rrs))
649
if (!add_additional_rrs(ans, rlen, rhn, T_A, ans_ttl(rrset,queryts),
650
rr, S_ADDITIONAL, udp, cb, sva))
655
rrset=ae->rr[T_AAAA-T_MIN];
656
if (rrset && (rr=rrset->rrs))
657
if (!add_additional_rrs(ans, rlen, rhn, T_AAAA, ans_ttl(rrset,queryts),
658
rr, S_ADDITIONAL, udp, cb, sva))
669
* Compose an answer message for the decoded query in q, hdr is the header of the dns request
670
* rlen is set to be the answer length.
672
static dns_ans_t *compose_answer(dlist q, dns_hdr_t *hdr, long *rlen, char udp)
678
time_t queryts=time(NULL);
683
ans=(dns_ans_t *)pdnsd_malloc(sizeof(dns_ans_t));
688
ans->hdr.opcode=OP_QUERY;
690
ans->hdr.tc=0; /* If tc is needed, it is set when the response is sent in udp_answer_thread. */
696
ans->hdr.rcode=RC_OK;
697
ans->hdr.qdcount=0; /* this is first filled in and will be modified */
702
*rlen=sizeof(dns_hdr_t);
703
/* first, add the query to the response */
704
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
706
dns_ans_t *nans=(dns_ans_t *)pdnsd_realloc(ans,ansoffset+*rlen+rhnlen(qe->query)+4);
711
unsigned char *p = ((unsigned char *)&ans->hdr) + *rlen;
712
/* the first name occurrence will not be compressed,
713
but the offset needs to be stored for future compressions */
714
if (!(qclen=compress_name(qe->query,p,*rlen,&cb)))
717
PUTINT16(qe->qtype,p);
718
PUTINT16(qe->qclass,p);
721
ans->hdr.qdcount=htons(ntohs(ans->hdr.qdcount)+1);
724
/* Barf if we get a query we cannot answer */
725
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
726
if (((qe->qtype<T_MIN || qe->qtype>T_MAX) &&
727
(qe->qtype!=QT_MAILB && qe->qtype!=QT_MAILA && qe->qtype!=QT_ALL)) ||
728
(qe->qclass!=C_IN && qe->qclass!=QC_ALL))
730
DEBUG_MSG("Unsupported QTYPE or QCLASS.\n");
731
ans->hdr.rcode=RC_NOTSUPP;
736
/* second, the answer section */
737
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
739
unsigned char qname[256];
741
rhncpy(qname,qe->query);
742
/* look if we have a cached copy. otherwise, perform a nameserver query. Same with timeout */
746
unsigned char c_soa=cundef;
747
if ((rc=dns_cached_resolve(qname,qe->qtype, &cached, MAX_HOPS,queryts,&c_soa))!=RC_OK) {
749
if(rc==RC_NAMEERR && c_soa!=cundef) {
750
/* Try to add a SOA record to the authority section. */
751
unsigned scnt=rhnsegcnt(qname);
752
if(c_soa<scnt && (cached=lookup_cache(skipsegs(qname,scnt-c_soa),NULL))) {
753
rr_set_t *rrset=cached->rr[T_SOA-T_MIN];
754
if (rrset && !(rrset->flags&CF_NEGATIVE)) {
756
for(rr=rrset->rrs; rr; rr=rr->next) {
757
if (!add_rr(&ans,rlen,cached->qname,T_SOA,ans_ttl(rrset,queryts),
758
rr->rdlen,rr->data,S_AUTHORITY,udp,&cb))
762
free_cent(cached DBG1);
768
if(!(cached->flags&DF_LOCAL))
771
if (!add_to_response(&ans,rlen,qname,qe->qtype,queryts,cached,udp,&cb,&sva,&ar))
773
if (hdr->rd && qe->qtype!=T_CNAME && qe->qtype!=QT_ALL &&
774
!(qe->qtype>=T_MIN && qe->qtype<=T_MAX && have_rr(cached,qe->qtype)) &&
775
follow_cname_chain(cached,qname))
776
/* The rd bit is set and the response does not contain records of the requested type,
777
* but the response does contain a cname, so repeat the inquiry with the cname.
778
* add_to_response() has already added the cname to the response.
779
* Because of follow_cname_chain(), qname now contains the last cname in the chain. */
782
/* maintain a list (ar) for authority records: We will add every name server that was
783
listed as authoritative in a reply we received (and only those) to this list.
784
This list will be used to fill the authority and additional sections of our own reply.
785
We only do this for the last record in a cname chain, to prevent answer bloat. */
788
if((qe->qtype>=T_MIN && qe->qtype<=T_MAX && !have_rr(cached,qe->qtype)) ||
789
(qe->qtype==QT_MAILB && !have_rr(cached,T_MB) && !have_rr(cached,T_MG) && !have_rr(cached,T_MR)) ||
790
(qe->qtype==QT_MAILA && !have_rr(cached,T_MD) && !have_rr(cached,T_MF)))
792
/* no record of requested type in the answer section. */
795
rrset=cached->rr[rretp-T_MIN];
796
if(rrset && (rrset->flags&CF_NEGATIVE))
799
/* Try to find a name server higher up the hierarchy .
801
dns_cent_t *prev=cached;
802
unsigned scnt=rhnsegcnt(prev->qname);
803
unsigned tcnt=(rretp==T_NS?prev->c_ns:prev->c_soa);
804
if((cached=lookup_cache((tcnt!=cundef && tcnt<scnt)?skipsegs(prev->qname,scnt-tcnt):prev->qname,NULL))) {
805
rrset=cached->rr[rretp-T_MIN];
806
if(rrset && (rrset->flags&CF_NEGATIVE))
809
if(!rrset && (prev->flags&DF_LOCAL)) {
810
unsigned char *nm=getlocalowner(prev->qname,rretp);
813
free_cent(cached DBG1);
816
if((cached=lookup_cache(nm,NULL)))
817
rrset=cached->rr[rretp-T_MIN];
820
free_cent(prev DBG1);
825
for (rr=rrset->rrs; rr; rr=rr->next) {
826
if (!add_ar(&ar, rretp, rr->rdlen,rr->data, cached->qname,
827
ans_ttl(rrset,queryts)))
831
hops=0; /* this will break the loop */
834
free_cent(cached DBG1);
842
/* Add the authority section */
843
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
844
if (rre->tp == T_NS || rre->tp == T_SOA) {
845
unsigned char *nm = rre->tnm + rre->tsz;
846
if (!add_additional_rr(&ans, rlen, nm, rre->tp, rre->ttl, rre->tsz, rre->tnm,
847
S_AUTHORITY, udp, &cb, &sva))
854
/* now add the name server addresses */
855
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
856
if (rre->tp == T_NS || rre->tp == RRETP_ADD) {
857
if (!add_additional_a(&ans, rlen, rre->tnm, queryts, udp, &cb, &sva))
866
/* You may not like goto's, but here we avoid lots of code duplication. */
868
free_cent(cached DBG1);
881
* Decode the query (the query messgage is in data and rlen bytes long) into q
882
* XXX: data needs to be aligned
884
static int decode_query(unsigned char *data, long rlen, dlist *qp)
887
dns_hdr_t *hdr=(dns_hdr_t *)data; /* aligned, so no prob. */
888
unsigned char *ptr=(unsigned char *)(hdr+1);
889
long sz=rlen-sizeof(dns_hdr_t);
891
uint16_t qdcount=ntohs(hdr->qdcount);
894
for (i=0;i<qdcount;i++) {
897
unsigned char qbuf[256];
898
res=decompress_name(data,rlen,&ptr,&sz,qbuf,&qlen);
901
if (i==0) /*not even one complete query*/
902
goto return_rc_format;
906
goto return_rc_format;
911
/* truncated in qtype or qclass */
912
DEBUG_MSG("decode_query: query truncated in qtype or qclass.\n");
914
if (i==0) /*not even one complete query*/
915
goto return_rc_format;
919
goto return_rc_format;
921
if(!(q=dlist_grow(q,sizeof(dns_queryel_t)+qlen)))
924
GETINT16(qe->qtype,ptr);
925
GETINT16(qe->qclass,ptr);
927
memcpy(qe->query,qbuf,qlen);
939
/* Make a dns error reply message
940
* Id is the query id and still in network order.
941
* op is the opcode to fill in, rescode - name says it all.
943
static void mk_error_reply(unsigned short id, unsigned short opcode,unsigned short rescode,dns_hdr_t *rep)
963
* Analyze and answer the query in data. The answer is returned. rlen is at call the query length and at
964
* return the length of the answer. You have to free the answer after sending it.
966
static dns_ans_t *process_query(unsigned char *data, long *rlenp, char udp)
971
dlist q=NULL; /* Initialized to inhibit compiler warning. */
974
DEBUG_MSG("Received query.\n");
975
DEBUG_DUMP_DNS_MSG(NULL, data, rlen);
978
* We will ignore all records that come with a query, except for the actual query records.
979
* We will send back the query in the response. We will reject all non-queries, and
980
* some not supported thingies.
981
* If anyone notices behaviour that is not in standard conformance, please notify me!
983
hdr=(dns_hdr_t *)data;
985
DEBUG_MSG("Message too short.\n");
986
return NULL; /* message too short: no id provided. */
988
if (rlen<sizeof(dns_hdr_t)) {
989
DEBUG_MSG("Message too short.\n");
993
if (hdr->qr!=QR_QUERY) {
994
DEBUG_MSG("The QR bit indicates this is a response, not a query.\n");
995
return NULL; /* RFC says: discard */
997
if (hdr->opcode!=OP_QUERY) {
998
DEBUG_MSG("Not a standard query (opcode=%u).\n",hdr->opcode);
1003
DEBUG_MSG("Malformed query (nonzero Z bit).\n");
1007
if (!global.ignore_cd && hdr->cd!=0) {
1008
DEBUG_MSG("Malformed query (nonzero CD bit and ignore_cd=off).\n");
1012
if (hdr->rcode!=RC_OK) {
1013
DEBUG_MSG("Bad rcode(%u).\n",hdr->rcode);
1014
return NULL; /* discard (may cause error storms) */
1018
DEBUG_MSG("Query has a non-empty answer section!\n");
1024
DEBUG_MSG("Query has a non-empty authority section!\n");
1030
DEBUG_MSG("Query has a non-empty additional section!\n");
1035
res=decode_query(data,rlen,&q);
1044
DEBUG_MSG("Questions are:\n");
1045
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
1046
DEBUG_RHN_MSG("\tqc=%s (%u), qt=%s (%u), query=\"%s\"\n",get_cname(qe->qclass),qe->qclass,get_tname(qe->qtype),qe->qtype,RHN2STR(qe->query));
1050
DEBUG_MSG("Query contains no questions.\n");
1059
if (!(ans=compose_answer(q, hdr, rlenp, udp))) {
1060
/* An out of memory condition or similar could cause NULL output. Send failure notification */
1070
*rlenp=sizeof(dns_hdr_t);
1071
ans=pdnsd_malloc(sizeof(dns_ans_t));
1073
mk_error_reply(hdr->id,rlen>=3?hdr->opcode:OP_QUERY,res,&ans->hdr);
1075
else if (++da_mem_errs<=MEM_MAX_ERRS) {
1076
log_error("Out of memory in query processing.");
1082
* Called by *_answer_thread exit handler to clean up process count.
1084
inline static void decrease_procs()
1087
pthread_mutex_lock(&proc_lock);
1090
pthread_mutex_unlock(&proc_lock);
1093
static void udp_answer_thread_cleanup(void *data)
1100
* A thread opened to answer a query transmitted via udp. Data is a pointer to the structure udp_buf_t that
1101
* contains the received data and various other parameters.
1102
* After the query is answered, the thread terminates
1103
* XXX: data must point to a correctly aligned buffer
1105
static void *udp_answer_thread(void *data)
1109
struct cmsghdr *cmsg;
1110
#if defined(SRC_ADDR_DISC)
1111
char ctrl[CMSG_SPACE(sizeof(pkt_info_t))];
1113
long rlen=((udp_buf_t *)data)->len;
1114
/* XXX: process_query is assigned to this, this mallocs, so this points to aligned memory */
1117
pthread_cleanup_push(udp_answer_thread_cleanup, data);
1120
if (!global.strict_suid) {
1121
if (!run_as(global.run_as)) {
1127
pthread_mutex_lock(&proc_lock);
1128
if (procs<global.proc_limit)
1130
pthread_mutex_unlock(&proc_lock);
1135
pthread_mutex_unlock(&proc_lock);
1140
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
1141
if(++da_misc_errs<=MISC_MAX_ERRS)
1142
log_error("pthread_setspecific failed: %s",strerror(err));
1148
if (!(resp=process_query(((udp_buf_t *)data)->buf,&rlen,1))) {
1150
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
1151
* logging is already done. Just exit the thread now.
1153
pthread_exit(NULL); /* data freed by cleanup handler */
1155
pthread_cleanup_push(free, resp);
1158
resp->hdr.tc=1; /*set truncated bit*/
1160
DEBUG_MSG("Outbound msg len %li, tc=%u, rc=\"%s\"\n",rlen,resp->hdr.tc,get_ename(resp->hdr.rcode));
1162
v.iov_base=(char *)&resp->hdr;
1166
#if (TARGET!=TARGET_CYGWIN)
1167
#if defined(SRC_ADDR_DISC)
1168
msg.msg_control=ctrl;
1169
msg.msg_controllen=sizeof(ctrl);
1171
msg.msg_control=NULL;
1172
msg.msg_controllen=0;
1174
msg.msg_flags=0; /* to avoid warning message by Valgrind */
1180
msg.msg_name=&((udp_buf_t *)data)->addr.sin4;
1181
msg.msg_namelen=sizeof(struct sockaddr_in);
1182
# if defined(SRC_ADDR_DISC)
1183
# if (TARGET==TARGET_LINUX)
1184
((udp_buf_t *)data)->pi.pi4.ipi_spec_dst=((udp_buf_t *)data)->pi.pi4.ipi_addr;
1185
cmsg=CMSG_FIRSTHDR(&msg);
1186
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
1187
cmsg->cmsg_level=SOL_IP;
1188
cmsg->cmsg_type=IP_PKTINFO;
1189
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi4,sizeof(struct in_pktinfo));
1190
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_pktinfo));
1192
cmsg=CMSG_FIRSTHDR(&msg);
1193
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
1194
cmsg->cmsg_level=IPPROTO_IP;
1195
cmsg->cmsg_type=IP_RECVDSTADDR;
1196
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.ai4,sizeof(struct in_addr));
1197
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_addr));
1202
char buf[ADDRSTR_MAXLEN];
1204
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET,&((udp_buf_t *)data)->addr.sin4.sin_addr,buf,ADDRSTR_MAXLEN));
1205
# if defined(SRC_ADDR_DISC)
1206
# if (TARGET==TARGET_LINUX)
1207
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.pi4.ipi_spec_dst,buf,ADDRSTR_MAXLEN));
1209
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.ai4,buf,ADDRSTR_MAXLEN));
1221
msg.msg_name=&((udp_buf_t *)data)->addr.sin6;
1222
msg.msg_namelen=sizeof(struct sockaddr_in6);
1223
# if defined(SRC_ADDR_DISC)
1224
cmsg=CMSG_FIRSTHDR(&msg);
1225
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in6_pktinfo));
1226
cmsg->cmsg_level=SOL_IPV6;
1227
cmsg->cmsg_type=IPV6_PKTINFO;
1228
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi6,sizeof(struct in6_pktinfo));
1229
msg.msg_controllen=CMSG_SPACE(sizeof(struct in6_pktinfo));
1233
char buf[ADDRSTR_MAXLEN];
1235
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET6,&((udp_buf_t *)data)->addr.sin6.sin6_addr,buf,ADDRSTR_MAXLEN));
1236
# if defined(SRC_ADDR_DISC)
1237
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET6,&((udp_buf_t *)data)->pi.pi6.ipi6_addr,buf,ADDRSTR_MAXLEN));
1246
/* Lock the socket, and clear the error flag before dropping the lock */
1247
#ifdef SOCKET_LOCKING
1248
pthread_mutex_lock(&s_lock);
1250
if (sendmsg(((udp_buf_t *)data)->sock,&msg,0)<0) {
1251
#ifdef SOCKET_LOCKING
1252
pthread_mutex_unlock(&s_lock);
1254
if (++da_udp_errs<=UDP_MAX_ERRS) {
1255
log_error("Error in udp send: %s",strerror(errno));
1259
socklen_t sl=sizeof(tmp);
1260
getsockopt(((udp_buf_t *)data)->sock, SOL_SOCKET, SO_ERROR, &tmp, &sl);
1261
#ifdef SOCKET_LOCKING
1262
pthread_mutex_unlock(&s_lock);
1266
pthread_cleanup_pop(1); /* free(resp) */
1267
pthread_cleanup_pop(1); /* free(data) */
1271
int init_udp_socket()
1277
struct sockaddr_in sin4;
1280
struct sockaddr_in6 sin6;
1287
if ((sock=socket(PF_INET,SOCK_DGRAM,IPPROTO_UDP))==-1) {
1288
log_error("Could not open udp socket: %s",strerror(errno));
1291
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
1292
sin.sin4.sin_family=AF_INET;
1293
sin.sin4.sin_port=htons(global.port);
1294
sin.sin4.sin_addr=global.a.ipv4;
1295
SET_SOCKA_LEN4(sin.sin4);
1296
sinl=sizeof(struct sockaddr_in);
1301
if ((sock=socket(PF_INET6,SOCK_DGRAM,IPPROTO_UDP))==-1) {
1302
log_error("Could not open udp socket: %s",strerror(errno));
1305
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
1306
sin.sin6.sin6_family=AF_INET6;
1307
sin.sin6.sin6_port=htons(global.port);
1308
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
1309
sin.sin6.sin6_addr=global.a.ipv6;
1310
SET_SOCKA_LEN6(sin.sin6);
1311
sinl=sizeof(struct sockaddr_in6);
1315
#ifdef SRC_ADDR_DISC
1316
# if (TARGET!=TARGET_LINUX)
1319
/* The following must be set on any case because it also applies for IPv4 packets sent to
1320
* ipv6 addresses. */
1321
# if (TARGET==TARGET_LINUX )
1322
if (setsockopt(sock,SOL_IP,IP_PKTINFO,&so,sizeof(so))!=0) {
1324
if (setsockopt(sock,IPPROTO_IP,IP_RECVDSTADDR,&so,sizeof(so))!=0) {
1326
log_error("Could not set options on udp socket: %s",strerror(errno));
1330
# if (TARGET!=TARGET_LINUX)
1336
if (setsockopt(sock,SOL_IPV6,IPV6_RECVPKTINFO,&so,sizeof(so))!=0) {
1338
#ifdef IPv6_RECVPKTINFO
1339
rc = setsockopt(sock,SOL_IPV6,IPV6_RECVPKTINFO,&so,sizeof(so));
1340
if(rc < 0 && errno == ENOPROTOOPT)
1342
#ifdef IPV6_2292PKTINFO
1343
rc = setsockopt(sock,SOL_IPV6,IPV6_2292PKTINFO,&so,sizeof(so));
1344
if(rc < 0 && errno == ENOPROTOOPT)
1346
rc = setsockopt(sock,SOL_IPV6,IPV6_PKTINFO,&so,sizeof(so));
1348
log_error("Could not set options on udp socket: %s",strerror(errno));
1355
if (bind(sock,(struct sockaddr *)&sin,sinl)!=0) {
1356
log_error("Could not bind to udp socket: %s",strerror(errno));
1364
* Listen on the specified port for udp packets and answer them (each in a new thread to be nonblocking)
1365
* This was changed to support sending UDP packets with exactly the same source address as they were coming
1366
* to us, as required by rfc2181. Although this is a sensible requirement, it is slightly more difficult
1367
* and may introduce portability issues.
1369
void *udp_server_thread(void *dummy)
1377
struct cmsghdr *cmsg;
1379
#if defined(ENABLE_IPV6) && (TARGET==TARGET_LINUX)
1380
struct in_pktinfo sip;
1382
/* (void)dummy; */ /* To inhibit "unused variable" warning */
1387
if (!global.strict_suid) {
1388
if (!run_as(global.run_as)) {
1396
if (!(buf=(udp_buf_t *)pdnsd_calloc(1,sizeof(udp_buf_t)))) {
1397
if (++da_mem_errs<=MEM_MAX_ERRS) {
1398
log_error("Out of memory in request handling.");
1405
v.iov_base=(char *)buf->buf;
1406
v.iov_len=udp_buf_len;
1409
#if (TARGET!=TARGET_CYGWIN)
1410
msg.msg_control=ctrl;
1411
msg.msg_controllen=sizeof(ctrl);
1414
#if defined(SRC_ADDR_DISC)
1417
msg.msg_name=&buf->addr.sin4;
1418
msg.msg_namelen=sizeof(struct sockaddr_in);
1419
if ((qlen=recvmsg(sock,&msg,0))>=0) {
1420
cmsg=CMSG_FIRSTHDR(&msg);
1422
# if (TARGET==TARGET_LINUX)
1423
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
1424
memcpy(&buf->pi.pi4,CMSG_DATA(cmsg),sizeof(struct in_pktinfo));
1428
if (cmsg->cmsg_level==IPPROTO_IP && cmsg->cmsg_type==IP_RECVDSTADDR) {
1429
memcpy(&buf->pi.ai4,CMSG_DATA(cmsg),sizeof(buf->pi.ai4));
1433
cmsg=CMSG_NXTHDR(&msg,cmsg);
1436
if (++da_udp_errs<=UDP_MAX_ERRS) {
1437
log_error("Could not discover udp destination address");
1439
goto free_buf_continue;
1441
} else if (errno!=EINTR) {
1442
if (++da_udp_errs<=UDP_MAX_ERRS) {
1443
log_error("error in UDP recv: %s", strerror(errno));
1450
msg.msg_name=&buf->addr.sin6;
1451
msg.msg_namelen=sizeof(struct sockaddr_in6);
1452
if ((qlen=recvmsg(sock,&msg,0))>=0) {
1453
cmsg=CMSG_FIRSTHDR(&msg);
1455
if (cmsg->cmsg_level==SOL_IPV6 && cmsg->cmsg_type==IPV6_PKTINFO) {
1456
memcpy(&buf->pi.pi6,CMSG_DATA(cmsg),sizeof(struct in6_pktinfo));
1459
cmsg=CMSG_NXTHDR(&msg,cmsg);
1462
/* We might have an IPv4 Packet incoming on our IPv6 port, so we also have to
1463
* check for IPv4 sender addresses */
1464
cmsg=CMSG_FIRSTHDR(&msg);
1466
# if (TARGET==TARGET_LINUX)
1467
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
1468
memcpy(&sip,CMSG_DATA(cmsg),sizeof(sip));
1469
IPV6_MAPIPV4(&sip.ipi_addr,&buf->pi.pi6.ipi6_addr);
1470
buf->pi.pi6.ipi6_ifindex=sip.ipi_ifindex;
1473
/* FIXME: What about BSD? probably ok, but... */
1475
cmsg=CMSG_NXTHDR(&msg,cmsg);
1478
if (++da_udp_errs<=UDP_MAX_ERRS) {
1479
log_error("Could not discover udp destination address");
1481
goto free_buf_continue;
1484
} else if (errno!=EINTR) {
1485
if (++da_udp_errs<=UDP_MAX_ERRS) {
1486
log_error("error in UDP recv: %s", strerror(errno));
1491
#else /* !SRC_ADDR_DISC */
1494
msg.msg_name=&buf->addr.sin4;
1495
msg.msg_namelen=sizeof(struct sockaddr_in);
1496
qlen=recvmsg(sock,&msg,0);
1497
if (qlen<0 && errno!=EINTR) {
1498
if (++da_udp_errs<=UDP_MAX_ERRS) {
1499
log_error("error in UDP recv: %s", strerror(errno));
1506
msg.msg_name=&buf->addr.sin6;
1507
msg.msg_namelen=sizeof(struct sockaddr_in6);
1508
qlen=recvmsg(sock,&msg,0);
1509
if (qlen<0 && errno!=EINTR) {
1510
if (++da_udp_errs<=UDP_MAX_ERRS) {
1511
log_error("error in UDP recv: %s", strerror(errno));
1519
pthread_mutex_lock(&proc_lock);
1520
if (qprocs<global.proc_limit+global.procq_limit) {
1522
++qprocs; ++spawned;
1523
pthread_mutex_unlock(&proc_lock);
1525
err=pthread_create(&pt,&attr_detached,udp_answer_thread,(void *)buf);
1528
if(++da_thrd_errs<=THRD_MAX_ERRS)
1529
log_warn("pthread_create failed: %s",strerror(err));
1530
/* If thread creation failed, free resources associated with it. */
1531
pthread_mutex_lock(&proc_lock);
1532
--qprocs; --spawned;
1535
pthread_mutex_unlock(&proc_lock);
1544
udps_thrid=main_thrid;
1550
#ifndef NO_TCP_SERVER
1552
static void tcp_answer_thread_cleanup(void *csock)
1554
close(*((int *)csock));
1560
* Process a dns query via tcp. The argument is a pointer to the socket.
1562
static void *tcp_answer_thread(void *csock)
1564
/* XXX: This should be OK, the original must be (and is) aligned */
1565
int sock=*((int *)csock);
1568
pthread_cleanup_push(tcp_answer_thread_cleanup, csock);
1571
if (!global.strict_suid) {
1572
if (!run_as(global.run_as)) {
1578
pthread_mutex_lock(&proc_lock);
1579
if (procs<global.proc_limit)
1581
pthread_mutex_unlock(&proc_lock);
1586
pthread_mutex_unlock(&proc_lock);
1591
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
1592
if(++da_misc_errs<=MISC_MAX_ERRS)
1593
log_error("pthread_setspecific failed: %s",strerror(err));
1599
/* rfc1035 says we should process multiple queries in succession, so we are looping until
1600
* the socket is closed by the other side or by tcp timeout.
1601
* This in fact makes DoSing easier. If that is your concern, you should disable pdnsd's
1613
PDNSD_ASSERT(sock<FD_SETSIZE,"socket file descriptor exceeds FD_SETSIZE.");
1616
tv.tv_sec=global.tcp_qtimeout;
1617
if (select(sock+1,&fds,NULL,NULL,&tv)<=0)
1618
pthread_exit(NULL); /* socket is closed by cleanup handler */
1623
if (poll(&pfd,1,global.tcp_qtimeout*1000)<=0)
1624
pthread_exit(NULL); /* socket is closed by cleanup handler */
1629
if ((err=read(sock,&rlen_net,sizeof(rlen_net)))!=sizeof(rlen_net)) {
1630
DEBUG_MSG("Error while reading from TCP client: %s\n",err==-1?strerror(errno):"incomplete data");
1632
* If the socket timed or was closed before we even received the
1633
* query length, we cannot return an error. So exit silently.
1635
pthread_exit(NULL); /* socket is closed by cleanup handler */
1637
rlen=ntohs(rlen_net);
1640
log_error("TCP zero size query received.\n");
1643
buf=(unsigned char *)pdnsd_malloc(rlen);
1645
if (++da_mem_errs<=MEM_MAX_ERRS) {
1646
log_error("Out of memory in request handling.");
1648
pthread_exit(NULL); /* socket is closed by cleanup handler */
1650
pthread_cleanup_push(free, buf);
1659
tv.tv_sec=global.tcp_qtimeout;
1660
if (select(sock+1,&fds,NULL,NULL,&tv)<=0)
1661
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1665
if (poll(&pfd,1,global.tcp_qtimeout*1000)<=0)
1666
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1668
rv=read(sock,buf+olen,rlen-olen);
1670
DEBUG_MSG("Error while reading from TCP client: %s\n",rv==-1?strerror(errno):"incomplete data");
1672
* If the promised length was not sent, we should return an error message,
1673
* but if read fails that way, it is unlikely that it will arrive. Nevertheless...
1675
if (olen>=2) { /* We need the id to send a valid reply. */
1677
mk_error_reply(((dns_hdr_t*)buf)->id,
1678
olen>=3?((dns_hdr_t*)buf)->opcode:OP_QUERY,
1681
err.len=htons(sizeof(dns_hdr_t));
1682
write_all(sock,&err,sizeof(err)); /* error anyway. */
1684
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1689
if (!(resp=process_query(buf,&nlen,0))) {
1691
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
1692
* logging is already done. Just exit the thread now.
1696
pthread_cleanup_pop(1); /* free(buf) */
1697
pthread_cleanup_push(free,resp);
1700
resp->len=htons(nlen);
1701
rsize=ansoffset+nlen;
1702
if ((err=write_all(sock,resp,rsize))!=rsize) {
1703
DEBUG_MSG("Error while writing to TCP client: %s\n",err==-1?strerror(errno):"unknown error");
1704
pthread_exit(NULL); /* resp is freed and socket is closed by cleanup handlers */
1707
pthread_cleanup_pop(1); /* free(resp) */
1709
/* Do not allow multiple queries in one sequence.*/
1714
/* socket is closed by cleanup handler */
1715
pthread_cleanup_pop(1);
1719
int init_tcp_socket()
1724
struct sockaddr_in sin4;
1727
struct sockaddr_in6 sin6;
1734
if ((sock=socket(PF_INET,SOCK_STREAM,IPPROTO_TCP))==-1) {
1735
log_error("Could not open tcp socket: %s",strerror(errno));
1738
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
1739
sin.sin4.sin_family=AF_INET;
1740
sin.sin4.sin_port=htons(global.port);
1741
sin.sin4.sin_addr=global.a.ipv4;
1742
SET_SOCKA_LEN4(sin.sin4);
1743
sinl=sizeof(struct sockaddr_in);
1748
if ((sock=socket(PF_INET6,SOCK_STREAM,IPPROTO_TCP))==-1) {
1749
log_error("Could not open tcp socket: %s",strerror(errno));
1752
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
1753
sin.sin6.sin6_family=AF_INET6;
1754
sin.sin6.sin6_port=htons(global.port);
1755
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
1756
sin.sin6.sin6_addr=global.a.ipv6;
1757
SET_SOCKA_LEN6(sin.sin6);
1758
sinl=sizeof(struct sockaddr_in6);
1763
/* The SO_REUSEADDR socket option tells the kernel that even if this port
1764
is busy (in the TIME_WAIT state), go ahead and reuse it anyway. If it
1765
is busy, but with another state, we should get an address already in
1766
use error. It is useful if pdnsd is shut down, and then restarted right
1767
away while sockets are still active on its port. There is a slight risk
1768
though. If unexpected data comes in, it may confuse pdnsd, but while
1769
this is possible, it is not likely.
1771
if(setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&so,sizeof(so)))
1772
log_warn("Could not set options on tcp socket: %s",strerror(errno));
1774
if (bind(sock,(struct sockaddr *)&sin,sinl)) {
1775
log_error("Could not bind tcp socket: %s",strerror(errno));
1783
* Listen on the specified port for tcp connects and answer them (each in a new thread to be nonblocking)
1785
void *tcp_server_thread(void *p)
1791
/* (void)p; */ /* To inhibit "unused variable" warning */
1795
if (!global.strict_suid) {
1796
if (!run_as(global.run_as)) {
1803
if (listen(sock,5)) {
1804
if (++da_tcp_errs<=TCP_MAX_ERRS) {
1805
log_error("Could not listen on tcp socket: %s",strerror(errno));
1807
goto close_sock_return;
1811
if (!(csock=(int *)pdnsd_malloc(sizeof(int)))) {
1812
if (++da_mem_errs<=MEM_MAX_ERRS) {
1813
log_error("Out of memory in request handling.");
1817
if ((*csock=accept(sock,NULL,0))==-1) {
1818
if (errno!=EINTR && ++da_tcp_errs<=TCP_MAX_ERRS) {
1819
log_error("tcp accept failed: %s",strerror(errno));
1823
* With creating a new thread, we follow recommendations
1824
* in rfc1035 not to block
1826
pthread_mutex_lock(&proc_lock);
1827
if (qprocs<global.proc_limit+global.procq_limit) {
1829
++qprocs; ++spawned;
1830
pthread_mutex_unlock(&proc_lock);
1831
err=pthread_create(&pt,&attr_detached,tcp_answer_thread,(void *)csock);
1834
if(++da_thrd_errs<=THRD_MAX_ERRS)
1835
log_warn("pthread_create failed: %s",strerror(err));
1836
/* If thread creation failed, free resources associated with it. */
1837
pthread_mutex_lock(&proc_lock);
1838
--qprocs; --spawned;
1841
pthread_mutex_unlock(&proc_lock);
1850
tcps_thrid=main_thrid;
1858
* Starts the tcp server thread and the udp server thread. Both threads
1859
* are not terminated, so only a signal can interrupt the server.
1861
void start_dns_servers()
1864
#ifndef NO_TCP_SERVER
1865
if (tcp_socket!=-1) {
1868
if (pthread_create(&tcps,&attr_detached,tcp_server_thread,NULL)) {
1869
log_error("Could not create tcp server thread. Exiting.");
1873
log_info(2,"tcp server thread started.");
1878
if (udp_socket!=-1) {
1881
if (pthread_create(&udps,&attr_detached,udp_server_thread,NULL)) {
1882
log_error("Could not create udp server thread. Exiting.");
1886
log_info(2,"udp server thread started.");
1892
/* Report the thread status to the file descriptor f, for the status fifo (see status.c) */
1893
int report_thread_stat(int f)
1895
unsigned long nspawned,ndropped;
1896
int nactive,ncurrent,nqueued;
1898
/* The thread counters are volatile, so we will make copies
1899
under locked conditions to make sure we get consistent data.
1901
pthread_mutex_lock(&proc_lock);
1902
nspawned=spawned; ndropped=dropped;
1903
nactive=procs; ncurrent=qprocs;
1904
nqueued=ncurrent-nactive;
1905
pthread_mutex_unlock(&proc_lock);
1907
fsprintf_or_return(f,"\nThread status:\n==============\n");
1908
if(!pthread_equal(servstat_thrid,main_thrid))
1909
fsprintf_or_return(f,"server status thread is running.\n");
1910
if(!pthread_equal(statsock_thrid,main_thrid))
1911
fsprintf_or_return(f,"pdnsd control thread is running.\n");
1912
if(!pthread_equal(tcps_thrid,main_thrid))
1913
fsprintf_or_return(f,"tcp server thread is running.\n");
1914
if(!pthread_equal(udps_thrid,main_thrid))
1915
fsprintf_or_return(f,"udp server thread is running.\n");
1916
fsprintf_or_return(f,"%lu query threads spawned in total (%lu queries dropped).\n",
1918
fsprintf_or_return(f,"%i running query threads (%i active, %i queued).\n",
1919
ncurrent,nactive,nqueued);