1
/* dns_answer.c - Receive and process incoming dns queries.
3
Copyright (C) 2000, 2001 Thomas Moestl
4
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009 Paul A. Rombouts
6
This file is part of the pdnsd package.
8
pdnsd is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 3 of the License, or
11
(at your option) any later version.
13
pdnsd is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with pdnsd; see the file COPYING. If not, see
20
<http://www.gnu.org/licenses/>.
26
* There are several standard conformity issues noted in the comments.
27
* Some additional comments:
29
* I always set RA but I ignore RD largely (in everything but CNAME recursion),
30
* not because it is not supported, but because I _always_ do a recursive
31
* resolve in order to be able to cache the results.
38
#include <sys/types.h>
39
#ifdef HAVE_SYS_POLL_H
42
#include <sys/param.h>
55
#include "dns_answer.h"
56
#include "dns_query.h"
62
#if !defined(lint) && !defined(NO_RCSIDS)
63
static char rcsid[]="$Id: dns_answer.c,v 1.60 2002/08/07 08:55:33 tmm Exp $";
67
* This is for error handling to prevent spewing the log files.
68
* Maximums of different message types are set.
69
* Races do not really matter here, so no locks.
71
#define TCP_MAX_ERRS 10
72
#define UDP_MAX_ERRS 10
73
#define MEM_MAX_ERRS 10
74
#define THRD_MAX_ERRS 10
75
#define MISC_MAX_ERRS 10
76
static volatile unsigned long da_tcp_errs=0;
77
static volatile unsigned long da_udp_errs=0;
78
static volatile unsigned long da_mem_errs=0;
79
static volatile unsigned long da_thrd_errs=0;
81
static volatile unsigned long da_misc_errs=0;
83
static volatile int procs=0; /* active query processes */
84
static volatile int qprocs=0; /* queued query processes */
85
static volatile unsigned long dropped=0,spawned=0;
86
static volatile unsigned thrid_cnt=0;
87
static pthread_mutex_t proc_lock = PTHREAD_MUTEX_INITIALIZER;
90
static pthread_mutex_t s_lock = PTHREAD_MUTEX_INITIALIZER;
95
# if (TARGET==TARGET_LINUX)
96
struct in_pktinfo pi4;
102
struct in6_pktinfo pi6;
106
#define udp_buf_len 512
111
struct sockaddr_in sin4;
114
struct sockaddr_in6 sin6;
123
unsigned char buf[udp_buf_len];
128
#ifndef NO_TCP_SERVER
132
} __attribute__((packed)) dns_ans_t;
141
unsigned short qtype;
142
unsigned short qclass;
143
unsigned char query[0];
148
#define S_AUTHORITY 2
149
#define S_ADDITIONAL 3
152
unsigned short tp,dlen;
154
/* unsigned char data[0]; */
159
* Mark an additional record as added to avoid double records.
161
static int sva_add(dlist *sva, const unsigned char *rhn, unsigned short tp, unsigned short dlen, void* data)
164
size_t rlen=rhnlen(rhn);
166
if (!(*sva=dlist_grow(*sva,sizeof(sva_t)+rlen+dlen))) {
172
memcpy(mempcpy(st->nm,rhn,rlen),data,dlen);
177
/* ans_ttl computes the ttl value to return to the client.
178
This is the ttl value stored in the cache entry minus the time
179
the cache entry has lived in the cache.
180
Local cache entries are an exception, they never "age".
182
inline static time_t ans_ttl(rr_set_t *rrset, time_t queryts)
184
time_t ttl= rrset->ttl;
186
if (!(rrset->flags&CF_LOCAL)) {
187
time_t tpassed= queryts - rrset->ts;
188
if(tpassed<0) tpassed=0;
195
/* follow_cname_chain takes a cache entry and a buffer (must be at least 256 bytes),
196
and copies the name indicated by the first cname record in the cache entry.
197
The name is returned in length-byte string notation.
198
follow_cname_chain returns 1 if a cname record is found, otherwise 0.
200
inline static int follow_cname_chain(dns_cent_t *c, unsigned char *name)
202
rr_set_t *rrset=c->rr[T_CNAME-T_MIN];
204
if (!rrset || !(rr=rrset->rrs))
206
PDNSD_ASSERT(rr->rdlen <= 256, "follow_cname_chain: record too long");
207
memcpy(name,rr->data,rr->rdlen);
213
* Add data from a rr_bucket_t (as in cache) into a dns message in ans. Ans is grown
214
* to fit, sz is the old size of the packet (it is modified so at the end of the procedure
215
* it is the new size), type is the rr type and ltime is the time in seconds the record is
217
* cb is the buffer used for message compression. *cb should be NULL when you call compress_name
218
* or add_to_response the first time.
219
* It gets filled with a pointer to compression information that can be reused in subsequent calls
220
* to add_to_response.
221
* sect is the section (S_ANSWER, S_AUTHORITY or S_ADDITIONAL) in which the record
222
* belongs logically. Note that you still have to add the rrs in the right order (answer rrs first,
223
* then authority and last additional).
225
static int add_rr(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned short type, uint32_t ttl,
226
int dlen, void *data, char section, char udp, dlist *cb)
228
int ilen,blen,osz,rdlen;
234
unsigned char nbuf[256];
236
if (!(nlen=compress_name(rrn,nbuf,*sz,cb)))
239
/* This buffer is over-allocated usually due to compression. Never mind, just a few bytes,
240
* and the buffer is freed soon*/
242
dns_ans_t *nans=(dns_ans_t *)pdnsd_realloc(*ans,ansoffset+*sz+nlen+sizeof_rr_hdr_t+dlen);
247
memcpy((unsigned char *)(&(*ans)->hdr)+*sz,nbuf,nlen);
251
/* the rr header will be filled in later. Just reserve some space for it. */
252
rrht=((unsigned char *)(&(*ans)->hdr))+(*sz);
253
*sz+=sizeof_rr_hdr_t;
264
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
266
PDNSD_ASSERT(rdlen <= dlen, "T_CNAME/T_MB/...: got longer");
273
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
276
ilen=rhnlen((unsigned char *)data);
277
PDNSD_ASSERT(rdlen <= ilen, "T_MINFO/T_RP: got longer");
278
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
281
PDNSD_ASSERT(rdlen <= dlen, "T_MINFO/T_RP: got longer");
290
PDNSD_ASSERT(dlen > 2, "T_MX/T_AFSDB/...: rr botch");
291
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
293
if (!(blen=compress_name(((unsigned char *)data)+2, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
296
PDNSD_ASSERT(rdlen <= dlen, "T_MX/T_AFSDB/...: got longer");
300
if (!(rdlen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
303
ilen=rhnlen((unsigned char *)data);
304
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
305
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
309
ilen+=rhnlen(((unsigned char *)data)+ilen);
310
PDNSD_ASSERT(rdlen <= ilen, "T_SOA: got longer");
311
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,20);
313
PDNSD_ASSERT(rdlen <= dlen, "T_SOA: rr botch");
318
PDNSD_ASSERT(dlen > 2, "T_PX: rr botch");
319
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,2);
322
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
326
ilen+=rhnlen(((unsigned char *)data)+ilen);
327
PDNSD_ASSERT(rdlen <= ilen, "T_PX: got longer");
328
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
331
PDNSD_ASSERT(rdlen <= dlen, "T_PX: got longer");
335
PDNSD_ASSERT(dlen > 6, "T_SRV: rr botch");
336
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),(unsigned char *)data,6);
338
if (!(blen=compress_name(((unsigned char *)data)+6, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
341
PDNSD_ASSERT(rdlen <= dlen, "T_SRV: got longer");
345
if (!(blen=compress_name(((unsigned char *)data), ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
349
ilen=rhnlen((unsigned char *)data);
350
PDNSD_ASSERT(rdlen <= ilen, "T_NXT: got longer");
351
PDNSD_ASSERT(dlen >= ilen, "T_NXT: rr botch");
353
int wlen = dlen - ilen;
354
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data)+ilen,wlen);
360
PDNSD_ASSERT(dlen > 4, "T_NAPTR: rr botch");
365
ilen += ((int)*(((unsigned char *)data)+ilen)) + 1;
366
PDNSD_ASSERT(dlen > ilen, "T_NAPTR: rr botch 2");
369
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),ilen);
372
if (!(blen=compress_name(((unsigned char *)data)+ilen, ((unsigned char *)(&(*ans)->hdr))+(*sz),*sz,cb)))
375
PDNSD_ASSERT(rdlen <= dlen, "T_NAPTR: got longer");
380
memcpy(((unsigned char *)(&(*ans)->hdr))+(*sz),((unsigned char *)data),dlen);
385
if (udp && (*sz)>512 && section==S_ADDITIONAL) /* only add the record if we do not increase the length over 512 */
386
*sz=osz; /* in additionals for udp answer*/
391
PUTINT16(rdlen,rrht);
395
(*ans)->hdr.ancount=htons(ntohs((*ans)->hdr.ancount)+1);
398
(*ans)->hdr.nscount=htons(ntohs((*ans)->hdr.nscount)+1);
401
(*ans)->hdr.arcount=htons(ntohs((*ans)->hdr.arcount)+1);
409
typedef struct rre_s {
411
unsigned short tsz; /* Size of tnm field */
412
uint32_t ttl; /* ttl of the record in the answer (if tp==T_NS or T_SOA) */
413
unsigned char tnm[0]; /* Name for the domain a record refers to */
414
/* unsigned char nm[0]; */ /* Name of the domain the record is for (if tp==T_NS or T_SOA) */
418
/* types for the tp field */
419
/* #define RRETP_NS T_NS */ /* For name server: add to authority, add address to additional. */
420
/* #define RRETP_SOA T_SOA */ /* For SOA record: add to authority. */
421
#define RRETP_ADD 0 /* For other records: add the address of buf to additional */
423
static int add_ar(dlist *ar,unsigned short tp, unsigned short tsz,void *tnm,unsigned char *nm, uint32_t ttl)
427
size_t nmsz=0,size=sizeof(rr_ext_t)+tsz;
428
if(tp==T_NS || tp==T_SOA) {
432
if (!(*ar=dlist_grow(*ar,size)))
438
p=mempcpy(re->tnm,tnm,tsz);
439
if(tp==T_NS || tp==T_SOA) {
446
/* Select a random rr record from a list. */
447
inline static rr_bucket_t *randrr(rr_bucket_t *rrb)
452
/* In order to have an equal chance for each record to be selected, we have to count first. */
453
for(rr=rrb; rr; rr=rr->next) ++cnt;
455
/* We do not use the pdnsd random functions (these might use /dev/urandom if the user is paranoid,
456
* and we do not need any good PRNG here). */
457
if(cnt) for(cnt=random()%cnt; cnt; --cnt) rrb=rrb->next;
467
static const int ar_recs[AR_NUM]={T_NS, T_MD, T_MF, T_MB, T_MX
472
/* offsets from record data start to server name */
473
static const int ar_offs[AR_NUM]={0,0,0,0,2
479
/* This adds an rrset, optionally randomizing the first element it adds.
480
* if that is done, all rrs after the randomized one appear in order, starting from
481
* that one and wrapping over if needed. */
482
static int add_rrset(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned tp, time_t queryts,
483
dns_cent_t *cached, char udp, dlist *cb, dlist *sva, dlist *ar)
485
rr_set_t *crrset=cached->rr[tp-T_MIN];
487
if (crrset && crrset->rrs) {
489
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
491
int rnd_recs=global.rnd_recs;
494
if (rnd_recs) b=first=randrr(crrset->rrs);
497
if (!add_rr(ans, sz, rrn, tp, ans_ttl(crrset,queryts),
498
b->rdlen, b->data, S_ANSWER, udp, cb))
500
if (tp==T_NS || tp==T_A || tp==T_AAAA) {
501
/* mark it as added */
502
if (!sva_add(sva,rrn,tp,b->rdlen,b->data))
505
/* Mark for additional address records. XXX: this should be a more effective algorithm; at least the list is small */
506
for (i=0;i<AR_NUM;i++) {
507
if (ar_recs[i]==tp) {
508
if (!add_ar(ar, RRETP_ADD,b->rdlen-ar_offs[i],((unsigned char *)(b->data))+ar_offs[i],
516
if(!b) b=crrset->rrs; /* wraparound */
525
* Add the fitting elements of the cached record to the message in ans, where ans
526
* is grown to fit, sz is the size of the packet and is modified to be the new size.
527
* The query is in qe.
528
* cb is the buffer used for message compression. *cb should be NULL if you call add_to_response
529
* the first time. It gets filled with a pointer to compression information that can be
530
* reused in subsequent calls to add_to_response.
532
static int add_to_response(dns_ans_t **ans, long *sz, unsigned char *rrn, unsigned qtype, time_t queryts,
533
dns_cent_t *cached, char udp, dlist *cb, dlist *sva, dlist *ar)
535
/* First of all, unless we have records of qtype, add cnames.
536
Well, actually, there should be at max one cname. */
537
if (qtype!=T_CNAME && qtype!=QT_ALL && !(qtype>=T_MIN && qtype<=T_MAX && have_rr(cached,qtype)))
538
if (!add_rrset(ans, sz, rrn, T_CNAME, queryts, cached, udp, cb, sva, ar))
541
/* We need no switch for qclass, since we already have filtered packets we cannot understand */
542
if (qtype==QT_AXFR || qtype==QT_IXFR) {
543
/* I do not know what to do in this case. Since we do not maintain zones (and since we are
544
no master server, so it is not our task), I just return an error message. If anyone
545
knows how to do this better, please notify me.
546
Anyway, this feature is rarely used in client communication, and there is no need for
547
other name servers to ask pdnsd. Btw: many bind servers reject an ?XFR query for security
550
} else if (qtype==QT_MAILB) {
551
if (!add_rrset(ans, sz, rrn, T_MB, queryts, cached, udp, cb, sva, ar))
553
if (!add_rrset(ans, sz, rrn, T_MG, queryts, cached, udp, cb, sva, ar))
555
if (!add_rrset(ans, sz, rrn, T_MR, queryts, cached, udp, cb, sva, ar))
557
} else if (qtype==QT_MAILA) {
558
if (!add_rrset(ans, sz, rrn, T_MD, queryts, cached, udp, cb, sva, ar))
560
if (!add_rrset(ans, sz, rrn, T_MF, queryts, cached, udp, cb, sva, ar))
562
} else if (qtype==QT_ALL) {
564
for (i=T_MIN;i<=T_MAX;i++) {
565
if (!add_rrset(ans, sz, rrn, i, queryts, cached, udp, cb, sva, ar))
568
} else if (qtype>=T_MIN && qtype<=T_MAX) {
569
if (!add_rrset(ans, sz, rrn, qtype, queryts, cached, udp, cb, sva, ar))
571
} else /* Shouldn't get here. */
574
if (!ntohs((*ans)->hdr.ancount)) {
575
/* Add a SOA if we have one and no other records are present in the answer.
576
* This is to aid caches so that they have a ttl. */
577
if (!add_rrset(ans, sz, rrn, T_SOA , queryts, cached, udp, cb, sva, ar))
587
static int add_additional_rr(dns_ans_t **ans, long *rlen, unsigned char *rhn, unsigned tp, time_t ttl,
588
unsigned dlen, void *data, int sect, char udp, dlist *cb, dlist *sva)
592
/* Check if already added; no double additionals */
593
for (st=dlist_first(*sva); st; st=dlist_next(st)) {
594
if (st->tp==tp && rhnicmp(st->nm,rhn) && st->dlen==dlen &&
595
(memcmp(skiprhn(st->nm),data, dlen)==0))
600
/* add_rr will do nothing when sz>512 bytes. */
601
if(!add_rr(ans, rlen, rhn, tp, ttl, dlen, data, sect, udp, cb))
603
/* mark it as added */
604
if (!sva_add(sva,rhn,tp,dlen,data))
611
* Add one or more additionals from an rr bucket.
613
static int add_additional_rrs(dns_ans_t **ans, long *rlen, unsigned char *rhn, unsigned tp, time_t ttl,
614
rr_bucket_t *rrb, int sect, char udp, dlist *cb, dlist *sva)
617
rr_bucket_t *first=NULL; /* Initialized to inhibit compiler warning */
618
int rnd_recs=global.rnd_recs;
621
if (rnd_recs) rr=first=randrr(rrb);
624
if (!add_additional_rr(ans, rlen, rhn, tp, ttl, rr->rdlen,rr->data, sect, udp, cb, sva))
628
if(!rr) rr=rrb; /* wraparound */
636
* The code below actually handles A and AAAA additionals.
638
static int add_additional_a(dns_ans_t **ans, long *rlen, unsigned char *rhn, time_t queryts,
639
char udp, dlist *cb, dlist *sva)
644
if ((ae=lookup_cache(rhn,NULL))) {
645
rr_set_t *rrset; rr_bucket_t *rr;
646
rrset=ae->rr[T_A-T_MIN];
647
if (rrset && (rr=rrset->rrs))
649
if (!add_additional_rrs(ans, rlen, rhn, T_A, ans_ttl(rrset,queryts),
650
rr, S_ADDITIONAL, udp, cb, sva))
655
rrset=ae->rr[T_AAAA-T_MIN];
656
if (rrset && (rr=rrset->rrs))
657
if (!add_additional_rrs(ans, rlen, rhn, T_AAAA, ans_ttl(rrset,queryts),
658
rr, S_ADDITIONAL, udp, cb, sva))
669
* Compose an answer message for the decoded query in q, hdr is the header of the dns request
670
* rlen is set to be the answer length.
672
static dns_ans_t *compose_answer(dlist q, dns_hdr_t *hdr, long *rlen, char udp)
678
time_t queryts=time(NULL);
683
ans=(dns_ans_t *)pdnsd_malloc(sizeof(dns_ans_t));
688
ans->hdr.opcode=OP_QUERY;
690
ans->hdr.tc=0; /* If tc is needed, it is set when the response is sent in udp_answer_thread. */
696
ans->hdr.rcode=RC_OK;
697
ans->hdr.qdcount=0; /* this is first filled in and will be modified */
702
*rlen=sizeof(dns_hdr_t);
703
/* first, add the query to the response */
704
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
706
dns_ans_t *nans=(dns_ans_t *)pdnsd_realloc(ans,ansoffset+*rlen+rhnlen(qe->query)+4);
711
unsigned char *p = ((unsigned char *)&ans->hdr) + *rlen;
712
/* the first name occurrence will not be compressed,
713
but the offset needs to be stored for future compressions */
714
if (!(qclen=compress_name(qe->query,p,*rlen,&cb)))
717
PUTINT16(qe->qtype,p);
718
PUTINT16(qe->qclass,p);
721
ans->hdr.qdcount=htons(ntohs(ans->hdr.qdcount)+1);
724
/* Barf if we get a query we cannot answer */
725
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
726
if (((qe->qtype<T_MIN || qe->qtype>T_MAX) &&
727
(qe->qtype!=QT_MAILB && qe->qtype!=QT_MAILA && qe->qtype!=QT_ALL)) ||
728
(qe->qclass!=C_IN && qe->qclass!=QC_ALL))
730
DEBUG_MSG("Unsupported QTYPE or QCLASS.\n");
731
ans->hdr.rcode=RC_NOTSUPP;
736
/* second, the answer section */
737
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
739
unsigned char qname[256];
741
rhncpy(qname,qe->query);
742
/* look if we have a cached copy. otherwise, perform a nameserver query. Same with timeout */
746
unsigned char c_soa=cundef;
747
if ((rc=dns_cached_resolve(qname,qe->qtype, &cached, MAX_HOPS,queryts,&c_soa))!=RC_OK) {
749
if(rc==RC_NAMEERR && c_soa!=cundef) {
750
/* Try to add a SOA record to the authority section. */
751
unsigned scnt=rhnsegcnt(qname);
752
if(c_soa<scnt && (cached=lookup_cache(skipsegs(qname,scnt-c_soa),NULL))) {
753
rr_set_t *rrset=cached->rr[T_SOA-T_MIN];
754
if (rrset && !(rrset->flags&CF_NEGATIVE)) {
756
for(rr=rrset->rrs; rr; rr=rr->next) {
757
if (!add_rr(&ans,rlen,cached->qname,T_SOA,ans_ttl(rrset,queryts),
758
rr->rdlen,rr->data,S_AUTHORITY,udp,&cb))
762
free_cent(cached DBG1);
768
if(!(cached->flags&DF_LOCAL))
771
if (!add_to_response(&ans,rlen,qname,qe->qtype,queryts,cached,udp,&cb,&sva,&ar))
773
if (hdr->rd && qe->qtype!=T_CNAME && qe->qtype!=QT_ALL &&
774
!(qe->qtype>=T_MIN && qe->qtype<=T_MAX && have_rr(cached,qe->qtype)) &&
775
follow_cname_chain(cached,qname))
776
/* The rd bit is set and the response does not contain records of the requested type,
777
* but the response does contain a cname, so repeat the inquiry with the cname.
778
* add_to_response() has already added the cname to the response.
779
* Because of follow_cname_chain(), qname now contains the last cname in the chain. */
782
/* maintain a list (ar) for authority records: We will add every name server that was
783
listed as authoritative in a reply we received (and only those) to this list.
784
This list will be used to fill the authority and additional sections of our own reply.
785
We only do this for the last record in a cname chain, to prevent answer bloat. */
788
if((qe->qtype>=T_MIN && qe->qtype<=T_MAX && !have_rr(cached,qe->qtype)) ||
789
(qe->qtype==QT_MAILB && !have_rr(cached,T_MB) && !have_rr(cached,T_MG) && !have_rr(cached,T_MR)) ||
790
(qe->qtype==QT_MAILA && !have_rr(cached,T_MD) && !have_rr(cached,T_MF)))
792
/* no record of requested type in the answer section. */
795
rrset=cached->rr[rretp-T_MIN];
796
if(rrset && (rrset->flags&CF_NEGATIVE))
799
/* Try to find a name server higher up the hierarchy .
801
dns_cent_t *prev=cached;
802
unsigned scnt=rhnsegcnt(prev->qname);
803
unsigned tcnt=(rretp==T_NS?prev->c_ns:prev->c_soa);
804
if((cached=lookup_cache((tcnt!=cundef && tcnt<scnt)?skipsegs(prev->qname,scnt-tcnt):prev->qname,NULL))) {
805
rrset=cached->rr[rretp-T_MIN];
806
if(rrset && (rrset->flags&CF_NEGATIVE))
809
if(!rrset && (prev->flags&DF_LOCAL)) {
810
unsigned char *nm=getlocalowner(prev->qname,rretp);
813
free_cent(cached DBG1);
816
if((cached=lookup_cache(nm,NULL)))
817
rrset=cached->rr[rretp-T_MIN];
820
free_cent(prev DBG1);
825
for (rr=rrset->rrs; rr; rr=rr->next) {
826
if (!add_ar(&ar, rretp, rr->rdlen,rr->data, cached->qname,
827
ans_ttl(rrset,queryts)))
831
hops=0; /* this will break the loop */
834
free_cent(cached DBG1);
842
/* Add the authority section */
843
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
844
if (rre->tp == T_NS || rre->tp == T_SOA) {
845
unsigned char *nm = rre->tnm + rre->tsz;
846
if (!add_additional_rr(&ans, rlen, nm, rre->tp, rre->ttl, rre->tsz, rre->tnm,
847
S_AUTHORITY, udp, &cb, &sva))
854
/* now add the name server addresses */
855
for (rre=dlist_first(ar); rre; rre=dlist_next(rre)) {
856
if (rre->tp == T_NS || rre->tp == RRETP_ADD) {
857
if (!add_additional_a(&ans, rlen, rre->tnm, queryts, udp, &cb, &sva))
866
/* You may not like goto's, but here we avoid lots of code duplication. */
868
free_cent(cached DBG1);
881
* Decode the query (the query messgage is in data and rlen bytes long) into q
882
* XXX: data needs to be aligned
884
static int decode_query(unsigned char *data, long rlen, dlist *qp)
887
dns_hdr_t *hdr=(dns_hdr_t *)data; /* aligned, so no prob. */
888
unsigned char *ptr=(unsigned char *)(hdr+1);
889
long sz=rlen-sizeof(dns_hdr_t);
891
uint16_t qdcount=ntohs(hdr->qdcount);
894
for (i=0;i<qdcount;i++) {
897
unsigned char qbuf[256];
898
res=decompress_name(data,rlen,&ptr,&sz,qbuf,&qlen);
901
if (i==0) /*not even one complete query*/
902
goto return_rc_format;
906
goto return_rc_format;
911
/* truncated in qtype or qclass */
912
DEBUG_MSG("decode_query: query truncated in qtype or qclass.\n");
914
if (i==0) /*not even one complete query*/
915
goto return_rc_format;
919
goto return_rc_format;
921
if(!(q=dlist_grow(q,sizeof(dns_queryel_t)+qlen)))
924
GETINT16(qe->qtype,ptr);
925
GETINT16(qe->qclass,ptr);
927
memcpy(qe->query,qbuf,qlen);
939
/* Make a dns error reply message
940
* Id is the query id and still in network order.
941
* op is the opcode to fill in, rescode - name says it all.
943
static void mk_error_reply(unsigned short id, unsigned short opcode,unsigned short rescode,dns_hdr_t *rep)
963
* Analyze and answer the query in data. The answer is returned. rlen is at call the query length and at
964
* return the length of the answer. You have to free the answer after sending it.
966
static dns_ans_t *process_query(unsigned char *data, long *rlenp, char udp)
971
dlist q=NULL; /* Initialized to inhibit compiler warning. */
974
DEBUG_MSG("Received query.\n");
975
DEBUG_DUMP_DNS_MSG(NULL, data, rlen);
978
* We will ignore all records that come with a query, except for the actual query records.
979
* We will send back the query in the response. We will reject all non-queries, and
980
* some not supported thingies.
981
* If anyone notices behaviour that is not in standard conformance, please notify me!
983
hdr=(dns_hdr_t *)data;
985
DEBUG_MSG("Message too short.\n");
986
return NULL; /* message too short: no id provided. */
988
if (rlen<sizeof(dns_hdr_t)) {
989
DEBUG_MSG("Message too short.\n");
993
if (hdr->qr!=QR_QUERY) {
994
DEBUG_MSG("The QR bit indicates this is a response, not a query.\n");
995
return NULL; /* RFC says: discard */
997
if (hdr->opcode!=OP_QUERY) {
998
DEBUG_MSG("Not a standard query (opcode=%u).\n",hdr->opcode);
1003
DEBUG_MSG("Malformed query (nonzero Z bit).\n");
1007
if (!global.ignore_cd && hdr->cd!=0) {
1008
DEBUG_MSG("Malformed query (nonzero CD bit and ignore_cd=off).\n");
1012
if (hdr->rcode!=RC_OK) {
1013
DEBUG_MSG("Bad rcode(%u).\n",hdr->rcode);
1014
return NULL; /* discard (may cause error storms) */
1018
DEBUG_MSG("Query has a non-empty answer section!\n");
1024
DEBUG_MSG("Query has a non-empty authority section!\n");
1030
DEBUG_MSG("Query has a non-empty additional section!\n");
1035
res=decode_query(data,rlen,&q);
1044
DEBUG_MSG("Questions are:\n");
1045
for (qe=dlist_first(q); qe; qe=dlist_next(qe)) {
1046
DEBUG_RHN_MSG("\tqc=%s (%u), qt=%s (%u), query=\"%s\"\n",get_cname(qe->qclass),qe->qclass,get_tname(qe->qtype),qe->qtype,RHN2STR(qe->query));
1050
DEBUG_MSG("Query contains no questions.\n");
1059
if (!(ans=compose_answer(q, hdr, rlenp, udp))) {
1060
/* An out of memory condition or similar could cause NULL output. Send failure notification */
1070
*rlenp=sizeof(dns_hdr_t);
1071
ans=pdnsd_malloc(sizeof(dns_ans_t));
1073
mk_error_reply(hdr->id,rlen>=3?hdr->opcode:OP_QUERY,res,&ans->hdr);
1075
else if (++da_mem_errs<=MEM_MAX_ERRS) {
1076
log_error("Out of memory in query processing.");
1082
* Called by *_answer_thread exit handler to clean up process count.
1084
inline static void decrease_procs()
1087
pthread_mutex_lock(&proc_lock);
1090
pthread_mutex_unlock(&proc_lock);
1093
static void udp_answer_thread_cleanup(void *data)
1100
* A thread opened to answer a query transmitted via udp. Data is a pointer to the structure udp_buf_t that
1101
* contains the received data and various other parameters.
1102
* After the query is answered, the thread terminates
1103
* XXX: data must point to a correctly aligned buffer
1105
static void *udp_answer_thread(void *data)
1109
struct cmsghdr *cmsg;
1110
#if defined(SRC_ADDR_DISC)
1111
char ctrl[CMSG_SPACE(sizeof(pkt_info_t))];
1113
long rlen=((udp_buf_t *)data)->len;
1114
/* XXX: process_query is assigned to this, this mallocs, so this points to aligned memory */
1117
pthread_cleanup_push(udp_answer_thread_cleanup, data);
1120
if (!global.strict_suid) {
1121
if (!run_as(global.run_as)) {
1127
pthread_mutex_lock(&proc_lock);
1128
if (procs<global.proc_limit)
1130
pthread_mutex_unlock(&proc_lock);
1135
pthread_mutex_unlock(&proc_lock);
1140
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
1141
if(++da_misc_errs<=MISC_MAX_ERRS)
1142
log_error("pthread_setspecific failed: %s",strerror(err));
1148
if (!(resp=process_query(((udp_buf_t *)data)->buf,&rlen,1))) {
1150
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
1151
* logging is already done. Just exit the thread now.
1153
pthread_exit(NULL); /* data freed by cleanup handler */
1155
pthread_cleanup_push(free, resp);
1158
resp->hdr.tc=1; /*set truncated bit*/
1160
DEBUG_MSG("Outbound msg len %li, tc=%u, rc=\"%s\"\n",rlen,resp->hdr.tc,get_ename(resp->hdr.rcode));
1162
v.iov_base=(char *)&resp->hdr;
1166
#if (TARGET!=TARGET_CYGWIN)
1167
#if defined(SRC_ADDR_DISC)
1168
msg.msg_control=ctrl;
1169
msg.msg_controllen=sizeof(ctrl);
1171
msg.msg_control=NULL;
1172
msg.msg_controllen=0;
1174
msg.msg_flags=0; /* to avoid warning message by Valgrind */
1180
msg.msg_name=&((udp_buf_t *)data)->addr.sin4;
1181
msg.msg_namelen=sizeof(struct sockaddr_in);
1182
# if defined(SRC_ADDR_DISC)
1183
# if (TARGET==TARGET_LINUX)
1184
((udp_buf_t *)data)->pi.pi4.ipi_spec_dst=((udp_buf_t *)data)->pi.pi4.ipi_addr;
1185
cmsg=CMSG_FIRSTHDR(&msg);
1186
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
1187
cmsg->cmsg_level=SOL_IP;
1188
cmsg->cmsg_type=IP_PKTINFO;
1189
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi4,sizeof(struct in_pktinfo));
1190
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_pktinfo));
1192
cmsg=CMSG_FIRSTHDR(&msg);
1193
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
1194
cmsg->cmsg_level=IPPROTO_IP;
1195
cmsg->cmsg_type=IP_RECVDSTADDR;
1196
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.ai4,sizeof(struct in_addr));
1197
msg.msg_controllen=CMSG_SPACE(sizeof(struct in_addr));
1202
char buf[ADDRSTR_MAXLEN];
1204
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET,&((udp_buf_t *)data)->addr.sin4.sin_addr,buf,ADDRSTR_MAXLEN));
1205
# if defined(SRC_ADDR_DISC)
1206
# if (TARGET==TARGET_LINUX)
1207
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.pi4.ipi_spec_dst,buf,ADDRSTR_MAXLEN));
1209
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET,&((udp_buf_t *)data)->pi.ai4,buf,ADDRSTR_MAXLEN));
1221
msg.msg_name=&((udp_buf_t *)data)->addr.sin6;
1222
msg.msg_namelen=sizeof(struct sockaddr_in6);
1223
# if defined(SRC_ADDR_DISC)
1224
cmsg=CMSG_FIRSTHDR(&msg);
1225
cmsg->cmsg_len=CMSG_LEN(sizeof(struct in6_pktinfo));
1226
cmsg->cmsg_level=SOL_IPV6;
1227
cmsg->cmsg_type=IPV6_PKTINFO;
1228
memcpy(CMSG_DATA(cmsg),&((udp_buf_t *)data)->pi.pi6,sizeof(struct in6_pktinfo));
1229
msg.msg_controllen=CMSG_SPACE(sizeof(struct in6_pktinfo));
1233
char buf[ADDRSTR_MAXLEN];
1235
DEBUG_MSG("Answering to: %s", inet_ntop(AF_INET6,&((udp_buf_t *)data)->addr.sin6.sin6_addr,buf,ADDRSTR_MAXLEN));
1236
# if defined(SRC_ADDR_DISC)
1237
DEBUG_MSGC(", source address: %s\n", inet_ntop(AF_INET6,&((udp_buf_t *)data)->pi.pi6.ipi6_addr,buf,ADDRSTR_MAXLEN));
1246
/* Lock the socket, and clear the error flag before dropping the lock */
1247
#ifdef SOCKET_LOCKING
1248
pthread_mutex_lock(&s_lock);
1250
if (sendmsg(((udp_buf_t *)data)->sock,&msg,0)<0) {
1251
#ifdef SOCKET_LOCKING
1252
pthread_mutex_unlock(&s_lock);
1254
if (++da_udp_errs<=UDP_MAX_ERRS) {
1255
log_error("Error in udp send: %s",strerror(errno));
1259
socklen_t sl=sizeof(tmp);
1260
getsockopt(((udp_buf_t *)data)->sock, SOL_SOCKET, SO_ERROR, &tmp, &sl);
1261
#ifdef SOCKET_LOCKING
1262
pthread_mutex_unlock(&s_lock);
1266
pthread_cleanup_pop(1); /* free(resp) */
1267
pthread_cleanup_pop(1); /* free(data) */
1271
int init_udp_socket()
1277
struct sockaddr_in sin4;
1280
struct sockaddr_in6 sin6;
1287
if ((sock=socket(PF_INET,SOCK_DGRAM,IPPROTO_UDP))==-1) {
1288
log_error("Could not open udp socket: %s",strerror(errno));
1291
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
1292
sin.sin4.sin_family=AF_INET;
1293
sin.sin4.sin_port=htons(global.port);
1294
sin.sin4.sin_addr=global.a.ipv4;
1295
SET_SOCKA_LEN4(sin.sin4);
1296
sinl=sizeof(struct sockaddr_in);
1301
if ((sock=socket(PF_INET6,SOCK_DGRAM,IPPROTO_UDP))==-1) {
1302
log_error("Could not open udp socket: %s",strerror(errno));
1305
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
1306
sin.sin6.sin6_family=AF_INET6;
1307
sin.sin6.sin6_port=htons(global.port);
1308
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
1309
sin.sin6.sin6_addr=global.a.ipv6;
1310
SET_SOCKA_LEN6(sin.sin6);
1311
sinl=sizeof(struct sockaddr_in6);
1315
#ifdef SRC_ADDR_DISC
1316
# if (TARGET!=TARGET_LINUX)
1319
/* The following must be set on any case because it also applies for IPv4 packets sent to
1320
* ipv6 addresses. */
1321
# if (TARGET==TARGET_LINUX )
1322
if (setsockopt(sock,SOL_IP,IP_PKTINFO,&so,sizeof(so))!=0) {
1324
if (setsockopt(sock,IPPROTO_IP,IP_RECVDSTADDR,&so,sizeof(so))!=0) {
1326
log_error("Could not set options on udp socket: %s",strerror(errno));
1330
# if (TARGET!=TARGET_LINUX)
1336
if (setsockopt(sock,SOL_IPV6,IPV6_RECVPKTINFO,&so,sizeof(so))!=0) {
1337
log_error("Could not set options on udp socket: %s",strerror(errno));
1344
if (bind(sock,(struct sockaddr *)&sin,sinl)!=0) {
1345
log_error("Could not bind to udp socket: %s",strerror(errno));
1353
* Listen on the specified port for udp packets and answer them (each in a new thread to be nonblocking)
1354
* This was changed to support sending UDP packets with exactly the same source address as they were coming
1355
* to us, as required by rfc2181. Although this is a sensible requirement, it is slightly more difficult
1356
* and may introduce portability issues.
1358
void *udp_server_thread(void *dummy)
1366
struct cmsghdr *cmsg;
1368
#if defined(ENABLE_IPV6) && (TARGET==TARGET_LINUX)
1369
struct in_pktinfo sip;
1371
/* (void)dummy; */ /* To inhibit "unused variable" warning */
1376
if (!global.strict_suid) {
1377
if (!run_as(global.run_as)) {
1385
if (!(buf=(udp_buf_t *)pdnsd_calloc(1,sizeof(udp_buf_t)))) {
1386
if (++da_mem_errs<=MEM_MAX_ERRS) {
1387
log_error("Out of memory in request handling.");
1394
v.iov_base=(char *)buf->buf;
1395
v.iov_len=udp_buf_len;
1398
#if (TARGET!=TARGET_CYGWIN)
1399
msg.msg_control=ctrl;
1400
msg.msg_controllen=sizeof(ctrl);
1403
#if defined(SRC_ADDR_DISC)
1406
msg.msg_name=&buf->addr.sin4;
1407
msg.msg_namelen=sizeof(struct sockaddr_in);
1408
if ((qlen=recvmsg(sock,&msg,0))>=0) {
1409
cmsg=CMSG_FIRSTHDR(&msg);
1411
# if (TARGET==TARGET_LINUX)
1412
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
1413
memcpy(&buf->pi.pi4,CMSG_DATA(cmsg),sizeof(struct in_pktinfo));
1417
if (cmsg->cmsg_level==IPPROTO_IP && cmsg->cmsg_type==IP_RECVDSTADDR) {
1418
memcpy(&buf->pi.ai4,CMSG_DATA(cmsg),sizeof(buf->pi.ai4));
1422
cmsg=CMSG_NXTHDR(&msg,cmsg);
1425
if (++da_udp_errs<=UDP_MAX_ERRS) {
1426
log_error("Could not discover udp destination address");
1428
goto free_buf_continue;
1430
} else if (errno!=EINTR) {
1431
if (++da_udp_errs<=UDP_MAX_ERRS) {
1432
log_error("error in UDP recv: %s", strerror(errno));
1439
msg.msg_name=&buf->addr.sin6;
1440
msg.msg_namelen=sizeof(struct sockaddr_in6);
1441
if ((qlen=recvmsg(sock,&msg,0))>=0) {
1442
cmsg=CMSG_FIRSTHDR(&msg);
1444
if (cmsg->cmsg_level==SOL_IPV6 && cmsg->cmsg_type==IPV6_PKTINFO) {
1445
memcpy(&buf->pi.pi6,CMSG_DATA(cmsg),sizeof(struct in6_pktinfo));
1448
cmsg=CMSG_NXTHDR(&msg,cmsg);
1451
/* We might have an IPv4 Packet incoming on our IPv6 port, so we also have to
1452
* check for IPv4 sender addresses */
1453
cmsg=CMSG_FIRSTHDR(&msg);
1455
# if (TARGET==TARGET_LINUX)
1456
if (cmsg->cmsg_level==SOL_IP && cmsg->cmsg_type==IP_PKTINFO) {
1457
memcpy(&sip,CMSG_DATA(cmsg),sizeof(sip));
1458
IPV6_MAPIPV4(&sip.ipi_addr,&buf->pi.pi6.ipi6_addr);
1459
buf->pi.pi6.ipi6_ifindex=sip.ipi_ifindex;
1462
/* FIXME: What about BSD? probably ok, but... */
1464
cmsg=CMSG_NXTHDR(&msg,cmsg);
1467
if (++da_udp_errs<=UDP_MAX_ERRS) {
1468
log_error("Could not discover udp destination address");
1470
goto free_buf_continue;
1473
} else if (errno!=EINTR) {
1474
if (++da_udp_errs<=UDP_MAX_ERRS) {
1475
log_error("error in UDP recv: %s", strerror(errno));
1480
#else /* !SRC_ADDR_DISC */
1483
msg.msg_name=&buf->addr.sin4;
1484
msg.msg_namelen=sizeof(struct sockaddr_in);
1485
qlen=recvmsg(sock,&msg,0);
1486
if (qlen<0 && errno!=EINTR) {
1487
if (++da_udp_errs<=UDP_MAX_ERRS) {
1488
log_error("error in UDP recv: %s", strerror(errno));
1495
msg.msg_name=&buf->addr.sin6;
1496
msg.msg_namelen=sizeof(struct sockaddr_in6);
1497
qlen=recvmsg(sock,&msg,0);
1498
if (qlen<0 && errno!=EINTR) {
1499
if (++da_udp_errs<=UDP_MAX_ERRS) {
1500
log_error("error in UDP recv: %s", strerror(errno));
1508
pthread_mutex_lock(&proc_lock);
1509
if (qprocs<global.proc_limit+global.procq_limit) {
1511
++qprocs; ++spawned;
1512
pthread_mutex_unlock(&proc_lock);
1514
err=pthread_create(&pt,&attr_detached,udp_answer_thread,(void *)buf);
1517
if(++da_thrd_errs<=THRD_MAX_ERRS)
1518
log_warn("pthread_create failed: %s",strerror(err));
1519
/* If thread creation failed, free resources associated with it. */
1520
pthread_mutex_lock(&proc_lock);
1521
--qprocs; --spawned;
1524
pthread_mutex_unlock(&proc_lock);
1533
udps_thrid=main_thrid;
1539
#ifndef NO_TCP_SERVER
1541
static void tcp_answer_thread_cleanup(void *csock)
1543
close(*((int *)csock));
1549
* Process a dns query via tcp. The argument is a pointer to the socket.
1551
static void *tcp_answer_thread(void *csock)
1553
/* XXX: This should be OK, the original must be (and is) aligned */
1554
int sock=*((int *)csock);
1557
pthread_cleanup_push(tcp_answer_thread_cleanup, csock);
1560
if (!global.strict_suid) {
1561
if (!run_as(global.run_as)) {
1567
pthread_mutex_lock(&proc_lock);
1568
if (procs<global.proc_limit)
1570
pthread_mutex_unlock(&proc_lock);
1575
pthread_mutex_unlock(&proc_lock);
1580
if ((err=pthread_setspecific(thrid_key, &thrid)) != 0) {
1581
if(++da_misc_errs<=MISC_MAX_ERRS)
1582
log_error("pthread_setspecific failed: %s",strerror(err));
1588
/* rfc1035 says we should process multiple queries in succession, so we are looping until
1589
* the socket is closed by the other side or by tcp timeout.
1590
* This in fact makes DoSing easier. If that is your concern, you should disable pdnsd's
1602
PDNSD_ASSERT(sock<FD_SETSIZE,"socket file descriptor exceeds FD_SETSIZE.");
1605
tv.tv_sec=global.tcp_qtimeout;
1606
if (select(sock+1,&fds,NULL,NULL,&tv)<=0)
1607
pthread_exit(NULL); /* socket is closed by cleanup handler */
1612
if (poll(&pfd,1,global.tcp_qtimeout*1000)<=0)
1613
pthread_exit(NULL); /* socket is closed by cleanup handler */
1618
if ((err=read(sock,&rlen_net,sizeof(rlen_net)))!=sizeof(rlen_net)) {
1619
DEBUG_MSG("Error while reading from TCP client: %s\n",err==-1?strerror(errno):"incomplete data");
1621
* If the socket timed or was closed before we even received the
1622
* query length, we cannot return an error. So exit silently.
1624
pthread_exit(NULL); /* socket is closed by cleanup handler */
1626
rlen=ntohs(rlen_net);
1629
log_error("TCP zero size query received.\n");
1632
buf=(unsigned char *)pdnsd_malloc(rlen);
1634
if (++da_mem_errs<=MEM_MAX_ERRS) {
1635
log_error("Out of memory in request handling.");
1637
pthread_exit(NULL); /* socket is closed by cleanup handler */
1639
pthread_cleanup_push(free, buf);
1648
tv.tv_sec=global.tcp_qtimeout;
1649
if (select(sock+1,&fds,NULL,NULL,&tv)<=0)
1650
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1654
if (poll(&pfd,1,global.tcp_qtimeout*1000)<=0)
1655
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1657
rv=read(sock,buf+olen,rlen-olen);
1659
DEBUG_MSG("Error while reading from TCP client: %s\n",rv==-1?strerror(errno):"incomplete data");
1661
* If the promised length was not sent, we should return an error message,
1662
* but if read fails that way, it is unlikely that it will arrive. Nevertheless...
1664
if (olen>=2) { /* We need the id to send a valid reply. */
1666
mk_error_reply(((dns_hdr_t*)buf)->id,
1667
olen>=3?((dns_hdr_t*)buf)->opcode:OP_QUERY,
1670
err.len=htons(sizeof(dns_hdr_t));
1671
write_all(sock,&err,sizeof(err)); /* error anyway. */
1673
pthread_exit(NULL); /* buf freed and socket closed by cleanup handlers */
1678
if (!(resp=process_query(buf,&nlen,0))) {
1680
* A return value of NULL is a fatal error that prohibits even the sending of an error message.
1681
* logging is already done. Just exit the thread now.
1685
pthread_cleanup_pop(1); /* free(buf) */
1686
pthread_cleanup_push(free,resp);
1689
resp->len=htons(nlen);
1690
rsize=ansoffset+nlen;
1691
if ((err=write_all(sock,resp,rsize))!=rsize) {
1692
DEBUG_MSG("Error while writing to TCP client: %s\n",err==-1?strerror(errno):"unknown error");
1693
pthread_exit(NULL); /* resp is freed and socket is closed by cleanup handlers */
1696
pthread_cleanup_pop(1); /* free(resp) */
1698
/* Do not allow multiple queries in one sequence.*/
1703
/* socket is closed by cleanup handler */
1704
pthread_cleanup_pop(1);
1708
int init_tcp_socket()
1713
struct sockaddr_in sin4;
1716
struct sockaddr_in6 sin6;
1723
if ((sock=socket(PF_INET,SOCK_STREAM,IPPROTO_TCP))==-1) {
1724
log_error("Could not open tcp socket: %s",strerror(errno));
1727
memset(&sin.sin4,0,sizeof(struct sockaddr_in));
1728
sin.sin4.sin_family=AF_INET;
1729
sin.sin4.sin_port=htons(global.port);
1730
sin.sin4.sin_addr=global.a.ipv4;
1731
SET_SOCKA_LEN4(sin.sin4);
1732
sinl=sizeof(struct sockaddr_in);
1737
if ((sock=socket(PF_INET6,SOCK_STREAM,IPPROTO_TCP))==-1) {
1738
log_error("Could not open tcp socket: %s",strerror(errno));
1741
memset(&sin.sin6,0,sizeof(struct sockaddr_in6));
1742
sin.sin6.sin6_family=AF_INET6;
1743
sin.sin6.sin6_port=htons(global.port);
1744
sin.sin6.sin6_flowinfo=IPV6_FLOWINFO;
1745
sin.sin6.sin6_addr=global.a.ipv6;
1746
SET_SOCKA_LEN6(sin.sin6);
1747
sinl=sizeof(struct sockaddr_in6);
1752
/* The SO_REUSEADDR socket option tells the kernel that even if this port
1753
is busy (in the TIME_WAIT state), go ahead and reuse it anyway. If it
1754
is busy, but with another state, we should get an address already in
1755
use error. It is useful if pdnsd is shut down, and then restarted right
1756
away while sockets are still active on its port. There is a slight risk
1757
though. If unexpected data comes in, it may confuse pdnsd, but while
1758
this is possible, it is not likely.
1760
if(setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,&so,sizeof(so)))
1761
log_warn("Could not set options on tcp socket: %s",strerror(errno));
1763
if (bind(sock,(struct sockaddr *)&sin,sinl)) {
1764
log_error("Could not bind tcp socket: %s",strerror(errno));
1772
* Listen on the specified port for tcp connects and answer them (each in a new thread to be nonblocking)
1774
void *tcp_server_thread(void *p)
1780
/* (void)p; */ /* To inhibit "unused variable" warning */
1784
if (!global.strict_suid) {
1785
if (!run_as(global.run_as)) {
1792
if (listen(sock,5)) {
1793
if (++da_tcp_errs<=TCP_MAX_ERRS) {
1794
log_error("Could not listen on tcp socket: %s",strerror(errno));
1796
goto close_sock_return;
1800
if (!(csock=(int *)pdnsd_malloc(sizeof(int)))) {
1801
if (++da_mem_errs<=MEM_MAX_ERRS) {
1802
log_error("Out of memory in request handling.");
1806
if ((*csock=accept(sock,NULL,0))==-1) {
1807
if (errno!=EINTR && ++da_tcp_errs<=TCP_MAX_ERRS) {
1808
log_error("tcp accept failed: %s",strerror(errno));
1812
* With creating a new thread, we follow recommendations
1813
* in rfc1035 not to block
1815
pthread_mutex_lock(&proc_lock);
1816
if (qprocs<global.proc_limit+global.procq_limit) {
1818
++qprocs; ++spawned;
1819
pthread_mutex_unlock(&proc_lock);
1820
err=pthread_create(&pt,&attr_detached,tcp_answer_thread,(void *)csock);
1823
if(++da_thrd_errs<=THRD_MAX_ERRS)
1824
log_warn("pthread_create failed: %s",strerror(err));
1825
/* If thread creation failed, free resources associated with it. */
1826
pthread_mutex_lock(&proc_lock);
1827
--qprocs; --spawned;
1830
pthread_mutex_unlock(&proc_lock);
1839
tcps_thrid=main_thrid;
1847
* Starts the tcp server thread and the udp server thread. Both threads
1848
* are not terminated, so only a signal can interrupt the server.
1850
void start_dns_servers()
1853
#ifndef NO_TCP_SERVER
1854
if (tcp_socket!=-1) {
1857
if (pthread_create(&tcps,&attr_detached,tcp_server_thread,NULL)) {
1858
log_error("Could not create tcp server thread. Exiting.");
1862
log_info(2,"tcp server thread started.");
1867
if (udp_socket!=-1) {
1870
if (pthread_create(&udps,&attr_detached,udp_server_thread,NULL)) {
1871
log_error("Could not create udp server thread. Exiting.");
1875
log_info(2,"udp server thread started.");
1881
/* Report the thread status to the file descriptor f, for the status fifo (see status.c) */
1882
int report_thread_stat(int f)
1884
unsigned long nspawned,ndropped;
1885
int nactive,ncurrent,nqueued;
1887
/* The thread counters are volatile, so we will make copies
1888
under locked conditions to make sure we get consistent data.
1890
pthread_mutex_lock(&proc_lock);
1891
nspawned=spawned; ndropped=dropped;
1892
nactive=procs; ncurrent=qprocs;
1893
nqueued=ncurrent-nactive;
1894
pthread_mutex_unlock(&proc_lock);
1896
fsprintf_or_return(f,"\nThread status:\n==============\n");
1897
if(!pthread_equal(servstat_thrid,main_thrid))
1898
fsprintf_or_return(f,"server status thread is running.\n");
1899
if(!pthread_equal(statsock_thrid,main_thrid))
1900
fsprintf_or_return(f,"pdnsd control thread is running.\n");
1901
if(!pthread_equal(tcps_thrid,main_thrid))
1902
fsprintf_or_return(f,"tcp server thread is running.\n");
1903
if(!pthread_equal(udps_thrid,main_thrid))
1904
fsprintf_or_return(f,"udp server thread is running.\n");
1905
fsprintf_or_return(f,"%lu query threads spawned in total (%lu queries dropped).\n",
1907
fsprintf_or_return(f,"%i running query threads (%i active, %i queued).\n",
1908
ncurrent,nactive,nqueued);