2
Copyright (C) 2003-2004 Douglas Thain and the University of Wisconsin
3
Copyright (C) 2005- The University of Notre Dame
4
This software is distributed under the GNU General Public License.
5
See the file COPYING for details.
8
#include "pfs_sysdeps.h"
9
#include "pfs_channel.h"
10
#include "pfs_channel_cache.h"
11
#include "pfs_process.h"
14
#include "pfs_service.h"
15
#include "pfs_dispatch.h"
19
#include "stringtools.h"
22
#include "int_sizes.h"
38
#include <sys/statfs.h>
43
#include <sys/utsname.h>
46
#include <linux/sockios.h>
48
#include <sys/types.h>
52
extern struct pfs_process *pfs_current;
53
extern char *pfs_temp_dir;
54
extern char *pfs_false_uname;
58
extern pid_t trace_this_pid;
60
extern INT64_T pfs_syscall_count;
61
extern INT64_T pfs_read_count;
62
extern INT64_T pfs_write_count;
64
extern int pfs_trap_after_fork;
66
extern char *pfs_ldso_path;
67
extern int *pfs_syscall_totals32;
69
extern void handle_specific_process( pid_t pid );
72
Divert this incoming system call to a read or write on the I/O channel
75
static void divert_to_channel( struct pfs_process *p, int syscall, const void *uaddr, int length, pfs_size_t channel_offset )
78
args[0] = pfs_channel_fd();
79
args[1] = (UPTRINT_T)uaddr;
81
args[3] = channel_offset&0xffffffff;
82
args[4] = (((UINT64_T)channel_offset) >> 32);
83
tracer_args_set(p->tracer,syscall,args,5);
84
p->syscall_args_changed = 1;
85
p->diverted_length = length;
89
Divert this incoming system call to something harmless with the given result.
92
static void divert_to_dummy( struct pfs_process *p, int result )
95
p->syscall_result = result;
96
tracer_args_set(p->tracer,SYSCALL32_getpid,0,0);
99
static int errno_in_progress( int e )
101
return (e==EAGAIN || e==EALREADY || e==EINPROGRESS);
105
The large majority of calls, given below in decode_syscall
106
have a simple transformation using tracer_copy_{in,out}.
107
Read, write, and similar syscalls need better performance
108
and thus have more specialized implementations shown here.
112
SYSCALL32_read and friends are implemented by loading the data
113
into the channel, and then redirecting the system call
114
to read from the channel fd.
117
#define POINTER( i ) ((void*)(PTRINT_T)(i))
119
static void decode_read( struct pfs_process *p, int entering, int syscall, INT64_T *args )
122
void *uaddr = POINTER(args[1]);
123
pfs_size_t length = args[2];
124
pfs_off_t offset = args[3];
128
if(!pfs_channel_alloc(length,&p->io_channel_offset)) {
129
divert_to_dummy(p,-ENOMEM);
132
local_addr = pfs_channel_base() + p->io_channel_offset;
134
if(syscall==SYSCALL32_read) {
135
p->syscall_result = pfs_read(fd,local_addr,length);
136
} else if(syscall==SYSCALL32_pread) {
137
p->syscall_result = pfs_pread(fd,local_addr,length,offset);
138
} else if(syscall==SYS_RECV) {
139
p->syscall_result = pfs_recv(fd,local_addr,length,args[3]);
140
} else if(syscall==SYS_RECVFROM) {
141
p->syscall_result = pfs_recvfrom(fd,local_addr,length,args[3],(struct sockaddr*)POINTER(args[4]),(int*)POINTER(args[5]));
144
p->diverted_length = 0;
146
if(p->syscall_result==0) {
147
divert_to_dummy(p,0);
148
} else if(p->syscall_result>0) {
149
divert_to_channel(p,SYSCALL32_pread,uaddr,p->syscall_result,p->io_channel_offset);
150
pfs_read_count += p->syscall_result;
151
} else if( errno==EAGAIN ) {
154
divert_to_dummy(p,-EINTR);
155
} else if(pfs_is_nonblocking(fd)) {
156
divert_to_dummy(p,-EAGAIN);
158
pfs_channel_free(p->io_channel_offset);
159
p->state = PFS_PROCESS_STATE_WAITREAD;
160
int rfd = pfs_get_real_fd(fd);
161
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_READ);
164
divert_to_dummy(p,-errno);
168
This is an ugly situation.
169
If we arrive here with EINTR, it means that we have copied
170
all of the data into the channel, taken any side effects
171
of accessing the remote storage device, but the system call
172
happened not to read it because of an incoming signal.
173
We have no way of re-trying the channel read, so we do
174
the ugly slow copy out instead.
177
if( (p->syscall_result==-EINTR) && (p->diverted_length>0) ) {
178
tracer_copy_out(p->tracer,pfs_channel_base()+p->io_channel_offset,uaddr,p->diverted_length);
179
p->syscall_result = p->diverted_length;
180
tracer_result_set(p->tracer,p->syscall_result);
183
pfs_channel_free(p->io_channel_offset);
188
decode_write is much the same as read. We allocate space
189
in the channel, and then redirect the caller to write
190
to it. When the syscall completes, we write the data
191
to its destination and then set the result.
194
static void decode_write( struct pfs_process *p, int entering, int syscall, INT64_T *args )
197
void *uaddr = POINTER(args[1]);
198
INT64_T length = args[2];
199
if(!pfs_channel_alloc(length,&p->io_channel_offset)) {
200
divert_to_dummy(p,-ENOMEM);
203
divert_to_channel(p,SYSCALL32_pwrite,uaddr,length,p->io_channel_offset);
205
INT64_T actual_result;
206
tracer_result_get(p->tracer,&actual_result);
208
if(actual_result!=args[2]) {
209
debug(D_NOTICE,"channel write returned %lld instead of %lld",actual_result,args[2]);
212
if(actual_result>0) {
214
pfs_off_t offset = args[3];
215
char *local_addr = pfs_channel_base() + p->io_channel_offset;
217
if(syscall==SYSCALL32_write) {
218
p->syscall_result = pfs_write(fd,local_addr,actual_result);
219
} else if(syscall==SYSCALL32_pwrite) {
220
p->syscall_result = pfs_pwrite(fd,local_addr,actual_result,offset);
221
} else if(syscall==SYS_SEND) {
222
p->syscall_result = pfs_send(fd,local_addr,actual_result,args[3]);
223
} else if(syscall==SYS_SENDTO) {
224
p->syscall_result = pfs_sendto(fd,local_addr,actual_result,args[3],(struct sockaddr *)POINTER(args[4]),args[5]);
227
if(p->syscall_result>=0) {
228
if(p->syscall_result!=actual_result) {
229
debug(D_SYSCALL,"write returned %lld instead of %lld",p->syscall_result,actual_result);
231
tracer_result_set(p->tracer,p->syscall_result);
232
pfs_channel_free(p->io_channel_offset);
233
p->state = PFS_PROCESS_STATE_KERNEL;
235
pfs_write_count += p->syscall_result;
237
if(errno==EAGAIN && !pfs_is_nonblocking(fd)) {
238
p->state = PFS_PROCESS_STATE_WAITWRITE;
239
int rfd = pfs_get_real_fd(fd);
240
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_WRITE);
242
p->syscall_result = -errno;
243
tracer_result_set(p->tracer,p->syscall_result);
244
pfs_channel_free(p->io_channel_offset);
245
if(p->syscall_result==-EPIPE) {
246
// make sure that we are not in a wait state,
247
// otherwise pfs_process_raise will re-dispatch.
248
p->state = PFS_PROCESS_STATE_KERNEL;
249
pfs_process_raise(p->pid,SIGPIPE,1);
254
pfs_channel_free(p->io_channel_offset);
259
static struct pfs_kernel_iovec * iovec_alloc_in( struct pfs_process *p, struct pfs_kernel_iovec *uv, int count )
261
struct pfs_kernel_iovec *v;
262
int size = sizeof(struct pfs_kernel_iovec)*count;
264
v = (struct pfs_kernel_iovec *) malloc(size);
266
tracer_copy_in(p->tracer,v,uv,size);
273
static int iovec_size( struct pfs_process *p, struct pfs_kernel_iovec *v, int count )
276
for(i=0;i<count;i++) {
277
total += v[i].iov_len;
282
static int iovec_copy_in( struct pfs_process *p, char *buf, struct pfs_kernel_iovec *v, int count )
285
for(i=0;i<count;i++) {
286
tracer_copy_in(p->tracer,&buf[pos],(void*)(UPTRINT_T)v[i].iov_base,v[i].iov_len);
292
static int iovec_copy_out( struct pfs_process *p, char *buf, struct pfs_kernel_iovec *v, int count )
295
for(i=0;i<count;i++) {
296
tracer_copy_out(p->tracer,&buf[pos],(void*)(UPTRINT_T)v[i].iov_base,v[i].iov_len);
303
Both readv and writev have a careful but inefficient implementation.
304
For each uio block, we examine the data structures, do a manual
305
read and write in our local buffer, and then copy the data over.
306
I assume that these are not heavily used system calls, although
307
they do seem to appear sporadically in X11, the dynamic linker,
308
and sporadically in networking utilities.
311
static void decode_readv( struct pfs_process *p, int entering, int syscall, INT64_T *args )
315
struct pfs_kernel_iovec *uv = (struct pfs_kernel_iovec *) POINTER(args[1]);
318
struct pfs_kernel_iovec *v;
323
if(!uv || count<=0) {
324
divert_to_dummy(p,-EINVAL);
328
v = iovec_alloc_in(p,uv,count);
330
size = iovec_size(p,v,count);
331
buffer = (char*) malloc(size);
333
result = pfs_read(fd,buffer,size);
335
iovec_copy_out(p,buffer,v,count);
336
divert_to_dummy(p,result);
338
if(errno==EAGAIN && !pfs_is_nonblocking(fd)) {
339
p->state = PFS_PROCESS_STATE_WAITREAD;
340
int rfd = pfs_get_real_fd(fd);
341
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_READ);
343
divert_to_dummy(p,-errno);
348
divert_to_dummy(p,-ENOMEM);
352
divert_to_dummy(p,-ENOMEM);
357
static void decode_writev( struct pfs_process *p, int entering, int syscall, INT64_T *args )
361
struct pfs_kernel_iovec *uv = (struct pfs_kernel_iovec *)POINTER(args[1]);
364
struct pfs_kernel_iovec *v;
369
if(!uv || count<=0) {
370
divert_to_dummy(p,-EINVAL);
374
v = iovec_alloc_in(p,uv,count);
376
size = iovec_size(p,v,count);
377
buffer = (char *) malloc(size);
379
iovec_copy_in(p,buffer,v,count);
380
result = pfs_write(fd,buffer,size);
382
divert_to_dummy(p,result);
383
} else if(result<0) {
384
if(errno==EAGAIN && !pfs_is_nonblocking(fd)) {
386
WAITREAD is correct here, because WAITWRITE
387
would cause us to be called again with entering=0
389
p->state = PFS_PROCESS_STATE_WAITREAD;
390
int rfd = pfs_get_real_fd(fd);
391
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_WRITE);
393
divert_to_dummy(p,-errno);
398
divert_to_dummy(p,-ENOMEM);
402
divert_to_dummy(p,-ENOMEM);
407
static void decode_stat( struct pfs_process *p, int entering, int syscall, INT64_T *args, int sixty_four )
410
void *uaddr = (void*) POINTER(args[1]);
412
struct pfs_stat lbuf;
413
struct pfs_kernel_stat kbuf;
414
struct pfs_kernel_stat64 kbuf64;
415
char path[PFS_PATH_MAX];
419
p->io_channel_offset = 0;
421
if(syscall==SYSCALL32_stat) {
422
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
423
p->syscall_result = pfs_stat(path,&lbuf);
424
} else if(syscall==SYSCALL32_lstat) {
425
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
426
p->syscall_result = pfs_lstat(path,&lbuf);
427
} else if(syscall==SYSCALL32_fstat) {
428
p->syscall_result = pfs_fstat(fd,&lbuf);
431
if(p->syscall_result>=0) {
432
if(!pfs_channel_alloc(sizeof(kbuf64),&p->io_channel_offset)) {
433
divert_to_dummy(p,-ENOMEM);
435
local_addr = pfs_channel_base() + p->io_channel_offset;
438
COPY_STAT(lbuf,kbuf64);
439
/* Special case: Linux needs stat64.st_ino in two places. */
440
kbuf64.st_ino_extra = kbuf64.st_ino;
441
memcpy(local_addr,&kbuf64,sizeof(kbuf64));
442
bufsize = sizeof(kbuf64);
444
COPY_STAT(lbuf,kbuf);
445
memcpy(local_addr,&kbuf,sizeof(kbuf));
446
bufsize = sizeof(kbuf);
448
divert_to_channel(p,SYSCALL32_pread,uaddr,bufsize,p->io_channel_offset);
451
divert_to_dummy(p,-errno);
454
if(p->syscall_result>=0) {
455
pfs_channel_free(p->io_channel_offset);
456
divert_to_dummy(p,0);
461
static void decode_statfs( struct pfs_process *p, int entering, int syscall, INT64_T *args, int sixty_four )
463
struct pfs_statfs lbuf;
464
struct pfs_kernel_statfs kbuf;
465
struct pfs_kernel_statfs64 kbuf64;
466
char path[PFS_PATH_MAX];
469
p->io_channel_offset = 0;
471
if(syscall==SYSCALL32_statfs) {
472
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
473
p->syscall_result = pfs_statfs(path,&lbuf);
474
} else if(syscall==SYSCALL32_fstatfs) {
475
p->syscall_result = pfs_fstatfs(args[0],&lbuf);
478
if(p->syscall_result>=0) {
480
COPY_STATFS(lbuf,kbuf64);
481
tracer_copy_out(p->tracer,&kbuf64,POINTER(args[2]),sizeof(kbuf64));
483
if(lbuf.f_blocks > 0xffffffff) lbuf.f_blocks = 0xffffffff;
484
if(lbuf.f_bavail > 0xffffffff) lbuf.f_bavail = 0xffffffff;
485
if(lbuf.f_bfree > 0xffffffff) lbuf.f_bfree = 0xffffffff;
486
COPY_STATFS(lbuf,kbuf);
487
tracer_copy_out(p->tracer,&kbuf,POINTER(args[1]),sizeof(kbuf));
489
divert_to_dummy(p,p->syscall_result);
491
divert_to_dummy(p,-errno);
497
On Linux, all of the socket related system calls are
498
multiplexed through one system call.
501
void decode_socketcall( struct pfs_process *p, int entering, int syscall, INT64_T *a )
511
struct sockaddr_un *paddr;
512
struct sockaddr_un addr;
517
r = pfs_socket(a[0],a[1],a[2]);
519
divert_to_dummy(p,r);
522
x = xxmalloc(a[2]+2);
523
tracer_copy_in(p->tracer,x,POINTER(a[1]),a[2]);
524
paddr = (struct sockaddr_un *)x;
525
if(paddr->sun_family==AF_UNIX) {
527
((char*)x)[a[2]] = 0;
528
if(pfs_resolve_name(paddr->sun_path,&pname)) {
529
addr.sun_family = AF_UNIX;
530
strcpy(addr.sun_path,pname.path);
531
r = pfs_bind(a[0],(struct sockaddr *)&addr,sizeof(addr));
536
r = pfs_bind(a[0],(struct sockaddr *)x,a[2]);
540
divert_to_dummy(p,r);
543
x = xxmalloc(a[2]+2);
544
tracer_copy_in(p->tracer,x,POINTER(a[1]),a[2]);
545
paddr = (struct sockaddr_un *)x;
546
if(paddr->sun_family==AF_UNIX) {
548
((char*)x)[a[2]] = 0;
549
if(pfs_resolve_name(paddr->sun_path,&pname)) {
550
addr.sun_family = AF_UNIX;
551
strcpy(addr.sun_path,pname.path);
552
r = pfs_connect(a[0],(struct sockaddr *)&addr,sizeof(addr));
557
r = pfs_connect(a[0],(struct sockaddr*)x,a[2]);
560
divert_to_dummy(p,r);
561
} else if(errno_in_progress(errno)) {
564
divert_to_dummy(p,-EINTR);
565
} else if(pfs_is_nonblocking(a[0])) {
566
divert_to_dummy(p,-EINPROGRESS);
568
p->state = PFS_PROCESS_STATE_WAITREAD;
569
int rfd = pfs_get_real_fd(a[0]);
570
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_READ|PFS_POLL_WRITE|PFS_POLL_EXCEPT);
573
divert_to_dummy(p,-errno);
578
r = pfs_listen(a[0],a[1]);
580
divert_to_dummy(p,r);
584
tracer_copy_in(p->tracer,&length,POINTER(a[2]),sizeof(length));
585
x = xxmalloc(length);
586
r = pfs_accept(a[0],(struct sockaddr*)x,&length);
589
r = pfs_accept(a[0],0,0);
593
tracer_copy_out(p->tracer,x,POINTER(a[1]),length);
594
tracer_copy_out(p->tracer,&length,POINTER(a[2]),sizeof(length));
596
divert_to_dummy(p,r);
597
} else if(errno_in_progress(errno)) {
600
divert_to_dummy(p,-EINTR);
601
} else if(pfs_is_nonblocking(a[0])) {
602
divert_to_dummy(p,-EAGAIN);
604
p->state = PFS_PROCESS_STATE_WAITREAD;
605
int rfd = pfs_get_real_fd(a[0]);
606
if(rfd>=0) pfs_poll_wakeon(rfd,PFS_POLL_READ);
609
divert_to_dummy(p,-errno);
613
case SYS_GETSOCKNAME:
614
tracer_copy_in(p->tracer,&length,POINTER(a[2]),sizeof(length));
615
x = xxmalloc(length);
617
r = pfs_getsockname(a[0],(struct sockaddr *)x,&length);
621
tracer_copy_out(p->tracer,x,POINTER(a[1]),MIN(length,savelength));
622
tracer_copy_out(p->tracer,&length,POINTER(a[2]),sizeof(length));
625
divert_to_dummy(p,r);
627
case SYS_GETPEERNAME:
628
tracer_copy_in(p->tracer,&length,POINTER(a[2]),sizeof(length));
629
x = xxmalloc(length);
630
r = pfs_getpeername(a[0],(struct sockaddr *)x,&length);
634
tracer_copy_out(p->tracer,x,POINTER(a[1]),length);
635
tracer_copy_out(p->tracer,&length,POINTER(a[2]),sizeof(length));
638
divert_to_dummy(p,r);
641
r = pfs_socketpair(a[0],a[1],a[2],fds);
645
tracer_copy_out(p->tracer,fds,(void*)POINTER(a[3]),sizeof(fds));
647
divert_to_dummy(p,r);
650
decode_write(p,entering,syscall,a);
655
tracer_copy_in(p->tracer,x,POINTER(a[4]),a[5]);
659
decode_write(p,entering,syscall,a);
666
decode_read(p,entering,syscall,a);
670
tracer_copy_in(p->tracer,&length,POINTER(a[5]),sizeof(length));
671
x = xxmalloc(length);
675
a[5] = (PTRINT_T) &length;
677
decode_read(p,entering,syscall,a);
681
if(p->syscall_result>=0) {
682
tracer_copy_out(p->tracer,x,POINTER(a[4]),length);
683
tracer_copy_out(p->tracer,&length,POINTER(a[5]),sizeof(length));
689
r = pfs_shutdown(a[0],a[1]);
691
divert_to_dummy(p,r);
695
tracer_copy_in(p->tracer,x,(void*)POINTER(a[3]),a[4]);
696
r = pfs_setsockopt(a[0],a[1],a[2],x,a[4]);
699
divert_to_dummy(p,r);
702
tracer_copy_in(p->tracer,&length,POINTER(a[4]),sizeof(length));
703
x = xxmalloc(length);
704
r = pfs_getsockopt(a[0],a[1],a[2],x,&length);
708
tracer_copy_out(p->tracer,x,(void*)POINTER(a[3]),length);
709
tracer_copy_out(p->tracer,&length,POINTER(a[4]),sizeof(length));
712
divert_to_dummy(p,r);
716
In principle, sendmsg and recvmsg are quite simple,
717
but they require much copying in and out of pointers
718
to support the complex msghdr structure.
724
struct pfs_kernel_msghdr umsg;
725
struct pfs_kernel_iovec *uvec = NULL;
729
/* Copy in the msghdr structure */
730
tracer_copy_in(p->tracer,&umsg,POINTER(a[1]),sizeof(umsg));
732
/* Build a copy of all of the various sub-pointers */
734
if(umsg.msg_name && umsg.msg_namelen>0) {
735
msg.msg_name = xxmalloc(umsg.msg_namelen);
736
msg.msg_namelen = umsg.msg_namelen;
737
tracer_copy_in(p->tracer,msg.msg_name,POINTER(umsg.msg_name),umsg.msg_namelen);
744
uvec = iovec_alloc_in(p,(struct pfs_kernel_iovec *)POINTER(umsg.msg_iov),umsg.msg_iovlen);
747
vec.iov_len = iovec_size(p,uvec,umsg.msg_iovlen);
748
vec.iov_base = xxmalloc(vec.iov_len);
754
if(umsg.msg_control && umsg.msg_controllen>0) {
755
msg.msg_control = xxmalloc(umsg.msg_controllen);
756
msg.msg_controllen = umsg.msg_controllen;
757
tracer_copy_in(p->tracer,msg.msg_control,POINTER(umsg.msg_control),umsg.msg_controllen);
760
msg.msg_controllen = 0;
762
msg.msg_flags = umsg.msg_flags;
764
/* Do a sendmsg or recvmsg on the data, and copy out if needed */
766
if(syscall==SYS_SENDMSG) {
767
iovec_copy_in(p,(char*)vec.iov_base,uvec,umsg.msg_iovlen);
768
p->syscall_result = pfs_sendmsg(a[0],&msg,a[2]);
770
p->syscall_result = pfs_recvmsg(a[0],&msg,a[2]);
771
if(p->syscall_result>0) {
772
iovec_copy_out(p,(char*)vec.iov_base,uvec,umsg.msg_iovlen);
773
if(msg.msg_name && msg.msg_namelen>0) {
774
tracer_copy_out(p->tracer,msg.msg_name,POINTER(umsg.msg_name),msg.msg_namelen);
776
if(msg.msg_control && msg.msg_controllen>0) {
777
tracer_copy_out(p->tracer,msg.msg_control,POINTER(umsg.msg_control),msg.msg_controllen);
779
umsg.msg_namelen = msg.msg_namelen;
780
umsg.msg_controllen = msg.msg_controllen;
781
umsg.msg_flags = msg.msg_flags;
782
tracer_copy_out(p->tracer,&umsg,POINTER(a[1]),sizeof(umsg));
787
/* Delete the msghdr structure */
789
if(msg.msg_control) free(msg.msg_control);
790
if(msg.msg_iov) free(msg.msg_iov->iov_base);
792
if(msg.msg_name) free(msg.msg_name);
794
if(p->syscall_result>=0) {
795
divert_to_dummy(p,p->syscall_result);
797
divert_to_dummy(p,-errno);
803
divert_to_dummy(p,r);
807
/* Only these few have any action when exiting */
811
decode_write(p,entering,syscall,a);
816
tracer_copy_in(p->tracer,x,POINTER(a[4]),a[5]);
820
decode_write(p,entering,syscall,a);
828
decode_read(p,entering,syscall,a);
835
This function is an inexpensive test to see if a given
836
filename is executable. It is not all-inclusive, nor should
837
it be considered a reliable security device. This function is
838
over-optimistic in some cases, but if it falsely reports
839
true, the later real execve() may still fail.
842
static int is_executable( const char *path )
846
if(pfs_stat(path,&buf)!=0) return 0;
848
if(buf.st_mode&S_ISUID || buf.st_mode&S_ISGID) {
849
debug(D_NOTICE,"cannot execute the program %s because it is setuid.",path);
854
if(buf.st_mode&S_IXUSR || buf.st_mode&S_IXGRP || buf.st_mode&S_IXOTH) {
862
#define GET_PTR32(addr) ((PTRINT_T)(addr)&0xffffffff)
864
static void redirect_ldso( struct pfs_process *p, const char *ldso, INT64_T *args, char * const start_of_available_scratch )
868
char real_physical_name[PFS_PATH_MAX];
869
char ldso_physical_name[PFS_PATH_MAX];
870
typedef unsigned int argv32;
871
argv32 argv[PFS_ARG_MAX];
873
char *ext_real_logical_name;
874
char *ext_ldso_physical_name;
875
char *ext_real_physical_name;
878
strcpy(real_physical_name, p->new_physical_name);
879
debug(D_PROCESS,"redirect_ldso: called on %s (%s)", p->new_logical_name, real_physical_name);
881
if(pfs_get_local_name(ldso,ldso_physical_name,0,0)!=0) {
882
debug(D_PROCESS,"redirect_ldso: cannot get physical name of %s",ldso);
887
/* Unwise to check ldso recursively */
888
if (strcmp(real_physical_name, ldso_physical_name) == 0) return;
890
/* Test whether loading with ldso would work by */
891
/* running ldso --verify on the executable (may be static) */
895
debug(D_PROCESS,"redirect_ldso: cannot fork");
898
if (child_pid == 0) {
899
int fd = open("/dev/null", O_WRONLY);
906
execlp(ldso_physical_name, ldso_physical_name, "--verify", real_physical_name, NULL);
909
waitpid(child_pid, &child_status, 0);
911
if (!WIFEXITED(child_status)) {
912
debug(D_PROCESS,"redirect_ldso: %s --verify %s didn't exit normally. status == %d", ldso_physical_name, real_physical_name, child_status);
915
if (WEXITSTATUS(child_status) != 0) {
916
debug(D_PROCESS,"redirect_ldso: %s --verify %s exited with status %d", ldso_physical_name, real_physical_name, WEXITSTATUS(child_status));
920
/* Start with the physical name of ldso */
921
ext_ldso_physical_name = start_of_available_scratch;
923
/* strcpy(p->new_logical_name,ldso); */
924
strcpy(p->new_physical_name,ldso_physical_name);
926
/* then the "real" physical name */
927
ext_real_physical_name = ext_ldso_physical_name + strlen(ldso_physical_name) + 1;
929
/* and the "real" logical name */
930
ext_real_logical_name = ext_real_physical_name + strlen(real_physical_name) + 1;
932
/* the new argv goes in the scratch area next */
933
ext_argv = ext_real_logical_name + strlen(p->new_logical_name) + 1;
935
/* load in the arguments given by the program and count them up */
936
tracer_copy_in(p->tracer,argv,POINTER(args[1]),sizeof(argv));
938
for(argc=0;argv[argc] && argc<PFS_ARG_MAX;argc++) {}
940
/* The original scratch area should have already been saved */
942
/* write out the new exe, logical and physical names */
943
tracer_copy_out(p->tracer,p->new_logical_name,ext_real_logical_name,strlen(p->new_logical_name)+1);
944
tracer_copy_out(p->tracer,ldso_physical_name,ext_ldso_physical_name,strlen(ldso_physical_name)+1);
945
tracer_copy_out(p->tracer,real_physical_name,ext_real_physical_name,strlen(real_physical_name)+1);
946
/* rebuild the argv copy it out */
947
for(i=argc;i>0;i--) argv[i] = argv[i-1];
949
argv[0] = GET_PTR32(ext_real_logical_name);
950
argv[1] = GET_PTR32(ext_real_physical_name);
952
debug(D_PROCESS,"redirect_ldso: argc == %d", argc);
953
for(i=0;i<=argc;i++) {
954
tracer_copy_out(p->tracer,&argv[i],ext_argv+sizeof(argv32)*i,sizeof(argv32));
957
/* change the registers to reflect argv */
958
args[0] = (PTRINT_T) ext_ldso_physical_name;
959
args[1] = (PTRINT_T) ext_argv;
960
tracer_args_set(p->tracer,p->syscall,args,3);
962
debug(D_PROCESS,"redirect_ldso: will execute %s %s",ldso,real_physical_name);
966
Several things to note about exec.
968
An entry to execve looks like a normal syscall.
969
An exit from execve indicates a successfull execve in progress.
970
Finally, a *third* event with args[0]==0 indicates an execve
971
that has completed with the new image active.
973
Now, we cannot execute the path named by the execve directly.
974
It must be resolved through PFS, because our idea of the
975
current dir (or even the meaning of the name) may be quite
976
different. We resolve the file name into a local path,
977
perhaps by pulling it into the cache.
979
In the simple (second) case, we copy the new local name
980
into the address space of the process and exec that instead.
981
If the exec fails, we must restore the changed bytes afterwards.
983
In the complex (first) case, the program contains a pound-bang
984
indicating an interpreter. We instead resolve the interpreter
985
as the executable and fiddle around with the job's argv to
986
indicate that. Then, we do much the same as the first case.
989
void decode_execve( struct pfs_process *p, int entering, int syscall, INT64_T *args )
991
char *scratch_addr = (char*)pfs_process_scratch_address(p);
992
int scratch_size = PFS_SCRATCH_SIZE;
993
char *scratch_avail = scratch_addr;
996
debug(D_PROCESS,"execve: %s executing ",p->name);
997
p->state = PFS_PROCESS_STATE_USER;
998
} else if(entering) {
999
char path[PFS_PATH_MAX];
1000
char firstline[PFS_PATH_MAX];
1002
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
1004
p->new_logical_name[0] = 0;
1005
p->new_physical_name[0] = 0;
1007
if(!is_executable(path)) {
1008
divert_to_dummy(p,-errno);
1013
strcpy(p->new_logical_name,path);
1014
if(pfs_get_local_name(path,p->new_physical_name,firstline,sizeof(firstline))<0) {
1015
divert_to_dummy(p,-errno);
1019
/* remove any newlines or spaces at the end */
1021
char *c = firstline;
1024
while( *c=='\n' || *c==' ' ) {
1029
if(!strncmp(firstline,"#!",2)) {
1030
typedef unsigned int argv32;
1031
argv32 argv[PFS_ARG_MAX];
1033
char *interp, *ext_interp;
1034
char *interparg, *ext_interparg;
1035
char *scriptarg, *ext_scriptarg;
1036
char *ext_physical_name;
1037
int i, argc, shiftargs;
1039
debug(D_PROCESS,"execve: %s is an interpreted executable",p->new_logical_name);
1041
/* interp points to the interpreter */
1042
/* store it in the scratch area */
1044
interp = &firstline[2];
1045
while(isspace(*interp)) interp++;
1046
ext_interp = scratch_addr;
1048
/* interparg points to the internal argument */
1049
/* scriptarg points to the script itself */
1050
interparg = strchr(interp,' ');
1054
while(isspace(*interparg)) interparg++;
1055
ext_interparg = ext_interp + strlen(interp) + 1;
1057
ext_scriptarg = ext_interparg + strlen(interparg) + 1;
1058
debug(D_PROCESS,"execve: instead do %s %s %s",interp,interparg,scriptarg);
1062
ext_interparg = ext_interp + strlen(interp) + 1; /* BUG ? Why shouldn't it skip interp ?*/
1064
ext_scriptarg = ext_interparg;
1066
debug(D_PROCESS,"execve: instead do %s %s",interp,scriptarg);
1070
/* make sure the new interp is loaded */
1071
strcpy(p->new_logical_name,interp);
1072
if(pfs_get_local_name(interp,p->new_physical_name,0,0)!=0) {
1073
divert_to_dummy(p,-errno);
1077
/* the physical name of the interp is next */
1078
ext_physical_name = ext_scriptarg + strlen(scriptarg) + 1;
1079
/* make sure redirect_ldso doesn't clobber arguments */
1080
scratch_avail = ext_physical_name;
1082
/* the new argv goes in the scratch area next */
1083
ext_argv = ext_physical_name + strlen(p->new_physical_name) + 1;
1085
/* load in the arguments given by the program and count them up */
1086
tracer_copy_in(p->tracer,argv,POINTER(args[1]),sizeof(argv));
1087
for(argc=0;argv[argc] && argc<PFS_ARG_MAX;argc++) {}
1089
/* save the scratch area */
1090
tracer_copy_in(p->tracer,p->scratch_data,scratch_addr,scratch_size);
1092
/* write out the new interp, arg, and physical name */
1093
tracer_copy_out(p->tracer,interp,ext_interp,strlen(interp)+1);
1094
if(interparg) tracer_copy_out(p->tracer,interparg,ext_interparg,strlen(interparg)+1);
1095
tracer_copy_out(p->tracer,scriptarg,ext_scriptarg,strlen(scriptarg)+1);
1096
tracer_copy_out(p->tracer,p->new_physical_name,ext_physical_name,strlen(p->new_physical_name)+1);
1097
/* rebuild the argv copy it out */
1098
for(i=argc-1+shiftargs;i>0;i--) argv[i] = argv[i-shiftargs];
1100
argv[0] = GET_PTR32(ext_interp);
1102
argv[1] = GET_PTR32(ext_interparg);
1103
argv[2] = GET_PTR32(ext_scriptarg);
1105
argv[1] = GET_PTR32(ext_scriptarg);
1108
for(i=0;i<=argc;i++) {
1109
tracer_copy_out(p->tracer,&argv[i],ext_argv+sizeof(argv32)*i,sizeof(argv32));
1112
/* change the registers to reflect argv */
1113
args[0] = (PTRINT_T) ext_physical_name;
1114
args[1] = (PTRINT_T) ext_argv;
1115
tracer_args_set(p->tracer,p->syscall,args,3); /* BUG? Why 3 ? */
1117
debug(D_PROCESS,"execve: %s is an ordinary executable",p->new_logical_name);
1119
/* save all of the data we are going to clobber */
1120
tracer_copy_in(p->tracer,p->scratch_data,scratch_addr,scratch_size);
1122
/* store the new local path */
1123
tracer_copy_out(p->tracer,p->new_physical_name,scratch_addr,strlen(p->new_physical_name)+1);
1125
/* set the new program name to the logical name */
1126
args[0] = (PTRINT_T) scratch_addr;
1127
tracer_args_set(p->tracer,p->syscall,args,3); /* BUG? Why 3 ? */
1129
if (pfs_ldso_path) {
1130
redirect_ldso(p, pfs_ldso_path, args, scratch_avail);
1132
debug(D_PROCESS,"execve: %s attempting",p->new_logical_name);
1134
/* FIXME: It would be nice if we could test whether the D_PROCESS */
1135
/* debug flag is set. */
1137
typedef unsigned int argv32;
1138
argv32 argv[PFS_ARG_MAX];
1139
char debug_argv[256];
1141
tracer_copy_in(p->tracer,argv,POINTER(args[1]),sizeof(argv));
1142
for(argc=0;argv[argc] && argc<PFS_ARG_MAX;argc++) {
1143
debug_argv[sizeof(debug_argv)-1] = 0;
1144
tracer_copy_in(p->tracer,debug_argv,POINTER(argv[argc]),sizeof(debug_argv)-1);
1145
debug(D_PROCESS,"execve: argv[%d] == \"%s\"", argc, debug_argv);
1149
INT64_T actual_result;
1150
tracer_result_get(p->tracer,&actual_result);
1151
if(actual_result==0) {
1152
debug(D_PROCESS,"execve: %s working",p->new_logical_name);
1153
strcpy(p->name,p->new_logical_name);
1154
/* after a successful exec, signal handlers are reset */
1155
memset(p->signal_interruptible,0,sizeof(p->signal_interruptible));
1156
/* and certain files in the file table are closed */
1157
p->table->close_on_exec();
1158
/* and our knowledge of the address space is gone. */
1159
p->heap_address = 0;
1160
p->break_address = 0;
1161
debug(D_PSTREE,"%d exec %s",p->pid,p->new_logical_name);
1162
} else if(p->new_logical_name[0]) {
1163
debug(D_PROCESS,"execve: %s failed",p->new_logical_name);
1164
debug(D_PROCESS,"execve: restoring scratch area");
1165
tracer_copy_out(p->tracer,p->scratch_data,(void*)scratch_addr,scratch_size);
1172
Memory mapped files are loaded into the channel,
1173
the whole file regardless of what portion is actually
1174
mapped. The channel cache keeps a reference count.
1176
Note some unusual behavior in the implementation of mmap:
1178
The "old" mmap system call simply stores the arguments
1179
to mmap in memory, and passes a pointer to the arguments
1180
in args[0]. The offset is measured in bytes, as you
1183
The "new" mmap2 system call puts all of the arguments
1184
in registers, **AND** measures the offset in NUMBER OF PAGES,
1187
To unify the two cases, we copy the arguments into the
1188
nargs array and adjust the offset as needed.
1191
void decode_mmap( struct pfs_process *p, int syscall, int entering, INT64_T *args )
1194
UINT32_T addr, orig_length, prot, fd, flags;
1195
UINT32_T nargs[TRACER_ARGS_MAX];
1196
pfs_size_t length, channel_offset, source_offset;
1198
if(p->syscall==SYSCALL32_mmap) {
1199
tracer_copy_in(p->tracer,nargs,POINTER(args[0]),sizeof(nargs));
1201
for(int i=0;i<TRACER_ARGS_MAX;i++) {
1207
orig_length = nargs[1];
1212
if(p->syscall==SYSCALL32_mmap) {
1213
source_offset = nargs[5];
1215
source_offset = nargs[5]*getpagesize();
1218
// Note that on many versions of Linux, nargs[5]
1219
// is corrupted in mmap2 on a 64-bit machine.
1220
// See comments in tracer.c and
1221
// http://lkml.org/lkml/2007/1/31/317
1223
#ifdef CCTOOLS_CPU_X86_64
1224
if(p->syscall==SYSCALL32_mmap2 && (source_offset & 0x80000000 )) {
1225
debug(D_SYSCALL,"detected kernel bug in ptrace: offset has suspicious value of 0x%llx",source_offset);
1226
tracer_has_args5_bug(p->tracer);
1227
tracer_args_get(p->tracer,&p->syscall,p->syscall_args);
1228
source_offset = args[5]*getpagesize();
1229
debug(D_SYSCALL,"detected kernel bug in ptrace: new offset is 0x%llx",source_offset);
1233
debug(D_SYSCALL,"mmap addr=0x%x len=0x%x prot=0x%x flags=0x%x fd=%d offset=0x%llx",addr,orig_length,prot,flags,fd,source_offset);
1235
if(flags&MAP_ANONYMOUS) {
1236
/* great, just do it. */
1237
debug(D_SYSCALL,"mmap skipped b/c anonymous");
1239
char file_name[PFS_PATH_MAX];
1241
if(pfs_get_full_name(fd,file_name)!=0) {
1242
debug(D_SYSCALL,"mmap failed name: %s",strerror(errno));
1243
divert_to_dummy(p,-errno);
1247
if(pfs_channel_cache_alloc(file_name,fd,&length,&channel_offset)) {
1248
nargs[3] = flags & ~MAP_DENYWRITE;
1249
nargs[4] = pfs_channel_fd();
1250
nargs[5] = channel_offset+source_offset;
1252
debug(D_SYSCALL,"channel_offset=0x%llx source_offset=0x%llx total=0x%x",channel_offset,source_offset,nargs[5]);
1254
if(p->syscall==SYSCALL32_mmap) {
1255
tracer_copy_out(p->tracer,nargs,POINTER(args[0]),sizeof(nargs));
1257
nargs[5] = nargs[5] / getpagesize();
1258
for(int i=0;i<TRACER_ARGS_MAX;i++) {
1261
tracer_args_set(p->tracer,p->syscall,args,6);
1262
p->syscall_args_changed = 1;
1264
debug(D_SYSCALL,"mmap changed: fd=%d addr=0x%x",nargs[4],nargs[5]);
1266
debug(D_SYSCALL,"mmap failed cache: %s",strerror(errno));
1267
divert_to_dummy(p,-errno);
1274
int decode_ioctl_siocgifconf( struct pfs_process *p, int fd, int cmd, void *uaddr )
1276
struct pfs_kernel_ifconf uifc;
1282
tracer_copy_in(p->tracer,&uifc,uaddr,sizeof(uifc));
1283
buffer = (char*) malloc(uifc.ifc_len);
1284
length = tracer_copy_in(p->tracer,buffer,(void*)(PTRINT_T)uifc.ifc_buffer,uifc.ifc_len);
1286
ifc.ifc_buf = buffer;
1287
ifc.ifc_len = length;
1289
result = pfs_ioctl(fd,cmd,&ifc);
1292
uifc.ifc_len = ifc.ifc_len;
1293
tracer_copy_out(p->tracer,&uifc,uaddr,sizeof(uifc));
1294
tracer_copy_out(p->tracer,buffer,(void*)(PTRINT_T)uifc.ifc_buffer,uifc.ifc_len);
1302
void decode_syscall( struct pfs_process *p, int entering )
1306
char path[PFS_PATH_MAX];
1307
char path2[PFS_PATH_MAX];
1310
p->state = PFS_PROCESS_STATE_KERNEL;
1311
p->syscall_dummy = 0;
1312
tracer_args_get(p->tracer,&p->syscall,p->syscall_args);
1313
debug(D_SYSCALL,"%s",tracer_syscall_name(p->tracer,p->syscall));
1314
p->syscall_original = p->syscall;
1315
pfs_syscall_count++;
1317
if(pfs_syscall_totals32) {
1319
if(s>=0 && s<SYSCALL32_MAX) {
1320
pfs_syscall_totals32[p->syscall]++;
1325
args = p->syscall_args;
1327
switch(p->syscall) {
1328
case SYSCALL32_oldolduname:
1329
p->syscall = SYSCALL32_execve;
1331
case SYSCALL32_execve:
1332
decode_execve(p,entering,p->syscall,args);
1336
Some variants of fork do not propagate ptrace, so we
1337
must convert them into clone with appropriate flags.
1338
Once a fork is started, we must trace only that pid
1339
so that we can determine the child pid before seeing
1340
any events from the child. On return, we must fill
1341
in the child process with its parent's ppid.
1344
case SYSCALL32_fork:
1345
case SYSCALL32_clone:
1349
if(p->syscall==SYSCALL32_fork) {
1350
newargs[0] = CLONE_PTRACE|CLONE_PARENT|SIGCHLD;
1353
p->syscall_args_changed = 1;
1354
debug(D_SYSCALL,"converting fork into clone(%x)",newargs[0]);
1356
newargs[0] = (args[0]&~0xff)|CLONE_PTRACE|CLONE_PARENT|SIGCHLD;
1358
debug(D_SYSCALL,"adjusting clone(%llx,%llx,%llx,%llx) -> clone(%llx)",args[0],args[1],args[2],args[3],newargs[0]);
1360
tracer_args_set(p->tracer,SYSCALL32_clone,newargs,newargs_count);
1361
trace_this_pid = p->pid;
1364
struct pfs_process *child;
1365
tracer_result_get(p->tracer,&childpid);
1367
int child_signal,clone_files;
1368
if(p->syscall_original==SYSCALL32_fork) {
1369
child_signal = SIGCHLD;
1372
child_signal = args[0]&0xff;
1373
clone_files = args[0]&CLONE_FILES;
1375
pid_t notify_parent;
1376
if(args[0]&(CLONE_PARENT|CLONE_THREAD)) {
1377
notify_parent = p->ppid;
1379
notify_parent = p->pid;
1381
child = pfs_process_create(childpid,p->pid,notify_parent,clone_files,child_signal);
1382
child->syscall_result = 0;
1383
if(args[0]&CLONE_THREAD) child->tgid = p->tgid;
1384
if(p->syscall_original==SYSCALL32_fork) {
1385
memcpy(child->syscall_args,p->syscall_args,sizeof(p->syscall_args));
1386
child->syscall_args_changed = 1;
1388
if(pfs_trap_after_fork) {
1389
child->state = PFS_PROCESS_STATE_KERNEL;
1391
child->state = PFS_PROCESS_STATE_USER;
1393
debug(D_PROCESS,"%d created pid %d",(int)p->pid,(int)childpid);
1394
/* now trace any process at all */
1395
trace_this_pid = -1;
1402
Note that we do not support vfork. The behavior of vfork
1403
varies greatly from kernel to kernel, and is in fact impossible
1404
to support through ptrace without a kernel patch in some cases.
1405
However, glibc is smart and converts vfork into fork if the
1406
kernel response that it does not exist. So, failed vforks
1407
eventually end up in the previous case. Also note parrot_helper.so,
1408
which also aims to solve this problem.
1411
case SYSCALL32_vfork:
1413
debug(D_NOTICE,"sorry, I cannot run this program (%s) without parrot_helper.so.",p->name);
1414
divert_to_dummy(p,-ENOSYS);
1419
On ptraces that do not preserve the process structure,
1420
we must trap and manage variants of wait() to permit
1421
the propagation of child completion.
1423
case SYSCALL32_waitpid:
1425
pfs_process_waitpid(p,args[0],(int*)POINTER(args[1]),args[2],0);
1426
divert_to_dummy(p,p->syscall_result);
1430
case SYSCALL32_wait4:
1432
pfs_process_waitpid(p,args[0],(int*)POINTER(args[1]),args[2],(struct rusage*)POINTER(args[3]));
1433
divert_to_dummy(p,p->syscall_result);
1438
We don't do anything special with exit. Just let it
1439
run to completion, and then process the exit event
1443
case SYSCALL32_exit:
1444
case SYSCALL32_exit_group:
1448
Here begin all of the I/O operations, given in the
1449
same order as in pfs_table. Notice that most operations
1450
use the simple but slow tracer_copy_{in,out} routines.
1451
When performance is important (write,mmap), we resort
1452
to redirection I/O to the side channel.
1455
/* File descriptor creation */
1457
case SYSCALL32_open:
1458
case SYSCALL32_creat:
1460
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
1461
if(p->syscall==SYSCALL32_creat) {
1462
p->syscall_result = pfs_open(path,O_CREAT|O_WRONLY|O_TRUNC,args[1]);
1464
p->syscall_result = pfs_open(path,args[1],args[2]);
1466
if(p->syscall_result<0) p->syscall_result = -errno;
1467
divert_to_dummy(p,p->syscall_result);
1471
case SYSCALL32_pipe:
1474
p->syscall_result = pfs_pipe(fds);
1475
if(p->syscall_result<0) {
1476
p->syscall_result = -errno;
1478
tracer_copy_out(p->tracer,(void*)fds,POINTER(args[0]),sizeof(fds));
1480
divert_to_dummy(p,p->syscall_result);
1484
/* operations on open files */
1486
case SYSCALL32_close:
1488
p->syscall_result = pfs_close(args[0]);
1489
if(p->syscall_result<0) p->syscall_result = -errno;
1490
divert_to_dummy(p,p->syscall_result);
1494
case SYSCALL32_read:
1495
case SYSCALL32_pread:
1496
decode_read(p,entering,p->syscall,args);
1499
case SYSCALL32_write:
1500
case SYSCALL32_pwrite:
1501
decode_write(p,entering,p->syscall,args);
1504
case SYSCALL32_readv:
1505
decode_readv(p,entering,p->syscall,args);
1508
case SYSCALL32_writev:
1509
decode_writev(p,entering,p->syscall,args);
1512
case SYSCALL32_socketcall: {
1514
INT64_T subargs64[6];
1516
tracer_copy_in(p->tracer,subargs,POINTER(args[1]),sizeof(subargs));
1517
for(i=0;i<6;i++) subargs64[i] = subargs[i];
1518
decode_socketcall(p,entering,args[0],subargs64);
1523
Now we have a series of standard file operations that
1524
only use the integer arguments, and are (mostly) easily
1525
passed back and forth.
1528
case SYSCALL32_lseek:
1530
p->syscall_result = pfs_lseek(args[0],args[1],args[2]);
1531
if(p->syscall_result<0) p->syscall_result = -errno;
1532
divert_to_dummy(p,p->syscall_result);
1536
case SYSCALL32__llseek:
1538
UINT64_T high = args[1];
1539
UINT64_T low = args[2];
1540
UINT64_T offset = (high<<32) | low;
1541
INT64_T result = pfs_lseek(args[0],offset,args[4]);
1543
p->syscall_result = -errno;
1545
tracer_copy_out(p->tracer,&result,POINTER(args[3]),sizeof(result));
1546
p->syscall_result = 0;
1548
divert_to_dummy(p,p->syscall_result);
1552
case SYSCALL32_ftruncate:
1554
p->syscall_result = pfs_ftruncate(args[0],args[1]);
1555
if(p->syscall_result<0) p->syscall_result = -errno;
1556
divert_to_dummy(p,p->syscall_result);
1560
case SYSCALL32_ftruncate64:
1562
INT64_T offset = args[1]+(((INT64_T)args[2])<<32);
1563
p->syscall_result = pfs_ftruncate(args[0],offset);
1564
if(p->syscall_result<0) p->syscall_result = -errno;
1565
divert_to_dummy(p,p->syscall_result);
1569
case SYSCALL32_fstat:
1570
decode_stat(p,entering,SYSCALL32_fstat,args,0);
1572
case SYSCALL32_fstat64:
1573
decode_stat(p,entering,SYSCALL32_fstat,args,1);
1575
case SYSCALL32_fstatfs:
1576
decode_statfs(p,entering,SYSCALL32_fstatfs,args,0);
1578
case SYSCALL32_fstatfs64:
1579
decode_statfs(p,entering,SYSCALL32_fstatfs,args,1);
1582
case SYSCALL32_flock:
1584
p->syscall_result = pfs_flock(args[0],args[1]);
1585
if(p->syscall_result<0) p->syscall_result = -errno;
1586
divert_to_dummy(p,p->syscall_result);
1590
case SYSCALL32_fsync:
1591
case SYSCALL32_fdatasync:
1593
p->syscall_result = pfs_fsync(args[0]);
1594
if(p->syscall_result<0) p->syscall_result = -errno;
1595
divert_to_dummy(p,p->syscall_result);
1599
case SYSCALL32_fchdir:
1601
p->syscall_result = pfs_fchdir(args[0]);
1602
if(p->syscall_result<0) p->syscall_result = -errno;
1603
divert_to_dummy(p,p->syscall_result);
1607
case SYSCALL32_fchmod:
1609
p->syscall_result = pfs_fchmod(args[0],args[1]);
1610
if(p->syscall_result<0) p->syscall_result = -errno;
1611
divert_to_dummy(p,p->syscall_result);
1615
case SYSCALL32_fchown:
1616
case SYSCALL32_fchown32:
1618
p->syscall_result = pfs_fchown(args[0],args[1],args[2]);
1619
if(p->syscall_result<0) p->syscall_result = -errno;
1620
divert_to_dummy(p,p->syscall_result);
1625
ioctl presents both bad news and good news.
1626
The bad news is that all ioctl operations are driver
1627
specific. I have no intention of coding up all the
1628
possible ioctls here, nor could I. However, it is a fair
1629
assumption that the third argument, when a valid pointer,
1630
is a continuous and small chunk of memory. So, we copy
1631
in one page that it points to (if any), and use that as
1632
a temporary buffer. We copy back to the application only
1633
those words in the buffer that change. If none change,
1634
then the argument was likely interpreted as an integer
1635
rather than a pointer. A nice coincidence is that
1636
copy in is generally efficient, while copy out, though
1640
case SYSCALL32_ioctl:
1644
void *uaddr = POINTER(args[2]);
1646
char tbuffer[65536];
1650
if(cmd==SIOCGIFCONF) {
1651
p->syscall_result = decode_ioctl_siocgifconf(p,fd,cmd,uaddr);
1652
divert_to_dummy(p,p->syscall_result);
1657
length = tracer_copy_in(p->tracer,buffer,uaddr,sizeof(buffer));
1659
memcpy(tbuffer,buffer,length);
1660
p->syscall_result = pfs_ioctl(fd,cmd,buffer);
1663
p->syscall_result = pfs_ioctl(fd,cmd,uaddr);
1666
p->syscall_result = pfs_ioctl(fd,cmd,uaddr);
1669
if(p->syscall_result<0) {
1670
p->syscall_result = -errno;
1674
for(i=0;i<length;i++) {
1675
if(tbuffer[i]!=buffer[i]) {
1679
changed = _ROUND_UP(changed,sizeof(int));
1680
tracer_copy_out(p->tracer,buffer,uaddr,changed);
1683
divert_to_dummy(p,p->syscall_result);
1688
Unlike ioctl, fcntl operations are rather generic
1689
and operate on the file table itself. These things
1690
we can parse, understand, and pass along to the
1691
file table in most cases.
1693
We permit the user to set the O_ASYNC flag and thus
1694
receive activity notification via SIGIO. However,
1695
we don't yet support extended signal information.
1698
case SYSCALL32_fcntl:
1699
case SYSCALL32_fcntl64:
1703
void *uaddr = POINTER(args[2]);
1704
struct pfs_kernel_flock kfl;
1705
struct pfs_kernel_flock64 kfl64;
1707
struct flock64 fl64;
1716
p->syscall_result = pfs_fcntl(fd,cmd,uaddr);
1717
if(p->syscall_result<0) p->syscall_result=-errno;
1718
divert_to_dummy(p,p->syscall_result);
1721
int flags = (int)args[2];
1723
debug(D_PROCESS,"pid %d requests O_ASYNC on fd %d",(int)pfs_current->pid,(int)fd);
1724
p->flags |= PFS_PROCESS_FLAGS_ASYNC;
1732
tracer_copy_in(p->tracer,&kfl,uaddr,sizeof(kfl));
1734
p->syscall_result = pfs_fcntl(fd,cmd,&kfl);
1735
if(p->syscall_result<0) {
1736
p->syscall_result=-errno;
1739
tracer_copy_out(p->tracer,&kfl,uaddr,sizeof(kfl));
1741
divert_to_dummy(p,p->syscall_result);
1747
tracer_copy_in(p->tracer,&kfl64,uaddr,sizeof(kfl64));
1748
COPY_FLOCK(kfl64,fl64);
1749
p->syscall_result = pfs_fcntl(fd,cmd,&fl64);
1750
if(p->syscall_result<0) {
1751
p->syscall_result=-errno;
1753
COPY_FLOCK(fl64,kfl64);
1754
tracer_copy_out(p->tracer,&kfl64,uaddr,sizeof(kfl64));
1756
divert_to_dummy(p,p->syscall_result);
1759
/* Pretend that the caller is the signal recipient */
1761
divert_to_dummy(p,p->pid);
1764
/* But we always get the signal. */
1766
debug(D_PROCESS,"pid %d requests F_SETOWN on fd %d",(int)pfs_current->pid,(int)fd);
1767
p->flags |= PFS_PROCESS_FLAGS_ASYNC;
1769
pfs_fcntl(fd,F_SETOWN,(void*)pid);
1770
divert_to_dummy(p,0);
1774
divert_to_dummy(p,-ENOSYS);
1781
case SYSCALL32_mmap:
1782
case SYSCALL32_mmap2:
1783
decode_mmap(p,p->syscall,entering,args);
1787
XXX Incomplete but acceptable for now.
1788
We permit the application to munmap whatever it likes.
1789
This might be an anonymous map that we don't track,
1790
or it could be file. In the latter case, we would like
1791
to decrement the reference count. At the moment, we
1792
can't tell the difference, nor can we reverse map the
1793
application's space into our own address space.
1794
Once loaded, mmaped objects just stay in until the
1795
parent pfs process dies.
1798
case SYSCALL32_munmap:
1802
For select, we must copy in all the data structures
1803
that are pointed to, select, and then copy out.
1804
Notice that on Linux, newselect has the ordinary
1805
interface, while SYSCALL32_select keeps all of the arguments
1806
in a single structure.
1809
case SYSCALL32__newselect:
1810
case SYSCALL32_select:
1812
int maxfd = args[0];
1813
fd_set rset, wset, eset;
1814
struct pfs_kernel_timeval ktv;
1816
fd_set *prset, *pwset, *peset;
1817
struct timeval *ptv;
1820
nlongs = (maxfd+31)/32;
1828
tracer_copy_in(p->tracer,&rset,POINTER(args[1]),nbytes);
1835
tracer_copy_in(p->tracer,&wset,POINTER(args[2]),nbytes);
1842
tracer_copy_in(p->tracer,&eset,POINTER(args[3]),nbytes);
1849
tracer_copy_in(p->tracer,&ktv,POINTER(args[4]),sizeof(tv));
1850
COPY_TIMEVAL(ktv,tv);
1856
p->syscall_result = pfs_select(maxfd,prset,pwset,peset,ptv);
1858
if(p->syscall_result>=0) {
1859
divert_to_dummy(p,p->syscall_result);
1860
if(prset) tracer_copy_out(p->tracer,prset,POINTER(args[1]),nbytes);
1861
if(pwset) tracer_copy_out(p->tracer,pwset,POINTER(args[2]),nbytes);
1862
if(peset) tracer_copy_out(p->tracer,peset,POINTER(args[3]),nbytes);
1864
COPY_TIMEVAL(tv,ktv);
1865
tracer_copy_out(p->tracer,&ktv,POINTER(args[4]),sizeof(ktv));
1867
} else if(errno==EAGAIN) {
1868
if(p->interrupted) {
1870
divert_to_dummy(p,-EINTR);
1872
p->state = PFS_PROCESS_STATE_WAITREAD;
1875
divert_to_dummy(p,-errno);
1880
case SYSCALL32_poll:
1882
struct pollfd *ufds;
1884
divert_to_dummy(p,-EINVAL);
1886
int length = sizeof(*ufds)*args[1];
1887
ufds = (struct pollfd *) malloc(length);
1889
tracer_copy_in(p->tracer,ufds,POINTER(args[0]),length);
1890
p->syscall_result = pfs_poll(ufds,args[1],args[2]);
1891
if(p->syscall_result>=0) {
1892
divert_to_dummy(p,p->syscall_result);
1893
tracer_copy_out(p->tracer,ufds,POINTER(args[0]),length);
1894
} else if(errno==EAGAIN) {
1895
if(p->interrupted) {
1897
divert_to_dummy(p,-EINTR);
1899
p->state = PFS_PROCESS_STATE_WAITREAD;
1902
divert_to_dummy(p,-errno);
1906
divert_to_dummy(p,-ENOMEM);
1913
Next, we have operations that do not modify any files
1914
in particular, but change the state of the file table
1915
within the process in question.
1918
case SYSCALL32_chdir:
1920
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
1921
p->syscall_result = pfs_chdir(path);
1922
if(p->syscall_result<0) p->syscall_result = -errno;
1923
divert_to_dummy(p,p->syscall_result);
1927
case SYSCALL32_getcwd:
1929
if(pfs_getcwd(path,sizeof(path))) {
1930
p->syscall_result = strlen(path)+1;
1931
if(p->syscall_result>args[1]) {
1932
p->syscall_result = -ERANGE;
1934
tracer_copy_out(p->tracer,path,POINTER(args[0]),p->syscall_result);
1937
p->syscall_result = -errno;
1939
divert_to_dummy(p,p->syscall_result);
1945
p->syscall_result = pfs_dup(args[0]);
1946
if(p->syscall_result<0) p->syscall_result = -errno;
1947
divert_to_dummy(p,p->syscall_result);
1951
case SYSCALL32_dup2:
1953
p->syscall_result = pfs_dup2(args[0],args[1]);
1954
if(p->syscall_result<0) p->syscall_result = -errno;
1955
divert_to_dummy(p,p->syscall_result);
1960
Next we have all of the system calls that work on
1961
a file name, rather than an open file. In most cases,
1962
we use the (fast) tracer_copy_in to fetch the file
1963
name, and then invoke the pfs_ XXX We should have
1964
some sort of bounds checking on the path name.
1967
case SYSCALL32_stat:
1968
decode_stat(p,entering,SYSCALL32_stat,args,0);
1970
case SYSCALL32_stat64:
1971
decode_stat(p,entering,SYSCALL32_stat,args,1);
1973
case SYSCALL32_lstat:
1974
decode_stat(p,entering,SYSCALL32_lstat,args,0);
1976
case SYSCALL32_lstat64:
1977
decode_stat(p,entering,SYSCALL32_lstat,args,1);
1979
case SYSCALL32_statfs:
1980
decode_statfs(p,entering,SYSCALL32_statfs,args,0);
1982
case SYSCALL32_statfs64:
1983
decode_statfs(p,entering,SYSCALL32_statfs,args,1);
1986
case SYSCALL32_access:
1988
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
1989
p->syscall_result = pfs_access(path,args[1]);
1990
if(p->syscall_result<0) p->syscall_result = -errno;
1991
divert_to_dummy(p,p->syscall_result);
1995
case SYSCALL32_chmod:
1997
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
1998
p->syscall_result = pfs_chmod(path,args[1]);
1999
if(p->syscall_result<0) p->syscall_result = -errno;
2000
divert_to_dummy(p,p->syscall_result);
2004
case SYSCALL32_chown:
2005
case SYSCALL32_chown32:
2007
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2008
p->syscall_result = pfs_chown(path,args[1],args[2]);
2009
if(p->syscall_result<0) p->syscall_result = -errno;
2010
divert_to_dummy(p,p->syscall_result);
2014
case SYSCALL32_lchown:
2015
case SYSCALL32_lchown32:
2017
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2018
p->syscall_result = pfs_lchown(path,args[1],args[2]);
2019
if(p->syscall_result<0) p->syscall_result = -errno;
2020
divert_to_dummy(p,p->syscall_result);
2024
case SYSCALL32_truncate:
2026
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2027
p->syscall_result = pfs_truncate(path,args[1]);
2028
if(p->syscall_result<0) p->syscall_result = -errno;
2029
divert_to_dummy(p,p->syscall_result);
2033
case SYSCALL32_truncate64:
2035
INT64_T offset = args[1]+(((INT64_T)args[2])<<32);
2036
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2037
p->syscall_result = pfs_truncate(path,offset);
2038
if(p->syscall_result<0) p->syscall_result = -errno;
2039
divert_to_dummy(p,p->syscall_result);
2043
case SYSCALL32_unlink:
2045
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2046
p->syscall_result = pfs_unlink(path);
2047
if(p->syscall_result<0) p->syscall_result = -errno;
2048
divert_to_dummy(p,p->syscall_result);
2052
case SYSCALL32_rename:
2054
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2055
tracer_copy_in_string(p->tracer,path2,POINTER(args[1]),sizeof(path2));
2056
p->syscall_result = pfs_rename(path,path2);
2057
if(p->syscall_result<0) p->syscall_result = -errno;
2058
divert_to_dummy(p,p->syscall_result);
2062
case SYSCALL32_link:
2064
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2065
tracer_copy_in_string(p->tracer,path2,POINTER(args[1]),sizeof(path2));
2066
p->syscall_result = pfs_link(path,path2);
2067
if(p->syscall_result<0) p->syscall_result = -errno;
2068
divert_to_dummy(p,p->syscall_result);
2072
case SYSCALL32_symlink:
2074
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2075
tracer_copy_in_string(p->tracer,path2,POINTER(args[1]),sizeof(path2));
2076
p->syscall_result = pfs_symlink(path,path2);
2077
if(p->syscall_result<0) p->syscall_result = -errno;
2078
divert_to_dummy(p,p->syscall_result);
2082
case SYSCALL32_readlink:
2084
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2085
p->syscall_result = pfs_readlink(path,path2,sizeof(path2));
2086
if(p->syscall_result<0) {
2087
p->syscall_result = -errno;
2089
tracer_copy_out(p->tracer,path2,POINTER(args[1]),p->syscall_result);
2091
divert_to_dummy(p,p->syscall_result);
2095
case SYSCALL32_mknod:
2097
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2098
p->syscall_result = pfs_mknod(path,args[1],args[2]);
2099
if(p->syscall_result<0) p->syscall_result = -errno;
2100
divert_to_dummy(p,p->syscall_result);
2104
case SYSCALL32_mkdir:
2106
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2107
p->syscall_result = pfs_mkdir(path,args[1]);
2108
if(p->syscall_result<0) p->syscall_result = -errno;
2109
divert_to_dummy(p,p->syscall_result);
2113
case SYSCALL32_rmdir:
2115
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2116
p->syscall_result = pfs_rmdir(path);
2117
if(p->syscall_result<0) p->syscall_result = -errno;
2118
divert_to_dummy(p,p->syscall_result);
2123
Although pfs_table supports the high-level operations
2124
opendir/readdir/closedir, all we can get a hold of at
2125
this level is getdents, which works on an open file
2126
descriptor. We copy dirents out one by one using fdreaddir,
2127
and transform them into the type expected by the kernel.
2128
If we overrun the available buffer, immediately seek
2129
the fd back to where it was before.
2132
case SYSCALL32_getdents:
2135
char *uaddr = (char*) POINTER(args[1]);
2136
int length = args[2];
2140
struct pfs_kernel_dirent buf;
2143
while((d=pfs_fdreaddir(fd))) {
2144
COPY_DIRENT(*d,buf);
2145
if(DIRENT_SIZE(buf)>(unsigned)length) {
2146
pfs_lseek(fd,d->d_off,SEEK_SET);
2150
tracer_copy_out(p->tracer,&buf,(void*)uaddr,buf.d_reclen);
2151
uaddr += buf.d_reclen;
2152
length -= buf.d_reclen;
2153
result += buf.d_reclen;
2156
if(result==0 && errno!=0) {
2157
p->syscall_result = -errno;
2159
p->syscall_result = result;
2161
divert_to_dummy(p,p->syscall_result);
2166
case SYSCALL32_getdents64:
2169
char *uaddr = (char*) POINTER(args[1]);
2170
int length = args[2];
2174
struct pfs_kernel_dirent64 buf;
2177
while((d=pfs_fdreaddir(fd))) {
2178
COPY_DIRENT(*d,buf);
2179
if(DIRENT_SIZE(buf)>(unsigned)length) {
2180
pfs_lseek(fd,d->d_off,SEEK_SET);
2184
tracer_copy_out(p->tracer,&buf,(void*)uaddr,buf.d_reclen);
2185
uaddr += buf.d_reclen;
2186
length -= buf.d_reclen;
2187
result += buf.d_reclen;
2190
if(result==0 && errno!=0) {
2191
p->syscall_result = -errno;
2193
p->syscall_result = result;
2195
divert_to_dummy(p,p->syscall_result);
2199
case SYSCALL32_utime:
2201
struct pfs_kernel_utimbuf kut;
2203
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2205
tracer_copy_in(p->tracer,&kut,POINTER(args[1]),sizeof(kut));
2206
COPY_UTIMBUF(kut,ut);
2208
ut.actime = ut.modtime = time(0);
2210
p->syscall_result = pfs_utime(path,&ut);
2211
if(p->syscall_result<0) p->syscall_result = -errno;
2212
divert_to_dummy(p,p->syscall_result);
2216
case SYSCALL32_utimes:
2218
struct pfs_kernel_timeval times[2];
2220
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2222
tracer_copy_in(p->tracer,times,POINTER(args[1]),sizeof(times));
2223
ut.actime = times[0].tv_sec;
2224
ut.modtime = times[1].tv_sec;
2226
ut.actime = ut.modtime = time(0);
2228
p->syscall_result = pfs_utime(path,&ut);
2229
if(p->syscall_result<0) p->syscall_result = -errno;
2230
divert_to_dummy(p,p->syscall_result);
2236
Note that we call pfs_process_raise here so that the process data
2237
structures are made aware of the signal propagation, possibly kicking
2238
someone out of sleep. However, we do *not* convert this call to
2239
a dummy, so that the sender can deliver itself, thus getting the
2240
correct data into the sa_info structure.
2243
case SYSCALL32_kill:
2244
case SYSCALL32_tkill:
2246
pfs_process_raise(args[0],args[1],0);
2251
case SYSCALL32_tgkill:
2253
pfs_process_raise(args[0],args[2],0);
2258
We need to track the umask ourselves and use it in open.
2261
case SYSCALL32_umask:
2263
int old_umask = pfs_current->umask;
2264
pfs_current->umask = args[0] & 0777;
2265
divert_to_dummy(p,old_umask);
2270
The tracing mechanism re-parents traced children,
2271
so we must fake the parent pid if the child wants
2272
to send its parent a signal.
2275
case SYSCALL32_getppid:
2276
divert_to_dummy(p,p->ppid);
2280
Always return the dummy uids.
2283
case SYSCALL32_getuid32:
2284
case SYSCALL32_geteuid32:
2285
case SYSCALL32_geteuid:
2286
case SYSCALL32_getuid:
2287
divert_to_dummy(p,pfs_uid);
2290
case SYSCALL32_getgid32:
2291
case SYSCALL32_getegid32:
2292
case SYSCALL32_getegid:
2293
case SYSCALL32_getgid:
2294
divert_to_dummy(p,pfs_gid);
2297
case SYSCALL32_getresuid32:
2298
case SYSCALL32_getresuid:
2299
tracer_copy_out(p->tracer,&pfs_uid,POINTER(args[0]),sizeof(pfs_uid));
2300
tracer_copy_out(p->tracer,&pfs_uid,POINTER(args[1]),sizeof(pfs_uid));
2301
tracer_copy_out(p->tracer,&pfs_uid,POINTER(args[2]),sizeof(pfs_uid));
2302
divert_to_dummy(p,0);
2306
case SYSCALL32_getresgid32:
2307
case SYSCALL32_getresgid:
2308
tracer_copy_out(p->tracer,&pfs_gid,POINTER(args[0]),sizeof(pfs_uid));
2309
tracer_copy_out(p->tracer,&pfs_gid,POINTER(args[1]),sizeof(pfs_uid));
2310
tracer_copy_out(p->tracer,&pfs_gid,POINTER(args[2]),sizeof(pfs_uid));
2311
divert_to_dummy(p,0);
2314
case SYSCALL32_setsid:
2316
pfs_current->tty[0] = 0;
2321
Generally speaking, the kernel implements signal handling,
2322
so we just pass through operations such as sigaction and signal.
2323
However, we must keep track of which signals are allowed to
2324
interrupt I/O operations in progress. Each process has an
2325
array, signal_interruptible, that records this. The SA_RESTART
2326
flag to sigaction can turn this on or off. The traditional
2327
BSD signal() always turns it on.
2330
case SYSCALL32_sigaction:
2331
case SYSCALL32_rt_sigaction:
2335
struct pfs_kernel_sigaction act;
2336
tracer_copy_in(p->tracer,&act,POINTER(args[1]),sizeof(act));
2337
if(act.pfs_sa_flags&SA_RESTART) {
2338
pfs_current->signal_interruptible[sig] = 0;
2340
pfs_current->signal_interruptible[sig] = 1;
2346
case SYSCALL32_signal:
2349
pfs_current->signal_interruptible[sig] = 0;
2353
case SYSCALL32_parrot_lsalloc:
2355
char alloc_path[PFS_PATH_MAX];
2356
pfs_ssize_t avail, inuse;
2357
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2358
p->syscall_result = pfs_lsalloc(path,alloc_path,&avail,&inuse);
2359
if(p->syscall_result>=0) {
2360
tracer_copy_out(p->tracer,alloc_path,POINTER(args[1]),strlen(alloc_path));
2361
tracer_copy_out(p->tracer,&avail,POINTER(args[2]),sizeof(avail));
2362
tracer_copy_out(p->tracer,&inuse,POINTER(args[3]),sizeof(inuse));
2364
p->syscall_result = -errno;
2366
divert_to_dummy(p,p->syscall_result);
2370
case SYSCALL32_parrot_mkalloc:
2373
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2374
tracer_copy_in(p->tracer,&size,POINTER(args[1]),sizeof(size));
2375
p->syscall_result = pfs_mkalloc(path,size,args[2]);
2376
if(p->syscall_result<0) p->syscall_result = -errno;
2377
divert_to_dummy(p,p->syscall_result);
2381
case SYSCALL32_parrot_setacl:
2383
char path[PFS_PATH_MAX];
2384
char subject[PFS_PATH_MAX];
2385
char rights[PFS_PATH_MAX];
2386
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2387
tracer_copy_in_string(p->tracer,subject,POINTER(args[1]),sizeof(subject));
2388
tracer_copy_in_string(p->tracer,rights,POINTER(args[2]),sizeof(rights));
2389
p->syscall_result = pfs_setacl(path,subject,rights);
2390
if(p->syscall_result<0) p->syscall_result = -errno;
2391
divert_to_dummy(p,p->syscall_result);
2395
case SYSCALL32_parrot_getacl:
2396
case SYSCALL32_parrot_whoami:
2398
char path[PFS_PATH_MAX];
2400
unsigned size=args[2];
2402
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2403
if(size>sizeof(buffer)) size = sizeof(buffer);
2405
if(p->syscall==SYSCALL32_parrot_getacl) {
2406
p->syscall_result = pfs_getacl(path,buffer,sizeof(buffer));
2408
p->syscall_result = pfs_whoami(path,buffer,sizeof(buffer));
2411
if(p->syscall_result>=0) {
2412
tracer_copy_out(p->tracer,buffer,POINTER(args[1]),p->syscall_result);
2414
p->syscall_result = -errno;
2416
divert_to_dummy(p,p->syscall_result);
2420
case SYSCALL32_parrot_locate:
2422
char path[PFS_PATH_MAX];
2424
unsigned size=args[2];
2427
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2428
if(size>sizeof(buffer)) size = sizeof(buffer);
2433
p->syscall_result = pfs_locate(path,buffer,sizeof(buffer));
2435
if(p->syscall_result>=0) {
2436
tracer_copy_out(p->tracer,buffer,POINTER(args[1]),p->syscall_result);
2438
p->syscall_result = -errno;
2440
divert_to_dummy(p,p->syscall_result);
2444
case SYSCALL32_parrot_timeout:
2448
tracer_copy_in_string(p->tracer,buffer,POINTER(args[0]),sizeof(buffer));
2449
p->syscall_result = pfs_timeout(buffer);
2451
p->syscall_result = pfs_timeout(NULL);
2454
if(p->syscall_result<0) {
2455
p->syscall_result = -errno;
2457
divert_to_dummy(p,p->syscall_result);
2461
case SYSCALL32_parrot_copyfile:
2463
char source[PFS_PATH_MAX];
2464
char target[PFS_PATH_MAX];
2466
tracer_copy_in_string(p->tracer,source,POINTER(args[0]),sizeof(source));
2467
tracer_copy_in_string(p->tracer,target,POINTER(args[1]),sizeof(target));
2469
p->syscall_result = pfs_copyfile(source,target);
2471
if(p->syscall_result<0) p->syscall_result = -errno;
2472
divert_to_dummy(p,p->syscall_result);
2476
case SYSCALL32_parrot_md5:
2479
tracer_copy_in_string(p->tracer,path,POINTER(args[0]),sizeof(path));
2480
p->syscall_result = pfs_md5(path,(unsigned char*)digest);
2481
if(p->syscall_result>=0) {
2482
tracer_copy_out(p->tracer,digest,POINTER(args[1]),sizeof(digest));
2484
if(p->syscall_result<0) p->syscall_result = -errno;
2485
divert_to_dummy(p,p->syscall_result);
2490
Changing the userid is not allow, but for completeness,
2491
you can always change to your own uid.
2493
case SYSCALL32_setgid:
2494
case SYSCALL32_setregid:
2495
case SYSCALL32_setuid:
2496
case SYSCALL32_setresuid:
2497
case SYSCALL32_setresgid:
2498
case SYSCALL32_setreuid:
2499
case SYSCALL32_setgid32:
2500
case SYSCALL32_setregid32:
2501
case SYSCALL32_setuid32:
2502
case SYSCALL32_setresuid32:
2503
case SYSCALL32_setresgid32:
2504
case SYSCALL32_setreuid32:
2505
case SYSCALL32_setfsuid32:
2506
case SYSCALL32_setfsgid32:
2507
divert_to_dummy(p,0);
2510
/* Whenever the break address is udpated validly, save it. */
2511
/* This is used as one way of computing a scratch space. */
2516
if(p->syscall_result==0) {
2517
if(p->syscall_args[0]!=0) {
2518
p->break_address = p->syscall_args[0];
2525
These things are not currently permitted.
2527
case SYSCALL32_chroot:
2528
case SYSCALL32_mount:
2529
case SYSCALL32_stime:
2530
case SYSCALL32_sysfs:
2531
case SYSCALL32_umount2:
2532
case SYSCALL32_umount:
2533
case SYSCALL32_uselib:
2534
case SYSCALL32_lookup_dcookie:
2535
case SYSCALL32_sys_epoll_create:
2536
case SYSCALL32_sys_epoll_ctl:
2537
case SYSCALL32_sys_epoll_wait:
2538
case SYSCALL32_remap_file_pages:
2540
divert_to_dummy(p,-EPERM);
2543
These system calls are historical artifacts or
2544
otherwise not necessary to support.
2546
case SYSCALL32_acct:
2547
case SYSCALL32_break:
2548
case SYSCALL32_ftime:
2549
case SYSCALL32_gtty:
2550
case SYSCALL32_lock:
2552
case SYSCALL32_profil:
2553
case SYSCALL32_stty:
2554
case SYSCALL32_ulimit:
2555
case SYSCALL32_fadvise64:
2556
divert_to_dummy(p,-ENOSYS);
2560
A wide variety of calls have no relation to file
2561
access, so we simply send them along to the
2565
case SYSCALL32_uname:
2566
case SYSCALL32_olduname:
2567
case SYSCALL32__sysctl:
2568
case SYSCALL32_adjtimex:
2569
case SYSCALL32_afs_syscall:
2570
case SYSCALL32_alarm:
2571
case SYSCALL32_bdflush:
2572
case SYSCALL32_capget:
2573
case SYSCALL32_capset:
2574
case SYSCALL32_clock_settime:
2575
case SYSCALL32_clock_gettime:
2576
case SYSCALL32_clock_getres:
2577
case SYSCALL32_create_module:
2578
case SYSCALL32_delete_module:
2579
case SYSCALL32_get_kernel_syms:
2580
case SYSCALL32_getgroups32:
2581
case SYSCALL32_getgroups:
2582
case SYSCALL32_getitimer:
2583
case SYSCALL32_getpgid:
2584
case SYSCALL32_getpgrp:
2585
case SYSCALL32_getpid:
2586
case SYSCALL32_getpriority:
2587
case SYSCALL32_getrlimit:
2588
case SYSCALL32_getrusage:
2589
case SYSCALL32_getsid:
2590
case SYSCALL32_gettid:
2591
case SYSCALL32_gettimeofday:
2592
case SYSCALL32_idle:
2593
case SYSCALL32_init_module:
2594
case SYSCALL32_ioperm:
2595
case SYSCALL32_iopl:
2597
case SYSCALL32_madvise:
2598
case SYSCALL32_mincore:
2599
case SYSCALL32_mlock:
2600
case SYSCALL32_mlockall:
2601
case SYSCALL32_modify_ldt:
2602
case SYSCALL32_mprotect:
2603
case SYSCALL32_mremap:
2604
case SYSCALL32_msync:
2605
case SYSCALL32_munlock:
2606
case SYSCALL32_munlockall:
2607
case SYSCALL32_nanosleep:
2608
case SYSCALL32_nice:
2609
case SYSCALL32_pause:
2610
case SYSCALL32_prctl:
2611
case SYSCALL32_query_module:
2612
case SYSCALL32_quotactl:
2613
case SYSCALL32_reboot:
2614
case SYSCALL32_rt_sigpending:
2615
case SYSCALL32_rt_sigprocmask:
2616
case SYSCALL32_rt_sigqueueinfo:
2617
case SYSCALL32_rt_sigreturn:
2618
case SYSCALL32_rt_sigsuspend:
2619
case SYSCALL32_rt_sigtimedwait:
2620
case SYSCALL32_sched_get_priority_max:
2621
case SYSCALL32_sched_get_priority_min:
2622
case SYSCALL32_sched_getparam:
2623
case SYSCALL32_sched_getscheduler:
2624
case SYSCALL32_sched_rr_get_interval:
2625
case SYSCALL32_sched_setparam:
2626
case SYSCALL32_sched_setscheduler:
2627
case SYSCALL32_sched_yield:
2628
case SYSCALL32_setdomainname:
2629
case SYSCALL32_setgroups32:
2630
case SYSCALL32_setgroups:
2631
case SYSCALL32_sethostname:
2632
case SYSCALL32_setitimer:
2633
case SYSCALL32_setpgid:
2634
case SYSCALL32_setpriority:
2635
case SYSCALL32_setrlimit:
2636
case SYSCALL32_settimeofday:
2637
case SYSCALL32_set_tid_address:
2638
case SYSCALL32_sgetmask:
2639
case SYSCALL32_sigaltstack:
2640
case SYSCALL32_sigpending:
2641
case SYSCALL32_sigprocmask:
2642
case SYSCALL32_sigreturn:
2643
case SYSCALL32_sigsuspend:
2644
case SYSCALL32_ssetmask:
2645
case SYSCALL32_swapoff:
2646
case SYSCALL32_swapon:
2647
case SYSCALL32_sync:
2648
case SYSCALL32_sysinfo:
2649
case SYSCALL32_syslog:
2650
case SYSCALL32_time:
2651
case SYSCALL32_times:
2652
case SYSCALL32_timer_create:
2653
case SYSCALL32_timer_settime:
2654
case SYSCALL32_timer_gettime:
2655
case SYSCALL32_timer_getoverrun:
2656
case SYSCALL32_timer_delete:
2657
case SYSCALL32_ugetrlimit:
2658
case SYSCALL32_ustat:
2659
case SYSCALL32_vhangup:
2660
case SYSCALL32_vm86:
2661
case SYSCALL32_vm86old:
2662
case SYSCALL32_sched_setaffinity:
2663
case SYSCALL32_sched_getaffinity:
2664
case SYSCALL32_set_thread_area:
2665
case SYSCALL32_get_thread_area:
2666
case SYSCALL32_alloc_hugepages:
2667
case SYSCALL32_free_hugepages:
2668
case SYSCALL32_futex:
2669
case SYSCALL32_set_robust_list:
2670
case SYSCALL32_get_robust_list:
2674
These system calls get and set extended
2675
attributes on file systems. Not only are
2676
they non-standard, they are only implemented
2677
by the jfs filesystem on Linux. Libraries
2678
expect these to return EOPNOTSUPP.
2681
case SYSCALL32_fgetxattr:
2682
case SYSCALL32_flistxattr:
2683
case SYSCALL32_fremovexattr:
2684
case SYSCALL32_fsetxattr:
2685
case SYSCALL32_getxattr:
2686
case SYSCALL32_lgetxattr:
2687
case SYSCALL32_listxattr:
2688
case SYSCALL32_llistxattr:
2689
case SYSCALL32_lremovexattr:
2690
case SYSCALL32_lsetxattr:
2691
case SYSCALL32_removexattr:
2692
case SYSCALL32_setxattr:
2693
divert_to_dummy(p,-EOPNOTSUPP);
2697
These system calls could concievably be supported,
2698
but we haven't had the need or the time to attack
2702
case SYSCALL32_getpmsg:
2703
case SYSCALL32_io_cancel:
2704
case SYSCALL32_io_destroy:
2705
case SYSCALL32_io_getevents:
2706
case SYSCALL32_io_setup:
2707
case SYSCALL32_io_submit:
2708
case SYSCALL32_ptrace:
2709
case SYSCALL32_putpmsg:
2710
case SYSCALL32_readahead:
2711
case SYSCALL32_security:
2712
case SYSCALL32_sendfile64:
2713
case SYSCALL32_sendfile:
2717
If anything else escaped our attention, we must know
2718
about it in an obvious way.
2723
debug(D_NOTICE,"warning: system call %d (%s) not supported for program %s",(int)p->syscall,tracer_syscall_name(p->tracer,p->syscall),p->name);
2724
divert_to_dummy(p,-ENOSYS);
2729
if(!entering && p->state==PFS_PROCESS_STATE_KERNEL) {
2731
p->state = PFS_PROCESS_STATE_USER;
2733
if(p->syscall_args_changed) {
2734
tracer_args_set(p->tracer,p->syscall,p->syscall_args,TRACER_ARGS_MAX);
2735
p->syscall_args_changed = 0;
2738
if(p->syscall_dummy) {
2739
tracer_result_set(p->tracer,p->syscall_result);
2740
p->syscall_dummy = 0;
2741
debug(D_SYSCALL,"= %d %s",(int)p->syscall_result,p->syscall_result<0 ? strerror(-p->syscall_result) : "" );
2743
debug(D_SYSCALL,"= ");
2750
Note that we clear the interrupted flag whenever
2751
we start a new system call or leave an old one.
2752
We don't want one system call to be interrupted
2753
by a signal from a previous system call.
2756
void pfs_dispatch32( struct pfs_process *p, INT64_T signum )
2758
struct pfs_process *oldcurrent = pfs_current;
2762
case PFS_PROCESS_STATE_KERNEL:
2763
case PFS_PROCESS_STATE_WAITWRITE:
2764
decode_syscall(p,0);
2766
case PFS_PROCESS_STATE_USER:
2768
case PFS_PROCESS_STATE_WAITREAD:
2769
decode_syscall(p,1);
2771
case PFS_PROCESS_STATE_WAITPID:
2772
case PFS_PROCESS_STATE_DONE:
2776
debug(D_PROCESS,"process %d in unexpected state %d",(int)p->pid,(int)p->state);
2781
case PFS_PROCESS_STATE_KERNEL:
2782
case PFS_PROCESS_STATE_USER:
2783
tracer_continue(p->tracer,signum);
2785
case PFS_PROCESS_STATE_WAITPID:
2786
case PFS_PROCESS_STATE_WAITREAD:
2787
case PFS_PROCESS_STATE_WAITWRITE:
2788
case PFS_PROCESS_STATE_DONE:
2791
debug(D_PROCESS,"process %d in unexpected state %d",(int)p->pid,(int)p->state);
2795
pfs_current = oldcurrent;
2798
void pfs_dispatch( struct pfs_process *p, INT64_T signum )
2800
if(p->state==PFS_PROCESS_STATE_DONE) return;
2802
if(tracer_is_64bit(p->tracer)) {
2803
pfs_dispatch64(p,signum);
2805
pfs_dispatch32(p,signum);