1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
42
#include <sys/ioctl.h> /* 44BSD requires this too */
45
#include "cl_data_types.h"
46
#include "cl_commlib.h"
47
#include "cl_endpoint_list.h"
49
#include "sge_utility.h"
50
#include "sge_security.h"
51
#include "sge_ijs_comm.h"
53
extern char *g_hostname;
54
extern sig_atomic_t received_signal;
56
static void ijs_general_communication_error(
57
const cl_application_error_list_elem_t *commlib_error)
62
/* redirects the commlib logging to a file */
63
/* this is a modified copy of the cl_log_list_flush_list() */
64
int my_log_list_flush_list(cl_raw_list_t* list_p) { /* CR check */
66
cl_log_list_elem_t* elem = NULL;
73
return CL_RETVAL_LOG_NO_LOGLIST;
76
if ((ret_val = cl_raw_list_lock(list_p)) != CL_RETVAL_OK) {
80
if ((fp = fopen("cl_log.txt", "a")) == NULL) {
81
return CL_RETVAL_NOT_OPEN;
84
while ((elem = cl_log_list_get_first_elem(list_p)) != NULL) {
85
/* TODO: rework logging output (log to file? call foreign log function, got by function pointer ?) */
87
gettimeofday(&now,NULL);
89
fprintf(fp, "%-76s|", elem->log_module_name);
90
if (elem->log_parameter == NULL) {
91
#define CL_COM_PRINT_THREAD_ID 0
93
#if CL_COM_PRINT_THREAD_ID
94
fprintf(fp, "%ld.%ld|%20s|%4d|%10s|%8s| %s\n",
96
fprintf(fp, "%ld.%ld|%20s|%10s|%8s| %s\n",
101
elem->log_thread_name,
102
#if CL_COM_PRINT_THREAD_ID
105
cl_thread_convert_state_id(elem->log_thread_state),
106
cl_log_list_convert_type_id(elem->log_type),
109
#if CL_COM_PRINT_THREAD_ID
110
fprintf(fp, "%ld.%ld|%20s|%4d|%10s|%8s| %s %s\n",
112
fprintf(fp, "%ld.%ld|%20s|%10s|%8s| %s %s\n",
117
elem->log_thread_name,
118
#if CL_COM_PRINT_THREAD_ID
121
cl_thread_convert_state_id(elem->log_thread_state),
122
cl_log_list_convert_type_id(elem->log_type),
124
elem->log_parameter);
126
cl_log_list_del_log(list_p);
132
if ((ret_val = cl_raw_list_unlock(list_p)) != CL_RETVAL_OK) {
138
/****** sge_ijs_comm/comm_init_lib() *******************************************
140
* comm_init_lib() -- Initializes the communication library
143
* int comm_init_lib(dstring *err_msg)
146
* Initializes the communication library, call it before using any other
147
* communication function.
150
* dstring *err_msg - Gets the error reason in case of error.
153
* int - COMM_RETVAL_OK:
154
* Communication library was successfully initialized.
156
* COMM_CANT_SETUP_COMMLIB:
157
* Error initializing the communication library, err_msg contains
161
* MT-NOTE: comm_init_lib() is not MT safe
164
* communication/comm_cleanup_lib()
165
*******************************************************************************/
166
int comm_init_lib(dstring *err_msg)
168
int ret, ret_val = 0;
170
DENTER(TOP_LAYER, "comm_init_lib");
172
ret = cl_com_setup_commlib(CL_NO_THREAD, CL_LOG_OFF, NULL /*DEBUG, my_log_list_flush_list*/);
173
if (ret != CL_RETVAL_OK) {
174
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
175
DPRINTF(("cl_com_setup_commlib() failed: %s (%d)\n",
176
sge_dstring_get_string(err_msg), ret));
177
ret_val = COMM_CANT_SETUP_COMMLIB;
184
/****** sge_ijs_comm/comm_cleanup_lib() ***************************************
186
* comm_cleanup_lib() -- Clean up the communication library
189
* int comm_cleanup_lib(dstring *err_msg)
192
* Cleans up the communication library. Call it when done using the library.
195
* dstring *err_msg - Pointer to a dstring that receives a static error
196
* string. If no error happens it get's set to
197
* "no error happened".
200
* int - COMM_RETVAL_OK:
201
* Communication library was successfully cleaned up.
203
* COMM_CANT_CLEANUP_COMMLIB:
204
* Error cleaning up the communication library, err_msg contains
208
* MT-NOTE: comm_cleanup_lib() is not MT safe
211
* communication/comm_init_lib()
212
*******************************************************************************/
213
int comm_cleanup_lib(dstring *err_msg)
215
int ret, ret_val = 0;
217
DENTER(TOP_LAYER, "comm_cleanup_lib");
219
ret = cl_com_cleanup_commlib();
220
if (ret != CL_RETVAL_OK) {
221
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
222
DPRINTF(("cl_com_cleanup_commlib() failed: %s (%d)\n",
223
sge_dstring_get_string(err_msg), ret));
224
ret_val = COMM_CANT_CLEANUP_COMMLIB;
231
/****** sge_ijs_comm/comm_open_connection() ***********************************
233
* comm_open_connection() -- Connects to or starts a comm server
236
* int comm_open_connection(bool b_server, int port,
237
* const char *component_name, bool b_secure, const char *user_name,
238
* COMMUNICATION_HANDLE **handle, dstring *err_msg)
241
* Either start a comm server or connect to a running comm server.
244
* bool b_server - If true, a comm server is started, if false
245
* a connection to a server is established.
246
* int port - In case of server: Port on which the server
247
* should listen. If this is 0, a free port is
249
* In case of client: Port on which the server
251
* const char *component_name - A unique name for this end of the connection.
252
* bool b_secure - If true: Use secured connections
253
* const char *user_name - For secured connections: Name of the user
254
* whose certificates are to be used.
255
* Ignored for unsecured connections.
256
* COMMUNICATION_HANDLE **handle - The address of a COMMUNICATION_HANDLE
257
* pointer which must be initialized to NULL.
258
* dstring *err_msg - Pointer to an empty dstring to receive
262
* COMMUNICATION_HANDLE **handle - The COMMUNICATION_HANDLE of the connection.
263
* dstring *err_msg - In case of error: The error reason.
267
* int - COMM_RETVAL_OK:
268
* Connection was successfully opened.
270
* COMM_INVALID_PARAMETER:
271
* The *handle is not NULL.
273
* COMM_CANT_SETUP_SSL:
274
* err_msg contains the reason.
276
* COMM_CANT_CREATE_HANDLE:
277
* err_msg contains the reason.
280
* MT-NOTE: comm_open_connection() is not MT safe
283
* communication/comm_shutdown_connection()
284
*******************************************************************************/
285
int comm_open_connection(bool b_server,
287
const char *component_name,
289
const char *user_name,
290
COMMUNICATION_HANDLE **handle,
294
int old_euid = SGE_SUPERUSER_UID;
295
int ret_val = COMM_RETVAL_OK;
296
int commlib_error = CL_RETVAL_OK;
297
cl_framework_t communication_framework = CL_CT_TCP;
298
cl_tcp_connect_t connect_type = CL_TCP_DEFAULT;
299
cl_xml_connection_type_t connection_type = CL_CM_CT_MESSAGE;
301
DENTER(TOP_LAYER, "open_connection");
303
/* Check validity of parameters */
304
if (*handle != NULL) {
305
sge_dstring_sprintf(err_msg, "Invalid parameter: *handle is not NULL");
306
DPRINTF((sge_dstring_get_string(err_msg)));
308
return COMM_INVALID_PARAMETER;
311
if (b_secure == true) {
313
const char *progname = "pty";
314
communication_framework = CL_CT_SSL;
316
if (strcmp(component_name, "pty_shepherd") == 0) {
317
/* pretend we are the execd to access it's credentials */
322
* Got to do this with euid = root
324
if (getuid() == SGE_SUPERUSER_UID) {
325
old_euid = geteuid();
326
seteuid(SGE_SUPERUSER_UID);
328
ret = sge_ssl_setup_security_path(progname, user_name);
330
* Switch back to old euid before error handling to do tracing as
331
* the SGE admin user.
333
if (old_euid != SGE_SUPERUSER_UID) {
338
DPRINTF(("sge_ssl_setup_security_path() failed!\n"));
339
sge_dstring_sprintf(err_msg, "Setting up SSL failed!");
340
ret_val = COMM_CANT_SETUP_SSL;
344
* If secure communication was requested but we cannot provide it
345
* because seclib support was not compiled in, we must not fall back to
346
* insecure mode, instead we must return with a fatal error.
348
sge_dstring_sprintf(err_msg, "No security support compiled into this binary!");
349
DPRINTF(("%s\n", sge_dstring_get_string(err_msg)));
350
return COMM_NO_SECURITY_COMPILED_IN;
354
if (ret_val == COMM_RETVAL_OK) {
356
* Define a error handling function for the commlib here -
357
* the default error handling function of the commlib prints
358
* error messages to stderr!
360
ret = cl_com_set_error_func(ijs_general_communication_error);
361
if (ret != CL_RETVAL_OK) {
362
sge_dstring_sprintf(err_msg, "can't set commlib error function: %s",
363
cl_get_error_text(ret));
364
DPRINTF(("cl_com_set_error_func() failed: %s (%d)\n",
365
sge_dstring_get_string(err_msg), ret));
366
ret_val = COMM_CANT_SETUP_COMMLIB;
368
DPRINTF(("trying to create commlib handle\n"));
369
if (b_server == false) {
370
*handle = cl_com_create_handle(&commlib_error,
371
communication_framework,
372
connection_type, CL_FALSE, port,
373
connect_type, (char*)component_name,
376
*handle = cl_com_create_handle(&commlib_error,
377
communication_framework,
378
connection_type, CL_TRUE, port,
379
connect_type, (char*)component_name,
383
if (*handle == NULL) {
384
sge_dstring_sprintf(err_msg, cl_get_error_text(commlib_error));
385
DPRINTF(("cl_com_create_handle() failed: %s (%d)\n",
386
sge_dstring_get_string(err_msg), commlib_error));
387
ret_val = COMM_CANT_CREATE_HANDLE;
389
/* Set connection timeout to 'infinite' */
390
(*handle)->connection_timeout = 0x0fffffff;
391
DPRINTF(("(*handle)->connect_port = %d\n", (*handle)->connect_port));
392
DPRINTF(("(*handle)->service_port = %d\n", (*handle)->service_port));
401
/****** sge_ijs_comm/comm_shutdown_connection() *******************************
403
* comm_shutdown_connection() -- gracefully shuts down a connection
406
* int comm_shutdown_connection(COMMUNICATION_HANDLE *handle,
407
* const char *component_name, dstring *err_msg)
410
* All connections get closed and then the communication handle gets freed.
413
* COMMUNICATION_HANDLE *handle - Handle of the connection to be shut down.
414
* const char *component_name - Name of the remote component of the
415
* connection to be shut down.
416
* dstring *err_msg - Gets the error reason in case of error.
419
* int - COMM_RETVAL_OK:
420
* Connection was successfully opened.
422
* COMM_CANT_CLOSE_CONNECTION:
423
* err_msg contains the reason.
425
* COMM_CANT_SHUTDOOWN_HANDLE:
426
* err_msg contains the reason.
429
* MT-NOTE: comm_shutdown_connection() is not MT safe
432
* communication/comm_open_connection()
433
*******************************************************************************/
434
int comm_shutdown_connection(COMMUNICATION_HANDLE *handle,
435
const char *component_name, dstring *err_msg)
438
int ret_val = COMM_RETVAL_OK;
440
DENTER(TOP_LAYER, "comm_shutdown_connection");
442
ret = cl_commlib_close_connection(handle, g_hostname,
443
(char*)component_name, 1, CL_FALSE);
444
if (ret != CL_RETVAL_OK) {
445
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
446
DPRINTF(("cl_commlib_close_connection() failed: %s (%d)\n",
447
sge_dstring_get_string(err_msg), ret));
448
ret_val = COMM_CANT_CLOSE_CONNECTION;
450
ret = cl_commlib_shutdown_handle(handle, CL_FALSE);
451
if (ret != CL_RETVAL_OK) {
452
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
453
DPRINTF(("cl_commlib_close_connection() failed: %s (%d)\n",
454
sge_dstring_get_string(err_msg), ret));
455
ret_val = COMM_CANT_SHUTDOWN_HANDLE;
462
/****** sge_ijs_comm/comm_set_connection_param() ******************************
464
* comm_set_connection_param() -- Set several connection parameters.
467
* int comm_set_connection_param(COMMUNICATION_HANDLE *handle, int param,
468
* int value, dstring *err_msg)
471
* Sets several connection parameter. Valid parameters are:
472
* HEARD_FROM_TIMEOUT: The time until the communication library will
473
* treat a connection as lost.
476
* COMMUNICATION_HANDLE *handle - Handle of the connection.
477
* int param - ID of the param to set. Currently
478
* HEARD_FROM_TIMEOUT (in seconds) is
480
* int value - Value to set the param to.
481
* dstring *err_msg - Gets the error reason in case of error.
484
* int - COMM_RETVAL_OK:
485
* Connection was successfully opened.
487
* COMM_CANT_SET_CONNECTION_PARAM:
488
* err_msg contains the reason.
491
* MT-NOTE: comm_set_connection_param() is not MT safe
492
*******************************************************************************/
493
int comm_set_connection_param(COMMUNICATION_HANDLE *handle, int param, int value,
497
int ret_val = COMM_RETVAL_OK;
499
DENTER(TOP_LAYER, "comm_set_connection_param");
500
ret = cl_commlib_set_connection_param(handle, param, value);
501
if (ret != CL_RETVAL_OK) {
502
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
503
DPRINTF(("cl_commlib_set_connection_param() failed: %s (%d)\n",
504
sge_dstring_get_string(err_msg), ret));
505
ret_val = COMM_CANT_SET_CONNECTION_PARAM;
511
/****** sge_ijs_comm/comm_ignore_timeouts() ***********************************
513
* comm_ignore_timeouts() -- Use timeouts or wait infinitely.
516
* int comm_ignore_timeouts(bool b_ignore)
519
* Tells the communication library to either use timeouts or just wait
520
* until all work is done.
523
* bool b_ignore - If true, the comm. library ignores timeouts,
524
* if false, timeouts are enabled.
525
* dstring *err_msg - Gets the error reason in case of error.
529
* int - COMM_RETVAL_OK:
530
* Connection was successfully opened.
532
* COMM_CANT_SET_IGNORE_TIMEOUTS:
533
* err_msg contains the reason.
536
* MT-NOTE: comm_ignore_timeouts() is not MT safe
537
*******************************************************************************/
538
int comm_ignore_timeouts(bool b_ignore, dstring *err_msg)
540
int ret = CL_RETVAL_OK;
541
int ret_val = COMM_RETVAL_OK;
543
DENTER(TOP_LAYER, "comm_ignore_timeouts");
545
cl_com_ignore_timeouts(b_ignore==true ? CL_TRUE : CL_FALSE);
546
if (ret != CL_RETVAL_OK) {
547
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
548
DPRINTF(("cl_com_ignore_timeouts() failed: %s (%d)\n",
549
sge_dstring_get_string(err_msg), ret));
550
ret_val = COMM_CANT_SET_IGNORE_TIMEOUTS;
556
/****** sge_ijs_comm/comm_wait_for_connection() *******************************
558
* comm_wait_for_connection() -- Waits until at least one client has connected
561
* int comm_wait_for_connection(COMMUNICATION_HANDLE *handle, const char
562
* *component, int wait_secs, const char **host, dstring *err_msg)
565
* On a server, waits until at least one client has connected.
568
* COMMUNICATION_HANDLE *handle - Handle of the connection.
569
* const char *component - Wait for a client with this component
571
* int wait_secs - Wait at most wait_secs seconds.
572
* const char **host - Name of the host from where the client
574
* dstring *err_msg - Gets the error reason in case of error.
577
* int - COMM_RETVAL_OK:
578
* A client is connected to us.
581
* 'wait_seconds' have elapsed.
584
* err_msg contains the reason.
586
* COMM_CANT_SEARCH_ENDPOINT:
587
* err_msg contains the reason.
590
* MT-NOTE: comm_wait_for_connection() is not MT safe
593
* communication/comm_wait_for_no_connection()
594
*******************************************************************************/
595
int comm_wait_for_connection(COMMUNICATION_HANDLE *handle,
596
const char *component,
604
int ret_val = COMM_RETVAL_OK;
605
cl_raw_list_t *endpoint_list = NULL;
606
cl_endpoint_list_elem_t *endpoint;
608
DENTER(TOP_LAYER, "wait_for_connection");
611
* In the while loop, do this:
612
* Call cl_commlib_trigger(), ignore the return value (it won't return 99)
613
* Get the list of endpoints of expected kind
614
* If endpointlist is returned and contains 0 elements, sleep for
615
* 10 milliseconds and loop again.
617
while ((ret2=cl_commlib_trigger(handle, 0)) != 99
618
&& (ret = cl_commlib_search_endpoint(handle, NULL,
619
(char*)component, 0, CL_TRUE, &endpoint_list)) == CL_RETVAL_OK
620
&& endpoint_list != NULL
621
&& endpoint_list->elem_count == 0
622
&& waited_usec/1000000 < wait_secs) {
624
cl_endpoint_list_cleanup(&endpoint_list);
626
waited_usec += 10000;
627
if (received_signal == SIGINT) {
631
if (waited_usec/1000000 >= wait_secs) {
632
sge_dstring_sprintf(err_msg, "Timeout occured while waiting for connection");
633
DPRINTF((sge_dstring_get_string(err_msg)));
634
ret_val = COMM_GOT_TIMEOUT;
635
} else if (ret2 != CL_RETVAL_OK) {
636
sge_dstring_sprintf(err_msg, cl_get_error_text(ret2));
637
DPRINTF(("cl_commlib_trigger() failed: %s (%d)\n",
638
sge_dstring_get_string(err_msg), ret2));
639
ret_val = COMM_CANT_TRIGGER;
640
} else if (ret != CL_RETVAL_OK) {
641
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
642
DPRINTF(("cl_commlib_search_endpoint() failed: %s (%d)\n",
643
sge_dstring_get_string(err_msg), ret));
644
ret_val = COMM_CANT_SEARCH_ENDPOINT;
646
if (endpoint_list != NULL) {
647
/* A client connected to us, get it's hostname */
648
if (endpoint_list->elem_count > 0) {
649
endpoint = cl_endpoint_list_get_first_elem(endpoint_list);
650
g_hostname = strdup(endpoint->endpoint->comp_host);
652
DPRINTF(("A client from host %s has connected\n", g_hostname));
654
cl_endpoint_list_cleanup(&endpoint_list);
660
/****** sge_ijs_comm/comm_wait_for_no_connection() ****************************
662
* comm_wait_for_no_connection() -- Wait until no client is connected any
666
* int comm_wait_for_no_connection(COMMUNICATION_HANDLE *handle, const char
667
* *component, int wait_secs, dstring *err_msg)
670
* Waits until no client is connected to us any more.
673
* COMMUNICATION_HANDLE *handle - Handle of the connection.
674
* const char *component - Filter for clients with this component name
675
* int wait_secs - Wait at most wait_secs seconds.
676
* dstring *err_msg - Gets the error reason in case of error.
679
* int - COMM_RETVAL_OK:
680
* No client is connected to us.
683
* 'wait_seconds' have elapsed.
686
* err_msg contains the reason.
688
* COMM_CANT_SEARCH_ENDPOINT:
689
* err_msg contains the reason.
692
* MT-NOTE: comm_wait_for_no_connection() is not MT safe
695
* communication/comm_wait_for_connection()
696
*******************************************************************************/
697
int comm_wait_for_no_connection(COMMUNICATION_HANDLE *handle, const char *component,
698
int wait_secs, dstring *err_msg)
703
int ret_val = COMM_RETVAL_OK;
704
cl_raw_list_t *endpoint_list = NULL;
705
bool do_exit = false;
707
DENTER(TOP_LAYER, "comm_wait_for_no_connection");
710
* In the while loop, do this:
711
* Call cl_commlib_trigger(), ignore the return value (it won't return 99)
712
* Get the list of endpoints of expected kind
713
* If endpointlist is returned and contains >0 elements, sleep for
714
* 10 milliseconds and loop again.
717
while (do_exit == false) {
718
/* Let commlib update it's lists */
719
ret2 = cl_commlib_trigger(handle, 0);
720
/* Get list of all endpoints */
721
ret = cl_commlib_search_endpoint(handle, NULL, (char*)component, 0, CL_TRUE,
724
if (ret == CL_RETVAL_OK
725
&& endpoint_list != NULL
726
&& endpoint_list->elem_count > 0
727
&& waited_usec/1000000 < wait_secs) {
728
cl_endpoint_list_cleanup(&endpoint_list);
729
endpoint_list = NULL;
731
waited_usec += 10000;
732
if (received_signal == SIGINT) {
737
DPRINTF(("No known endpoint left or timeout -> exit loop\n"));
743
DPRINTF(("wait_for_no_connection: after while\n"));
744
if (waited_usec/1000000 >= wait_secs) {
745
sge_dstring_sprintf(err_msg,
746
"Timeout occured while waiting for no connection");
747
DPRINTF((sge_dstring_get_string(err_msg)));
748
ret_val = COMM_GOT_TIMEOUT;
750
if (ret2 != CL_RETVAL_OK) {
751
sge_dstring_sprintf(err_msg, cl_get_error_text(ret2));
752
DPRINTF(("cl_commlib_trigger() failed: %s (%d)\n",
753
sge_dstring_get_string(err_msg), ret2));
754
ret_val = COMM_CANT_TRIGGER;
756
if (ret != CL_RETVAL_OK) {
757
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
758
DPRINTF(("cl_commlib_search_endpoint() failed: %s (%d)\n",
759
sge_dstring_get_string(err_msg), ret));
760
ret_val = COMM_CANT_SEARCH_ENDPOINT;
762
if (endpoint_list != NULL) {
763
DPRINTF(("wait_for_no_connection: cleaning up endpoint list\n"));
764
cl_endpoint_list_cleanup(&endpoint_list);
770
/****** sge_ijs_comm/comm_get_connection_count() ******************************
772
* comm_get_connection_count() -- Retrieves the current number of connections
775
* int comm_get_connection_count(COMMUNICATION_HANDLE *handle, dstring
779
* Retrieves the current number of connections.
782
* COMMUNICATION_HANDLE *handle - Handle of the connection.
783
* dstring *err_msg - Gets the error reason in case of error.
786
* int - Number of connections.
787
* <0 in case of error:
788
* -COMM_CANT_LOCK_CONNECTION_LIST:
789
* err_msg contains the reason.
791
* -COMM_CANT_UNLOCK_CONNECTION_LIST:
792
* err_msg contains the reason.
796
* MT-NOTE: comm_get_connection_count() is not MT safe
797
*******************************************************************************/
798
int comm_get_connection_count(COMMUNICATION_HANDLE *handle, dstring *err_msg)
802
cl_connection_list_elem_t* elem = NULL;
804
DENTER(TOP_LAYER, "comm_get_connection_count");
806
ret = cl_raw_list_lock(handle->connection_list);
807
if (ret != CL_RETVAL_OK) {
808
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
809
DPRINTF(("cl_raw_list_lock() failed: %s (%d)\n",
810
sge_dstring_get_string(err_msg), ret));
811
ret_val = -COMM_CANT_LOCK_CONNECTION_LIST;
813
elem = cl_connection_list_get_first_elem(handle->connection_list);
817
ret = cl_raw_list_unlock(handle->connection_list);
818
if (ret != CL_RETVAL_OK) {
819
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
820
DPRINTF(("cl_raw_list_unlock() failed: %s (%d)\n",
821
sge_dstring_get_string(err_msg), ret));
822
ret_val = -COMM_CANT_UNLOCK_CONNECTION_LIST;
830
/****** sge_ijs_comm/comm_trigger() *******************************************
832
* comm_trigger() -- Trigger communication library
835
* int comm_trigger(COMMUNICATION_HANDLE *handle, int synchron, dstring
839
* Triggers the communication library to do pending tasks.
842
* COMMUNICATION_HANDLE *handle - Handle of the connection.
843
* int synchron - Set to != 0 to wait until all pending
844
* messages are sent, == 0 to just do one
845
* piece of work and return then.
846
* dstring *err_msg - Gets the error reason in case of error.
849
* int - COMM_RETVAL_OK:
850
* Trigger was successful.
853
* 'wait_seconds' have elapsed.
856
* err_msg contains the reason.
858
* COMM_CANT_SEARCH_ENDPOINT:
859
* err_msg contains the reason.
862
* MT-NOTE: comm_trigger() is not MT safe
863
*******************************************************************************/
864
int comm_trigger(COMMUNICATION_HANDLE *handle, int synchron, dstring *err_msg)
867
int ret_val = COMM_RETVAL_OK;
869
DENTER(TOP_LAYER, "comm_trigger");
871
ret = cl_commlib_trigger(handle, synchron);
872
if (ret != CL_RETVAL_OK) {
873
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
874
DPRINTF(("cl_commlib_trigger() failed: %s (%d)\n",
875
sge_dstring_get_string(err_msg), ret));
876
ret_val = COMM_CANT_TRIGGER;
882
/****** sge_ijs_comm/comm_write_message() *************************************
884
* comm_write_message() -- Write a message to the connection
887
* unsigned long comm_write_message(COMMUNICATION_HANDLE *handle, const char
888
* *unresolved_hostname, const char *component_name, unsigned long component_id,
889
* unsigned char *buffer, unsigned long size, unsigned char type, dstring
893
* Writes a message to the connection.
896
* COMMUNICATION_HANDLE *handle - Handle of the connection.
897
* const char *unresolved_hostname - Hostname of the destination host.
898
* const char *component_name - Component name of the destination.
899
* unsigned long component_id - Component ID of the destination.
900
* unsigned char *buffer - The message data.
901
* unsigned long size - Message data length.
902
* unsigned char type - Message type.
903
* dstring *err_msg - Gets the error reason in case of error.
906
* unsigned long - the number of bytes written.
907
* 0 in case of error.
910
* MT-NOTE: comm_write_message() is not MT safe
913
* communication/comm_recv_message
914
*******************************************************************************/
915
unsigned long comm_write_message(COMMUNICATION_HANDLE *handle,
916
const char *unresolved_hostname,
917
const char *component_name,
918
unsigned long component_id,
919
unsigned char *buffer,
926
unsigned long nwritten = 0;
928
DENTER(TOP_LAYER, "comm_write_message");
931
* Copy only 'size' bytes from 'buffer' to a new sendbuf and add
932
* one byte for the message type at the beginning of the sendbuf.
933
* The commlib will free this buffer when it's content was sent.
935
sendbuf = malloc(size+1);
937
memcpy(&sendbuf[1], buffer, size);
939
ret = cl_commlib_send_message(handle,
940
(char*)unresolved_hostname,
941
(char*)component_name,
949
CL_FALSE, /* don't copy the sendbuf */
950
CL_FALSE); /* don't wait for ack */
952
/* sendbuf was freed by the commlib */
953
sge_dstring_sprintf(err_msg, "%s", cl_get_error_text(ret));
955
if (ret == CL_RETVAL_OK) {
958
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
959
DPRINTF(("cl_commlib_send_message() failed: %s (%d)\n",
960
sge_dstring_get_string(err_msg), ret));
967
/****** sge_ijs_comm/comm_flush_write_messages() ******************************
969
* comm_flush_write_messages() -- Flush all messages still in the write list
970
* of the communication library
973
* int comm_flush_write_messages(COMMUNICATION_HANDLE *handle, dstring *err_msg)
976
* Flushes all messages still in the write list of the communication library.
977
* comm_write_message() adds a message to the write list and tries to send
978
* it immediately. This isn't always possible, so comm_flush_write_messages()
979
* makes sure all messages are really written.
982
* COMMUNICATION_HANDLE *handle - Handle of the connection.
983
* dstring *err_msg - Contains error message in case of error.
986
* int - 0: Ok, all messages were flushed.
987
* <0: Retries needed to flush all messages * -1
988
* >0: An error occured, error number is a commlib error.
991
* MT-NOTE: comm_flush_write_messages() is not MT safe
994
* communication/comm_write_message
995
*******************************************************************************/
996
int comm_flush_write_messages(COMMUNICATION_HANDLE *handle, dstring *err_msg)
998
unsigned long elems = 0;
999
int ret = 0, retries = 0;
1001
elems = cl_com_messages_in_send_queue(handle);
1004
* Don't set the cl_commlib_trigger()-call to be blocking and
1005
* get rid of the usleep() - it's much slower!
1006
* The last cl_commlib_trigger()-call will take 1 s.
1008
ret = cl_commlib_trigger(handle, 0);
1010
* Bail out if trigger fails with an error that indicates that we
1011
* won't be able to send the messages in the near future.
1013
if (ret != CL_RETVAL_OK &&
1014
ret != CL_RETVAL_SELECT_TIMEOUT &&
1015
ret != CL_RETVAL_SELECT_INTERRUPT) {
1016
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1020
elems = cl_com_messages_in_send_queue(handle);
1022
* We just tried to send the messages and it wasn't possible to send
1023
* all messages - give the network some time to recover.
1033
/****** sge_ijs_comm/comm_recv_message() **************************************
1035
* comm_recv_message() -- Receives a message from the connection
1038
* int comm_recv_message(COMMUNICATION_HANDLE *handle, cl_bool_t b_synchron,
1039
* recv_message_t *recv_mess, dstring *err_msg)
1042
* Receives a message from the connection.
1045
* COMMUNICATION_HANDLE *handle - Handle of the connection.
1046
* cl_bool_t b_synchron - true: Wait until a complete message was read
1047
* false: Get what's available and return.
1048
* recv_message_t *recv_mess - The message gets filled into this struct.
1049
* The caller has to free buffers.
1050
* dstring *err_msg - Gets the error reason in case of error.
1053
* int - COMM_RETVAL_OK:
1054
* A message was received.
1057
* 'wait_seconds' have elapsed.
1059
* COMM_CANT_TRIGGER:
1060
* err_msg contains the reason.
1062
* COMM_CANT_SEARCH_ENDPOINT:
1063
* err_msg contains the reason.
1066
* MT-NOTE: comm_recv_message() is not MT safe
1069
* communication/comm_send_message, communication/comm_free_message
1070
*******************************************************************************/
1071
int comm_recv_message(COMMUNICATION_HANDLE *handle, cl_bool_t b_synchron,
1072
recv_message_t *recv_mess, dstring *err_msg)
1074
int ret_val = COMM_RETVAL_OK;
1077
cl_com_message_t *message = NULL;
1078
cl_com_endpoint_t *sender = NULL;
1080
DENTER(TOP_LAYER, "recv_message");
1082
/* check validity of parameters */
1083
if(handle == NULL || recv_mess == NULL) {
1085
return COMM_INVALID_PARAMETER;
1088
if (b_synchron == CL_TRUE) {
1089
ret = cl_commlib_trigger(handle, 1);
1090
if (ret != CL_RETVAL_OK &&
1091
ret != CL_RETVAL_SELECT_TIMEOUT &&
1092
ret != CL_RETVAL_SELECT_INTERRUPT) {
1093
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1094
DPRINTF(("cl_commlib_trigger() failed: %s (%d)\n",
1095
sge_dstring_get_string(err_msg), ret));
1096
ret_val = COMM_CANT_TRIGGER;
1097
} else if (ret == CL_RETVAL_SELECT_TIMEOUT) {
1098
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1099
DPRINTF(("cl_commlib_trigger() failed: %s (%d)\n",
1100
sge_dstring_get_string(err_msg), ret));
1101
ret_val = COMM_GOT_TIMEOUT;
1102
} else if (ret == CL_RETVAL_SELECT_INTERRUPT) {
1103
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1104
ret_val = COMM_SELECT_INTERRUPT;
1108
if (ret_val == COMM_RETVAL_OK) {
1109
ret = cl_commlib_receive_message(handle,
1110
g_hostname, /* unresolved_hostname, */
1111
NULL, /* component_name, */
1112
0, /* component_id, */
1117
if (ret != CL_RETVAL_OK) {
1118
if (ret == CL_RETVAL_NO_SELECT_DESCRIPTORS) {
1119
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1120
DPRINTF(("cl_commlib_receive_message() failed: %s (%d)\n",
1121
sge_dstring_get_string(err_msg), ret));
1122
ret_val = COMM_NO_SELECT_DESCRIPTORS;
1123
} else if (ret == CL_RETVAL_CONNECTION_NOT_FOUND) {
1124
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1125
DPRINTF(("cl_commlib_receive_message() failed: %s (%d)\n",
1126
sge_dstring_get_string(err_msg), ret));
1127
ret_val = COMM_CONNECTION_NOT_FOUND;
1129
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1130
DPRINTF(("cl_commlib_receive_message() failed: %s (%d)\n",
1131
sge_dstring_get_string(err_msg), ret));
1132
ret_val = COMM_CANT_RECEIVE_MESSAGE;
1136
if(sender != NULL) {
1137
cl_com_free_endpoint(&sender);
1141
if (ret_val == COMM_RETVAL_OK) {
1142
recv_mess->cl_message = message;
1143
if (message != NULL) {
1144
if (message->message_length>0) {
1146
switch (message->message[0]) {
1147
case STDIN_DATA_MSG:
1148
case STDOUT_DATA_MSG:
1149
case STDERR_DATA_MSG:
1150
case REGISTER_CTRL_MSG:
1151
case UNREGISTER_CTRL_MSG:
1152
case UNREGISTER_RESPONSE_CTRL_MSG:
1153
case SETTINGS_CTRL_MSG:
1154
DPRINTF(("length of message: %d\n", (int)message->message_length));
1156
recv_mess->type = message->message[0];
1157
recv_mess->data = (char*)&(message->message[1]);
1159
DPRINTF(("recv_mess->type = %d\n", recv_mess->type));
1160
snprintf(tmpbuf, MIN(100, message->message_length), "%s", recv_mess->data);
1161
DPRINTF(("recv_mess->data = %s\n", tmpbuf));
1164
case WINDOW_SIZE_CTRL_MSG:
1165
/* control message */
1166
recv_mess->type = message->message[0];
1168
sscanf((char*)&(message->message[1]),
1170
if (strcmp(sub_type, "WS") == 0) {
1171
int row, col, xpixel, ypixel;
1172
sscanf((char*)&(message->message[4]),
1174
&row, &col, &xpixel, &ypixel);
1175
recv_mess->ws.ws_row = row;
1176
recv_mess->ws.ws_col = col;
1177
recv_mess->ws.ws_xpixel = xpixel;
1178
recv_mess->ws.ws_ypixel = ypixel;
1189
/****** sge_ijs_comm/comm_free_message() **************************************
1191
* comm_free_message() -- free contents of a received message struct
1194
* int comm_free_message(recv_message_t *recv_mess, dstring *err_msg)
1197
* Frees the content of a received message struct.
1200
* recv_message_t *recv_mess - The message struct that is to be freed.
1201
* dstring *err_msg - Gets the error reason in case of error.
1204
* int - COMM_RETVAL_OK:
1205
* The message is freed.
1207
* COMM_CANT_FREE_MESSAGE:
1208
* err_msg contains the error reason.
1211
* MT-NOTE: comm_free_message() is not MT safe
1214
* communication/comm_recv_message()
1215
*******************************************************************************/
1216
int comm_free_message(recv_message_t *recv_mess, dstring *err_msg)
1219
int ret_val = COMM_RETVAL_OK;
1221
DENTER(TOP_LAYER, "comm_free_message");
1223
if (recv_mess != NULL && recv_mess->cl_message != NULL) {
1224
ret = cl_com_free_message(&(recv_mess->cl_message));
1225
if (ret != CL_RETVAL_OK) {
1226
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1227
DPRINTF(("cl_com_free_message() failed: %s (%d)\n",
1228
sge_dstring_get_string(err_msg), ret));
1229
ret_val = COMM_CANT_FREE_MESSAGE;
1237
/****** sge_ijs_comm/check_client_alive() *************************************
1239
* check_client_alive() -- Checks is a know, connected client is still alive
1242
* int check_client_alive(COMMUNICATION_HANDLE *handle,
1243
* const char *component_name, dstring *err_msg)
1246
* Checks if a known, connected client is still alive.
1249
* COMMUNICATION_HANDLE *handle - Handle to the connection.
1250
* const char *component_name - Name of the comonent to check.
1251
* dstring *err_msg - Gets the error reason in case of error.
1254
* int - COMM_RETVAL_OK:
1255
* The client is alive.
1257
* COMM_CANT_GET_CLIENT_STATUS:
1258
* err_msg contains the error reason.
1261
* MT-NOTE: check_client_alive() is not MT safe
1263
*******************************************************************************/
1264
int check_client_alive(COMMUNICATION_HANDLE *handle, const char *component_name,
1268
int ret_val = COMM_RETVAL_OK;
1269
cl_com_SIRM_t *status = NULL;
1271
DENTER(TOP_LAYER, "check_client_alive");
1273
DPRINTF(("handle->connect_port = %d\n", handle->connect_port));
1274
DPRINTF(("handle->service_port = %d\n", handle->service_port));
1275
DPRINTF(("client component name = %s\n", component_name));
1276
DPRINTF(("g_hostname = %s\n", g_hostname));
1278
ret = cl_commlib_get_endpoint_status(handle, g_hostname,
1279
(char*)component_name, 1, &status);
1280
if (ret != CL_RETVAL_OK) {
1281
sge_dstring_sprintf(err_msg, cl_get_error_text(ret));
1282
DPRINTF(("cl_commlib_get_endpoint() failed: %s (%d)\n",
1283
sge_dstring_get_string(err_msg), ret));
1284
ret_val = COMM_CANT_GET_CLIENT_STATUS;
1287
cl_com_free_sirm_message(&status);