3
3
// sdlwork.c - SDL OSD core work item functions
5
// Copyright (c) 1996-2007, Nicola Salmoria and the MAME Team.
5
// Copyright (c) 1996-2009, Nicola Salmoria and the MAME Team.
6
6
// Visit http://mamedev.org for licensing and usage restrictions.
8
8
// SDLMAME by Olivier Galibert and R. Belmont
10
10
//============================================================
12
// MinGW does not have pthreads, defer to Aaron's implementation on that platform
13
#if defined(SDLMAME_WIN32)
14
#include "../windows/winwork.c"
12
#if defined(SDLMAME_NOASM)
13
#include "../osdmini/miniwork.c"
17
16
#include "osdcore.h"
18
17
#include "osinline.h"
21
#include <mach/mach.h>
24
19
#include "sdlsync.h"
26
22
#include "eminline.h"
54
44
//============================================================
56
46
#if KEEP_STATISTICS
57
#define add_to_stat(v,x) do { interlocked_add((v), (x)); } while (0)
58
#define begin_timing(v) do { (v) -= osd_profiling_ticks(); } while (0)
59
#define end_timing(v) do { (v) += osd_profiling_ticks(); } while (0)
47
#define add_to_stat(v,x) do { atomic_add32((v), (x)); } while (0)
48
#define begin_timing(v) do { (v) -= get_profile_ticks(); } while (0)
49
#define end_timing(v) do { (v) += get_profile_ticks(); } while (0)
61
51
#define add_to_stat(v,x) do { } while (0)
62
52
#define begin_timing(v) do { } while (0)
146
120
//============================================================
148
122
static int effective_num_processors(void);
149
#ifndef NO_THREAD_COOPERATIVE
150
123
static UINT32 effective_cpu_mask(int index);
152
124
static void * worker_thread_entry(void *param);
153
125
static void worker_thread_process(osd_work_queue *queue, work_thread_info *thread);
155
//============================================================
157
//============================================================
159
INLINE INT32 interlocked_exchange32(INT32 volatile *ptr, INT32 value)
161
return atomic_exchange32(ptr, value);
164
INLINE INT32 interlocked_increment(INT32 volatile *ptr)
166
return atomic_increment32(ptr);
170
INLINE INT32 interlocked_decrement(INT32 volatile *ptr)
172
return atomic_decrement32(ptr);
176
INLINE INT32 interlocked_add(INT32 volatile *ptr, INT32 add)
178
return atomic_add32(ptr, add);
183
//============================================================
185
//============================================================
187
INLINE void scalable_lock_init(scalable_lock *lock)
189
#ifndef NO_THREAD_COOPERATIVE
190
lock->lock = osd_lock_alloc();
192
memset(lock, 0, sizeof(*lock));
193
lock->slot[0].haslock = TRUE;
198
INLINE INT32 scalable_lock_acquire(scalable_lock *lock)
200
#ifndef NO_THREAD_COOPERATIVE
201
osd_lock_acquire(lock->lock);
204
INT32 myslot = (atomic_increment32(&lock->nextindex) - 1) & (WORK_MAX_THREADS - 1);
206
#if defined(__i386__) || defined(__x86_64__)
208
__asm__ __volatile__ (
210
" xchg %[haslock], %[tmp] ;"
211
" test %[tmp], %[tmp] ;"
213
"2: mov %[haslock], %[tmp] ;"
214
" test %[tmp], %[tmp] ;"
219
: [haslock] "+m" (lock->slot[myslot].haslock)
224
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
226
__asm__ __volatile__ (
227
"1: lwarx %[tmp], 0, %[haslock] \n"
228
" cmpwi %[tmp], 0 \n"
230
"2: lwzx %[tmp], 0, %[haslock] \n"
231
" cmpwi %[tmp], 0 \n"
238
" stwcx. %[tmp], 0, %[haslock] \n"
242
: [haslock] "r" (&lock->slot[myslot].haslock)
247
while (!osd_compare_exchange32(&lock->slot[myslot].haslock, TRUE, FALSE))
250
for (backcount = 0; backcount < backoff; backcount++)
251
osd_yield_processor();
260
INLINE void scalable_lock_release(scalable_lock *lock, INT32 myslot)
262
#ifndef NO_THREAD_COOPERATIVE
263
osd_lock_release(lock->lock);
266
#if defined(__i386__) || defined(__x86_64__)
267
register INT32 tmp = TRUE;
268
__asm__ __volatile__ (
269
" xchg %[haslock], %[tmp] ;"
270
: [haslock] "+m" (lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock)
274
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
275
lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock = TRUE;
276
__asm__ __volatile__ ( " eieio " : : );
278
osd_exchange32(&lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock, TRUE);
283
INLINE void scalable_lock_free(scalable_lock *lock)
285
#ifndef NO_THREAD_COOPERATIVE
286
osd_lock_free(lock->lock);
292
128
//============================================================
293
129
// osd_work_queue_alloc
356
194
// set its priority: I/O threads get high priority because they are assumed to be
357
195
// blocked most of the time; other threads just match the creator's priority
358
196
if (flags & WORK_QUEUE_FLAG_IO)
359
osd_thread_adjust_priority(thread->handle, 0); // TODO: specify appropriate priority
197
osd_thread_adjust_priority(thread->handle, 1);
361
osd_thread_adjust_priority(thread->handle, 0); // TODO: specify appropriate priority
199
osd_thread_adjust_priority(thread->handle, 0);
363
#ifndef NO_THREAD_COOPERATIVE
364
201
// Bind main thread to cpu 0
365
202
osd_thread_cpu_affinity(NULL, effective_cpu_mask(0));
418
253
worker_thread_process(queue, thread);
420
255
// if we're a high frequency queue, spin until done
421
if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ)
256
if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue->items != 0)
258
osd_ticks_t stopspin = osd_ticks() + timeout;
423
260
// spin until we're done
424
261
begin_timing(thread->spintime);
425
while (queue->items != 0 && osd_ticks() < stopspin)
427
264
int spin = 10000;
428
265
while (--spin && queue->items != 0)
429
266
osd_yield_processor();
267
} while (queue->items != 0 && osd_ticks() < stopspin);
431
268
end_timing(thread->spintime);
433
270
begin_timing(thread->waittime);
439
276
// reset our done event and double-check the items before waiting
440
277
osd_event_reset(queue->doneevent);
441
interlocked_exchange32(&queue->waiting, TRUE);
278
atomic_exchange32(&queue->waiting, TRUE);
442
279
if (queue->items != 0)
443
280
osd_event_wait(queue->doneevent, timeout);
444
interlocked_exchange32(&queue->waiting, FALSE);
281
atomic_exchange32(&queue->waiting, FALSE);
446
283
// return TRUE if we actually hit 0
447
284
return (queue->items == 0);
591
428
*item_tailptr = item;
592
429
item_tailptr = &item->next;
430
parambase = (UINT8 *)parambase + paramstep;
595
433
// enqueue the whole thing within the critical section
596
lockslot = scalable_lock_acquire(&queue->lock);
434
lockslot = osd_scalable_lock_acquire(queue->lock);
597
435
*queue->tailptr = itemlist;
598
436
queue->tailptr = item_tailptr;
599
scalable_lock_release(&queue->lock, lockslot);
437
osd_scalable_lock_release(queue->lock, lockslot);
601
439
// increment the number of items in the queue
602
interlocked_add(&queue->items, numitems);
440
atomic_add32(&queue->items, numitems);
603
441
add_to_stat(&queue->itemsqueued, numitems);
605
443
// look for free threads to do the work
746
584
if (strlen(s) % 4 != 0 || strlen(s) < (index+1)*4)
748
fprintf(stderr,"Invalid cpu mask: %s\n", s);
586
fprintf(stderr,"Invalid cpu mask @index %d: %s\n", index, s);
752
590
memcpy(buf,s+4*index,4);
754
if (sscanf(buf, "%x", &mask) != 1)
592
if (sscanf(buf, "%04x", &mask) != 1)
755
593
fprintf(stderr,"Invalid cpu mask element %d: %s\n", index, buf);
763
600
//============================================================
764
601
// worker_thread_entry
796
633
worker_thread_process(queue, thread);
798
635
// if we're a high frequency queue, spin for a while before giving up
799
if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ)
636
if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue->list == NULL)
801
638
// spin for a while looking for more work
802
639
begin_timing(thread->spintime);
803
640
stopspin = osd_ticks() + SPIN_LOOP_TIME;
804
while (queue->list == NULL && osd_ticks() < stopspin)
806
643
int spin = 10000;
807
644
while (--spin && queue->list == NULL)
808
645
osd_yield_processor();
646
} while (queue->list == NULL && osd_ticks() < stopspin);
810
647
end_timing(thread->spintime);
863
700
end_timing(thread->actruntime);
865
702
// decrement the item count after we are done
866
interlocked_decrement(&queue->items);
867
interlocked_exchange32(&item->done, TRUE);
703
atomic_decrement32(&queue->items);
704
atomic_exchange32(&item->done, TRUE);
868
705
add_to_stat(&thread->itemsdone, 1);
870
707
// if it's an auto-release item, release it