~ubuntu-branches/ubuntu/lucid/sdlmame/lucid

« back to all changes in this revision

Viewing changes to src/osd/sdl/sdlwork.c

  • Committer: Bazaar Package Importer
  • Author(s): Cesare Falco
  • Date: 2009-11-03 17:10:15 UTC
  • mfrom: (1.1.5 upstream)
  • Revision ID: james.westby@ubuntu.com-20091103171015-6hop4ory5lxnumpn
Tags: 0.135-0ubuntu1
* New upstream release - Closes (LP: #403212)
* debian/watch: unstable releases are no longer detected
* mame.ini: added the cheat subdirectories to cheatpath so zipped
  cheatfiles will be searched too
* renamed crsshair subdirectory to crosshair to reflect upstream change
* mame.ini: renamed references to crosshair subdirectory (see above)

Show diffs side-by-side

added added

removed removed

Lines of Context:
2
2
//
3
3
//  sdlwork.c - SDL OSD core work item functions
4
4
//
5
 
//  Copyright (c) 1996-2007, Nicola Salmoria and the MAME Team.
 
5
//  Copyright (c) 1996-2009, Nicola Salmoria and the MAME Team.
6
6
//  Visit http://mamedev.org for licensing and usage restrictions.
7
7
//
8
8
//  SDLMAME by Olivier Galibert and R. Belmont
9
9
//
10
10
//============================================================
11
11
 
12
 
// MinGW does not have pthreads, defer to Aaron's implementation on that platform
13
 
#if defined(SDLMAME_WIN32)
14
 
#include "../windows/winwork.c"
 
12
#if defined(SDLMAME_NOASM)
 
13
#include "../osdmini/miniwork.c"
15
14
#else
16
15
 
17
16
#include "osdcore.h"
18
17
#include "osinline.h"
19
18
 
20
 
#ifdef SDLMAME_DARWIN
21
 
#include <mach/mach.h>
22
 
#include "osxutils.h"
23
 
#endif
24
19
#include "sdlsync.h"
 
20
#include "sdlos.h"
25
21
 
26
22
#include "eminline.h"
27
23
 
40
36
#define SDLENV_CPUMASKS                                 "OSDCPUMASKS"
41
37
 
42
38
#define INFINITE                                (osd_ticks_per_second() *  (osd_ticks_t) 10000)
43
 
#ifndef NO_THREAD_COOPERATIVE
44
 
//FIXME: depends on <THREADS>/<PHYS PROCESSORS>
45
39
#define SPIN_LOOP_TIME                  (osd_ticks_per_second() / 10000)
46
 
//#define SPIN_LOOP_TIME                        ((osd_ticks_t) 0)
47
 
#else
48
 
#define SPIN_LOOP_TIME                  (osd_ticks_per_second() / 1000)
49
 
#endif
50
40
 
51
41
 
52
42
//============================================================
54
44
//============================================================
55
45
 
56
46
#if KEEP_STATISTICS
57
 
#define add_to_stat(v,x)                do { interlocked_add((v), (x)); } while (0)
58
 
#define begin_timing(v)                 do { (v) -= osd_profiling_ticks(); } while (0)
59
 
#define end_timing(v)                   do { (v) += osd_profiling_ticks(); } while (0)
 
47
#define add_to_stat(v,x)                do { atomic_add32((v), (x)); } while (0)
 
48
#define begin_timing(v)                 do { (v) -= get_profile_ticks(); } while (0)
 
49
#define end_timing(v)                   do { (v) += get_profile_ticks(); } while (0)
60
50
#else
61
51
#define add_to_stat(v,x)                do { } while (0)
62
52
#define begin_timing(v)                 do { } while (0)
69
59
//  TYPE DEFINITIONS
70
60
//============================================================
71
61
 
72
 
typedef struct _scalable_lock scalable_lock;
73
 
struct _scalable_lock
74
 
{
75
 
#ifndef NO_THREAD_COOPERATIVE
76
 
        osd_lock                        *lock;
77
 
#else
78
 
        struct
79
 
        {
80
 
                volatile INT32  haslock;                // do we have the lock?
81
 
                INT32                   filler[64/4-1]; // assumes a 64-byte cache line
82
 
        } slot[WORK_MAX_THREADS];                       // one slot per thread
83
 
        volatile INT32          nextindex;              // index of next slot to use
84
 
#endif
85
 
};
86
 
 
87
 
 
88
62
typedef struct _work_thread_info work_thread_info;
89
63
struct _work_thread_info
90
64
{
105
79
 
106
80
struct _osd_work_queue
107
81
{
108
 
        scalable_lock           lock;                   // lock for protecting the queue
 
82
        osd_scalable_lock *     lock;                   // lock for protecting the queue
109
83
        osd_work_item * volatile list;          // list of items in the queue
110
84
        osd_work_item ** volatile tailptr;      // pointer to the tail pointer of work items in the queue
111
85
        osd_work_item * volatile free;          // free list of work items
146
120
//============================================================
147
121
 
148
122
static int effective_num_processors(void);
149
 
#ifndef NO_THREAD_COOPERATIVE
150
123
static UINT32 effective_cpu_mask(int index);
151
 
#endif
152
124
static void * worker_thread_entry(void *param);
153
125
static void worker_thread_process(osd_work_queue *queue, work_thread_info *thread);
154
126
 
155
 
//============================================================
156
 
//  INLINE FUNCTIONS
157
 
//============================================================
158
 
 
159
 
INLINE INT32 interlocked_exchange32(INT32 volatile *ptr, INT32 value)
160
 
{
161
 
        return atomic_exchange32(ptr, value);
162
 
}
163
 
 
164
 
INLINE INT32 interlocked_increment(INT32 volatile *ptr)
165
 
{
166
 
        return atomic_increment32(ptr);
167
 
}
168
 
 
169
 
 
170
 
INLINE INT32 interlocked_decrement(INT32 volatile *ptr)
171
 
{
172
 
        return atomic_decrement32(ptr);
173
 
}
174
 
 
175
 
 
176
 
INLINE INT32 interlocked_add(INT32 volatile *ptr, INT32 add)
177
 
{
178
 
        return atomic_add32(ptr, add);
179
 
}
180
 
 
181
 
 
182
 
 
183
 
//============================================================
184
 
//  Scalable Locks
185
 
//============================================================
186
 
 
187
 
INLINE void scalable_lock_init(scalable_lock *lock)
188
 
{
189
 
#ifndef NO_THREAD_COOPERATIVE
190
 
        lock->lock = osd_lock_alloc();
191
 
#else
192
 
        memset(lock, 0, sizeof(*lock));
193
 
        lock->slot[0].haslock = TRUE;
194
 
#endif
195
 
}
196
 
 
197
 
 
198
 
INLINE INT32 scalable_lock_acquire(scalable_lock *lock)
199
 
{
200
 
#ifndef NO_THREAD_COOPERATIVE
201
 
        osd_lock_acquire(lock->lock);
202
 
        return 0;
203
 
#else
204
 
        INT32 myslot = (atomic_increment32(&lock->nextindex) - 1) & (WORK_MAX_THREADS - 1);
205
 
 
206
 
#if defined(__i386__) || defined(__x86_64__)
207
 
        register INT32 tmp;
208
 
        __asm__ __volatile__ (
209
 
                "1: clr    %[tmp]             ;"
210
 
                "   xchg   %[haslock], %[tmp] ;"
211
 
                "   test   %[tmp], %[tmp]     ;"
212
 
                "   jne    3f                 ;"
213
 
                "2: mov    %[haslock], %[tmp] ;"
214
 
                "   test   %[tmp], %[tmp]     ;"
215
 
                "   jne    1b                 ;"
216
 
                "   pause                     ;"
217
 
                "   jmp    2b                 ;"
218
 
                "3:                            "
219
 
                : [haslock] "+m"  (lock->slot[myslot].haslock)
220
 
                , [tmp]     "=&r" (tmp)
221
 
                :
222
 
                : "%cc"
223
 
        );
224
 
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
225
 
        register INT32 tmp;
226
 
        __asm__ __volatile__ (
227
 
                "1: lwarx   %[tmp], 0, %[haslock] \n"
228
 
                "   cmpwi   %[tmp], 0             \n"
229
 
                "   bne     3f                    \n"
230
 
                "2: lwzx    %[tmp], 0, %[haslock] \n"
231
 
                "   cmpwi   %[tmp], 0             \n"
232
 
                "   bne     1b                    \n"
233
 
                "   nop                           \n"
234
 
                "   nop                           \n"
235
 
                "   b       2b                    \n"
236
 
                "3: li      %[tmp], 0             \n"
237
 
                "   sync                          \n"
238
 
                "   stwcx.  %[tmp], 0, %[haslock] \n"
239
 
                "   bne-    1b                    \n"
240
 
                "   eieio                         \n"
241
 
                : [tmp]     "=&r" (tmp)
242
 
                : [haslock] "r"   (&lock->slot[myslot].haslock)
243
 
                : "cr0"
244
 
        );
245
 
#else
246
 
        INT32 backoff = 1;
247
 
        while (!osd_compare_exchange32(&lock->slot[myslot].haslock, TRUE, FALSE))
248
 
        {
249
 
                INT32 backcount;
250
 
                for (backcount = 0; backcount < backoff; backcount++)
251
 
                        osd_yield_processor();
252
 
                backoff <<= 1;
253
 
        }
254
 
#endif
255
 
        return myslot;
256
 
#endif
257
 
}
258
 
 
259
 
 
260
 
INLINE void scalable_lock_release(scalable_lock *lock, INT32 myslot)
261
 
{
262
 
#ifndef NO_THREAD_COOPERATIVE
263
 
        osd_lock_release(lock->lock);
264
 
        return;
265
 
#else
266
 
#if defined(__i386__) || defined(__x86_64__)
267
 
        register INT32 tmp = TRUE;
268
 
        __asm__ __volatile__ (
269
 
                " xchg   %[haslock], %[tmp] ;"
270
 
                : [haslock] "+m" (lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock)
271
 
                , [tmp]     "+r" (tmp)
272
 
                :
273
 
        );
274
 
#elif defined(__ppc__) || defined (__PPC__) || defined(__ppc64__) || defined(__PPC64__)
275
 
        lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock = TRUE;
276
 
        __asm__ __volatile__ ( " eieio " : : );
277
 
#else
278
 
        osd_exchange32(&lock->slot[(myslot + 1) & (WORK_MAX_THREADS - 1)].haslock, TRUE);
279
 
#endif
280
 
#endif
281
 
}
282
 
 
283
 
INLINE void scalable_lock_free(scalable_lock *lock)
284
 
{
285
 
#ifndef NO_THREAD_COOPERATIVE
286
 
        osd_lock_free(lock->lock);
287
 
#endif
288
 
}
289
 
 
290
 
 
291
127
 
292
128
//============================================================
293
129
//  osd_work_queue_alloc
300
136
        int threadnum;
301
137
 
302
138
        // allocate a new queue
303
 
        queue = malloc(sizeof(*queue));
 
139
        queue = (osd_work_queue *)malloc(sizeof(*queue));
304
140
        if (queue == NULL)
305
141
                goto error;
306
142
        memset(queue, 0, sizeof(*queue));
315
151
                goto error;
316
152
 
317
153
        // initialize the critical section
318
 
        scalable_lock_init(&queue->lock);
 
154
        queue->lock = osd_scalable_lock_alloc();
 
155
        if (queue->lock == NULL)
 
156
                goto error;
319
157
 
320
158
        // determine how many threads to create...
321
159
        // on a single-CPU system, create 1 thread for I/O queues, and 0 threads for everything else
330
168
        queue->threads = MIN(queue->threads, WORK_MAX_THREADS);
331
169
 
332
170
        // allocate memory for thread array (+1 to count the calling thread)
333
 
        queue->thread = malloc((queue->threads + 1) * sizeof(queue->thread[0]));
 
171
        queue->thread = (work_thread_info *)malloc((queue->threads + 1) * sizeof(queue->thread[0]));
334
172
        if (queue->thread == NULL)
335
173
                goto error;
336
174
        memset(queue->thread, 0, (queue->threads + 1) * sizeof(queue->thread[0]));
356
194
                // set its priority: I/O threads get high priority because they are assumed to be
357
195
                // blocked most of the time; other threads just match the creator's priority
358
196
                if (flags & WORK_QUEUE_FLAG_IO)
359
 
                        osd_thread_adjust_priority(thread->handle, 0);  // TODO: specify appropriate priority
 
197
                        osd_thread_adjust_priority(thread->handle, 1);  
360
198
                else
361
 
                        osd_thread_adjust_priority(thread->handle, 0);  // TODO: specify appropriate priority
 
199
                        osd_thread_adjust_priority(thread->handle, 0);  
362
200
 
363
 
#ifndef NO_THREAD_COOPERATIVE
364
201
                // Bind main thread to cpu 0
365
202
                osd_thread_cpu_affinity(NULL, effective_cpu_mask(0));
366
203
                
368
205
                        osd_thread_cpu_affinity(thread->handle, effective_cpu_mask(1));
369
206
                else
370
207
                        osd_thread_cpu_affinity(thread->handle, effective_cpu_mask(2+threadnum) );
371
 
#endif
372
208
        }
373
209
 
374
210
        // start a timer going for "waittime" on the main thread
410
246
        if (queue->flags & WORK_QUEUE_FLAG_MULTI)
411
247
        {
412
248
                work_thread_info *thread = &queue->thread[queue->threads];
413
 
                osd_ticks_t stopspin = osd_ticks() + timeout;
414
249
 
415
250
                end_timing(thread->waittime);
416
251
 
418
253
                worker_thread_process(queue, thread);
419
254
 
420
255
                // if we're a high frequency queue, spin until done
421
 
                if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ)
 
256
                if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue->items != 0)
422
257
                {
 
258
                        osd_ticks_t stopspin = osd_ticks() + timeout;
 
259
 
423
260
                        // spin until we're done
424
261
                        begin_timing(thread->spintime);
425
 
                        while (queue->items != 0 && osd_ticks() < stopspin)
426
 
                        {
 
262
                        
 
263
                        do {
427
264
                                int spin = 10000;
428
265
                                while (--spin && queue->items != 0)
429
266
                                        osd_yield_processor();
430
 
                        }
 
267
                        } while (queue->items != 0 && osd_ticks() < stopspin);
431
268
                        end_timing(thread->spintime);
432
269
 
433
270
                        begin_timing(thread->waittime);
438
275
 
439
276
        // reset our done event and double-check the items before waiting
440
277
        osd_event_reset(queue->doneevent);
441
 
        interlocked_exchange32(&queue->waiting, TRUE);
 
278
        atomic_exchange32(&queue->waiting, TRUE);
442
279
        if (queue->items != 0)
443
280
                osd_event_wait(queue->doneevent, timeout);
444
 
        interlocked_exchange32(&queue->waiting, FALSE);
 
281
        atomic_exchange32(&queue->waiting, FALSE);
445
282
 
446
283
        // return TRUE if we actually hit 0
447
284
        return (queue->items == 0);
539
376
        printf("Spin loops     = %9d\n", queue->spinloops);
540
377
#endif
541
378
 
542
 
        scalable_lock_free(&queue->lock);
 
379
        osd_scalable_lock_free(queue->lock);
543
380
        // free the queue itself
544
381
        free(queue);
545
382
}
571
408
                if (item == NULL)
572
409
                {
573
410
                        // allocate the item
574
 
                        item = malloc(sizeof(*item));
 
411
                        item = (osd_work_item *)malloc(sizeof(*item));
575
412
                        if (item == NULL)
576
413
                                return NULL;
577
414
                        item->event = NULL;
581
418
                // fill in the basics
582
419
                item->next = NULL;
583
420
                item->callback = callback;
584
 
                item->param = (UINT8 *)parambase + itemnum * paramstep;
 
421
                item->param = parambase;
585
422
                item->result = NULL;
586
423
                item->flags = flags;
587
424
                item->done = FALSE;
590
427
                lastitem = item;
591
428
                *item_tailptr = item;
592
429
                item_tailptr = &item->next;
 
430
                parambase = (UINT8 *)parambase + paramstep;
593
431
        }
594
432
 
595
433
        // enqueue the whole thing within the critical section
596
 
        lockslot = scalable_lock_acquire(&queue->lock);
 
434
        lockslot = osd_scalable_lock_acquire(queue->lock);
597
435
        *queue->tailptr = itemlist;
598
436
        queue->tailptr = item_tailptr;
599
 
        scalable_lock_release(&queue->lock, lockslot);
 
437
        osd_scalable_lock_release(queue->lock, lockslot);
600
438
 
601
439
        // increment the number of items in the queue
602
 
        interlocked_add(&queue->items, numitems);
 
440
        atomic_add32(&queue->items, numitems);
603
441
        add_to_stat(&queue->itemsqueued, numitems);
604
442
 
605
443
        // look for free threads to do the work
634
472
        }
635
473
        // only return the item if it won't get released automatically
636
474
        return (flags & WORK_ITEM_FLAG_AUTO_RELEASE) ? NULL : lastitem;
637
 
        //return (flags & WORK_ITEM_FLAG_AUTO_RELEASE) ? NULL : itemlist;
638
475
}
639
476
 
640
477
 
655
492
                 osd_event_reset(item->event);
656
493
 
657
494
        // if we don't have an event, we need to spin (shouldn't ever really happen)
658
 
        // FIXME: there is still a bug in winwork.c. The following is OK.
659
495
        if (item->event == NULL)
660
496
        {
661
497
                osd_ticks_t stopspin = osd_ticks() + timeout;
662
 
                while (!item->done && osd_ticks() < stopspin)
663
 
                        osd_yield_processor();
 
498
                do {
 
499
                        int spin = 10000;
 
500
                        while (--spin && !item->done)
 
501
                                osd_yield_processor();
 
502
                } while (!item->done && osd_ticks() < stopspin);
664
503
        }
665
504
 
666
505
        // otherwise, block on the event until done
724
563
//  effective_cpu_mask
725
564
//============================================================
726
565
 
727
 
#ifndef NO_THREAD_COOPERATIVE
728
566
static UINT32 effective_cpu_mask(int index)
729
567
{
730
568
        char    *s;
745
583
                {
746
584
                        if (strlen(s) % 4 != 0 || strlen(s) < (index+1)*4)
747
585
                        {
748
 
                                fprintf(stderr,"Invalid cpu mask: %s\n", s);
 
586
                                fprintf(stderr,"Invalid cpu mask @index %d: %s\n", index, s);
749
587
                        }
750
588
                        else
751
589
                        {
752
590
                                memcpy(buf,s+4*index,4);
753
591
                                buf[4] = 0;
754
 
                                if (sscanf(buf, "%x", &mask) != 1)
 
592
                                if (sscanf(buf, "%04x", &mask) != 1)
755
593
                                        fprintf(stderr,"Invalid cpu mask element %d: %s\n", index, buf);
756
594
                        }
757
595
                }
758
596
        }
759
597
        return mask;
760
598
}
761
 
#endif
762
599
 
763
600
//============================================================
764
601
//  worker_thread_entry
766
603
 
767
604
static void *worker_thread_entry(void *param)
768
605
{
769
 
        work_thread_info *thread = param;
 
606
        work_thread_info *thread = (work_thread_info *)param;
770
607
        osd_work_queue *queue = thread->queue;
771
608
 
772
609
        // loop until we exit
784
621
                        break;
785
622
 
786
623
                // indicate that we are live
787
 
                interlocked_exchange32(&thread->active, TRUE);
788
 
                interlocked_increment(&queue->livethreads);
 
624
                atomic_exchange32(&thread->active, TRUE);
 
625
                atomic_increment32(&queue->livethreads);
789
626
 
790
627
                // process work items
791
628
                for ( ;; )
796
633
                        worker_thread_process(queue, thread);
797
634
 
798
635
                        // if we're a high frequency queue, spin for a while before giving up
799
 
                        if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ)
 
636
                        if (queue->flags & WORK_QUEUE_FLAG_HIGH_FREQ && queue->list == NULL)
800
637
                        {
801
638
                                // spin for a while looking for more work
802
639
                                begin_timing(thread->spintime);
803
640
                                stopspin = osd_ticks() + SPIN_LOOP_TIME;
804
 
                                while (queue->list == NULL && osd_ticks() < stopspin)
805
 
                                {
 
641
                                
 
642
                                do {
806
643
                                        int spin = 10000;
807
644
                                        while (--spin && queue->list == NULL)
808
645
                                                osd_yield_processor();
809
 
                                }
 
646
                                } while (queue->list == NULL && osd_ticks() < stopspin);
810
647
                                end_timing(thread->spintime);
811
648
                        }
812
649
 
817
654
                }
818
655
 
819
656
                // decrement the live thread count
820
 
                interlocked_exchange32(&thread->active, FALSE);
821
 
                interlocked_decrement(&queue->livethreads);
 
657
                atomic_exchange32(&thread->active, FALSE);
 
658
                atomic_decrement32(&queue->livethreads);
822
659
        }
823
660
        return NULL;
824
661
}
841
678
                INT32 lockslot;
842
679
 
843
680
                // use a critical section to synchronize the removal of items
844
 
                lockslot = scalable_lock_acquire(&queue->lock);
 
681
                lockslot = osd_scalable_lock_acquire(queue->lock);
845
682
                {
846
683
                        // pull the item from the queue
847
684
                        item = (osd_work_item *)queue->list;
852
689
                                        queue->tailptr = (osd_work_item **)&queue->list;
853
690
                        }
854
691
                }
855
 
                scalable_lock_release(&queue->lock, lockslot);
 
692
                osd_scalable_lock_release(queue->lock, lockslot);
856
693
 
857
694
                // process non-NULL items
858
695
                if (item != NULL)
863
700
                        end_timing(thread->actruntime);
864
701
 
865
702
                        // decrement the item count after we are done
866
 
                        interlocked_decrement(&queue->items);
867
 
                        interlocked_exchange32(&item->done, TRUE);
 
703
                        atomic_decrement32(&queue->items);
 
704
                        atomic_exchange32(&item->done, TRUE);
868
705
                        add_to_stat(&thread->itemsdone, 1);
869
706
 
870
707
                        // if it's an auto-release item, release it
894
731
        end_timing(thread->runtime);
895
732
}
896
733
 
897
 
#endif  // SDLMAME_WIN32
 
734
#endif // SDLMAME_NOASM