2
* Copyright © 2008 Intel Corporation
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24
* Eric Anholt <eric@anholt.net>
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/shmem_fs.h>
35
#include <linux/slab.h>
36
#include <linux/swap.h>
37
#include <linux/pci.h>
38
#include <linux/dma-buf.h>
40
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
41
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
44
bool map_and_fenceable,
46
static int i915_gem_phys_pwrite(struct drm_device *dev,
47
struct drm_i915_gem_object *obj,
48
struct drm_i915_gem_pwrite *args,
49
struct drm_file *file);
51
static void i915_gem_write_fence(struct drm_device *dev, int reg,
52
struct drm_i915_gem_object *obj);
53
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
54
struct drm_i915_fence_reg *fence,
57
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
58
struct shrink_control *sc);
59
static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
60
static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
61
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
63
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
66
i915_gem_release_mmap(obj);
68
/* As we do not have an associated fence register, we will force
69
* a tiling change if we ever need to acquire one.
71
obj->fence_dirty = false;
72
obj->fence_reg = I915_FENCE_REG_NONE;
75
/* some bookkeeping */
76
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
79
dev_priv->mm.object_count++;
80
dev_priv->mm.object_memory += size;
83
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
86
dev_priv->mm.object_count--;
87
dev_priv->mm.object_memory -= size;
91
i915_gem_wait_for_error(struct drm_device *dev)
93
struct drm_i915_private *dev_priv = dev->dev_private;
94
struct completion *x = &dev_priv->error_completion;
98
if (!atomic_read(&dev_priv->mm.wedged))
102
* Only wait 10 seconds for the gpu reset to complete to avoid hanging
103
* userspace. If it takes that long something really bad is going on and
104
* we should simply try to bail out and fail as gracefully as possible.
106
ret = wait_for_completion_interruptible_timeout(x, 10*HZ);
108
DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
110
} else if (ret < 0) {
114
if (atomic_read(&dev_priv->mm.wedged)) {
115
/* GPU is hung, bump the completion count to account for
116
* the token we just consumed so that we never hit zero and
117
* end up waiting upon a subsequent completion event that
120
spin_lock_irqsave(&x->wait.lock, flags);
122
spin_unlock_irqrestore(&x->wait.lock, flags);
127
int i915_mutex_lock_interruptible(struct drm_device *dev)
131
ret = i915_gem_wait_for_error(dev);
135
ret = mutex_lock_interruptible(&dev->struct_mutex);
139
WARN_ON(i915_verify_lists(dev));
144
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
146
return obj->gtt_space && !obj->active;
150
i915_gem_init_ioctl(struct drm_device *dev, void *data,
151
struct drm_file *file)
153
struct drm_i915_gem_init *args = data;
155
if (drm_core_check_feature(dev, DRIVER_MODESET))
158
if (args->gtt_start >= args->gtt_end ||
159
(args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
162
/* GEM with user mode setting was never supported on ilk and later. */
163
if (INTEL_INFO(dev)->gen >= 5)
166
mutex_lock(&dev->struct_mutex);
167
i915_gem_init_global_gtt(dev, args->gtt_start,
168
args->gtt_end, args->gtt_end);
169
mutex_unlock(&dev->struct_mutex);
175
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
176
struct drm_file *file)
178
struct drm_i915_private *dev_priv = dev->dev_private;
179
struct drm_i915_gem_get_aperture *args = data;
180
struct drm_i915_gem_object *obj;
184
mutex_lock(&dev->struct_mutex);
185
list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
187
pinned += obj->gtt_space->size;
188
mutex_unlock(&dev->struct_mutex);
190
args->aper_size = dev_priv->mm.gtt_total;
191
args->aper_available_size = args->aper_size - pinned;
197
i915_gem_create(struct drm_file *file,
198
struct drm_device *dev,
202
struct drm_i915_gem_object *obj;
206
size = roundup(size, PAGE_SIZE);
210
/* Allocate the new object */
211
obj = i915_gem_alloc_object(dev, size);
215
ret = drm_gem_handle_create(file, &obj->base, &handle);
217
drm_gem_object_release(&obj->base);
218
i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
223
/* drop reference from allocate - handle holds it now */
224
drm_gem_object_unreference(&obj->base);
225
trace_i915_gem_object_create(obj);
232
i915_gem_dumb_create(struct drm_file *file,
233
struct drm_device *dev,
234
struct drm_mode_create_dumb *args)
236
/* have to work out size/pitch and return them */
237
args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
238
args->size = args->pitch * args->height;
239
return i915_gem_create(file, dev,
240
args->size, &args->handle);
243
int i915_gem_dumb_destroy(struct drm_file *file,
244
struct drm_device *dev,
247
return drm_gem_handle_delete(file, handle);
251
* Creates a new mm object and returns a handle to it.
254
i915_gem_create_ioctl(struct drm_device *dev, void *data,
255
struct drm_file *file)
257
struct drm_i915_gem_create *args = data;
259
return i915_gem_create(file, dev,
260
args->size, &args->handle);
263
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
265
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
267
return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
268
obj->tiling_mode != I915_TILING_NONE;
272
__copy_to_user_swizzled(char __user *cpu_vaddr,
273
const char *gpu_vaddr, int gpu_offset,
276
int ret, cpu_offset = 0;
279
int cacheline_end = ALIGN(gpu_offset + 1, 64);
280
int this_length = min(cacheline_end - gpu_offset, length);
281
int swizzled_gpu_offset = gpu_offset ^ 64;
283
ret = __copy_to_user(cpu_vaddr + cpu_offset,
284
gpu_vaddr + swizzled_gpu_offset,
289
cpu_offset += this_length;
290
gpu_offset += this_length;
291
length -= this_length;
298
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
299
const char __user *cpu_vaddr,
302
int ret, cpu_offset = 0;
305
int cacheline_end = ALIGN(gpu_offset + 1, 64);
306
int this_length = min(cacheline_end - gpu_offset, length);
307
int swizzled_gpu_offset = gpu_offset ^ 64;
309
ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
310
cpu_vaddr + cpu_offset,
315
cpu_offset += this_length;
316
gpu_offset += this_length;
317
length -= this_length;
323
/* Per-page copy function for the shmem pread fastpath.
324
* Flushes invalid cachelines before reading the target if
325
* needs_clflush is set. */
327
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
328
char __user *user_data,
329
bool page_do_bit17_swizzling, bool needs_clflush)
334
if (unlikely(page_do_bit17_swizzling))
337
vaddr = kmap_atomic(page);
339
drm_clflush_virt_range(vaddr + shmem_page_offset,
341
ret = __copy_to_user_inatomic(user_data,
342
vaddr + shmem_page_offset,
344
kunmap_atomic(vaddr);
346
return ret ? -EFAULT : 0;
350
shmem_clflush_swizzled_range(char *addr, unsigned long length,
353
if (unlikely(swizzled)) {
354
unsigned long start = (unsigned long) addr;
355
unsigned long end = (unsigned long) addr + length;
357
/* For swizzling simply ensure that we always flush both
358
* channels. Lame, but simple and it works. Swizzled
359
* pwrite/pread is far from a hotpath - current userspace
360
* doesn't use it at all. */
361
start = round_down(start, 128);
362
end = round_up(end, 128);
364
drm_clflush_virt_range((void *)start, end - start);
366
drm_clflush_virt_range(addr, length);
371
/* Only difference to the fast-path function is that this can handle bit17
372
* and uses non-atomic copy and kmap functions. */
374
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
375
char __user *user_data,
376
bool page_do_bit17_swizzling, bool needs_clflush)
383
shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
385
page_do_bit17_swizzling);
387
if (page_do_bit17_swizzling)
388
ret = __copy_to_user_swizzled(user_data,
389
vaddr, shmem_page_offset,
392
ret = __copy_to_user(user_data,
393
vaddr + shmem_page_offset,
397
return ret ? - EFAULT : 0;
401
i915_gem_shmem_pread(struct drm_device *dev,
402
struct drm_i915_gem_object *obj,
403
struct drm_i915_gem_pread *args,
404
struct drm_file *file)
406
char __user *user_data;
409
int shmem_page_offset, page_length, ret = 0;
410
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
411
int hit_slowpath = 0;
413
int needs_clflush = 0;
414
struct scatterlist *sg;
417
user_data = (char __user *) (uintptr_t) args->data_ptr;
420
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
422
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
423
/* If we're not in the cpu read domain, set ourself into the gtt
424
* read domain and manually flush cachelines (if required). This
425
* optimizes for the case when the gpu will dirty the data
426
* anyway again before the next pread happens. */
427
if (obj->cache_level == I915_CACHE_NONE)
429
if (obj->gtt_space) {
430
ret = i915_gem_object_set_to_gtt_domain(obj, false);
436
ret = i915_gem_object_get_pages(obj);
440
i915_gem_object_pin_pages(obj);
442
offset = args->offset;
444
for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
447
if (i < offset >> PAGE_SHIFT)
453
/* Operation in this page
455
* shmem_page_offset = offset within page in shmem file
456
* page_length = bytes to copy for this page
458
shmem_page_offset = offset_in_page(offset);
459
page_length = remain;
460
if ((shmem_page_offset + page_length) > PAGE_SIZE)
461
page_length = PAGE_SIZE - shmem_page_offset;
464
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
465
(page_to_phys(page) & (1 << 17)) != 0;
467
ret = shmem_pread_fast(page, shmem_page_offset, page_length,
468
user_data, page_do_bit17_swizzling,
474
mutex_unlock(&dev->struct_mutex);
477
ret = fault_in_multipages_writeable(user_data, remain);
478
/* Userspace is tricking us, but we've already clobbered
479
* its pages with the prefault and promised to write the
480
* data up to the first fault. Hence ignore any errors
481
* and just continue. */
486
ret = shmem_pread_slow(page, shmem_page_offset, page_length,
487
user_data, page_do_bit17_swizzling,
490
mutex_lock(&dev->struct_mutex);
493
mark_page_accessed(page);
498
remain -= page_length;
499
user_data += page_length;
500
offset += page_length;
504
i915_gem_object_unpin_pages(obj);
507
/* Fixup: Kill any reinstated backing storage pages */
508
if (obj->madv == __I915_MADV_PURGED)
509
i915_gem_object_truncate(obj);
516
* Reads data from the object referenced by handle.
518
* On error, the contents of *data are undefined.
521
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
522
struct drm_file *file)
524
struct drm_i915_gem_pread *args = data;
525
struct drm_i915_gem_object *obj;
531
if (!access_ok(VERIFY_WRITE,
532
(char __user *)(uintptr_t)args->data_ptr,
536
ret = i915_mutex_lock_interruptible(dev);
540
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
541
if (&obj->base == NULL) {
546
/* Bounds check source. */
547
if (args->offset > obj->base.size ||
548
args->size > obj->base.size - args->offset) {
553
/* prime objects have no backing filp to GEM pread/pwrite
556
if (!obj->base.filp) {
561
trace_i915_gem_object_pread(obj, args->offset, args->size);
563
ret = i915_gem_shmem_pread(dev, obj, args, file);
566
drm_gem_object_unreference(&obj->base);
568
mutex_unlock(&dev->struct_mutex);
572
/* This is the fast write path which cannot handle
573
* page faults in the source data
577
fast_user_write(struct io_mapping *mapping,
578
loff_t page_base, int page_offset,
579
char __user *user_data,
582
void __iomem *vaddr_atomic;
584
unsigned long unwritten;
586
vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
587
/* We can use the cpu mem copy function because this is X86. */
588
vaddr = (void __force*)vaddr_atomic + page_offset;
589
unwritten = __copy_from_user_inatomic_nocache(vaddr,
591
io_mapping_unmap_atomic(vaddr_atomic);
596
* This is the fast pwrite path, where we copy the data directly from the
597
* user into the GTT, uncached.
600
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
601
struct drm_i915_gem_object *obj,
602
struct drm_i915_gem_pwrite *args,
603
struct drm_file *file)
605
drm_i915_private_t *dev_priv = dev->dev_private;
607
loff_t offset, page_base;
608
char __user *user_data;
609
int page_offset, page_length, ret;
611
ret = i915_gem_object_pin(obj, 0, true, true);
615
ret = i915_gem_object_set_to_gtt_domain(obj, true);
619
ret = i915_gem_object_put_fence(obj);
623
user_data = (char __user *) (uintptr_t) args->data_ptr;
626
offset = obj->gtt_offset + args->offset;
629
/* Operation in this page
631
* page_base = page offset within aperture
632
* page_offset = offset within page
633
* page_length = bytes to copy for this page
635
page_base = offset & PAGE_MASK;
636
page_offset = offset_in_page(offset);
637
page_length = remain;
638
if ((page_offset + remain) > PAGE_SIZE)
639
page_length = PAGE_SIZE - page_offset;
641
/* If we get a fault while copying data, then (presumably) our
642
* source page isn't available. Return the error and we'll
643
* retry in the slow path.
645
if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
646
page_offset, user_data, page_length)) {
651
remain -= page_length;
652
user_data += page_length;
653
offset += page_length;
657
i915_gem_object_unpin(obj);
662
/* Per-page copy function for the shmem pwrite fastpath.
663
* Flushes invalid cachelines before writing to the target if
664
* needs_clflush_before is set and flushes out any written cachelines after
665
* writing if needs_clflush is set. */
667
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
668
char __user *user_data,
669
bool page_do_bit17_swizzling,
670
bool needs_clflush_before,
671
bool needs_clflush_after)
676
if (unlikely(page_do_bit17_swizzling))
679
vaddr = kmap_atomic(page);
680
if (needs_clflush_before)
681
drm_clflush_virt_range(vaddr + shmem_page_offset,
683
ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
686
if (needs_clflush_after)
687
drm_clflush_virt_range(vaddr + shmem_page_offset,
689
kunmap_atomic(vaddr);
691
return ret ? -EFAULT : 0;
694
/* Only difference to the fast-path function is that this can handle bit17
695
* and uses non-atomic copy and kmap functions. */
697
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
698
char __user *user_data,
699
bool page_do_bit17_swizzling,
700
bool needs_clflush_before,
701
bool needs_clflush_after)
707
if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
708
shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
710
page_do_bit17_swizzling);
711
if (page_do_bit17_swizzling)
712
ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
716
ret = __copy_from_user(vaddr + shmem_page_offset,
719
if (needs_clflush_after)
720
shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
722
page_do_bit17_swizzling);
725
return ret ? -EFAULT : 0;
729
i915_gem_shmem_pwrite(struct drm_device *dev,
730
struct drm_i915_gem_object *obj,
731
struct drm_i915_gem_pwrite *args,
732
struct drm_file *file)
736
char __user *user_data;
737
int shmem_page_offset, page_length, ret = 0;
738
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
739
int hit_slowpath = 0;
740
int needs_clflush_after = 0;
741
int needs_clflush_before = 0;
743
struct scatterlist *sg;
745
user_data = (char __user *) (uintptr_t) args->data_ptr;
748
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
750
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
751
/* If we're not in the cpu write domain, set ourself into the gtt
752
* write domain and manually flush cachelines (if required). This
753
* optimizes for the case when the gpu will use the data
754
* right away and we therefore have to clflush anyway. */
755
if (obj->cache_level == I915_CACHE_NONE)
756
needs_clflush_after = 1;
757
if (obj->gtt_space) {
758
ret = i915_gem_object_set_to_gtt_domain(obj, true);
763
/* Same trick applies for invalidate partially written cachelines before
765
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
766
&& obj->cache_level == I915_CACHE_NONE)
767
needs_clflush_before = 1;
769
ret = i915_gem_object_get_pages(obj);
773
i915_gem_object_pin_pages(obj);
775
offset = args->offset;
778
for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
780
int partial_cacheline_write;
782
if (i < offset >> PAGE_SHIFT)
788
/* Operation in this page
790
* shmem_page_offset = offset within page in shmem file
791
* page_length = bytes to copy for this page
793
shmem_page_offset = offset_in_page(offset);
795
page_length = remain;
796
if ((shmem_page_offset + page_length) > PAGE_SIZE)
797
page_length = PAGE_SIZE - shmem_page_offset;
799
/* If we don't overwrite a cacheline completely we need to be
800
* careful to have up-to-date data by first clflushing. Don't
801
* overcomplicate things and flush the entire patch. */
802
partial_cacheline_write = needs_clflush_before &&
803
((shmem_page_offset | page_length)
804
& (boot_cpu_data.x86_clflush_size - 1));
807
page_do_bit17_swizzling = obj_do_bit17_swizzling &&
808
(page_to_phys(page) & (1 << 17)) != 0;
810
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
811
user_data, page_do_bit17_swizzling,
812
partial_cacheline_write,
813
needs_clflush_after);
818
mutex_unlock(&dev->struct_mutex);
819
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
820
user_data, page_do_bit17_swizzling,
821
partial_cacheline_write,
822
needs_clflush_after);
824
mutex_lock(&dev->struct_mutex);
827
set_page_dirty(page);
828
mark_page_accessed(page);
833
remain -= page_length;
834
user_data += page_length;
835
offset += page_length;
839
i915_gem_object_unpin_pages(obj);
842
/* Fixup: Kill any reinstated backing storage pages */
843
if (obj->madv == __I915_MADV_PURGED)
844
i915_gem_object_truncate(obj);
845
/* and flush dirty cachelines in case the object isn't in the cpu write
847
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
848
i915_gem_clflush_object(obj);
849
i915_gem_chipset_flush(dev);
853
if (needs_clflush_after)
854
i915_gem_chipset_flush(dev);
860
* Writes data to the object referenced by handle.
862
* On error, the contents of the buffer that were to be modified are undefined.
865
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
866
struct drm_file *file)
868
struct drm_i915_gem_pwrite *args = data;
869
struct drm_i915_gem_object *obj;
875
if (!access_ok(VERIFY_READ,
876
(char __user *)(uintptr_t)args->data_ptr,
880
ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
885
ret = i915_mutex_lock_interruptible(dev);
889
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
890
if (&obj->base == NULL) {
895
/* Bounds check destination. */
896
if (args->offset > obj->base.size ||
897
args->size > obj->base.size - args->offset) {
902
/* prime objects have no backing filp to GEM pread/pwrite
905
if (!obj->base.filp) {
910
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
913
/* We can only do the GTT pwrite on untiled buffers, as otherwise
914
* it would end up going through the fenced access, and we'll get
915
* different detiling behavior between reading and writing.
916
* pread/pwrite currently are reading and writing from the CPU
917
* perspective, requiring manual detiling by the client.
920
ret = i915_gem_phys_pwrite(dev, obj, args, file);
924
if (obj->cache_level == I915_CACHE_NONE &&
925
obj->tiling_mode == I915_TILING_NONE &&
926
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
927
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
928
/* Note that the gtt paths might fail with non-page-backed user
929
* pointers (e.g. gtt mappings when moving data between
930
* textures). Fallback to the shmem path in that case. */
933
if (ret == -EFAULT || ret == -ENOSPC)
934
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
937
drm_gem_object_unreference(&obj->base);
939
mutex_unlock(&dev->struct_mutex);
944
i915_gem_check_wedge(struct drm_i915_private *dev_priv,
947
if (atomic_read(&dev_priv->mm.wedged)) {
948
struct completion *x = &dev_priv->error_completion;
949
bool recovery_complete;
952
/* Give the error handler a chance to run. */
953
spin_lock_irqsave(&x->wait.lock, flags);
954
recovery_complete = x->done > 0;
955
spin_unlock_irqrestore(&x->wait.lock, flags);
957
/* Non-interruptible callers can't handle -EAGAIN, hence return
958
* -EIO unconditionally for these. */
962
/* Recovery complete, but still wedged means reset failure. */
963
if (recovery_complete)
973
* Compare seqno against outstanding lazy request. Emit a request if they are
977
i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
981
BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
984
if (seqno == ring->outstanding_lazy_request)
985
ret = i915_add_request(ring, NULL, NULL);
991
* __wait_seqno - wait until execution of seqno has finished
992
* @ring: the ring expected to report seqno
994
* @interruptible: do an interruptible wait (normally yes)
995
* @timeout: in - how long to wait (NULL forever); out - how much time remaining
997
* Returns 0 if the seqno was found within the alloted time. Else returns the
998
* errno with remaining time filled in timeout argument.
1000
static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1001
bool interruptible, struct timespec *timeout)
1003
drm_i915_private_t *dev_priv = ring->dev->dev_private;
1004
struct timespec before, now, wait_time={1,0};
1005
unsigned long timeout_jiffies;
1007
bool wait_forever = true;
1010
if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
1013
trace_i915_gem_request_wait_begin(ring, seqno);
1015
if (timeout != NULL) {
1016
wait_time = *timeout;
1017
wait_forever = false;
1020
timeout_jiffies = timespec_to_jiffies(&wait_time);
1022
if (WARN_ON(!ring->irq_get(ring)))
1025
/* Record current time in case interrupted by signal, or wedged * */
1026
getrawmonotonic(&before);
1029
(i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \
1030
atomic_read(&dev_priv->mm.wedged))
1033
end = wait_event_interruptible_timeout(ring->irq_queue,
1037
end = wait_event_timeout(ring->irq_queue, EXIT_COND,
1040
ret = i915_gem_check_wedge(dev_priv, interruptible);
1043
} while (end == 0 && wait_forever);
1045
getrawmonotonic(&now);
1047
ring->irq_put(ring);
1048
trace_i915_gem_request_wait_end(ring, seqno);
1052
struct timespec sleep_time = timespec_sub(now, before);
1053
*timeout = timespec_sub(*timeout, sleep_time);
1058
case -EAGAIN: /* Wedged */
1059
case -ERESTARTSYS: /* Signal */
1061
case 0: /* Timeout */
1063
set_normalized_timespec(timeout, 0, 0);
1065
default: /* Completed */
1066
WARN_ON(end < 0); /* We're not aware of other errors */
1072
* Waits for a sequence number to be signaled, and cleans up the
1073
* request and object lists appropriately for that event.
1076
i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno)
1078
struct drm_device *dev = ring->dev;
1079
struct drm_i915_private *dev_priv = dev->dev_private;
1080
bool interruptible = dev_priv->mm.interruptible;
1083
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1086
ret = i915_gem_check_wedge(dev_priv, interruptible);
1090
ret = i915_gem_check_olr(ring, seqno);
1094
return __wait_seqno(ring, seqno, interruptible, NULL);
1098
* Ensures that all rendering to the object has completed and the object is
1099
* safe to unbind from the GTT or access from the CPU.
1101
static __must_check int
1102
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1105
struct intel_ring_buffer *ring = obj->ring;
1109
seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1113
ret = i915_wait_seqno(ring, seqno);
1117
i915_gem_retire_requests_ring(ring);
1119
/* Manually manage the write flush as we may have not yet
1120
* retired the buffer.
1122
if (obj->last_write_seqno &&
1123
i915_seqno_passed(seqno, obj->last_write_seqno)) {
1124
obj->last_write_seqno = 0;
1125
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1131
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1132
* as the object state may change during this call.
1134
static __must_check int
1135
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
1138
struct drm_device *dev = obj->base.dev;
1139
struct drm_i915_private *dev_priv = dev->dev_private;
1140
struct intel_ring_buffer *ring = obj->ring;
1144
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1145
BUG_ON(!dev_priv->mm.interruptible);
1147
seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
1151
ret = i915_gem_check_wedge(dev_priv, true);
1155
ret = i915_gem_check_olr(ring, seqno);
1159
mutex_unlock(&dev->struct_mutex);
1160
ret = __wait_seqno(ring, seqno, true, NULL);
1161
mutex_lock(&dev->struct_mutex);
1163
i915_gem_retire_requests_ring(ring);
1165
/* Manually manage the write flush as we may have not yet
1166
* retired the buffer.
1168
if (obj->last_write_seqno &&
1169
i915_seqno_passed(seqno, obj->last_write_seqno)) {
1170
obj->last_write_seqno = 0;
1171
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1178
* Called when user space prepares to use an object with the CPU, either
1179
* through the mmap ioctl's mapping or a GTT mapping.
1182
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1183
struct drm_file *file)
1185
struct drm_i915_gem_set_domain *args = data;
1186
struct drm_i915_gem_object *obj;
1187
uint32_t read_domains = args->read_domains;
1188
uint32_t write_domain = args->write_domain;
1191
/* Only handle setting domains to types used by the CPU. */
1192
if (write_domain & I915_GEM_GPU_DOMAINS)
1195
if (read_domains & I915_GEM_GPU_DOMAINS)
1198
/* Having something in the write domain implies it's in the read
1199
* domain, and only that read domain. Enforce that in the request.
1201
if (write_domain != 0 && read_domains != write_domain)
1204
ret = i915_mutex_lock_interruptible(dev);
1208
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1209
if (&obj->base == NULL) {
1214
/* Try to flush the object off the GPU without holding the lock.
1215
* We will repeat the flush holding the lock in the normal manner
1216
* to catch cases where we are gazumped.
1218
ret = i915_gem_object_wait_rendering__nonblocking(obj, !write_domain);
1222
if (read_domains & I915_GEM_DOMAIN_GTT) {
1223
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1225
/* Silently promote "you're not bound, there was nothing to do"
1226
* to success, since the client was just asking us to
1227
* make sure everything was done.
1232
ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1236
drm_gem_object_unreference(&obj->base);
1238
mutex_unlock(&dev->struct_mutex);
1243
* Called when user space has done writes to this buffer
1246
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1247
struct drm_file *file)
1249
struct drm_i915_gem_sw_finish *args = data;
1250
struct drm_i915_gem_object *obj;
1253
ret = i915_mutex_lock_interruptible(dev);
1257
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1258
if (&obj->base == NULL) {
1263
/* Pinned buffers may be scanout, so flush the cache */
1265
i915_gem_object_flush_cpu_write_domain(obj);
1267
drm_gem_object_unreference(&obj->base);
1269
mutex_unlock(&dev->struct_mutex);
1274
* Maps the contents of an object, returning the address it is mapped
1277
* While the mapping holds a reference on the contents of the object, it doesn't
1278
* imply a ref on the object itself.
1281
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1282
struct drm_file *file)
1284
struct drm_i915_gem_mmap *args = data;
1285
struct drm_gem_object *obj;
1288
obj = drm_gem_object_lookup(dev, file, args->handle);
1292
/* prime objects have no backing filp to GEM mmap
1296
drm_gem_object_unreference_unlocked(obj);
1300
addr = vm_mmap(obj->filp, 0, args->size,
1301
PROT_READ | PROT_WRITE, MAP_SHARED,
1303
drm_gem_object_unreference_unlocked(obj);
1304
if (IS_ERR((void *)addr))
1307
args->addr_ptr = (uint64_t) addr;
1313
* i915_gem_fault - fault a page into the GTT
1314
* vma: VMA in question
1317
* The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1318
* from userspace. The fault handler takes care of binding the object to
1319
* the GTT (if needed), allocating and programming a fence register (again,
1320
* only if needed based on whether the old reg is still valid or the object
1321
* is tiled) and inserting a new PTE into the faulting process.
1323
* Note that the faulting process may involve evicting existing objects
1324
* from the GTT and/or fence registers to make room. So performance may
1325
* suffer if the GTT working set is large or there are few fence registers
1328
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1330
struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1331
struct drm_device *dev = obj->base.dev;
1332
drm_i915_private_t *dev_priv = dev->dev_private;
1333
pgoff_t page_offset;
1336
bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1338
/* We don't use vmf->pgoff since that has the fake offset */
1339
page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1342
ret = i915_mutex_lock_interruptible(dev);
1346
trace_i915_gem_object_fault(obj, page_offset, true, write);
1348
/* Now bind it into the GTT if needed */
1349
ret = i915_gem_object_pin(obj, 0, true, false);
1353
ret = i915_gem_object_set_to_gtt_domain(obj, write);
1357
ret = i915_gem_object_get_fence(obj);
1361
obj->fault_mappable = true;
1363
pfn = ((dev_priv->mm.gtt_base_addr + obj->gtt_offset) >> PAGE_SHIFT) +
1366
/* Finally, remap it using the new GTT offset */
1367
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1369
i915_gem_object_unpin(obj);
1371
mutex_unlock(&dev->struct_mutex);
1375
/* If this -EIO is due to a gpu hang, give the reset code a
1376
* chance to clean up the mess. Otherwise return the proper
1378
if (!atomic_read(&dev_priv->mm.wedged))
1379
return VM_FAULT_SIGBUS;
1381
/* Give the error handler a chance to run and move the
1382
* objects off the GPU active list. Next time we service the
1383
* fault, we should be able to transition the page into the
1384
* GTT without touching the GPU (and so avoid further
1385
* EIO/EGAIN). If the GPU is wedged, then there is no issue
1386
* with coherency, just lost writes.
1394
* EBUSY is ok: this just means that another thread
1395
* already did the job.
1397
return VM_FAULT_NOPAGE;
1399
return VM_FAULT_OOM;
1401
return VM_FAULT_SIGBUS;
1403
WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
1404
return VM_FAULT_SIGBUS;
1409
* i915_gem_release_mmap - remove physical page mappings
1410
* @obj: obj in question
1412
* Preserve the reservation of the mmapping with the DRM core code, but
1413
* relinquish ownership of the pages back to the system.
1415
* It is vital that we remove the page mapping if we have mapped a tiled
1416
* object through the GTT and then lose the fence register due to
1417
* resource pressure. Similarly if the object has been moved out of the
1418
* aperture, than pages mapped into userspace must be revoked. Removing the
1419
* mapping will then trigger a page fault on the next user access, allowing
1420
* fixup by i915_gem_fault().
1423
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1425
if (!obj->fault_mappable)
1428
if (obj->base.dev->dev_mapping)
1429
unmap_mapping_range(obj->base.dev->dev_mapping,
1430
(loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1433
obj->fault_mappable = false;
1437
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1441
if (INTEL_INFO(dev)->gen >= 4 ||
1442
tiling_mode == I915_TILING_NONE)
1445
/* Previous chips need a power-of-two fence region when tiling */
1446
if (INTEL_INFO(dev)->gen == 3)
1447
gtt_size = 1024*1024;
1449
gtt_size = 512*1024;
1451
while (gtt_size < size)
1458
* i915_gem_get_gtt_alignment - return required GTT alignment for an object
1459
* @obj: object to check
1461
* Return the required GTT alignment for an object, taking into account
1462
* potential fence register mapping.
1465
i915_gem_get_gtt_alignment(struct drm_device *dev,
1470
* Minimum alignment is 4k (GTT page size), but might be greater
1471
* if a fence register is needed for the object.
1473
if (INTEL_INFO(dev)->gen >= 4 ||
1474
tiling_mode == I915_TILING_NONE)
1478
* Previous chips need to be aligned to the size of the smallest
1479
* fence register that can contain the object.
1481
return i915_gem_get_gtt_size(dev, size, tiling_mode);
1485
* i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1488
* @size: size of the object
1489
* @tiling_mode: tiling mode of the object
1491
* Return the required GTT alignment for an object, only taking into account
1492
* unfenced tiled surface requirements.
1495
i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1500
* Minimum alignment is 4k (GTT page size) for sane hw.
1502
if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1503
tiling_mode == I915_TILING_NONE)
1506
/* Previous hardware however needs to be aligned to a power-of-two
1507
* tile height. The simplest method for determining this is to reuse
1508
* the power-of-tile object size.
1510
return i915_gem_get_gtt_size(dev, size, tiling_mode);
1513
static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1515
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1518
if (obj->base.map_list.map)
1521
ret = drm_gem_create_mmap_offset(&obj->base);
1525
/* Badly fragmented mmap space? The only way we can recover
1526
* space is by destroying unwanted objects. We can't randomly release
1527
* mmap_offsets as userspace expects them to be persistent for the
1528
* lifetime of the objects. The closest we can is to release the
1529
* offsets on purgeable objects by truncating it and marking it purged,
1530
* which prevents userspace from ever using that object again.
1532
i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
1533
ret = drm_gem_create_mmap_offset(&obj->base);
1537
i915_gem_shrink_all(dev_priv);
1538
return drm_gem_create_mmap_offset(&obj->base);
1541
static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1543
if (!obj->base.map_list.map)
1546
drm_gem_free_mmap_offset(&obj->base);
1550
i915_gem_mmap_gtt(struct drm_file *file,
1551
struct drm_device *dev,
1555
struct drm_i915_private *dev_priv = dev->dev_private;
1556
struct drm_i915_gem_object *obj;
1559
ret = i915_mutex_lock_interruptible(dev);
1563
obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1564
if (&obj->base == NULL) {
1569
if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1574
if (obj->madv != I915_MADV_WILLNEED) {
1575
DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1580
ret = i915_gem_object_create_mmap_offset(obj);
1584
*offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1587
drm_gem_object_unreference(&obj->base);
1589
mutex_unlock(&dev->struct_mutex);
1594
* i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1596
* @data: GTT mapping ioctl data
1597
* @file: GEM object info
1599
* Simply returns the fake offset to userspace so it can mmap it.
1600
* The mmap call will end up in drm_gem_mmap(), which will set things
1601
* up so we can get faults in the handler above.
1603
* The fault handler will take care of binding the object into the GTT
1604
* (since it may have been evicted to make room for something), allocating
1605
* a fence register, and mapping the appropriate aperture address into
1609
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1610
struct drm_file *file)
1612
struct drm_i915_gem_mmap_gtt *args = data;
1614
return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1617
/* Immediately discard the backing storage */
1619
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1621
struct inode *inode;
1623
i915_gem_object_free_mmap_offset(obj);
1625
if (obj->base.filp == NULL)
1628
/* Our goal here is to return as much of the memory as
1629
* is possible back to the system as we are called from OOM.
1630
* To do this we must instruct the shmfs to drop all of its
1631
* backing pages, *now*.
1633
inode = obj->base.filp->f_path.dentry->d_inode;
1634
shmem_truncate_range(inode, 0, (loff_t)-1);
1636
obj->madv = __I915_MADV_PURGED;
1640
i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1642
return obj->madv == I915_MADV_DONTNEED;
1646
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1648
int page_count = obj->base.size / PAGE_SIZE;
1649
struct scatterlist *sg;
1652
BUG_ON(obj->madv == __I915_MADV_PURGED);
1654
ret = i915_gem_object_set_to_cpu_domain(obj, true);
1656
/* In the event of a disaster, abandon all caches and
1657
* hope for the best.
1659
WARN_ON(ret != -EIO);
1660
i915_gem_clflush_object(obj);
1661
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1664
if (i915_gem_object_needs_bit17_swizzle(obj))
1665
i915_gem_object_save_bit_17_swizzle(obj);
1667
if (obj->madv == I915_MADV_DONTNEED)
1670
for_each_sg(obj->pages->sgl, sg, page_count, i) {
1671
struct page *page = sg_page(sg);
1674
set_page_dirty(page);
1676
if (obj->madv == I915_MADV_WILLNEED)
1677
mark_page_accessed(page);
1679
page_cache_release(page);
1683
sg_free_table(obj->pages);
1688
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1690
const struct drm_i915_gem_object_ops *ops = obj->ops;
1692
if (obj->pages == NULL)
1695
BUG_ON(obj->gtt_space);
1697
if (obj->pages_pin_count)
1700
ops->put_pages(obj);
1703
list_del(&obj->gtt_list);
1704
if (i915_gem_object_is_purgeable(obj))
1705
i915_gem_object_truncate(obj);
1711
i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1713
struct drm_i915_gem_object *obj, *next;
1716
list_for_each_entry_safe(obj, next,
1717
&dev_priv->mm.unbound_list,
1719
if (i915_gem_object_is_purgeable(obj) &&
1720
i915_gem_object_put_pages(obj) == 0) {
1721
count += obj->base.size >> PAGE_SHIFT;
1722
if (count >= target)
1727
list_for_each_entry_safe(obj, next,
1728
&dev_priv->mm.inactive_list,
1730
if (i915_gem_object_is_purgeable(obj) &&
1731
i915_gem_object_unbind(obj) == 0 &&
1732
i915_gem_object_put_pages(obj) == 0) {
1733
count += obj->base.size >> PAGE_SHIFT;
1734
if (count >= target)
1743
i915_gem_shrink_all(struct drm_i915_private *dev_priv)
1745
struct drm_i915_gem_object *obj, *next;
1747
i915_gem_evict_everything(dev_priv->dev);
1749
list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
1750
i915_gem_object_put_pages(obj);
1754
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1756
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1758
struct address_space *mapping;
1759
struct sg_table *st;
1760
struct scatterlist *sg;
1764
/* Assert that the object is not currently in any GPU domain. As it
1765
* wasn't in the GTT, there shouldn't be any way it could have been in
1768
BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1769
BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1771
st = kmalloc(sizeof(*st), GFP_KERNEL);
1775
page_count = obj->base.size / PAGE_SIZE;
1776
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1782
/* Get the list of pages out of our struct file. They'll be pinned
1783
* at this point until we release them.
1785
* Fail silently without starting the shrinker
1787
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
1788
gfp = mapping_gfp_mask(mapping);
1789
gfp |= __GFP_NORETRY | __GFP_NOWARN;
1790
gfp &= ~(__GFP_IO | __GFP_WAIT);
1791
for_each_sg(st->sgl, sg, page_count, i) {
1792
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1794
i915_gem_purge(dev_priv, page_count);
1795
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1798
/* We've tried hard to allocate the memory by reaping
1799
* our own buffer, now let the real VM do its job and
1800
* go down in flames if truly OOM.
1802
gfp &= ~(__GFP_NORETRY | __GFP_NOWARN);
1803
gfp |= __GFP_IO | __GFP_WAIT;
1805
i915_gem_shrink_all(dev_priv);
1806
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1810
gfp |= __GFP_NORETRY | __GFP_NOWARN;
1811
gfp &= ~(__GFP_IO | __GFP_WAIT);
1814
sg_set_page(sg, page, PAGE_SIZE, 0);
1819
if (i915_gem_object_needs_bit17_swizzle(obj))
1820
i915_gem_object_do_bit_17_swizzle(obj);
1825
for_each_sg(st->sgl, sg, i, page_count)
1826
page_cache_release(sg_page(sg));
1829
return PTR_ERR(page);
1832
/* Ensure that the associated pages are gathered from the backing storage
1833
* and pinned into our object. i915_gem_object_get_pages() may be called
1834
* multiple times before they are released by a single call to
1835
* i915_gem_object_put_pages() - once the pages are no longer referenced
1836
* either as a result of memory pressure (reaping pages under the shrinker)
1837
* or as the object is itself released.
1840
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
1842
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1843
const struct drm_i915_gem_object_ops *ops = obj->ops;
1849
BUG_ON(obj->pages_pin_count);
1851
ret = ops->get_pages(obj);
1855
list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
1860
i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1861
struct intel_ring_buffer *ring,
1864
struct drm_device *dev = obj->base.dev;
1865
struct drm_i915_private *dev_priv = dev->dev_private;
1867
BUG_ON(ring == NULL);
1870
/* Add a reference if we're newly entering the active list. */
1872
drm_gem_object_reference(&obj->base);
1876
/* Move from whatever list we were on to the tail of execution. */
1877
list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1878
list_move_tail(&obj->ring_list, &ring->active_list);
1880
obj->last_read_seqno = seqno;
1882
if (obj->fenced_gpu_access) {
1883
obj->last_fenced_seqno = seqno;
1885
/* Bump MRU to take account of the delayed flush */
1886
if (obj->fence_reg != I915_FENCE_REG_NONE) {
1887
struct drm_i915_fence_reg *reg;
1889
reg = &dev_priv->fence_regs[obj->fence_reg];
1890
list_move_tail(®->lru_list,
1891
&dev_priv->mm.fence_list);
1897
i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1899
struct drm_device *dev = obj->base.dev;
1900
struct drm_i915_private *dev_priv = dev->dev_private;
1902
BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
1903
BUG_ON(!obj->active);
1905
if (obj->pin_count) /* are we a framebuffer? */
1906
intel_mark_fb_idle(obj);
1908
list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1910
list_del_init(&obj->ring_list);
1913
obj->last_read_seqno = 0;
1914
obj->last_write_seqno = 0;
1915
obj->base.write_domain = 0;
1917
obj->last_fenced_seqno = 0;
1918
obj->fenced_gpu_access = false;
1921
drm_gem_object_unreference(&obj->base);
1923
WARN_ON(i915_verify_lists(dev));
1927
i915_gem_get_seqno(struct drm_device *dev)
1929
drm_i915_private_t *dev_priv = dev->dev_private;
1930
u32 seqno = dev_priv->next_seqno;
1932
/* reserve 0 for non-seqno */
1933
if (++dev_priv->next_seqno == 0)
1934
dev_priv->next_seqno = 1;
1940
i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
1942
if (ring->outstanding_lazy_request == 0)
1943
ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
1945
return ring->outstanding_lazy_request;
1949
i915_add_request(struct intel_ring_buffer *ring,
1950
struct drm_file *file,
1953
drm_i915_private_t *dev_priv = ring->dev->dev_private;
1954
struct drm_i915_gem_request *request;
1955
u32 request_ring_position;
1961
* Emit any outstanding flushes - execbuf can fail to emit the flush
1962
* after having emitted the batchbuffer command. Hence we need to fix
1963
* things up similar to emitting the lazy request. The difference here
1964
* is that the flush _must_ happen before the next request, no matter
1967
ret = intel_ring_flush_all_caches(ring);
1971
request = kmalloc(sizeof(*request), GFP_KERNEL);
1972
if (request == NULL)
1975
seqno = i915_gem_next_request_seqno(ring);
1977
/* Record the position of the start of the request so that
1978
* should we detect the updated seqno part-way through the
1979
* GPU processing the request, we never over-estimate the
1980
* position of the head.
1982
request_ring_position = intel_ring_get_tail(ring);
1984
ret = ring->add_request(ring, &seqno);
1990
trace_i915_gem_request_add(ring, seqno);
1992
request->seqno = seqno;
1993
request->ring = ring;
1994
request->tail = request_ring_position;
1995
request->emitted_jiffies = jiffies;
1996
was_empty = list_empty(&ring->request_list);
1997
list_add_tail(&request->list, &ring->request_list);
1998
request->file_priv = NULL;
2001
struct drm_i915_file_private *file_priv = file->driver_priv;
2003
spin_lock(&file_priv->mm.lock);
2004
request->file_priv = file_priv;
2005
list_add_tail(&request->client_list,
2006
&file_priv->mm.request_list);
2007
spin_unlock(&file_priv->mm.lock);
2010
ring->outstanding_lazy_request = 0;
2012
if (!dev_priv->mm.suspended) {
2013
if (i915_enable_hangcheck) {
2014
mod_timer(&dev_priv->hangcheck_timer,
2015
round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
2018
queue_delayed_work(dev_priv->wq,
2019
&dev_priv->mm.retire_work,
2020
round_jiffies_up_relative(HZ));
2021
intel_mark_busy(dev_priv->dev);
2031
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2033
struct drm_i915_file_private *file_priv = request->file_priv;
2038
spin_lock(&file_priv->mm.lock);
2039
if (request->file_priv) {
2040
list_del(&request->client_list);
2041
request->file_priv = NULL;
2043
spin_unlock(&file_priv->mm.lock);
2046
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2047
struct intel_ring_buffer *ring)
2049
while (!list_empty(&ring->request_list)) {
2050
struct drm_i915_gem_request *request;
2052
request = list_first_entry(&ring->request_list,
2053
struct drm_i915_gem_request,
2056
list_del(&request->list);
2057
i915_gem_request_remove_from_client(request);
2061
while (!list_empty(&ring->active_list)) {
2062
struct drm_i915_gem_object *obj;
2064
obj = list_first_entry(&ring->active_list,
2065
struct drm_i915_gem_object,
2068
i915_gem_object_move_to_inactive(obj);
2072
static void i915_gem_reset_fences(struct drm_device *dev)
2074
struct drm_i915_private *dev_priv = dev->dev_private;
2077
for (i = 0; i < dev_priv->num_fence_regs; i++) {
2078
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2080
i915_gem_write_fence(dev, i, NULL);
2083
i915_gem_object_fence_lost(reg->obj);
2087
INIT_LIST_HEAD(®->lru_list);
2090
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
2093
void i915_gem_reset(struct drm_device *dev)
2095
struct drm_i915_private *dev_priv = dev->dev_private;
2096
struct drm_i915_gem_object *obj;
2097
struct intel_ring_buffer *ring;
2100
for_each_ring(ring, dev_priv, i)
2101
i915_gem_reset_ring_lists(dev_priv, ring);
2103
/* Move everything out of the GPU domains to ensure we do any
2104
* necessary invalidation upon reuse.
2106
list_for_each_entry(obj,
2107
&dev_priv->mm.inactive_list,
2110
obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2113
/* The fence registers are invalidated so clear them out */
2114
i915_gem_reset_fences(dev);
2118
* This function clears the request list as sequence numbers are passed.
2121
i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2126
if (list_empty(&ring->request_list))
2129
WARN_ON(i915_verify_lists(ring->dev));
2131
seqno = ring->get_seqno(ring, true);
2133
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
2134
if (seqno >= ring->sync_seqno[i])
2135
ring->sync_seqno[i] = 0;
2137
while (!list_empty(&ring->request_list)) {
2138
struct drm_i915_gem_request *request;
2140
request = list_first_entry(&ring->request_list,
2141
struct drm_i915_gem_request,
2144
if (!i915_seqno_passed(seqno, request->seqno))
2147
trace_i915_gem_request_retire(ring, request->seqno);
2148
/* We know the GPU must have read the request to have
2149
* sent us the seqno + interrupt, so use the position
2150
* of tail of the request to update the last known position
2153
ring->last_retired_head = request->tail;
2155
list_del(&request->list);
2156
i915_gem_request_remove_from_client(request);
2160
/* Move any buffers on the active list that are no longer referenced
2161
* by the ringbuffer to the flushing/inactive lists as appropriate.
2163
while (!list_empty(&ring->active_list)) {
2164
struct drm_i915_gem_object *obj;
2166
obj = list_first_entry(&ring->active_list,
2167
struct drm_i915_gem_object,
2170
if (!i915_seqno_passed(seqno, obj->last_read_seqno))
2173
i915_gem_object_move_to_inactive(obj);
2176
if (unlikely(ring->trace_irq_seqno &&
2177
i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
2178
ring->irq_put(ring);
2179
ring->trace_irq_seqno = 0;
2182
WARN_ON(i915_verify_lists(ring->dev));
2186
i915_gem_retire_requests(struct drm_device *dev)
2188
drm_i915_private_t *dev_priv = dev->dev_private;
2189
struct intel_ring_buffer *ring;
2192
for_each_ring(ring, dev_priv, i)
2193
i915_gem_retire_requests_ring(ring);
2197
i915_gem_retire_work_handler(struct work_struct *work)
2199
drm_i915_private_t *dev_priv;
2200
struct drm_device *dev;
2201
struct intel_ring_buffer *ring;
2205
dev_priv = container_of(work, drm_i915_private_t,
2206
mm.retire_work.work);
2207
dev = dev_priv->dev;
2209
/* Come back later if the device is busy... */
2210
if (!mutex_trylock(&dev->struct_mutex)) {
2211
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2212
round_jiffies_up_relative(HZ));
2216
i915_gem_retire_requests(dev);
2218
/* Send a periodic flush down the ring so we don't hold onto GEM
2219
* objects indefinitely.
2222
for_each_ring(ring, dev_priv, i) {
2223
if (ring->gpu_caches_dirty)
2224
i915_add_request(ring, NULL, NULL);
2226
idle &= list_empty(&ring->request_list);
2229
if (!dev_priv->mm.suspended && !idle)
2230
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2231
round_jiffies_up_relative(HZ));
2233
intel_mark_idle(dev);
2235
mutex_unlock(&dev->struct_mutex);
2239
* Ensures that an object will eventually get non-busy by flushing any required
2240
* write domains, emitting any outstanding lazy request and retiring and
2241
* completed requests.
2244
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2249
ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
2253
i915_gem_retire_requests_ring(obj->ring);
2260
* i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2261
* @DRM_IOCTL_ARGS: standard ioctl arguments
2263
* Returns 0 if successful, else an error is returned with the remaining time in
2264
* the timeout parameter.
2265
* -ETIME: object is still busy after timeout
2266
* -ERESTARTSYS: signal interrupted the wait
2267
* -ENONENT: object doesn't exist
2268
* Also possible, but rare:
2269
* -EAGAIN: GPU wedged
2271
* -ENODEV: Internal IRQ fail
2272
* -E?: The add request failed
2274
* The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2275
* non-zero timeout parameter the wait ioctl will wait for the given number of
2276
* nanoseconds on an object becoming unbusy. Since the wait itself does so
2277
* without holding struct_mutex the object may become re-busied before this
2278
* function completes. A similar but shorter * race condition exists in the busy
2282
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2284
struct drm_i915_gem_wait *args = data;
2285
struct drm_i915_gem_object *obj;
2286
struct intel_ring_buffer *ring = NULL;
2287
struct timespec timeout_stack, *timeout = NULL;
2291
if (args->timeout_ns >= 0) {
2292
timeout_stack = ns_to_timespec(args->timeout_ns);
2293
timeout = &timeout_stack;
2296
ret = i915_mutex_lock_interruptible(dev);
2300
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2301
if (&obj->base == NULL) {
2302
mutex_unlock(&dev->struct_mutex);
2306
/* Need to make sure the object gets inactive eventually. */
2307
ret = i915_gem_object_flush_active(obj);
2312
seqno = obj->last_read_seqno;
2319
/* Do this after OLR check to make sure we make forward progress polling
2320
* on this IOCTL with a 0 timeout (like busy ioctl)
2322
if (!args->timeout_ns) {
2327
drm_gem_object_unreference(&obj->base);
2328
mutex_unlock(&dev->struct_mutex);
2330
ret = __wait_seqno(ring, seqno, true, timeout);
2332
WARN_ON(!timespec_valid(timeout));
2333
args->timeout_ns = timespec_to_ns(timeout);
2338
drm_gem_object_unreference(&obj->base);
2339
mutex_unlock(&dev->struct_mutex);
2344
* i915_gem_object_sync - sync an object to a ring.
2346
* @obj: object which may be in use on another ring.
2347
* @to: ring we wish to use the object on. May be NULL.
2349
* This code is meant to abstract object synchronization with the GPU.
2350
* Calling with NULL implies synchronizing the object with the CPU
2351
* rather than a particular GPU ring.
2353
* Returns 0 if successful, else propagates up the lower layer error.
2356
i915_gem_object_sync(struct drm_i915_gem_object *obj,
2357
struct intel_ring_buffer *to)
2359
struct intel_ring_buffer *from = obj->ring;
2363
if (from == NULL || to == from)
2366
if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2367
return i915_gem_object_wait_rendering(obj, false);
2369
idx = intel_ring_sync_index(from, to);
2371
seqno = obj->last_read_seqno;
2372
if (seqno <= from->sync_seqno[idx])
2375
ret = i915_gem_check_olr(obj->ring, seqno);
2379
ret = to->sync_to(to, from, seqno);
2381
from->sync_seqno[idx] = seqno;
2386
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2388
u32 old_write_domain, old_read_domains;
2390
/* Act a barrier for all accesses through the GTT */
2393
/* Force a pagefault for domain tracking on next user access */
2394
i915_gem_release_mmap(obj);
2396
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2399
old_read_domains = obj->base.read_domains;
2400
old_write_domain = obj->base.write_domain;
2402
obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2403
obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2405
trace_i915_gem_object_change_domain(obj,
2411
* Unbinds an object from the GTT aperture.
2414
i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2416
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2419
if (obj->gtt_space == NULL)
2425
BUG_ON(obj->pages == NULL);
2427
ret = i915_gem_object_finish_gpu(obj);
2430
/* Continue on if we fail due to EIO, the GPU is hung so we
2431
* should be safe and we need to cleanup or else we might
2432
* cause memory corruption through use-after-free.
2435
i915_gem_object_finish_gtt(obj);
2437
/* release the fence reg _after_ flushing */
2438
ret = i915_gem_object_put_fence(obj);
2442
trace_i915_gem_object_unbind(obj);
2444
if (obj->has_global_gtt_mapping)
2445
i915_gem_gtt_unbind_object(obj);
2446
if (obj->has_aliasing_ppgtt_mapping) {
2447
i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2448
obj->has_aliasing_ppgtt_mapping = 0;
2450
i915_gem_gtt_finish_object(obj);
2452
list_del(&obj->mm_list);
2453
list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
2454
/* Avoid an unnecessary call to unbind on rebind. */
2455
obj->map_and_fenceable = true;
2457
drm_mm_put_block_hsw(obj->gtt_space);
2458
obj->gtt_space = NULL;
2459
obj->gtt_offset = 0;
2464
static int i915_ring_idle(struct intel_ring_buffer *ring)
2466
if (list_empty(&ring->active_list))
2469
return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring));
2472
int i915_gpu_idle(struct drm_device *dev)
2474
drm_i915_private_t *dev_priv = dev->dev_private;
2475
struct intel_ring_buffer *ring;
2478
/* Flush everything onto the inactive list. */
2479
for_each_ring(ring, dev_priv, i) {
2480
ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2484
ret = i915_ring_idle(ring);
2492
static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2493
struct drm_i915_gem_object *obj)
2495
drm_i915_private_t *dev_priv = dev->dev_private;
2499
u32 size = obj->gtt_space->size;
2501
val = (uint64_t)((obj->gtt_offset + size - 4096) &
2503
val |= obj->gtt_offset & 0xfffff000;
2504
val |= (uint64_t)((obj->stride / 128) - 1) <<
2505
SANDYBRIDGE_FENCE_PITCH_SHIFT;
2507
if (obj->tiling_mode == I915_TILING_Y)
2508
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2509
val |= I965_FENCE_REG_VALID;
2513
I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2514
POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2517
static void i965_write_fence_reg(struct drm_device *dev, int reg,
2518
struct drm_i915_gem_object *obj)
2520
drm_i915_private_t *dev_priv = dev->dev_private;
2524
u32 size = obj->gtt_space->size;
2526
val = (uint64_t)((obj->gtt_offset + size - 4096) &
2528
val |= obj->gtt_offset & 0xfffff000;
2529
val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2530
if (obj->tiling_mode == I915_TILING_Y)
2531
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2532
val |= I965_FENCE_REG_VALID;
2536
I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2537
POSTING_READ(FENCE_REG_965_0 + reg * 8);
2540
static void i915_write_fence_reg(struct drm_device *dev, int reg,
2541
struct drm_i915_gem_object *obj)
2543
drm_i915_private_t *dev_priv = dev->dev_private;
2547
u32 size = obj->gtt_space->size;
2551
WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2552
(size & -size) != size ||
2553
(obj->gtt_offset & (size - 1)),
2554
"object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2555
obj->gtt_offset, obj->map_and_fenceable, size);
2557
if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2562
/* Note: pitch better be a power of two tile widths */
2563
pitch_val = obj->stride / tile_width;
2564
pitch_val = ffs(pitch_val) - 1;
2566
val = obj->gtt_offset;
2567
if (obj->tiling_mode == I915_TILING_Y)
2568
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2569
val |= I915_FENCE_SIZE_BITS(size);
2570
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2571
val |= I830_FENCE_REG_VALID;
2576
reg = FENCE_REG_830_0 + reg * 4;
2578
reg = FENCE_REG_945_8 + (reg - 8) * 4;
2580
I915_WRITE(reg, val);
2584
static void i830_write_fence_reg(struct drm_device *dev, int reg,
2585
struct drm_i915_gem_object *obj)
2587
drm_i915_private_t *dev_priv = dev->dev_private;
2591
u32 size = obj->gtt_space->size;
2594
WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2595
(size & -size) != size ||
2596
(obj->gtt_offset & (size - 1)),
2597
"object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2598
obj->gtt_offset, size);
2600
pitch_val = obj->stride / 128;
2601
pitch_val = ffs(pitch_val) - 1;
2603
val = obj->gtt_offset;
2604
if (obj->tiling_mode == I915_TILING_Y)
2605
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2606
val |= I830_FENCE_SIZE_BITS(size);
2607
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2608
val |= I830_FENCE_REG_VALID;
2612
I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2613
POSTING_READ(FENCE_REG_830_0 + reg * 4);
2616
static void i915_gem_write_fence(struct drm_device *dev, int reg,
2617
struct drm_i915_gem_object *obj)
2619
switch (INTEL_INFO(dev)->gen) {
2621
case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2623
case 4: i965_write_fence_reg(dev, reg, obj); break;
2624
case 3: i915_write_fence_reg(dev, reg, obj); break;
2625
case 2: i830_write_fence_reg(dev, reg, obj); break;
2630
static inline int fence_number(struct drm_i915_private *dev_priv,
2631
struct drm_i915_fence_reg *fence)
2633
return fence - dev_priv->fence_regs;
2636
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2637
struct drm_i915_fence_reg *fence,
2640
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2641
int reg = fence_number(dev_priv, fence);
2643
i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2646
obj->fence_reg = reg;
2648
list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2650
obj->fence_reg = I915_FENCE_REG_NONE;
2652
list_del_init(&fence->lru_list);
2657
i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2659
if (obj->last_fenced_seqno) {
2660
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
2664
obj->last_fenced_seqno = 0;
2667
/* Ensure that all CPU reads are completed before installing a fence
2668
* and all writes before removing the fence.
2670
if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2673
obj->fenced_gpu_access = false;
2678
i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2680
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2683
ret = i915_gem_object_flush_fence(obj);
2687
if (obj->fence_reg == I915_FENCE_REG_NONE)
2690
i915_gem_object_update_fence(obj,
2691
&dev_priv->fence_regs[obj->fence_reg],
2693
i915_gem_object_fence_lost(obj);
2698
static struct drm_i915_fence_reg *
2699
i915_find_fence_reg(struct drm_device *dev)
2701
struct drm_i915_private *dev_priv = dev->dev_private;
2702
struct drm_i915_fence_reg *reg, *avail;
2705
/* First try to find a free reg */
2707
for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2708
reg = &dev_priv->fence_regs[i];
2712
if (!reg->pin_count)
2719
/* None available, try to steal one or wait for a user to finish */
2720
list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2731
* i915_gem_object_get_fence - set up fencing for an object
2732
* @obj: object to map through a fence reg
2734
* When mapping objects through the GTT, userspace wants to be able to write
2735
* to them without having to worry about swizzling if the object is tiled.
2736
* This function walks the fence regs looking for a free one for @obj,
2737
* stealing one if it can't find any.
2739
* It then sets up the reg based on the object's properties: address, pitch
2740
* and tiling format.
2742
* For an untiled surface, this removes any existing fence.
2745
i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2747
struct drm_device *dev = obj->base.dev;
2748
struct drm_i915_private *dev_priv = dev->dev_private;
2749
bool enable = obj->tiling_mode != I915_TILING_NONE;
2750
struct drm_i915_fence_reg *reg;
2753
/* Have we updated the tiling parameters upon the object and so
2754
* will need to serialise the write to the associated fence register?
2756
if (obj->fence_dirty) {
2757
ret = i915_gem_object_flush_fence(obj);
2762
/* Just update our place in the LRU if our fence is getting reused. */
2763
if (obj->fence_reg != I915_FENCE_REG_NONE) {
2764
reg = &dev_priv->fence_regs[obj->fence_reg];
2765
if (!obj->fence_dirty) {
2766
list_move_tail(®->lru_list,
2767
&dev_priv->mm.fence_list);
2770
} else if (enable) {
2771
reg = i915_find_fence_reg(dev);
2776
struct drm_i915_gem_object *old = reg->obj;
2778
ret = i915_gem_object_flush_fence(old);
2782
i915_gem_object_fence_lost(old);
2787
i915_gem_object_update_fence(obj, reg, enable);
2788
obj->fence_dirty = false;
2793
static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2794
struct drm_mm_node_hsw *gtt_space,
2795
unsigned long cache_level)
2797
struct drm_mm_node_hsw *other;
2799
/* On non-LLC machines we have to be careful when putting differing
2800
* types of snoopable memory together to avoid the prefetcher
2801
* crossing memory domains and dieing.
2806
if (gtt_space == NULL)
2809
if (list_empty(>t_space->node_list))
2812
other = list_entry(gtt_space->node_list.prev, struct drm_mm_node_hsw, node_list);
2813
if (other->allocated && !other->hole_follows && other->color != cache_level)
2816
other = list_entry(gtt_space->node_list.next, struct drm_mm_node_hsw, node_list);
2817
if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2823
static void i915_gem_verify_gtt(struct drm_device *dev)
2826
struct drm_i915_private *dev_priv = dev->dev_private;
2827
struct drm_i915_gem_object *obj;
2830
list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2831
if (obj->gtt_space == NULL) {
2832
printk(KERN_ERR "object found on GTT list with no space reserved\n");
2837
if (obj->cache_level != obj->gtt_space->color) {
2838
printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2839
obj->gtt_space->start,
2840
obj->gtt_space->start + obj->gtt_space->size,
2842
obj->gtt_space->color);
2847
if (!i915_gem_valid_gtt_space(dev,
2849
obj->cache_level)) {
2850
printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2851
obj->gtt_space->start,
2852
obj->gtt_space->start + obj->gtt_space->size,
2864
* Finds free space in the GTT aperture and binds the object there.
2867
i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2869
bool map_and_fenceable,
2872
struct drm_device *dev = obj->base.dev;
2873
drm_i915_private_t *dev_priv = dev->dev_private;
2874
struct drm_mm_node_hsw *free_space;
2875
u32 size, fence_size, fence_alignment, unfenced_alignment;
2876
bool mappable, fenceable;
2879
if (obj->madv != I915_MADV_WILLNEED) {
2880
DRM_ERROR("Attempting to bind a purgeable object\n");
2884
fence_size = i915_gem_get_gtt_size(dev,
2887
fence_alignment = i915_gem_get_gtt_alignment(dev,
2890
unfenced_alignment =
2891
i915_gem_get_unfenced_gtt_alignment(dev,
2896
alignment = map_and_fenceable ? fence_alignment :
2898
if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2899
DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2903
size = map_and_fenceable ? fence_size : obj->base.size;
2905
/* If the object is bigger than the entire aperture, reject it early
2906
* before evicting everything in a vain attempt to find space.
2908
if (obj->base.size >
2909
(map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2910
DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2914
ret = i915_gem_object_get_pages(obj);
2918
i915_gem_object_pin_pages(obj);
2921
if (map_and_fenceable)
2922
free_space = drm_mm_search_free_in_range_color_hsw(&dev_priv->mm.gtt_space,
2923
size, alignment, obj->cache_level,
2924
0, dev_priv->mm.gtt_mappable_end,
2927
free_space = drm_mm_search_free_color_hsw(&dev_priv->mm.gtt_space,
2928
size, alignment, obj->cache_level,
2931
if (free_space != NULL) {
2932
if (map_and_fenceable)
2934
drm_mm_get_block_range_generic_hsw(free_space,
2935
size, alignment, obj->cache_level,
2936
0, dev_priv->mm.gtt_mappable_end,
2940
drm_mm_get_block_generic_hsw(free_space,
2941
size, alignment, obj->cache_level,
2944
if (free_space == NULL) {
2945
ret = i915_gem_evict_something(dev, size, alignment,
2950
i915_gem_object_unpin_pages(obj);
2956
if (WARN_ON(!i915_gem_valid_gtt_space(dev,
2958
obj->cache_level))) {
2959
i915_gem_object_unpin_pages(obj);
2960
drm_mm_put_block_hsw(free_space);
2964
ret = i915_gem_gtt_prepare_object(obj);
2966
i915_gem_object_unpin_pages(obj);
2967
drm_mm_put_block_hsw(free_space);
2971
list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
2972
list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2974
obj->gtt_space = free_space;
2975
obj->gtt_offset = free_space->start;
2978
free_space->size == fence_size &&
2979
(free_space->start & (fence_alignment - 1)) == 0;
2982
obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2984
obj->map_and_fenceable = mappable && fenceable;
2986
i915_gem_object_unpin_pages(obj);
2987
trace_i915_gem_object_bind(obj, map_and_fenceable);
2988
i915_gem_verify_gtt(dev);
2993
i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2995
/* If we don't have a page list set up, then we're not pinned
2996
* to GPU, and we can ignore the cache flush because it'll happen
2997
* again at bind time.
2999
if (obj->pages == NULL)
3002
/* If the GPU is snooping the contents of the CPU cache,
3003
* we do not need to manually clear the CPU cache lines. However,
3004
* the caches are only snooped when the render cache is
3005
* flushed/invalidated. As we always have to emit invalidations
3006
* and flushes when moving into and out of the RENDER domain, correct
3007
* snooping behaviour occurs naturally as the result of our domain
3010
if (obj->cache_level != I915_CACHE_NONE)
3013
trace_i915_gem_object_clflush(obj);
3015
drm_clflush_sg(obj->pages);
3018
/** Flushes the GTT write domain for the object if it's dirty. */
3020
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3022
uint32_t old_write_domain;
3024
if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3027
/* No actual flushing is required for the GTT write domain. Writes
3028
* to it immediately go to main memory as far as we know, so there's
3029
* no chipset flush. It also doesn't land in render cache.
3031
* However, we do have to enforce the order so that all writes through
3032
* the GTT land before any writes to the device, such as updates to
3037
old_write_domain = obj->base.write_domain;
3038
obj->base.write_domain = 0;
3040
trace_i915_gem_object_change_domain(obj,
3041
obj->base.read_domains,
3045
/** Flushes the CPU write domain for the object if it's dirty. */
3047
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3049
uint32_t old_write_domain;
3051
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3054
i915_gem_clflush_object(obj);
3055
i915_gem_chipset_flush(obj->base.dev);
3056
old_write_domain = obj->base.write_domain;
3057
obj->base.write_domain = 0;
3059
trace_i915_gem_object_change_domain(obj,
3060
obj->base.read_domains,
3065
* Moves a single object to the GTT read, and possibly write domain.
3067
* This function returns when the move is complete, including waiting on
3071
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3073
drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
3074
uint32_t old_write_domain, old_read_domains;
3077
/* Not valid to be called on unbound objects. */
3078
if (obj->gtt_space == NULL)
3081
if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3084
ret = i915_gem_object_wait_rendering(obj, !write);
3088
i915_gem_object_flush_cpu_write_domain(obj);
3090
old_write_domain = obj->base.write_domain;
3091
old_read_domains = obj->base.read_domains;
3093
/* It should now be out of any other write domains, and we can update
3094
* the domain values for our changes.
3096
BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3097
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3099
obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3100
obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3104
trace_i915_gem_object_change_domain(obj,
3108
/* And bump the LRU for this access */
3109
if (i915_gem_object_is_inactive(obj))
3110
list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3115
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3116
enum i915_cache_level cache_level)
3118
struct drm_device *dev = obj->base.dev;
3119
drm_i915_private_t *dev_priv = dev->dev_private;
3122
if (obj->cache_level == cache_level)
3125
if (obj->pin_count) {
3126
DRM_DEBUG("can not change the cache level of pinned objects\n");
3130
if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
3131
ret = i915_gem_object_unbind(obj);
3136
if (obj->gtt_space) {
3137
ret = i915_gem_object_finish_gpu(obj);
3141
i915_gem_object_finish_gtt(obj);
3143
/* Before SandyBridge, you could not use tiling or fence
3144
* registers with snooped memory, so relinquish any fences
3145
* currently pointing to our region in the aperture.
3147
if (INTEL_INFO(dev)->gen < 6) {
3148
ret = i915_gem_object_put_fence(obj);
3153
if (obj->has_global_gtt_mapping)
3154
i915_gem_gtt_bind_object(obj, cache_level);
3155
if (obj->has_aliasing_ppgtt_mapping)
3156
i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
3159
obj->gtt_space->color = cache_level;
3162
if (cache_level == I915_CACHE_NONE) {
3163
u32 old_read_domains, old_write_domain;
3165
/* If we're coming from LLC cached, then we haven't
3166
* actually been tracking whether the data is in the
3167
* CPU cache or not, since we only allow one bit set
3168
* in obj->write_domain and have been skipping the clflushes.
3169
* Just set it to the CPU cache for now.
3171
WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
3172
WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
3174
old_read_domains = obj->base.read_domains;
3175
old_write_domain = obj->base.write_domain;
3177
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3178
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3180
trace_i915_gem_object_change_domain(obj,
3185
obj->cache_level = cache_level;
3186
i915_gem_verify_gtt(dev);
3190
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3191
struct drm_file *file)
3193
struct drm_i915_gem_caching *args = data;
3194
struct drm_i915_gem_object *obj;
3197
ret = i915_mutex_lock_interruptible(dev);
3201
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3202
if (&obj->base == NULL) {
3207
args->caching = obj->cache_level != I915_CACHE_NONE;
3209
drm_gem_object_unreference(&obj->base);
3211
mutex_unlock(&dev->struct_mutex);
3215
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3216
struct drm_file *file)
3218
struct drm_i915_gem_caching *args = data;
3219
struct drm_i915_gem_object *obj;
3220
enum i915_cache_level level;
3223
switch (args->caching) {
3224
case I915_CACHING_NONE:
3225
level = I915_CACHE_NONE;
3227
case I915_CACHING_CACHED:
3228
level = I915_CACHE_LLC;
3234
ret = i915_mutex_lock_interruptible(dev);
3238
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3239
if (&obj->base == NULL) {
3244
ret = i915_gem_object_set_cache_level(obj, level);
3246
drm_gem_object_unreference(&obj->base);
3248
mutex_unlock(&dev->struct_mutex);
3253
* Prepare buffer for display plane (scanout, cursors, etc).
3254
* Can be called from an uninterruptible phase (modesetting) and allows
3255
* any flushes to be pipelined (for pageflips).
3258
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3260
struct intel_ring_buffer *pipelined)
3262
u32 old_read_domains, old_write_domain;
3265
if (pipelined != obj->ring) {
3266
ret = i915_gem_object_sync(obj, pipelined);
3271
/* The display engine is not coherent with the LLC cache on gen6. As
3272
* a result, we make sure that the pinning that is about to occur is
3273
* done with uncached PTEs. This is lowest common denominator for all
3276
* However for gen6+, we could do better by using the GFDT bit instead
3277
* of uncaching, which would allow us to flush all the LLC-cached data
3278
* with that bit in the PTE to main memory with just one PIPE_CONTROL.
3280
ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3284
/* As the user may map the buffer once pinned in the display plane
3285
* (e.g. libkms for the bootup splash), we have to ensure that we
3286
* always use map_and_fenceable for all scanout buffers.
3288
ret = i915_gem_object_pin(obj, alignment, true, false);
3292
i915_gem_object_flush_cpu_write_domain(obj);
3294
old_write_domain = obj->base.write_domain;
3295
old_read_domains = obj->base.read_domains;
3297
/* It should now be out of any other write domains, and we can update
3298
* the domain values for our changes.
3300
obj->base.write_domain = 0;
3301
obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3303
trace_i915_gem_object_change_domain(obj,
3311
i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3315
if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3318
ret = i915_gem_object_wait_rendering(obj, false);
3322
/* Ensure that we invalidate the GPU's caches and TLBs. */
3323
obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3328
* Moves a single object to the CPU read, and possibly write domain.
3330
* This function returns when the move is complete, including waiting on
3334
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3336
uint32_t old_write_domain, old_read_domains;
3339
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3342
ret = i915_gem_object_wait_rendering(obj, !write);
3346
i915_gem_object_flush_gtt_write_domain(obj);
3348
old_write_domain = obj->base.write_domain;
3349
old_read_domains = obj->base.read_domains;
3351
/* Flush the CPU cache if it's still invalid. */
3352
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3353
i915_gem_clflush_object(obj);
3355
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3358
/* It should now be out of any other write domains, and we can update
3359
* the domain values for our changes.
3361
BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3363
/* If we're writing through the CPU, then the GPU read domains will
3364
* need to be invalidated at next use.
3367
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3368
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3371
trace_i915_gem_object_change_domain(obj,
3378
/* Throttle our rendering by waiting until the ring has completed our requests
3379
* emitted over 20 msec ago.
3381
* Note that if we were to use the current jiffies each time around the loop,
3382
* we wouldn't escape the function with any frames outstanding if the time to
3383
* render a frame was over 20ms.
3385
* This should get us reasonable parallelism between CPU and GPU but also
3386
* relatively low latency when blocking on a particular request to finish.
3389
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3391
struct drm_i915_private *dev_priv = dev->dev_private;
3392
struct drm_i915_file_private *file_priv = file->driver_priv;
3393
unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3394
struct drm_i915_gem_request *request;
3395
struct intel_ring_buffer *ring = NULL;
3399
if (atomic_read(&dev_priv->mm.wedged))
3402
spin_lock(&file_priv->mm.lock);
3403
list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3404
if (time_after_eq(request->emitted_jiffies, recent_enough))
3407
ring = request->ring;
3408
seqno = request->seqno;
3410
spin_unlock(&file_priv->mm.lock);
3415
ret = __wait_seqno(ring, seqno, true, NULL);
3417
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3423
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3425
bool map_and_fenceable,
3430
if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3433
if (obj->gtt_space != NULL) {
3434
if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3435
(map_and_fenceable && !obj->map_and_fenceable)) {
3436
WARN(obj->pin_count,
3437
"bo is already pinned with incorrect alignment:"
3438
" offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3439
" obj->map_and_fenceable=%d\n",
3440
obj->gtt_offset, alignment,
3442
obj->map_and_fenceable);
3443
ret = i915_gem_object_unbind(obj);
3449
if (obj->gtt_space == NULL) {
3450
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3452
ret = i915_gem_object_bind_to_gtt(obj, alignment,
3458
if (!dev_priv->mm.aliasing_ppgtt)
3459
i915_gem_gtt_bind_object(obj, obj->cache_level);
3462
if (!obj->has_global_gtt_mapping && map_and_fenceable)
3463
i915_gem_gtt_bind_object(obj, obj->cache_level);
3466
obj->pin_mappable |= map_and_fenceable;
3472
i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3474
BUG_ON(obj->pin_count == 0);
3475
BUG_ON(obj->gtt_space == NULL);
3477
if (--obj->pin_count == 0)
3478
obj->pin_mappable = false;
3482
i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3483
struct drm_file *file)
3485
struct drm_i915_gem_pin *args = data;
3486
struct drm_i915_gem_object *obj;
3489
ret = i915_mutex_lock_interruptible(dev);
3493
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3494
if (&obj->base == NULL) {
3499
if (obj->madv != I915_MADV_WILLNEED) {
3500
DRM_ERROR("Attempting to pin a purgeable buffer\n");
3505
if (obj->pin_filp != NULL && obj->pin_filp != file) {
3506
DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3512
obj->user_pin_count++;
3513
obj->pin_filp = file;
3514
if (obj->user_pin_count == 1) {
3515
ret = i915_gem_object_pin(obj, args->alignment, true, false);
3520
/* XXX - flush the CPU caches for pinned objects
3521
* as the X server doesn't manage domains yet
3523
i915_gem_object_flush_cpu_write_domain(obj);
3524
args->offset = obj->gtt_offset;
3526
drm_gem_object_unreference(&obj->base);
3528
mutex_unlock(&dev->struct_mutex);
3533
i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3534
struct drm_file *file)
3536
struct drm_i915_gem_pin *args = data;
3537
struct drm_i915_gem_object *obj;
3540
ret = i915_mutex_lock_interruptible(dev);
3544
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3545
if (&obj->base == NULL) {
3550
if (obj->pin_filp != file) {
3551
DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3556
obj->user_pin_count--;
3557
if (obj->user_pin_count == 0) {
3558
obj->pin_filp = NULL;
3559
i915_gem_object_unpin(obj);
3563
drm_gem_object_unreference(&obj->base);
3565
mutex_unlock(&dev->struct_mutex);
3570
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3571
struct drm_file *file)
3573
struct drm_i915_gem_busy *args = data;
3574
struct drm_i915_gem_object *obj;
3577
ret = i915_mutex_lock_interruptible(dev);
3581
obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3582
if (&obj->base == NULL) {
3587
/* Count all active objects as busy, even if they are currently not used
3588
* by the gpu. Users of this interface expect objects to eventually
3589
* become non-busy without any further actions, therefore emit any
3590
* necessary flushes here.
3592
ret = i915_gem_object_flush_active(obj);
3594
args->busy = obj->active;
3596
BUILD_BUG_ON(I915_NUM_RINGS > 16);
3597
args->busy |= intel_ring_flag(obj->ring) << 16;
3600
drm_gem_object_unreference(&obj->base);
3602
mutex_unlock(&dev->struct_mutex);
3607
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3608
struct drm_file *file_priv)
3610
return i915_gem_ring_throttle(dev, file_priv);
3614
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3615
struct drm_file *file_priv)
3617
struct drm_i915_gem_madvise *args = data;
3618
struct drm_i915_gem_object *obj;
3621
switch (args->madv) {
3622
case I915_MADV_DONTNEED:
3623
case I915_MADV_WILLNEED:
3629
ret = i915_mutex_lock_interruptible(dev);
3633
obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3634
if (&obj->base == NULL) {
3639
if (obj->pin_count) {
3644
if (obj->madv != __I915_MADV_PURGED)
3645
obj->madv = args->madv;
3647
/* if the object is no longer attached, discard its backing storage */
3648
if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
3649
i915_gem_object_truncate(obj);
3651
args->retained = obj->madv != __I915_MADV_PURGED;
3654
drm_gem_object_unreference(&obj->base);
3656
mutex_unlock(&dev->struct_mutex);
3660
void i915_gem_object_init(struct drm_i915_gem_object *obj,
3661
const struct drm_i915_gem_object_ops *ops)
3663
INIT_LIST_HEAD(&obj->mm_list);
3664
INIT_LIST_HEAD(&obj->gtt_list);
3665
INIT_LIST_HEAD(&obj->ring_list);
3666
INIT_LIST_HEAD(&obj->exec_list);
3670
obj->fence_reg = I915_FENCE_REG_NONE;
3671
obj->madv = I915_MADV_WILLNEED;
3672
/* Avoid an unnecessary call to unbind on the first bind. */
3673
obj->map_and_fenceable = true;
3675
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3678
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3679
.get_pages = i915_gem_object_get_pages_gtt,
3680
.put_pages = i915_gem_object_put_pages_gtt,
3683
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3686
struct drm_i915_gem_object *obj;
3687
struct address_space *mapping;
3690
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3694
if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3699
mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3700
if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3701
/* 965gm cannot relocate objects above 4GiB. */
3702
mask &= ~__GFP_HIGHMEM;
3703
mask |= __GFP_DMA32;
3706
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3707
mapping_set_gfp_mask(mapping, mask);
3709
i915_gem_object_init(obj, &i915_gem_object_ops);
3711
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3712
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3715
/* On some devices, we can have the GPU use the LLC (the CPU
3716
* cache) for about a 10% performance improvement
3717
* compared to uncached. Graphics requests other than
3718
* display scanout are coherent with the CPU in
3719
* accessing this cache. This means in this mode we
3720
* don't need to clflush on the CPU side, and on the
3721
* GPU side we only need to flush internal caches to
3722
* get data visible to the CPU.
3724
* However, we maintain the display planes as UC, and so
3725
* need to rebind when first used as such.
3727
obj->cache_level = I915_CACHE_LLC;
3729
obj->cache_level = I915_CACHE_NONE;
3734
int i915_gem_init_object(struct drm_gem_object *obj)
3741
void i915_gem_free_object(struct drm_gem_object *gem_obj)
3743
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3744
struct drm_device *dev = obj->base.dev;
3745
drm_i915_private_t *dev_priv = dev->dev_private;
3747
trace_i915_gem_object_destroy(obj);
3750
i915_gem_detach_phys_object(dev, obj);
3753
if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3754
bool was_interruptible;
3756
was_interruptible = dev_priv->mm.interruptible;
3757
dev_priv->mm.interruptible = false;
3759
WARN_ON(i915_gem_object_unbind(obj));
3761
dev_priv->mm.interruptible = was_interruptible;
3764
obj->pages_pin_count = 0;
3765
i915_gem_object_put_pages(obj);
3766
i915_gem_object_free_mmap_offset(obj);
3770
if (obj->base.import_attach)
3771
drm_prime_gem_destroy(&obj->base, NULL);
3773
drm_gem_object_release(&obj->base);
3774
i915_gem_info_remove_obj(dev_priv, obj->base.size);
3781
i915_gem_idle(struct drm_device *dev)
3783
drm_i915_private_t *dev_priv = dev->dev_private;
3786
mutex_lock(&dev->struct_mutex);
3788
if (dev_priv->mm.suspended) {
3789
mutex_unlock(&dev->struct_mutex);
3793
ret = i915_gpu_idle(dev);
3795
mutex_unlock(&dev->struct_mutex);
3798
i915_gem_retire_requests(dev);
3800
/* Under UMS, be paranoid and evict. */
3801
if (!drm_core_check_feature(dev, DRIVER_MODESET))
3802
i915_gem_evict_everything(dev);
3804
i915_gem_reset_fences(dev);
3806
/* Hack! Don't let anybody do execbuf while we don't control the chip.
3807
* We need to replace this with a semaphore, or something.
3808
* And not confound mm.suspended!
3810
dev_priv->mm.suspended = 1;
3811
del_timer_sync(&dev_priv->hangcheck_timer);
3813
i915_kernel_lost_context(dev);
3814
i915_gem_cleanup_ringbuffer(dev);
3816
mutex_unlock(&dev->struct_mutex);
3818
/* Cancel the retire work handler, which should be idle now. */
3819
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3824
void i915_gem_l3_remap(struct drm_device *dev)
3826
drm_i915_private_t *dev_priv = dev->dev_private;
3830
if (!IS_IVYBRIDGE(dev))
3833
if (!dev_priv->l3_parity.remap_info)
3836
misccpctl = I915_READ(GEN7_MISCCPCTL);
3837
I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
3838
POSTING_READ(GEN7_MISCCPCTL);
3840
for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
3841
u32 remap = I915_READ(GEN7_L3LOG_BASE + i);
3842
if (remap && remap != dev_priv->l3_parity.remap_info[i/4])
3843
DRM_DEBUG("0x%x was already programmed to %x\n",
3844
GEN7_L3LOG_BASE + i, remap);
3845
if (remap && !dev_priv->l3_parity.remap_info[i/4])
3846
DRM_DEBUG_DRIVER("Clearing remapped register\n");
3847
I915_WRITE(GEN7_L3LOG_BASE + i, dev_priv->l3_parity.remap_info[i/4]);
3850
/* Make sure all the writes land before disabling dop clock gating */
3851
POSTING_READ(GEN7_L3LOG_BASE);
3853
I915_WRITE(GEN7_MISCCPCTL, misccpctl);
3856
void i915_gem_init_swizzling(struct drm_device *dev)
3858
drm_i915_private_t *dev_priv = dev->dev_private;
3860
if (INTEL_INFO(dev)->gen < 5 ||
3861
dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3864
I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3865
DISP_TILE_SURFACE_SWIZZLING);
3870
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3872
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3874
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3878
intel_enable_blt(struct drm_device *dev)
3883
/* The blitter was dysfunctional on early prototypes */
3884
if (IS_GEN6(dev) && dev->pdev->revision < 8) {
3885
DRM_INFO("BLT not supported on this pre-production hardware;"
3886
" graphics performance will be degraded.\n");
3894
i915_gem_init_hw(struct drm_device *dev)
3896
drm_i915_private_t *dev_priv = dev->dev_private;
3899
if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
3902
if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
3903
I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
3905
i915_gem_l3_remap(dev);
3907
i915_gem_init_swizzling(dev);
3909
ret = intel_init_render_ring_buffer(dev);
3914
ret = intel_init_bsd_ring_buffer(dev);
3916
goto cleanup_render_ring;
3919
if (intel_enable_blt(dev)) {
3920
ret = intel_init_blt_ring_buffer(dev);
3922
goto cleanup_bsd_ring;
3925
dev_priv->next_seqno = 1;
3928
* XXX: There was some w/a described somewhere suggesting loading
3929
* contexts before PPGTT.
3931
i915_gem_context_init(dev);
3932
i915_gem_init_ppgtt(dev);
3937
intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3938
cleanup_render_ring:
3939
intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3944
intel_enable_ppgtt(struct drm_device *dev)
3946
if (i915_enable_ppgtt >= 0)
3947
return i915_enable_ppgtt;
3949
#ifdef CONFIG_INTEL_IOMMU
3950
/* Disable ppgtt on SNB if VT-d is on. */
3951
if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
3958
int i915_gem_init(struct drm_device *dev)
3960
struct drm_i915_private *dev_priv = dev->dev_private;
3961
unsigned long gtt_size, mappable_size;
3964
gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3965
mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3967
mutex_lock(&dev->struct_mutex);
3968
if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3969
/* PPGTT pdes are stolen from global gtt ptes, so shrink the
3970
* aperture accordingly when using aliasing ppgtt. */
3971
gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3973
i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3975
ret = i915_gem_init_aliasing_ppgtt(dev);
3977
mutex_unlock(&dev->struct_mutex);
3981
/* Let GEM Manage all of the aperture.
3983
* However, leave one page at the end still bound to the scratch
3984
* page. There are a number of places where the hardware
3985
* apparently prefetches past the end of the object, and we've
3986
* seen multiple hangs with the GPU head pointer stuck in a
3987
* batchbuffer bound at the last page of the aperture. One page
3988
* should be enough to keep any prefetching inside of the
3991
i915_gem_init_global_gtt(dev, 0, mappable_size,
3995
ret = i915_gem_init_hw(dev);
3996
mutex_unlock(&dev->struct_mutex);
3998
i915_gem_cleanup_aliasing_ppgtt(dev);
4002
/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
4003
if (!drm_core_check_feature(dev, DRIVER_MODESET))
4004
dev_priv->dri1.allow_batchbuffer = 1;
4009
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4011
drm_i915_private_t *dev_priv = dev->dev_private;
4012
struct intel_ring_buffer *ring;
4015
for_each_ring(ring, dev_priv, i)
4016
intel_cleanup_ring_buffer(ring);
4020
i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4021
struct drm_file *file_priv)
4023
drm_i915_private_t *dev_priv = dev->dev_private;
4026
if (drm_core_check_feature(dev, DRIVER_MODESET))
4029
if (atomic_read(&dev_priv->mm.wedged)) {
4030
DRM_ERROR("Reenabling wedged hardware, good luck\n");
4031
atomic_set(&dev_priv->mm.wedged, 0);
4034
mutex_lock(&dev->struct_mutex);
4035
dev_priv->mm.suspended = 0;
4037
ret = i915_gem_init_hw(dev);
4039
mutex_unlock(&dev->struct_mutex);
4043
BUG_ON(!list_empty(&dev_priv->mm.active_list));
4044
mutex_unlock(&dev->struct_mutex);
4046
ret = drm_irq_install(dev);
4048
goto cleanup_ringbuffer;
4053
mutex_lock(&dev->struct_mutex);
4054
i915_gem_cleanup_ringbuffer(dev);
4055
dev_priv->mm.suspended = 1;
4056
mutex_unlock(&dev->struct_mutex);
4062
i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4063
struct drm_file *file_priv)
4065
if (drm_core_check_feature(dev, DRIVER_MODESET))
4068
drm_irq_uninstall(dev);
4069
return i915_gem_idle(dev);
4073
i915_gem_lastclose(struct drm_device *dev)
4077
if (drm_core_check_feature(dev, DRIVER_MODESET))
4080
ret = i915_gem_idle(dev);
4082
DRM_ERROR("failed to idle hardware: %d\n", ret);
4086
init_ring_lists(struct intel_ring_buffer *ring)
4088
INIT_LIST_HEAD(&ring->active_list);
4089
INIT_LIST_HEAD(&ring->request_list);
4093
i915_gem_load(struct drm_device *dev)
4096
drm_i915_private_t *dev_priv = dev->dev_private;
4098
INIT_LIST_HEAD(&dev_priv->mm.active_list);
4099
INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4100
INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4101
INIT_LIST_HEAD(&dev_priv->mm.bound_list);
4102
INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4103
for (i = 0; i < I915_NUM_RINGS; i++)
4104
init_ring_lists(&dev_priv->ring[i]);
4105
for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4106
INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4107
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4108
i915_gem_retire_work_handler);
4109
init_completion(&dev_priv->error_completion);
4111
/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4113
I915_WRITE(MI_ARB_STATE,
4114
_MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
4117
dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4119
/* Old X drivers will take 0-2 for front, back, depth buffers */
4120
if (!drm_core_check_feature(dev, DRIVER_MODESET))
4121
dev_priv->fence_reg_start = 3;
4123
if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4124
dev_priv->num_fence_regs = 16;
4126
dev_priv->num_fence_regs = 8;
4128
/* Initialize fence registers to zero */
4129
i915_gem_reset_fences(dev);
4131
i915_gem_detect_bit_6_swizzle(dev);
4132
init_waitqueue_head(&dev_priv->pending_flip_queue);
4134
dev_priv->mm.interruptible = true;
4136
dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4137
dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4138
register_shrinker(&dev_priv->mm.inactive_shrinker);
4142
* Create a physically contiguous memory object for this object
4143
* e.g. for cursor + overlay regs
4145
static int i915_gem_init_phys_object(struct drm_device *dev,
4146
int id, int size, int align)
4148
drm_i915_private_t *dev_priv = dev->dev_private;
4149
struct drm_i915_gem_phys_object *phys_obj;
4152
if (dev_priv->mm.phys_objs[id - 1] || !size)
4155
phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4161
phys_obj->handle = drm_pci_alloc(dev, size, align);
4162
if (!phys_obj->handle) {
4167
set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4170
dev_priv->mm.phys_objs[id - 1] = phys_obj;
4178
static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4180
drm_i915_private_t *dev_priv = dev->dev_private;
4181
struct drm_i915_gem_phys_object *phys_obj;
4183
if (!dev_priv->mm.phys_objs[id - 1])
4186
phys_obj = dev_priv->mm.phys_objs[id - 1];
4187
if (phys_obj->cur_obj) {
4188
i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4192
set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4194
drm_pci_free(dev, phys_obj->handle);
4196
dev_priv->mm.phys_objs[id - 1] = NULL;
4199
void i915_gem_free_all_phys_object(struct drm_device *dev)
4203
for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4204
i915_gem_free_phys_object(dev, i);
4207
void i915_gem_detach_phys_object(struct drm_device *dev,
4208
struct drm_i915_gem_object *obj)
4210
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4217
vaddr = obj->phys_obj->handle->vaddr;
4219
page_count = obj->base.size / PAGE_SIZE;
4220
for (i = 0; i < page_count; i++) {
4221
struct page *page = shmem_read_mapping_page(mapping, i);
4222
if (!IS_ERR(page)) {
4223
char *dst = kmap_atomic(page);
4224
memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4227
drm_clflush_pages(&page, 1);
4229
set_page_dirty(page);
4230
mark_page_accessed(page);
4231
page_cache_release(page);
4234
i915_gem_chipset_flush(dev);
4236
obj->phys_obj->cur_obj = NULL;
4237
obj->phys_obj = NULL;
4241
i915_gem_attach_phys_object(struct drm_device *dev,
4242
struct drm_i915_gem_object *obj,
4246
struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4247
drm_i915_private_t *dev_priv = dev->dev_private;
4252
if (id > I915_MAX_PHYS_OBJECT)
4255
if (obj->phys_obj) {
4256
if (obj->phys_obj->id == id)
4258
i915_gem_detach_phys_object(dev, obj);
4261
/* create a new object */
4262
if (!dev_priv->mm.phys_objs[id - 1]) {
4263
ret = i915_gem_init_phys_object(dev, id,
4264
obj->base.size, align);
4266
DRM_ERROR("failed to init phys object %d size: %zu\n",
4267
id, obj->base.size);
4272
/* bind to the object */
4273
obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4274
obj->phys_obj->cur_obj = obj;
4276
page_count = obj->base.size / PAGE_SIZE;
4278
for (i = 0; i < page_count; i++) {
4282
page = shmem_read_mapping_page(mapping, i);
4284
return PTR_ERR(page);
4286
src = kmap_atomic(page);
4287
dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4288
memcpy(dst, src, PAGE_SIZE);
4291
mark_page_accessed(page);
4292
page_cache_release(page);
4299
i915_gem_phys_pwrite(struct drm_device *dev,
4300
struct drm_i915_gem_object *obj,
4301
struct drm_i915_gem_pwrite *args,
4302
struct drm_file *file_priv)
4304
void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
4305
char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4307
if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4308
unsigned long unwritten;
4310
/* The physical object once assigned is fixed for the lifetime
4311
* of the obj, so we can safely drop the lock and continue
4314
mutex_unlock(&dev->struct_mutex);
4315
unwritten = copy_from_user(vaddr, user_data, args->size);
4316
mutex_lock(&dev->struct_mutex);
4321
i915_gem_chipset_flush(dev);
4325
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4327
struct drm_i915_file_private *file_priv = file->driver_priv;
4329
/* Clean up our request list when the client is going away, so that
4330
* later retire_requests won't dereference our soon-to-be-gone
4333
spin_lock(&file_priv->mm.lock);
4334
while (!list_empty(&file_priv->mm.request_list)) {
4335
struct drm_i915_gem_request *request;
4337
request = list_first_entry(&file_priv->mm.request_list,
4338
struct drm_i915_gem_request,
4340
list_del(&request->client_list);
4341
request->file_priv = NULL;
4343
spin_unlock(&file_priv->mm.lock);
4346
static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
4348
if (!mutex_is_locked(mutex))
4351
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
4352
return mutex->owner == task;
4354
/* Since UP may be pre-empted, we cannot assume that we own the lock */
4360
i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4362
struct drm_i915_private *dev_priv =
4363
container_of(shrinker,
4364
struct drm_i915_private,
4365
mm.inactive_shrinker);
4366
struct drm_device *dev = dev_priv->dev;
4367
struct drm_i915_gem_object *obj;
4368
int nr_to_scan = sc->nr_to_scan;
4372
if (!mutex_trylock(&dev->struct_mutex)) {
4373
if (!mutex_is_locked_by(&dev->struct_mutex, current))
4380
nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
4382
i915_gem_shrink_all(dev_priv);
4386
list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
4387
if (obj->pages_pin_count == 0)
4388
cnt += obj->base.size >> PAGE_SHIFT;
4389
list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
4390
if (obj->pin_count == 0 && obj->pages_pin_count == 0)
4391
cnt += obj->base.size >> PAGE_SHIFT;
4394
mutex_unlock(&dev->struct_mutex);