1
/* ldc.c: Logical Domain Channel link-layer protocol driver.
3
* Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
6
#include <linux/kernel.h>
7
#include <linux/export.h>
8
#include <linux/slab.h>
9
#include <linux/spinlock.h>
10
#include <linux/delay.h>
11
#include <linux/errno.h>
12
#include <linux/string.h>
13
#include <linux/scatterlist.h>
14
#include <linux/interrupt.h>
15
#include <linux/list.h>
16
#include <linux/init.h>
17
#include <linux/bitmap.h>
19
#include <asm/hypervisor.h>
20
#include <asm/iommu.h>
23
#include <asm/mdesc.h>
25
#define DRV_MODULE_NAME "ldc"
26
#define PFX DRV_MODULE_NAME ": "
27
#define DRV_MODULE_VERSION "1.1"
28
#define DRV_MODULE_RELDATE "July 22, 2008"
30
static char version[] __devinitdata =
31
DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
32
#define LDC_PACKET_SIZE 64
34
/* Packet header layout for unreliable and reliable mode frames.
35
* When in RAW mode, packets are simply straight 64-byte payloads
50
#define LDC_VERS 0x01 /* Link Version */
51
#define LDC_RTS 0x02 /* Request To Send */
52
#define LDC_RTR 0x03 /* Ready To Receive */
53
#define LDC_RDX 0x04 /* Ready for Data eXchange */
54
#define LDC_CTRL_MSK 0x0f
58
#define LDC_FRAG_MASK 0xc0
59
#define LDC_START 0x40
65
u8 u_data[LDC_PACKET_SIZE - 8];
69
u8 r_data[LDC_PACKET_SIZE - 8 - 8];
79
/* Ordered from largest major to lowest. */
80
static struct ldc_version ver_arr[] = {
81
{ .major = 1, .minor = 0 },
84
#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
85
#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
90
int (*write)(struct ldc_channel *, const void *, unsigned int);
91
int (*read)(struct ldc_channel *, void *, unsigned int);
94
static const struct ldc_mode_ops raw_ops;
95
static const struct ldc_mode_ops nonraw_ops;
96
static const struct ldc_mode_ops stream_ops;
98
int ldom_domaining_enabled;
101
/* Protects arena alloc/free. */
103
struct iommu_arena arena;
104
struct ldc_mtable_entry *page_table;
108
/* Protects all operations that depend upon channel state. */
117
struct ldc_packet *tx_base;
118
unsigned long tx_head;
119
unsigned long tx_tail;
120
unsigned long tx_num_entries;
123
unsigned long tx_acked;
125
struct ldc_packet *rx_base;
126
unsigned long rx_head;
127
unsigned long rx_tail;
128
unsigned long rx_num_entries;
134
unsigned long chan_state;
136
struct ldc_channel_config cfg;
139
const struct ldc_mode_ops *mops;
141
struct ldc_iommu iommu;
143
struct ldc_version ver;
146
#define LDC_HS_CLOSED 0x00
147
#define LDC_HS_OPEN 0x01
148
#define LDC_HS_GOTVERS 0x02
149
#define LDC_HS_SENTRTR 0x03
150
#define LDC_HS_GOTRTR 0x04
151
#define LDC_HS_COMPLETE 0x10
154
#define LDC_FLAG_ALLOCED_QUEUES 0x01
155
#define LDC_FLAG_REGISTERED_QUEUES 0x02
156
#define LDC_FLAG_REGISTERED_IRQS 0x04
157
#define LDC_FLAG_RESET 0x10
162
#define LDC_IRQ_NAME_MAX 32
163
char rx_irq_name[LDC_IRQ_NAME_MAX];
164
char tx_irq_name[LDC_IRQ_NAME_MAX];
166
struct hlist_head mh_list;
168
struct hlist_node list;
171
#define ldcdbg(TYPE, f, a...) \
172
do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
173
printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
176
static const char *state_to_str(u8 state)
179
case LDC_STATE_INVALID:
183
case LDC_STATE_BOUND:
185
case LDC_STATE_READY:
187
case LDC_STATE_CONNECTED:
194
static void ldc_set_state(struct ldc_channel *lp, u8 state)
196
ldcdbg(STATE, "STATE (%s) --> (%s)\n",
197
state_to_str(lp->state),
198
state_to_str(state));
203
static unsigned long __advance(unsigned long off, unsigned long num_entries)
205
off += LDC_PACKET_SIZE;
206
if (off == (num_entries * LDC_PACKET_SIZE))
212
static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
214
return __advance(off, lp->rx_num_entries);
217
static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
219
return __advance(off, lp->tx_num_entries);
222
static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
223
unsigned long *new_tail)
225
struct ldc_packet *p;
228
t = tx_advance(lp, lp->tx_tail);
229
if (t == lp->tx_head)
235
return p + (lp->tx_tail / LDC_PACKET_SIZE);
238
/* When we are in reliable or stream mode, have to track the next packet
239
* we haven't gotten an ACK for in the TX queue using tx_acked. We have
240
* to be careful not to stomp over the queue past that point. During
241
* the handshake, we don't have TX data packets pending in the queue
242
* and that's why handshake_get_tx_packet() need not be mindful of
245
static unsigned long head_for_data(struct ldc_channel *lp)
247
if (lp->cfg.mode == LDC_MODE_STREAM)
252
static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
254
unsigned long limit, tail, new_tail, diff;
257
limit = head_for_data(lp);
259
new_tail = tx_advance(lp, tail);
260
if (new_tail == limit)
263
if (limit > new_tail)
264
diff = limit - new_tail;
267
((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
268
diff /= LDC_PACKET_SIZE;
271
if (diff * mss < size)
277
static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
278
unsigned long *new_tail)
280
struct ldc_packet *p;
283
h = head_for_data(lp);
284
t = tx_advance(lp, lp->tx_tail);
291
return p + (lp->tx_tail / LDC_PACKET_SIZE);
294
static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
296
unsigned long orig_tail = lp->tx_tail;
300
while (limit-- > 0) {
303
err = sun4v_ldc_tx_set_qtail(lp->id, tail);
307
if (err != HV_EWOULDBLOCK) {
308
lp->tx_tail = orig_tail;
314
lp->tx_tail = orig_tail;
318
/* This just updates the head value in the hypervisor using
319
* a polling loop with a timeout. The caller takes care of
320
* upating software state representing the head change, if any.
322
static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
326
while (limit-- > 0) {
329
err = sun4v_ldc_rx_set_qhead(lp->id, head);
333
if (err != HV_EWOULDBLOCK)
342
static int send_tx_packet(struct ldc_channel *lp,
343
struct ldc_packet *p,
344
unsigned long new_tail)
346
BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
348
return set_tx_tail(lp, new_tail);
351
static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
353
void *data, int dlen,
354
unsigned long *new_tail)
356
struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
359
memset(p, 0, sizeof(*p));
364
memcpy(p->u.u_data, data, dlen);
369
static int start_handshake(struct ldc_channel *lp)
371
struct ldc_packet *p;
372
struct ldc_version *ver;
373
unsigned long new_tail;
377
ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
378
ver->major, ver->minor);
380
p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
381
ver, sizeof(*ver), &new_tail);
383
int err = send_tx_packet(lp, p, new_tail);
385
lp->flags &= ~LDC_FLAG_RESET;
391
static int send_version_nack(struct ldc_channel *lp,
392
u16 major, u16 minor)
394
struct ldc_packet *p;
395
struct ldc_version ver;
396
unsigned long new_tail;
401
p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
402
&ver, sizeof(ver), &new_tail);
404
ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
405
ver.major, ver.minor);
407
return send_tx_packet(lp, p, new_tail);
412
static int send_version_ack(struct ldc_channel *lp,
413
struct ldc_version *vp)
415
struct ldc_packet *p;
416
unsigned long new_tail;
418
p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
419
vp, sizeof(*vp), &new_tail);
421
ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
422
vp->major, vp->minor);
424
return send_tx_packet(lp, p, new_tail);
429
static int send_rts(struct ldc_channel *lp)
431
struct ldc_packet *p;
432
unsigned long new_tail;
434
p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
437
p->env = lp->cfg.mode;
441
ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
444
return send_tx_packet(lp, p, new_tail);
449
static int send_rtr(struct ldc_channel *lp)
451
struct ldc_packet *p;
452
unsigned long new_tail;
454
p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
457
p->env = lp->cfg.mode;
460
ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
463
return send_tx_packet(lp, p, new_tail);
468
static int send_rdx(struct ldc_channel *lp)
470
struct ldc_packet *p;
471
unsigned long new_tail;
473
p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
477
p->seqid = ++lp->snd_nxt;
478
p->u.r.ackid = lp->rcv_nxt;
480
ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
481
p->env, p->seqid, p->u.r.ackid);
483
return send_tx_packet(lp, p, new_tail);
488
static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
490
struct ldc_packet *p;
491
unsigned long new_tail;
494
p = data_get_tx_packet(lp, &new_tail);
497
memset(p, 0, sizeof(*p));
498
p->type = data_pkt->type;
500
p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
501
p->seqid = lp->snd_nxt + 1;
502
p->u.r.ackid = lp->rcv_nxt;
504
ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
505
p->type, p->ctrl, p->seqid, p->u.r.ackid);
507
err = send_tx_packet(lp, p, new_tail);
514
static int ldc_abort(struct ldc_channel *lp)
516
unsigned long hv_err;
518
ldcdbg(STATE, "ABORT\n");
520
/* We report but do not act upon the hypervisor errors because
521
* there really isn't much we can do if they fail at this point.
523
hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
525
printk(KERN_ERR PFX "ldc_abort: "
526
"sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
527
lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
529
hv_err = sun4v_ldc_tx_get_state(lp->id,
534
printk(KERN_ERR PFX "ldc_abort: "
535
"sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
538
hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
540
printk(KERN_ERR PFX "ldc_abort: "
541
"sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
542
lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
544
/* Refetch the RX queue state as well, because we could be invoked
545
* here in the queue processing context.
547
hv_err = sun4v_ldc_rx_get_state(lp->id,
552
printk(KERN_ERR PFX "ldc_abort: "
553
"sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
559
static struct ldc_version *find_by_major(u16 major)
561
struct ldc_version *ret = NULL;
564
for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
565
struct ldc_version *v = &ver_arr[i];
566
if (v->major <= major) {
574
static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
576
struct ldc_version *vap;
579
ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
580
vp->major, vp->minor);
582
if (lp->hs_state == LDC_HS_GOTVERS) {
583
lp->hs_state = LDC_HS_OPEN;
584
memset(&lp->ver, 0, sizeof(lp->ver));
587
vap = find_by_major(vp->major);
589
err = send_version_nack(lp, 0, 0);
590
} else if (vap->major != vp->major) {
591
err = send_version_nack(lp, vap->major, vap->minor);
593
struct ldc_version ver = *vp;
594
if (ver.minor > vap->minor)
595
ver.minor = vap->minor;
596
err = send_version_ack(lp, &ver);
599
lp->hs_state = LDC_HS_GOTVERS;
603
return ldc_abort(lp);
608
static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
610
ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
611
vp->major, vp->minor);
613
if (lp->hs_state == LDC_HS_GOTVERS) {
614
if (lp->ver.major != vp->major ||
615
lp->ver.minor != vp->minor)
616
return ldc_abort(lp);
619
lp->hs_state = LDC_HS_GOTVERS;
622
return ldc_abort(lp);
626
static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
628
struct ldc_version *vap;
629
struct ldc_packet *p;
630
unsigned long new_tail;
632
if (vp->major == 0 && vp->minor == 0)
633
return ldc_abort(lp);
635
vap = find_by_major(vp->major);
637
return ldc_abort(lp);
639
p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
643
return ldc_abort(lp);
645
return send_tx_packet(lp, p, new_tail);
648
static int process_version(struct ldc_channel *lp,
649
struct ldc_packet *p)
651
struct ldc_version *vp;
653
vp = (struct ldc_version *) p->u.u_data;
657
return process_ver_info(lp, vp);
660
return process_ver_ack(lp, vp);
663
return process_ver_nack(lp, vp);
666
return ldc_abort(lp);
670
static int process_rts(struct ldc_channel *lp,
671
struct ldc_packet *p)
673
ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
674
p->stype, p->seqid, p->env);
676
if (p->stype != LDC_INFO ||
677
lp->hs_state != LDC_HS_GOTVERS ||
678
p->env != lp->cfg.mode)
679
return ldc_abort(lp);
681
lp->snd_nxt = p->seqid;
682
lp->rcv_nxt = p->seqid;
683
lp->hs_state = LDC_HS_SENTRTR;
685
return ldc_abort(lp);
690
static int process_rtr(struct ldc_channel *lp,
691
struct ldc_packet *p)
693
ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
694
p->stype, p->seqid, p->env);
696
if (p->stype != LDC_INFO ||
697
p->env != lp->cfg.mode)
698
return ldc_abort(lp);
700
lp->snd_nxt = p->seqid;
701
lp->hs_state = LDC_HS_COMPLETE;
702
ldc_set_state(lp, LDC_STATE_CONNECTED);
708
static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
710
return lp->rcv_nxt + 1 == seqid;
713
static int process_rdx(struct ldc_channel *lp,
714
struct ldc_packet *p)
716
ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
717
p->stype, p->seqid, p->env, p->u.r.ackid);
719
if (p->stype != LDC_INFO ||
720
!(rx_seq_ok(lp, p->seqid)))
721
return ldc_abort(lp);
723
lp->rcv_nxt = p->seqid;
725
lp->hs_state = LDC_HS_COMPLETE;
726
ldc_set_state(lp, LDC_STATE_CONNECTED);
731
static int process_control_frame(struct ldc_channel *lp,
732
struct ldc_packet *p)
736
return process_version(lp, p);
739
return process_rts(lp, p);
742
return process_rtr(lp, p);
745
return process_rdx(lp, p);
748
return ldc_abort(lp);
752
static int process_error_frame(struct ldc_channel *lp,
753
struct ldc_packet *p)
755
return ldc_abort(lp);
758
static int process_data_ack(struct ldc_channel *lp,
759
struct ldc_packet *ack)
761
unsigned long head = lp->tx_acked;
762
u32 ackid = ack->u.r.ackid;
765
struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
767
head = tx_advance(lp, head);
769
if (p->seqid == ackid) {
773
if (head == lp->tx_tail)
774
return ldc_abort(lp);
780
static void send_events(struct ldc_channel *lp, unsigned int event_mask)
782
if (event_mask & LDC_EVENT_RESET)
783
lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
784
if (event_mask & LDC_EVENT_UP)
785
lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
786
if (event_mask & LDC_EVENT_DATA_READY)
787
lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
790
static irqreturn_t ldc_rx(int irq, void *dev_id)
792
struct ldc_channel *lp = dev_id;
793
unsigned long orig_state, flags;
794
unsigned int event_mask;
796
spin_lock_irqsave(&lp->lock, flags);
798
orig_state = lp->chan_state;
800
/* We should probably check for hypervisor errors here and
801
* reset the LDC channel if we get one.
803
sun4v_ldc_rx_get_state(lp->id,
808
ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
809
orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
813
if (lp->cfg.mode == LDC_MODE_RAW &&
814
lp->chan_state == LDC_CHANNEL_UP) {
815
lp->hs_state = LDC_HS_COMPLETE;
816
ldc_set_state(lp, LDC_STATE_CONNECTED);
818
event_mask |= LDC_EVENT_UP;
820
orig_state = lp->chan_state;
823
/* If we are in reset state, flush the RX queue and ignore
826
if (lp->flags & LDC_FLAG_RESET) {
827
(void) __set_rx_head(lp, lp->rx_tail);
831
/* Once we finish the handshake, we let the ldc_read()
832
* paths do all of the control frame and state management.
833
* Just trigger the callback.
835
if (lp->hs_state == LDC_HS_COMPLETE) {
837
if (lp->chan_state != orig_state) {
838
unsigned int event = LDC_EVENT_RESET;
840
if (lp->chan_state == LDC_CHANNEL_UP)
841
event = LDC_EVENT_UP;
845
if (lp->rx_head != lp->rx_tail)
846
event_mask |= LDC_EVENT_DATA_READY;
851
if (lp->chan_state != orig_state)
854
while (lp->rx_head != lp->rx_tail) {
855
struct ldc_packet *p;
859
p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
863
err = process_control_frame(lp, p);
869
event_mask |= LDC_EVENT_DATA_READY;
874
err = process_error_frame(lp, p);
886
new += LDC_PACKET_SIZE;
887
if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
891
err = __set_rx_head(lp, new);
893
(void) ldc_abort(lp);
896
if (lp->hs_state == LDC_HS_COMPLETE)
897
goto handshake_complete;
901
spin_unlock_irqrestore(&lp->lock, flags);
903
send_events(lp, event_mask);
908
static irqreturn_t ldc_tx(int irq, void *dev_id)
910
struct ldc_channel *lp = dev_id;
911
unsigned long flags, orig_state;
912
unsigned int event_mask = 0;
914
spin_lock_irqsave(&lp->lock, flags);
916
orig_state = lp->chan_state;
918
/* We should probably check for hypervisor errors here and
919
* reset the LDC channel if we get one.
921
sun4v_ldc_tx_get_state(lp->id,
926
ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
927
orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
929
if (lp->cfg.mode == LDC_MODE_RAW &&
930
lp->chan_state == LDC_CHANNEL_UP) {
931
lp->hs_state = LDC_HS_COMPLETE;
932
ldc_set_state(lp, LDC_STATE_CONNECTED);
934
event_mask |= LDC_EVENT_UP;
937
spin_unlock_irqrestore(&lp->lock, flags);
939
send_events(lp, event_mask);
944
/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
945
* XXX that addition and removal from the ldc_channel_list has
946
* XXX atomicity, otherwise the __ldc_channel_exists() check is
947
* XXX totally pointless as another thread can slip into ldc_alloc()
948
* XXX and add a channel with the same ID. There also needs to be
949
* XXX a spinlock for ldc_channel_list.
951
static HLIST_HEAD(ldc_channel_list);
953
static int __ldc_channel_exists(unsigned long id)
955
struct ldc_channel *lp;
956
struct hlist_node *n;
958
hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
965
static int alloc_queue(const char *name, unsigned long num_entries,
966
struct ldc_packet **base, unsigned long *ra)
968
unsigned long size, order;
971
size = num_entries * LDC_PACKET_SIZE;
972
order = get_order(size);
974
q = (void *) __get_free_pages(GFP_KERNEL, order);
976
printk(KERN_ERR PFX "Alloc of %s queue failed with "
977
"size=%lu order=%lu\n", name, size, order);
981
memset(q, 0, PAGE_SIZE << order);
989
static void free_queue(unsigned long num_entries, struct ldc_packet *q)
991
unsigned long size, order;
996
size = num_entries * LDC_PACKET_SIZE;
997
order = get_order(size);
999
free_pages((unsigned long)q, order);
1002
/* XXX Make this configurable... XXX */
1003
#define LDC_IOTABLE_SIZE (8 * 1024)
1005
static int ldc_iommu_init(struct ldc_channel *lp)
1007
unsigned long sz, num_tsb_entries, tsbsize, order;
1008
struct ldc_iommu *iommu = &lp->iommu;
1009
struct ldc_mtable_entry *table;
1010
unsigned long hv_err;
1013
num_tsb_entries = LDC_IOTABLE_SIZE;
1014
tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1016
spin_lock_init(&iommu->lock);
1018
sz = num_tsb_entries / 8;
1019
sz = (sz + 7UL) & ~7UL;
1020
iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1021
if (!iommu->arena.map) {
1022
printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1026
iommu->arena.limit = num_tsb_entries;
1028
order = get_order(tsbsize);
1030
table = (struct ldc_mtable_entry *)
1031
__get_free_pages(GFP_KERNEL, order);
1034
printk(KERN_ERR PFX "Alloc of MTE table failed, "
1035
"size=%lu order=%lu\n", tsbsize, order);
1039
memset(table, 0, PAGE_SIZE << order);
1041
iommu->page_table = table;
1043
hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1047
goto out_free_table;
1052
free_pages((unsigned long) table, order);
1053
iommu->page_table = NULL;
1056
kfree(iommu->arena.map);
1057
iommu->arena.map = NULL;
1062
static void ldc_iommu_release(struct ldc_channel *lp)
1064
struct ldc_iommu *iommu = &lp->iommu;
1065
unsigned long num_tsb_entries, tsbsize, order;
1067
(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1069
num_tsb_entries = iommu->arena.limit;
1070
tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1071
order = get_order(tsbsize);
1073
free_pages((unsigned long) iommu->page_table, order);
1074
iommu->page_table = NULL;
1076
kfree(iommu->arena.map);
1077
iommu->arena.map = NULL;
1080
struct ldc_channel *ldc_alloc(unsigned long id,
1081
const struct ldc_channel_config *cfgp,
1084
struct ldc_channel *lp;
1085
const struct ldc_mode_ops *mops;
1086
unsigned long dummy1, dummy2, hv_err;
1091
if (!ldom_domaining_enabled)
1098
switch (cfgp->mode) {
1101
mss = LDC_PACKET_SIZE;
1104
case LDC_MODE_UNRELIABLE:
1106
mss = LDC_PACKET_SIZE - 8;
1109
case LDC_MODE_STREAM:
1111
mss = LDC_PACKET_SIZE - 8 - 8;
1118
if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1121
hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1123
if (hv_err == HV_ECHANNEL)
1127
if (__ldc_channel_exists(id))
1132
lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1137
spin_lock_init(&lp->lock);
1141
err = ldc_iommu_init(lp);
1150
lp->cfg.mtu = LDC_DEFAULT_MTU;
1152
if (lp->cfg.mode == LDC_MODE_STREAM) {
1153
mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1156
goto out_free_iommu;
1158
lp->mssbuf = mssbuf;
1161
lp->event_arg = event_arg;
1163
/* XXX allow setting via ldc_channel_config to override defaults
1164
* XXX or use some formula based upon mtu
1166
lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1167
lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1169
err = alloc_queue("TX", lp->tx_num_entries,
1170
&lp->tx_base, &lp->tx_ra);
1172
goto out_free_mssbuf;
1174
err = alloc_queue("RX", lp->rx_num_entries,
1175
&lp->rx_base, &lp->rx_ra);
1179
lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1181
lp->hs_state = LDC_HS_CLOSED;
1182
ldc_set_state(lp, LDC_STATE_INIT);
1184
INIT_HLIST_NODE(&lp->list);
1185
hlist_add_head(&lp->list, &ldc_channel_list);
1187
INIT_HLIST_HEAD(&lp->mh_list);
1192
free_queue(lp->tx_num_entries, lp->tx_base);
1198
ldc_iommu_release(lp);
1204
return ERR_PTR(err);
1206
EXPORT_SYMBOL(ldc_alloc);
1208
void ldc_free(struct ldc_channel *lp)
1210
if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1211
free_irq(lp->cfg.rx_irq, lp);
1212
free_irq(lp->cfg.tx_irq, lp);
1215
if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1216
sun4v_ldc_tx_qconf(lp->id, 0, 0);
1217
sun4v_ldc_rx_qconf(lp->id, 0, 0);
1218
lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1220
if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1221
free_queue(lp->tx_num_entries, lp->tx_base);
1222
free_queue(lp->rx_num_entries, lp->rx_base);
1223
lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1226
hlist_del(&lp->list);
1230
ldc_iommu_release(lp);
1234
EXPORT_SYMBOL(ldc_free);
1236
/* Bind the channel. This registers the LDC queues with
1237
* the hypervisor and puts the channel into a pseudo-listening
1238
* state. This does not initiate a handshake, ldc_connect() does
1241
int ldc_bind(struct ldc_channel *lp, const char *name)
1243
unsigned long hv_err, flags;
1247
(lp->state != LDC_STATE_INIT))
1250
snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1251
snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1253
err = request_irq(lp->cfg.rx_irq, ldc_rx,
1254
IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1255
lp->rx_irq_name, lp);
1259
err = request_irq(lp->cfg.tx_irq, ldc_tx,
1260
IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1261
lp->tx_irq_name, lp);
1263
free_irq(lp->cfg.rx_irq, lp);
1268
spin_lock_irqsave(&lp->lock, flags);
1270
enable_irq(lp->cfg.rx_irq);
1271
enable_irq(lp->cfg.tx_irq);
1273
lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1276
hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1280
hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1284
hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1288
hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1292
lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1294
hv_err = sun4v_ldc_tx_get_state(lp->id,
1302
lp->tx_acked = lp->tx_head;
1304
lp->hs_state = LDC_HS_OPEN;
1305
ldc_set_state(lp, LDC_STATE_BOUND);
1307
spin_unlock_irqrestore(&lp->lock, flags);
1312
lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1313
sun4v_ldc_rx_qconf(lp->id, 0, 0);
1316
sun4v_ldc_tx_qconf(lp->id, 0, 0);
1319
lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1320
free_irq(lp->cfg.tx_irq, lp);
1321
free_irq(lp->cfg.rx_irq, lp);
1323
spin_unlock_irqrestore(&lp->lock, flags);
1327
EXPORT_SYMBOL(ldc_bind);
1329
int ldc_connect(struct ldc_channel *lp)
1331
unsigned long flags;
1334
if (lp->cfg.mode == LDC_MODE_RAW)
1337
spin_lock_irqsave(&lp->lock, flags);
1339
if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1340
!(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1341
lp->hs_state != LDC_HS_OPEN)
1344
err = start_handshake(lp);
1346
spin_unlock_irqrestore(&lp->lock, flags);
1350
EXPORT_SYMBOL(ldc_connect);
1352
int ldc_disconnect(struct ldc_channel *lp)
1354
unsigned long hv_err, flags;
1357
if (lp->cfg.mode == LDC_MODE_RAW)
1360
if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1361
!(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1364
spin_lock_irqsave(&lp->lock, flags);
1367
hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1371
hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1375
hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1379
hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1383
ldc_set_state(lp, LDC_STATE_BOUND);
1384
lp->hs_state = LDC_HS_OPEN;
1385
lp->flags |= LDC_FLAG_RESET;
1387
spin_unlock_irqrestore(&lp->lock, flags);
1392
sun4v_ldc_tx_qconf(lp->id, 0, 0);
1393
sun4v_ldc_rx_qconf(lp->id, 0, 0);
1394
free_irq(lp->cfg.tx_irq, lp);
1395
free_irq(lp->cfg.rx_irq, lp);
1396
lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1397
LDC_FLAG_REGISTERED_QUEUES);
1398
ldc_set_state(lp, LDC_STATE_INIT);
1400
spin_unlock_irqrestore(&lp->lock, flags);
1404
EXPORT_SYMBOL(ldc_disconnect);
1406
int ldc_state(struct ldc_channel *lp)
1410
EXPORT_SYMBOL(ldc_state);
1412
static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1414
struct ldc_packet *p;
1415
unsigned long new_tail;
1418
if (size > LDC_PACKET_SIZE)
1421
p = data_get_tx_packet(lp, &new_tail);
1425
memcpy(p, buf, size);
1427
err = send_tx_packet(lp, p, new_tail);
1434
static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1436
struct ldc_packet *p;
1437
unsigned long hv_err, new;
1440
if (size < LDC_PACKET_SIZE)
1443
hv_err = sun4v_ldc_rx_get_state(lp->id,
1448
return ldc_abort(lp);
1450
if (lp->chan_state == LDC_CHANNEL_DOWN ||
1451
lp->chan_state == LDC_CHANNEL_RESETTING)
1454
if (lp->rx_head == lp->rx_tail)
1457
p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1458
memcpy(buf, p, LDC_PACKET_SIZE);
1460
new = rx_advance(lp, lp->rx_head);
1463
err = __set_rx_head(lp, new);
1467
err = LDC_PACKET_SIZE;
1472
static const struct ldc_mode_ops raw_ops = {
1477
static int write_nonraw(struct ldc_channel *lp, const void *buf,
1480
unsigned long hv_err, tail;
1481
unsigned int copied;
1485
hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1487
if (unlikely(hv_err))
1490
if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1491
return ldc_abort(lp);
1493
if (!tx_has_space_for(lp, size))
1499
while (copied < size) {
1500
struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1501
u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1507
p->stype = LDC_INFO;
1510
data_len = size - copied;
1511
if (data_len > lp->mss)
1514
BUG_ON(data_len > LDC_LEN);
1516
p->env = (data_len |
1517
(copied == 0 ? LDC_START : 0) |
1518
(data_len == size - copied ? LDC_STOP : 0));
1522
ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1529
memcpy(data, buf, data_len);
1533
tail = tx_advance(lp, tail);
1536
err = set_tx_tail(lp, tail);
1545
static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1546
struct ldc_packet *first_frag)
1551
lp->rcv_nxt = first_frag->seqid - 1;
1553
err = send_data_nack(lp, p);
1557
err = __set_rx_head(lp, lp->rx_tail);
1559
return ldc_abort(lp);
1564
static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1566
if (p->stype & LDC_ACK) {
1567
int err = process_data_ack(lp, p);
1571
if (p->stype & LDC_NACK)
1572
return ldc_abort(lp);
1577
static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1579
unsigned long dummy;
1582
ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1583
cur_head, lp->rx_head, lp->rx_tail);
1584
while (limit-- > 0) {
1585
unsigned long hv_err;
1587
hv_err = sun4v_ldc_rx_get_state(lp->id,
1592
return ldc_abort(lp);
1594
if (lp->chan_state == LDC_CHANNEL_DOWN ||
1595
lp->chan_state == LDC_CHANNEL_RESETTING)
1598
if (cur_head != lp->rx_tail) {
1599
ldcdbg(DATA, "DATA WAIT DONE "
1600
"head[%lx] tail[%lx] chan_state[%lx]\n",
1601
dummy, lp->rx_tail, lp->chan_state);
1610
static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1612
int err = __set_rx_head(lp, head);
1615
return ldc_abort(lp);
1621
static void send_data_ack(struct ldc_channel *lp)
1623
unsigned long new_tail;
1624
struct ldc_packet *p;
1626
p = data_get_tx_packet(lp, &new_tail);
1630
memset(p, 0, sizeof(*p));
1634
p->seqid = lp->snd_nxt + 1;
1635
p->u.r.ackid = lp->rcv_nxt;
1637
err = send_tx_packet(lp, p, new_tail);
1643
static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1645
struct ldc_packet *first_frag;
1646
unsigned long hv_err, new;
1649
hv_err = sun4v_ldc_rx_get_state(lp->id,
1654
return ldc_abort(lp);
1656
if (lp->chan_state == LDC_CHANNEL_DOWN ||
1657
lp->chan_state == LDC_CHANNEL_RESETTING)
1660
if (lp->rx_head == lp->rx_tail)
1667
struct ldc_packet *p;
1670
BUG_ON(new == lp->rx_tail);
1671
p = lp->rx_base + (new / LDC_PACKET_SIZE);
1673
ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1683
if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1684
err = rx_bad_seq(lp, p, first_frag);
1689
if (p->type & LDC_CTRL) {
1690
err = process_control_frame(lp, p);
1696
lp->rcv_nxt = p->seqid;
1698
if (!(p->type & LDC_DATA)) {
1699
new = rx_advance(lp, new);
1702
if (p->stype & (LDC_ACK | LDC_NACK)) {
1703
err = data_ack_nack(lp, p);
1707
if (!(p->stype & LDC_INFO)) {
1708
new = rx_advance(lp, new);
1709
err = rx_set_head(lp, new);
1715
pkt_len = p->env & LDC_LEN;
1717
/* Every initial packet starts with the START bit set.
1719
* Singleton packets will have both START+STOP set.
1721
* Fragments will have START set in the first frame, STOP
1722
* set in the last frame, and neither bit set in middle
1723
* frames of the packet.
1725
* Therefore if we are at the beginning of a packet and
1726
* we don't see START, or we are in the middle of a fragmented
1727
* packet and do see START, we are unsynchronized and should
1728
* flush the RX queue.
1730
if ((first_frag == NULL && !(p->env & LDC_START)) ||
1731
(first_frag != NULL && (p->env & LDC_START))) {
1733
new = rx_advance(lp, new);
1735
err = rx_set_head(lp, new);
1745
if (pkt_len > size - copied) {
1746
/* User didn't give us a big enough buffer,
1747
* what to do? This is a pretty serious error.
1749
* Since we haven't updated the RX ring head to
1750
* consume any of the packets, signal the error
1751
* to the user and just leave the RX ring alone.
1753
* This seems the best behavior because this allows
1754
* a user of the LDC layer to start with a small
1755
* RX buffer for ldc_read() calls and use -EMSGSIZE
1756
* as a cue to enlarge it's read buffer.
1762
/* Ok, we are gonna eat this one. */
1763
new = rx_advance(lp, new);
1766
(lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1767
p->u.u_data : p->u.r.r_data), pkt_len);
1771
if (p->env & LDC_STOP)
1775
if (new == lp->rx_tail) {
1776
err = rx_data_wait(lp, new);
1783
err = rx_set_head(lp, new);
1785
if (err && first_frag)
1786
lp->rcv_nxt = first_frag->seqid - 1;
1790
if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1797
static const struct ldc_mode_ops nonraw_ops = {
1798
.write = write_nonraw,
1799
.read = read_nonraw,
1802
static int write_stream(struct ldc_channel *lp, const void *buf,
1805
if (size > lp->cfg.mtu)
1807
return write_nonraw(lp, buf, size);
1810
static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1812
if (!lp->mssbuf_len) {
1813
int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1817
lp->mssbuf_len = err;
1821
if (size > lp->mssbuf_len)
1822
size = lp->mssbuf_len;
1823
memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1825
lp->mssbuf_off += size;
1826
lp->mssbuf_len -= size;
1831
static const struct ldc_mode_ops stream_ops = {
1832
.write = write_stream,
1833
.read = read_stream,
1836
int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1838
unsigned long flags;
1847
spin_lock_irqsave(&lp->lock, flags);
1849
if (lp->hs_state != LDC_HS_COMPLETE)
1852
err = lp->mops->write(lp, buf, size);
1854
spin_unlock_irqrestore(&lp->lock, flags);
1858
EXPORT_SYMBOL(ldc_write);
1860
int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1862
unsigned long flags;
1871
spin_lock_irqsave(&lp->lock, flags);
1873
if (lp->hs_state != LDC_HS_COMPLETE)
1876
err = lp->mops->read(lp, buf, size);
1878
spin_unlock_irqrestore(&lp->lock, flags);
1882
EXPORT_SYMBOL(ldc_read);
1884
static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1886
struct iommu_arena *arena = &iommu->arena;
1887
unsigned long n, start, end, limit;
1890
limit = arena->limit;
1891
start = arena->hint;
1895
n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1897
if (unlikely(end >= limit)) {
1898
if (likely(pass < 1)) {
1904
/* Scanned the whole thing, give up. */
1908
bitmap_set(arena->map, n, npages);
1915
#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
1916
#define COOKIE_PGSZ_CODE_SHIFT 60ULL
1918
static u64 pagesize_code(void)
1920
switch (PAGE_SIZE) {
1922
case (8ULL * 1024ULL):
1924
case (64ULL * 1024ULL):
1926
case (512ULL * 1024ULL):
1928
case (4ULL * 1024ULL * 1024ULL):
1930
case (32ULL * 1024ULL * 1024ULL):
1932
case (256ULL * 1024ULL * 1024ULL):
1937
static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1939
return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1940
(index << PAGE_SHIFT) |
1944
static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1946
u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1948
cookie &= ~COOKIE_PGSZ_CODE;
1950
*shift = szcode * 3;
1952
return (cookie >> (13ULL + (szcode * 3ULL)));
1955
static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1956
unsigned long npages)
1960
entry = arena_alloc(iommu, npages);
1961
if (unlikely(entry < 0))
1964
return iommu->page_table + entry;
1967
static u64 perm_to_mte(unsigned int map_perm)
1971
mte_base = pagesize_code();
1973
if (map_perm & LDC_MAP_SHADOW) {
1974
if (map_perm & LDC_MAP_R)
1975
mte_base |= LDC_MTE_COPY_R;
1976
if (map_perm & LDC_MAP_W)
1977
mte_base |= LDC_MTE_COPY_W;
1979
if (map_perm & LDC_MAP_DIRECT) {
1980
if (map_perm & LDC_MAP_R)
1981
mte_base |= LDC_MTE_READ;
1982
if (map_perm & LDC_MAP_W)
1983
mte_base |= LDC_MTE_WRITE;
1984
if (map_perm & LDC_MAP_X)
1985
mte_base |= LDC_MTE_EXEC;
1987
if (map_perm & LDC_MAP_IO) {
1988
if (map_perm & LDC_MAP_R)
1989
mte_base |= LDC_MTE_IOMMU_R;
1990
if (map_perm & LDC_MAP_W)
1991
mte_base |= LDC_MTE_IOMMU_W;
1997
static int pages_in_region(unsigned long base, long len)
2002
unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2004
len -= (new - base);
2012
struct cookie_state {
2013
struct ldc_mtable_entry *page_table;
2014
struct ldc_trans_cookie *cookies;
2021
static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2022
unsigned long off, unsigned long len)
2025
unsigned long tlen, new = pa + PAGE_SIZE;
2028
sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2032
tlen = PAGE_SIZE - off;
2036
this_cookie = make_cookie(sp->pte_idx,
2037
pagesize_code(), off);
2041
if (this_cookie == sp->prev_cookie) {
2042
sp->cookies[sp->nc - 1].cookie_size += tlen;
2044
sp->cookies[sp->nc].cookie_addr = this_cookie;
2045
sp->cookies[sp->nc].cookie_size = tlen;
2048
sp->prev_cookie = this_cookie + tlen;
2057
static int sg_count_one(struct scatterlist *sg)
2059
unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2060
long len = sg->length;
2062
if ((sg->offset | len) & (8UL - 1))
2065
return pages_in_region(base + sg->offset, len);
2068
static int sg_count_pages(struct scatterlist *sg, int num_sg)
2074
for (i = 0; i < num_sg; i++) {
2075
int err = sg_count_one(sg + i);
2084
int ldc_map_sg(struct ldc_channel *lp,
2085
struct scatterlist *sg, int num_sg,
2086
struct ldc_trans_cookie *cookies, int ncookies,
2087
unsigned int map_perm)
2089
unsigned long i, npages, flags;
2090
struct ldc_mtable_entry *base;
2091
struct cookie_state state;
2092
struct ldc_iommu *iommu;
2095
if (map_perm & ~LDC_MAP_ALL)
2098
err = sg_count_pages(sg, num_sg);
2108
spin_lock_irqsave(&iommu->lock, flags);
2109
base = alloc_npages(iommu, npages);
2110
spin_unlock_irqrestore(&iommu->lock, flags);
2115
state.page_table = iommu->page_table;
2116
state.cookies = cookies;
2117
state.mte_base = perm_to_mte(map_perm);
2118
state.prev_cookie = ~(u64)0;
2119
state.pte_idx = (base - iommu->page_table);
2122
for (i = 0; i < num_sg; i++)
2123
fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2124
sg[i].offset, sg[i].length);
2128
EXPORT_SYMBOL(ldc_map_sg);
2130
int ldc_map_single(struct ldc_channel *lp,
2131
void *buf, unsigned int len,
2132
struct ldc_trans_cookie *cookies, int ncookies,
2133
unsigned int map_perm)
2135
unsigned long npages, pa, flags;
2136
struct ldc_mtable_entry *base;
2137
struct cookie_state state;
2138
struct ldc_iommu *iommu;
2140
if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2144
if ((pa | len) & (8UL - 1))
2147
npages = pages_in_region(pa, len);
2151
spin_lock_irqsave(&iommu->lock, flags);
2152
base = alloc_npages(iommu, npages);
2153
spin_unlock_irqrestore(&iommu->lock, flags);
2158
state.page_table = iommu->page_table;
2159
state.cookies = cookies;
2160
state.mte_base = perm_to_mte(map_perm);
2161
state.prev_cookie = ~(u64)0;
2162
state.pte_idx = (base - iommu->page_table);
2164
fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2165
BUG_ON(state.nc != 1);
2169
EXPORT_SYMBOL(ldc_map_single);
2171
static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2172
u64 cookie, u64 size)
2174
struct iommu_arena *arena = &iommu->arena;
2175
unsigned long i, shift, index, npages;
2176
struct ldc_mtable_entry *base;
2178
npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2179
index = cookie_to_index(cookie, &shift);
2180
base = iommu->page_table + index;
2182
BUG_ON(index > arena->limit ||
2183
(index + npages) > arena->limit);
2185
for (i = 0; i < npages; i++) {
2187
sun4v_ldc_revoke(id, cookie + (i << shift),
2190
__clear_bit(index + i, arena->map);
2194
void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2197
struct ldc_iommu *iommu = &lp->iommu;
2198
unsigned long flags;
2201
spin_lock_irqsave(&iommu->lock, flags);
2202
for (i = 0; i < ncookies; i++) {
2203
u64 addr = cookies[i].cookie_addr;
2204
u64 size = cookies[i].cookie_size;
2206
free_npages(lp->id, iommu, addr, size);
2208
spin_unlock_irqrestore(&iommu->lock, flags);
2210
EXPORT_SYMBOL(ldc_unmap);
2212
int ldc_copy(struct ldc_channel *lp, int copy_dir,
2213
void *buf, unsigned int len, unsigned long offset,
2214
struct ldc_trans_cookie *cookies, int ncookies)
2216
unsigned int orig_len;
2220
if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2221
printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2227
if ((ra | len | offset) & (8UL - 1)) {
2228
printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2229
"ra[%lx] len[%x] offset[%lx]\n",
2230
lp->id, ra, len, offset);
2234
if (lp->hs_state != LDC_HS_COMPLETE ||
2235
(lp->flags & LDC_FLAG_RESET)) {
2236
printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2237
"flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2242
for (i = 0; i < ncookies; i++) {
2243
unsigned long cookie_raddr = cookies[i].cookie_addr;
2244
unsigned long this_len = cookies[i].cookie_size;
2245
unsigned long actual_len;
2247
if (unlikely(offset)) {
2248
unsigned long this_off = offset;
2250
if (this_off > this_len)
2251
this_off = this_len;
2254
this_len -= this_off;
2257
cookie_raddr += this_off;
2264
unsigned long hv_err;
2266
hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2268
this_len, &actual_len);
2269
if (unlikely(hv_err)) {
2270
printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2273
if (lp->hs_state != LDC_HS_COMPLETE ||
2274
(lp->flags & LDC_FLAG_RESET))
2280
cookie_raddr += actual_len;
2283
if (actual_len == this_len)
2286
this_len -= actual_len;
2293
/* It is caller policy what to do about short copies.
2294
* For example, a networking driver can declare the
2295
* packet a runt and drop it.
2298
return orig_len - len;
2300
EXPORT_SYMBOL(ldc_copy);
2302
void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2303
struct ldc_trans_cookie *cookies, int *ncookies,
2304
unsigned int map_perm)
2309
if (len & (8UL - 1))
2310
return ERR_PTR(-EINVAL);
2312
buf = kzalloc(len, GFP_KERNEL);
2314
return ERR_PTR(-ENOMEM);
2316
err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2319
return ERR_PTR(err);
2325
EXPORT_SYMBOL(ldc_alloc_exp_dring);
2327
void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2328
struct ldc_trans_cookie *cookies, int ncookies)
2330
ldc_unmap(lp, cookies, ncookies);
2333
EXPORT_SYMBOL(ldc_free_exp_dring);
2335
static int __init ldc_init(void)
2337
unsigned long major, minor;
2338
struct mdesc_handle *hp;
2347
mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2349
if (mp == MDESC_NODE_NULL)
2352
v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2358
if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2359
printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2363
printk(KERN_INFO "%s", version);
2366
printk(KERN_INFO PFX "Domaining disabled.\n");
2369
ldom_domaining_enabled = 1;
2377
core_initcall(ldc_init);