summaryrefslogtreecommitdiffstats
path: root/VNFs/DPPD-PROX/lconf.c
blob: 935bac5d3ad9cfcaca19c8404dc06f7cb3e3c893 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
/*
// Copyright (c) 2010-2017 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/

#include "prox_malloc.h"
#include "lconf.h"
#include "rx_pkt.h"
#include "tx_pkt.h"
#include "log.h"
#include "quit.h"
#include "prox_cfg.h"

struct lcore_cfg *lcore_cfg;
/* only used at initialization time */
struct lcore_cfg  lcore_cfg_init[RTE_MAX_LCORE];

static int core_targ_next_from(struct lcore_cfg **lconf, struct task_args **targ, struct lcore_cfg *lcore_cfg, const int with_master)
{
	uint32_t lcore_id, task_id;

	if (*lconf && *targ) {
		lcore_id = *lconf - lcore_cfg;
		task_id = *targ - lcore_cfg[lcore_id].targs;

		if (task_id + 1 < lcore_cfg[lcore_id].n_tasks_all) {
			*targ = &lcore_cfg[lcore_id].targs[task_id + 1];
			return 0;
		} else {
			if (prox_core_next(&lcore_id, with_master))
				return -1;
			*lconf = &lcore_cfg[lcore_id];
			*targ = &lcore_cfg[lcore_id].targs[0];
			return 0;
		}
	} else {
		lcore_id = -1;

		if (prox_core_next(&lcore_id, with_master))
			return -1;
		*lconf = &lcore_cfg[lcore_id];
		*targ = &lcore_cfg[lcore_id].targs[0];
		return 0;
	}
}

int core_targ_next(struct lcore_cfg **lconf, struct task_args **targ, const int with_master)
{
	return core_targ_next_from(lconf, targ, lcore_cfg, with_master);
}

int core_targ_next_early(struct lcore_cfg **lconf, struct task_args **targ, const int with_master)
{
	return core_targ_next_from(lconf, targ, lcore_cfg_init, with_master);
}

struct task_args *core_targ_get(uint32_t lcore_id, uint32_t task_id)
{
	return &lcore_cfg[lcore_id].targs[task_id];
}

void lcore_cfg_alloc_hp(void)
{
	size_t mem_size = RTE_MAX_LCORE * sizeof(struct lcore_cfg);

	lcore_cfg = prox_zmalloc(mem_size, rte_socket_id());
	PROX_PANIC(lcore_cfg == NULL, "Could not allocate memory for core control structures\n");
	rte_memcpy(lcore_cfg, lcore_cfg_init, mem_size);

	/* get thread ID for master core */
	lcore_cfg[rte_lcore_id()].thread_id = pthread_self();
}

int lconf_run(__attribute__((unused)) void *dummy)
{
	uint32_t lcore_id = rte_lcore_id();
	struct lcore_cfg *lconf = &lcore_cfg[lcore_id];

	/* get thread ID, and set cancellation type to asynchronous */
	lconf->thread_id = pthread_self();
	int ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
	if (ret != 0)
		plog_warn("pthread_setcanceltype() failed on core %u: %i\n", lcore_id, ret);

	plog_info("Entering main loop on core %u\n", lcore_id);
	return lconf->thread_x(lconf);
}

static void msg_stop(struct lcore_cfg *lconf)
{
	int idx = -1;
	struct task_base *t = NULL;

	if (lconf->msg.task_id == -1) {
		for (int i = 0; i < lconf->n_tasks_all; ++i) {
			if (lconf->task_is_running[i]) {
				lconf->task_is_running[i] = 0;
				t = lconf->tasks_all[i];
				if (t->aux->stop)
				    t->aux->stop(t);
			}
		}
		lconf->n_tasks_run = 0;

		if (t && t->aux->stop_last)
			t->aux->stop_last(t);
	}
	else {
		for (int i = 0; i < lconf->n_tasks_run; ++i) {
			if (lconf_get_task_id(lconf, lconf->tasks_run[i]) == lconf->msg.task_id) {
				idx = i;
			}
			else if (idx != -1) {
				lconf->tasks_run[idx] = lconf->tasks_run[i];

				idx++;
			}
		}
		lconf->task_is_running[lconf->msg.task_id] = 0;

		t = lconf->tasks_all[lconf->msg.task_id];
		if (t->aux->stop)
			t->aux->stop(t);
		lconf->n_tasks_run--;
		if (lconf->n_tasks_run == 0 && t->aux->stop_last)
			t->aux->stop_last(t);
	}
}

static void msg_start(struct lcore_cfg *lconf)
{
	int idx = 1;
	struct task_base *t = NULL;

	if (lconf->msg.task_id == -1) {
		for (int i = 0; i < lconf->n_tasks_all; ++i) {
			t = lconf->tasks_run[i] = lconf->tasks_all[i];
			lconf->task_is_running[i] = 1;
			if (lconf->n_tasks_run == 0 && t->aux->start_first) {
				t->aux->start_first(t);
				lconf->n_tasks_run = 1;
			}
			if (t->aux->start)
				t->aux->start(t);
		}
		lconf->n_tasks_run = lconf->n_tasks_all;
	}
	else if (lconf->n_tasks_run == 0) {
		t = lconf->tasks_run[0] = lconf->tasks_all[lconf->msg.task_id];
		lconf->n_tasks_run = 1;
		lconf->task_is_running[lconf->msg.task_id] = 1;

		if (t->aux->start_first)
			t->aux->start_first(t);
		if (t->aux->start)
			t->aux->start(t);
	}
	else {
		for (int i = lconf->n_tasks_run - 1; i >= 0; --i) {
			idx = lconf_get_task_id(lconf, lconf->tasks_run[i]);
			if (idx == lconf->msg.task_id) {
				break;
			}
			else if (idx > lconf->msg.task_id) {
				lconf->tasks_run[i + 1] = lconf->tasks_run[i];
				if (i == 0) {
					lconf->tasks_run[i] = lconf->tasks_all[lconf->msg.task_id];
					lconf->n_tasks_run++;
					break;
				}
			}
			else {
				lconf->tasks_run[i + 1] = lconf->tasks_all[lconf->msg.task_id];
				lconf->n_tasks_run++;
				break;
			}
		}
		lconf->task_is_running[lconf->msg.task_id] = 1;

		if (lconf->tasks_all[lconf->msg.task_id]->aux->start)
			lconf->tasks_all[lconf->msg.task_id]->aux->start(lconf->tasks_all[lconf->msg.task_id]);
	}
}

int lconf_do_flags(struct lcore_cfg *lconf)
{
	struct task_base *t;
	int ret = 0;

	if ((lconf->msg.type == LCONF_MSG_TRACE) && (lconf->tasks_all[lconf->msg.task_id]->tx_pkt == tx_pkt_drop_all)) {
		/* We are asked to dump packets through command dump.
		 * This usually means map RX and TX packets before printing them.
		 * However we do not transmit the packets in this case => use the DUMP_RX function.
		 * This will prevent seeing the received packets also printed as TX[255] (= dropped)
		 */
		lconf->msg.type = LCONF_MSG_DUMP_RX;
	}

	switch (lconf->msg.type) {
	case LCONF_MSG_STOP:
		msg_stop(lconf);
		ret = -1;
		break;
	case LCONF_MSG_START:
		msg_start(lconf);
		ret = -1;
		break;
	case LCONF_MSG_DUMP_RX:
	case LCONF_MSG_DUMP_TX:
	case LCONF_MSG_DUMP:
		t = lconf->tasks_all[lconf->msg.task_id];

		if (lconf->msg.val) {
			if (lconf->msg.type == LCONF_MSG_DUMP ||
			    lconf->msg.type == LCONF_MSG_DUMP_RX) {
				t->aux->task_rt_dump.n_print_rx = lconf->msg.val;

				task_base_add_rx_pkt_function(t, rx_pkt_dump);
			}

			if (lconf->msg.type == LCONF_MSG_DUMP ||
			    lconf->msg.type == LCONF_MSG_DUMP_TX) {
				t->aux->task_rt_dump.n_print_tx = lconf->msg.val;
				if (t->tx_pkt == tx_pkt_l3) {
					if (t->aux->tx_pkt_orig)
						t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->aux->tx_pkt_l2;
					t->aux->tx_pkt_l2 = tx_pkt_dump;
				} else {
					if (t->aux->tx_pkt_orig)
						t->tx_pkt = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->tx_pkt;
					t->tx_pkt = tx_pkt_dump;
				}
			}
		}
		break;
	case LCONF_MSG_TRACE:
		t = lconf->tasks_all[lconf->msg.task_id];

		if (lconf->msg.val) {
			t->aux->task_rt_dump.n_trace = lconf->msg.val;

			if (task_base_get_original_rx_pkt_function(t) != rx_pkt_dummy) {
				task_base_add_rx_pkt_function(t, rx_pkt_trace);
				if (t->tx_pkt == tx_pkt_l3) {
					if (t->aux->tx_pkt_orig)
						t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->aux->tx_pkt_l2;
					t->aux->tx_pkt_l2 = tx_pkt_trace;
				} else {
					if (t->aux->tx_pkt_orig)
						t->tx_pkt = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->tx_pkt;
					t->tx_pkt = tx_pkt_trace;
				}
			} else {
				t->aux->task_rt_dump.n_print_tx = lconf->msg.val;
				if (t->tx_pkt == tx_pkt_l3) {
					if (t->aux->tx_pkt_orig)
						t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->aux->tx_pkt_l2;
					t->aux->tx_pkt_l2 = tx_pkt_dump;
				} else {
					if (t->aux->tx_pkt_orig)
						t->tx_pkt = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = t->tx_pkt;
					t->tx_pkt = tx_pkt_dump;
				}
			}
		}
		break;
	case LCONF_MSG_RX_DISTR_START:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			task_base_add_rx_pkt_function(t, rx_pkt_distr);
			memset(t->aux->rx_bucket, 0, sizeof(t->aux->rx_bucket));
			lconf->flags |= LCONF_FLAG_RX_DISTR_ACTIVE;
		}
		break;
	case LCONF_MSG_TX_DISTR_START:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];

			if (t->tx_pkt == tx_pkt_l3) {
				t->aux->tx_pkt_orig = t->aux->tx_pkt_l2;
				t->aux->tx_pkt_l2 = tx_pkt_distr;
			} else {
				t->aux->tx_pkt_orig = t->tx_pkt;
				t->tx_pkt = tx_pkt_distr;
			}
			memset(t->aux->tx_bucket, 0, sizeof(t->aux->tx_bucket));
			lconf->flags |= LCONF_FLAG_TX_DISTR_ACTIVE;
		}
		break;
	case LCONF_MSG_RX_DISTR_STOP:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			task_base_del_rx_pkt_function(t, rx_pkt_distr);
			lconf->flags &= ~LCONF_FLAG_RX_DISTR_ACTIVE;
		}
		break;
	case LCONF_MSG_TX_DISTR_STOP:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			if (t->aux->tx_pkt_orig) {
				if (t->tx_pkt == tx_pkt_l3) {
					t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = NULL;
				} else {
					t->tx_pkt = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = NULL;
				}
				lconf->flags &= ~LCONF_FLAG_TX_DISTR_ACTIVE;
			}
		}
		break;
	case LCONF_MSG_RX_DISTR_RESET:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];

			memset(t->aux->rx_bucket, 0, sizeof(t->aux->rx_bucket));
		}
		break;
	case LCONF_MSG_TX_DISTR_RESET:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];

			memset(t->aux->tx_bucket, 0, sizeof(t->aux->tx_bucket));
		}
		break;
	case LCONF_MSG_RX_BW_START:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			task_base_add_rx_pkt_function(t, rx_pkt_bw);
			lconf->flags |= LCONF_FLAG_RX_BW_ACTIVE;
		}
		break;
	case LCONF_MSG_RX_BW_STOP:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			task_base_del_rx_pkt_function(t, rx_pkt_bw);
			lconf->flags &= ~LCONF_FLAG_RX_BW_ACTIVE;
		}
		break;
	case LCONF_MSG_TX_BW_START:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];

			if (t->tx_pkt == tx_pkt_l3) {
				t->aux->tx_pkt_orig = t->aux->tx_pkt_l2;
				t->aux->tx_pkt_l2 = tx_pkt_bw;
			} else {
				t->aux->tx_pkt_orig = t->tx_pkt;
				t->tx_pkt = tx_pkt_bw;
			}
			lconf->flags |= LCONF_FLAG_TX_BW_ACTIVE;
		}
		break;
	case LCONF_MSG_TX_BW_STOP:
		for (uint8_t task_id = 0; task_id < lconf->n_tasks_all; ++task_id) {
			t = lconf->tasks_all[task_id];
			if (t->aux->tx_pkt_orig) {
				if (t->tx_pkt == tx_pkt_l3) {
					t->aux->tx_pkt_l2 = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = NULL;
				} else {
					t->tx_pkt = t->aux->tx_pkt_orig;
					t->aux->tx_pkt_orig = NULL;
				}
				lconf->flags &= ~LCONF_FLAG_TX_BW_ACTIVE;
			}
		}
		break;
	}

	lconf_unset_req(lconf);
	return ret;
}

int lconf_get_task_id(const struct lcore_cfg *lconf, const struct task_base *task)
{
	for (int i = 0; i < lconf->n_tasks_all; ++i) {
		if (lconf->tasks_all[i] == task)
			return i;
	}

	return -1;
}

int lconf_task_is_running(const struct lcore_cfg *lconf, uint8_t task_id)
{
	return lconf->task_is_running[task_id];
}
> struct mv_xor_desc_slot, node); mv_chan_start_new_chain(mv_chan, iter); } else { if (!list_is_last(&iter->node, &mv_chan->chain)) { /* * descriptors are still waiting after * current, trigger them */ iter = list_entry(iter->node.next, struct mv_xor_desc_slot, node); mv_chan_start_new_chain(mv_chan, iter); } else { /* * some descriptors are still waiting * to be cleaned */ tasklet_schedule(&mv_chan->irq_tasklet); } } } if (cookie > 0) mv_chan->dmachan.completed_cookie = cookie; } static void mv_xor_tasklet(unsigned long data) { struct mv_xor_chan *chan = (struct mv_xor_chan *) data; spin_lock_bh(&chan->lock); mv_chan_slot_cleanup(chan); spin_unlock_bh(&chan->lock); } static struct mv_xor_desc_slot * mv_chan_alloc_slot(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter; spin_lock_bh(&mv_chan->lock); if (!list_empty(&mv_chan->free_slots)) { iter = list_first_entry(&mv_chan->free_slots, struct mv_xor_desc_slot, node); list_move_tail(&iter->node, &mv_chan->allocated_slots); spin_unlock_bh(&mv_chan->lock); /* pre-ack descriptor */ async_tx_ack(&iter->async_tx); iter->async_tx.cookie = -EBUSY; return iter; } spin_unlock_bh(&mv_chan->lock); /* try to free some slots if the allocation fails */ tasklet_schedule(&mv_chan->irq_tasklet); return NULL; } /************************ DMA engine API functions ****************************/ static dma_cookie_t mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) { struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx); struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan); struct mv_xor_desc_slot *old_chain_tail; dma_cookie_t cookie; int new_hw_chain = 1; dev_dbg(mv_chan_to_devp(mv_chan), "%s sw_desc %p: async_tx %p\n", __func__, sw_desc, &sw_desc->async_tx); spin_lock_bh(&mv_chan->lock); cookie = dma_cookie_assign(tx); if (list_empty(&mv_chan->chain)) list_move_tail(&sw_desc->node, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, node); list_move_tail(&sw_desc->node, &mv_chan->chain); dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n", &old_chain_tail->async_tx.phys); /* fix up the hardware chain */ mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys); /* if the channel is not busy */ if (!mv_chan_is_busy(mv_chan)) { u32 current_desc = mv_chan_get_current_desc(mv_chan); /* * and the curren desc is the end of the chain before * the append, then we need to start the channel */ if (current_desc == old_chain_tail->async_tx.phys) new_hw_chain = 1; } } if (new_hw_chain) mv_chan_start_new_chain(mv_chan, sw_desc); spin_unlock_bh(&mv_chan->lock); return cookie; } /* returns the number of allocated descriptors */ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) { void *virt_desc; dma_addr_t dma_desc; int idx; struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); struct mv_xor_desc_slot *slot = NULL; int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE; /* Allocate descriptor slots */ idx = mv_chan->slots_allocated; while (idx < num_descs_in_pool) { slot = kzalloc(sizeof(*slot), GFP_KERNEL); if (!slot) { dev_info(mv_chan_to_devp(mv_chan), "channel only initialized %d descriptor slots", idx); break; } virt_desc = mv_chan->dma_desc_pool_virt; slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE; dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = mv_xor_tx_submit; INIT_LIST_HEAD(&slot->node); dma_desc = mv_chan->dma_desc_pool; slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE; slot->idx = idx++; spin_lock_bh(&mv_chan->lock); mv_chan->slots_allocated = idx; list_add_tail(&slot->node, &mv_chan->free_slots); spin_unlock_bh(&mv_chan->lock); } dev_dbg(mv_chan_to_devp(mv_chan), "allocated %d descriptor slots\n", mv_chan->slots_allocated); return mv_chan->slots_allocated ? : -ENOMEM; } static struct dma_async_tx_descriptor * mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); struct mv_xor_desc_slot *sw_desc; if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan_to_devp(mv_chan), "%s src_cnt: %d len: %u dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); sw_desc = mv_chan_alloc_slot(mv_chan); if (sw_desc) { sw_desc->type = DMA_XOR; sw_desc->async_tx.flags = flags; mv_desc_init(sw_desc, dest, len, flags); if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) mv_desc_set_mode(sw_desc); while (src_cnt--) mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]); } dev_dbg(mv_chan_to_devp(mv_chan), "%s sw_desc %p async_tx %p \n", __func__, sw_desc, &sw_desc->async_tx); return sw_desc ? &sw_desc->async_tx : NULL; } static struct dma_async_tx_descriptor * mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) { /* * A MEMCPY operation is identical to an XOR operation with only * a single source address. */ return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags); } static struct dma_async_tx_descriptor * mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); dma_addr_t src, dest; size_t len; src = mv_chan->dummy_src_addr; dest = mv_chan->dummy_dst_addr; len = MV_XOR_MIN_BYTE_COUNT; /* * We implement the DMA_INTERRUPT operation as a minimum sized * XOR operation with a single dummy source address. */ return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags); } static void mv_xor_free_chan_resources(struct dma_chan *chan) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); struct mv_xor_desc_slot *iter, *_iter; int in_use_descs = 0; spin_lock_bh(&mv_chan->lock); mv_chan_slot_cleanup(mv_chan); list_for_each_entry_safe(iter, _iter, &mv_chan->chain, node) { in_use_descs++; list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, node) { in_use_descs++; list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots, node) { in_use_descs++; list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe_reverse( iter, _iter, &mv_chan->free_slots, node) { list_del(&iter->node); kfree(iter); mv_chan->slots_allocated--; } dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n", __func__, mv_chan->slots_allocated); spin_unlock_bh(&mv_chan->lock); if (in_use_descs) dev_err(mv_chan_to_devp(mv_chan), "freeing %d in use descriptors!\n", in_use_descs); } /** * mv_xor_status - poll the status of an XOR transaction * @chan: XOR channel handle * @cookie: XOR transaction identifier * @txstate: XOR transactions state holder (or NULL) */ static enum dma_status mv_xor_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); enum dma_status ret; ret = dma_cookie_status(chan, cookie, txstate); if (ret == DMA_COMPLETE) return ret; spin_lock_bh(&mv_chan->lock); mv_chan_slot_cleanup(mv_chan); spin_unlock_bh(&mv_chan->lock); return dma_cookie_status(chan, cookie, txstate); } static void mv_chan_dump_regs(struct mv_xor_chan *chan) { u32 val; val = readl_relaxed(XOR_CONFIG(chan)); dev_err(mv_chan_to_devp(chan), "config 0x%08x\n", val); val = readl_relaxed(XOR_ACTIVATION(chan)); dev_err(mv_chan_to_devp(chan), "activation 0x%08x\n", val); val = readl_relaxed(XOR_INTR_CAUSE(chan)); dev_err(mv_chan_to_devp(chan), "intr cause 0x%08x\n", val); val = readl_relaxed(XOR_INTR_MASK(chan)); dev_err(mv_chan_to_devp(chan), "intr mask 0x%08x\n", val); val = readl_relaxed(XOR_ERROR_CAUSE(chan)); dev_err(mv_chan_to_devp(chan), "error cause 0x%08x\n", val); val = readl_relaxed(XOR_ERROR_ADDR(chan)); dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val); } static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan, u32 intr_cause) { if (intr_cause & XOR_INT_ERR_DECODE) { dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n"); return; } dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n", chan->idx, intr_cause); mv_chan_dump_regs(chan); WARN_ON(1); } static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) { struct mv_xor_chan *chan = data; u32 intr_cause = mv_chan_get_intr_cause(chan); dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause); if (intr_cause & XOR_INTR_ERRORS) mv_chan_err_interrupt_handler(chan, intr_cause); tasklet_schedule(&chan->irq_tasklet); mv_chan_clear_eoc_cause(chan); return IRQ_HANDLED; } static void mv_xor_issue_pending(struct dma_chan *chan) { struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); if (mv_chan->pending >= MV_XOR_THRESHOLD) { mv_chan->pending = 0; mv_chan_activate(mv_chan); } } /* * Perform a transaction to verify the HW works. */ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) { int i, ret; void *src, *dest; dma_addr_t src_dma, dest_dma; struct dma_chan *dma_chan; dma_cookie_t cookie; struct dma_async_tx_descriptor *tx; struct dmaengine_unmap_data *unmap; int err = 0; src = kmalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL); if (!src) return -ENOMEM; dest = kzalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL); if (!dest) { kfree(src); return -ENOMEM; } /* Fill in src buffer */ for (i = 0; i < PAGE_SIZE; i++) ((u8 *) src)[i] = (u8)i; dma_chan = &mv_chan->dmachan; if (mv_xor_alloc_chan_resources(dma_chan) < 1) { err = -ENODEV; goto out; } unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL); if (!unmap) { err = -ENOMEM; goto free_resources; } src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0, PAGE_SIZE, DMA_TO_DEVICE); unmap->addr[0] = src_dma; ret = dma_mapping_error(dma_chan->device->dev, src_dma); if (ret) { err = -ENOMEM; goto free_resources; } unmap->to_cnt = 1; dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0, PAGE_SIZE, DMA_FROM_DEVICE); unmap->addr[1] = dest_dma; ret = dma_mapping_error(dma_chan->device->dev, dest_dma); if (ret) { err = -ENOMEM; goto free_resources; } unmap->from_cnt = 1; unmap->len = PAGE_SIZE; tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, PAGE_SIZE, 0); if (!tx) { dev_err(dma_chan->device->dev, "Self-test cannot prepare operation, disabling\n"); err = -ENODEV; goto free_resources; } cookie = mv_xor_tx_submit(tx); if (dma_submit_error(cookie)) { dev_err(dma_chan->device->dev, "Self-test submit error, disabling\n"); err = -ENODEV; goto free_resources; } mv_xor_issue_pending(dma_chan); async_tx_ack(tx); msleep(1); if (mv_xor_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { dev_err(dma_chan->device->dev, "Self-test copy timed out, disabling\n"); err = -ENODEV; goto free_resources; } dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); if (memcmp(src, dest, PAGE_SIZE)) { dev_err(dma_chan->device->dev, "Self-test copy failed compare, disabling\n"); err = -ENODEV; goto free_resources; } free_resources: dmaengine_unmap_put(unmap); mv_xor_free_chan_resources(dma_chan); out: kfree(src); kfree(dest); return err; } #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ static int mv_chan_xor_self_test(struct mv_xor_chan *mv_chan) { int i, src_idx, ret; struct page *dest; struct page *xor_srcs[MV_XOR_NUM_SRC_TEST]; dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; dma_addr_t dest_dma; struct dma_async_tx_descriptor *tx; struct dmaengine_unmap_data *unmap; struct dma_chan *dma_chan; dma_cookie_t cookie; u8 cmp_byte = 0; u32 cmp_word; int err = 0; int src_count = MV_XOR_NUM_SRC_TEST; for (src_idx = 0; src_idx < src_count; src_idx++) { xor_srcs[src_idx] = alloc_page(GFP_KERNEL); if (!xor_srcs[src_idx]) { while (src_idx--) __free_page(xor_srcs[src_idx]); return -ENOMEM; } } dest = alloc_page(GFP_KERNEL); if (!dest) { while (src_idx--) __free_page(xor_srcs[src_idx]); return -ENOMEM; } /* Fill in src buffers */ for (src_idx = 0; src_idx < src_count; src_idx++) { u8 *ptr = page_address(xor_srcs[src_idx]); for (i = 0; i < PAGE_SIZE; i++) ptr[i] = (1 << src_idx); } for (src_idx = 0; src_idx < src_count; src_idx++) cmp_byte ^= (u8) (1 << src_idx); cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | (cmp_byte << 8) | cmp_byte; memset(page_address(dest), 0, PAGE_SIZE); dma_chan = &mv_chan->dmachan; if (mv_xor_alloc_chan_resources(dma_chan) < 1) { err = -ENODEV; goto out; } unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1, GFP_KERNEL); if (!unmap) { err = -ENOMEM; goto free_resources; } /* test xor */ for (i = 0; i < src_count; i++) { unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0, PAGE_SIZE, DMA_TO_DEVICE); dma_srcs[i] = unmap->addr[i]; ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]); if (ret) { err = -ENOMEM; goto free_resources; } unmap->to_cnt++; } unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); dest_dma = unmap->addr[src_count]; ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]); if (ret) { err = -ENOMEM; goto free_resources; } unmap->from_cnt = 1; unmap->len = PAGE_SIZE; tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, src_count, PAGE_SIZE, 0); if (!tx) { dev_err(dma_chan->device->dev, "Self-test cannot prepare operation, disabling\n"); err = -ENODEV; goto free_resources; } cookie = mv_xor_tx_submit(tx); if (dma_submit_error(cookie)) { dev_err(dma_chan->device->dev, "Self-test submit error, disabling\n"); err = -ENODEV; goto free_resources; } mv_xor_issue_pending(dma_chan); async_tx_ack(tx); msleep(8); if (mv_xor_status(dma_chan, cookie, NULL) != DMA_COMPLETE) { dev_err(dma_chan->device->dev, "Self-test xor timed out, disabling\n"); err = -ENODEV; goto free_resources; } dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { u32 *ptr = page_address(dest); if (ptr[i] != cmp_word) { dev_err(dma_chan->device->dev, "Self-test xor failed compare, disabling. index %d, data %x, expected %x\n", i, ptr[i], cmp_word); err = -ENODEV; goto free_resources; } } free_resources: dmaengine_unmap_put(unmap); mv_xor_free_chan_resources(dma_chan); out: src_idx = src_count; while (src_idx--) __free_page(xor_srcs[src_idx]); __free_page(dest); return err; } static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) { struct dma_chan *chan, *_chan; struct device *dev = mv_chan->dmadev.dev; dma_async_device_unregister(&mv_chan->dmadev); dma_free_coherent(dev, MV_XOR_POOL_SIZE, mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); dma_unmap_single(dev, mv_chan->dummy_src_addr, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); dma_unmap_single(dev, mv_chan->dummy_dst_addr, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels, device_node) { list_del(&chan->device_node); } free_irq(mv_chan->irq, mv_chan); return 0; } static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) { int ret = 0; struct mv_xor_chan *mv_chan; struct dma_device *dma_dev; mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL); if (!mv_chan) return ERR_PTR(-ENOMEM); mv_chan->idx = idx; mv_chan->irq = irq; mv_chan->op_in_desc = op_in_desc; dma_dev = &mv_chan->dmadev; /* * These source and destination dummy buffers are used to implement * a DMA_INTERRUPT operation as a minimum-sized XOR operation. * Hence, we only need to map the buffers at initialization-time. */ mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev, mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE); mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev, mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE); /* allocate coherent memory for hardware descriptors * note: writecombine gives slightly better performance, but * requires that we explicitly flush the writes */ mv_chan->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool, GFP_KERNEL); if (!mv_chan->dma_desc_pool_virt) return ERR_PTR(-ENOMEM); /* discover transaction capabilites from the platform data */ dma_dev->cap_mask = cap_mask; INIT_LIST_HEAD(&dma_dev->channels); /* set base routines */ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; dma_dev->device_tx_status = mv_xor_status; dma_dev->device_issue_pending = mv_xor_issue_pending; dma_dev->dev = &pdev->dev; /* set prep routines based on capability */ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt; if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->max_xor = 8; dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; } mv_chan->mmr_base = xordev->xor_base; mv_chan->mmr_high_base = xordev->xor_high_base; tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long) mv_chan); /* clear errors before enabling interrupts */ mv_chan_clear_err_status(mv_chan); ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler, 0, dev_name(&pdev->dev), mv_chan); if (ret) goto err_free_dma; mv_chan_unmask_interrupts(mv_chan); if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) mv_chan_set_mode_to_desc(mv_chan); else mv_chan_set_mode(mv_chan, DMA_XOR); spin_lock_init(&mv_chan->lock); INIT_LIST_HEAD(&mv_chan->chain); INIT_LIST_HEAD(&mv_chan->completed_slots); INIT_LIST_HEAD(&mv_chan->free_slots); INIT_LIST_HEAD(&mv_chan->allocated_slots); mv_chan->dmachan.device = dma_dev; dma_cookie_init(&mv_chan->dmachan); list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels); if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { ret = mv_chan_memcpy_self_test(mv_chan); dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); if (ret) goto err_free_irq; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { ret = mv_chan_xor_self_test(mv_chan); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_irq; } dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); dma_async_device_register(dma_dev); return mv_chan; err_free_irq: free_irq(mv_chan->irq, mv_chan); err_free_dma: dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE, mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool); return ERR_PTR(ret); } static void mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, const struct mbus_dram_target_info *dram) { void __iomem *base = xordev->xor_high_base; u32 win_enable = 0; int i; for (i = 0; i < 8; i++) { writel(0, base + WINDOW_BASE(i)); writel(0, base + WINDOW_SIZE(i)); if (i < 4) writel(0, base + WINDOW_REMAP_HIGH(i)); } for (i = 0; i < dram->num_cs; i++) { const struct mbus_dram_window *cs = dram->cs + i; writel((cs->base & 0xffff0000) | (cs->mbus_attr << 8) | dram->mbus_dram_target_id, base + WINDOW_BASE(i)); writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); win_enable |= (1 << i); win_enable |= 3 << (16 + (2 * i)); } writel(win_enable, base + WINDOW_BAR_ENABLE(0)); writel(win_enable, base + WINDOW_BAR_ENABLE(1)); writel(0, base + WINDOW_OVERRIDE_CTRL(0)); writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } static const struct of_device_id mv_xor_dt_ids[] = { { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, {}, }; static unsigned int mv_xor_engine_count; static int mv_xor_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; struct mv_xor_device *xordev; struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; unsigned int max_engines, max_channels; int i, ret; int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL); if (!xordev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; xordev->xor_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!xordev->xor_base) return -EBUSY; res = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!res) return -ENODEV; xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); if (!xordev->xor_high_base) return -EBUSY; platform_set_drvdata(pdev, xordev); /* * (Re-)program MBUS remapping windows if we are asked to. */ dram = mv_mbus_dram_info(); if (dram) mv_xor_conf_mbus_windows(xordev, dram); /* Not all platforms can gate the clock, so it is not * an error if the clock does not exists. */ xordev->clk = clk_get(&pdev->dev, NULL); if (!IS_ERR(xordev->clk)) clk_prepare_enable(xordev->clk); /* * We don't want to have more than one channel per CPU in * order for async_tx to perform well. So we limit the number * of engines and channels so that we take into account this * constraint. Note that we also want to use channels from * separate engines when possible. */ max_engines = num_present_cpus(); max_channels = min_t(unsigned int, MV_XOR_MAX_CHANNELS, DIV_ROUND_UP(num_present_cpus(), 2)); if (mv_xor_engine_count >= max_engines) return 0; if (pdev->dev.of_node) { struct device_node *np; int i = 0; const struct of_device_id *of_id = of_match_device(mv_xor_dt_ids, &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; op_in_desc = (int)of_id->data; if (i >= max_channels) continue; dma_cap_zero(cap_mask); dma_cap_set(DMA_MEMCPY, cap_mask); dma_cap_set(DMA_XOR, cap_mask); dma_cap_set(DMA_INTERRUPT, cap_mask); irq = irq_of_parse_and_map(np, 0); if (!irq) { ret = -ENODEV; goto err_channel_add; } chan = mv_xor_channel_add(xordev, pdev, i, cap_mask, irq, op_in_desc); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); goto err_channel_add; } xordev->channels[i] = chan; i++; } } else if (pdata && pdata->channels) { for (i = 0; i < max_channels; i++) { struct mv_xor_channel_data *cd; struct mv_xor_chan *chan; int irq; cd = &pdata->channels[i]; if (!cd) { ret = -ENODEV; goto err_channel_add; } irq = platform_get_irq(pdev, i); if (irq < 0) { ret = irq; goto err_channel_add; } chan = mv_xor_channel_add(xordev, pdev, i, cd->cap_mask, irq, XOR_MODE_IN_REG); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; } xordev->channels[i] = chan; } } return 0; err_channel_add: for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) if (xordev->channels[i]) { mv_xor_channel_remove(xordev->channels[i]); if (pdev->dev.of_node) irq_dispose_mapping(xordev->channels[i]->irq); } if (!IS_ERR(xordev->clk)) { clk_disable_unprepare(xordev->clk); clk_put(xordev->clk); } return ret; } static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, .driver = { .name = MV_XOR_NAME, .of_match_table = of_match_ptr(mv_xor_dt_ids), }, }; static int __init mv_xor_init(void) { return platform_driver_register(&mv_xor_driver); } device_initcall(mv_xor_init); /* MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); MODULE_LICENSE("GPL"); */