summaryrefslogtreecommitdiffstats
path: root/kernel/arch/mips/net
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/mips/net')
-rw-r--r--kernel/arch/mips/net/Makefile2
-rw-r--r--kernel/arch/mips/net/bpf_jit.c280
-rw-r--r--kernel/arch/mips/net/bpf_jit.h42
-rw-r--r--kernel/arch/mips/net/bpf_jit_asm.S283
4 files changed, 396 insertions, 211 deletions
diff --git a/kernel/arch/mips/net/Makefile b/kernel/arch/mips/net/Makefile
index ae74b3a91..8c2771401 100644
--- a/kernel/arch/mips/net/Makefile
+++ b/kernel/arch/mips/net/Makefile
@@ -1,3 +1,3 @@
# MIPS networking code
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_asm.o
diff --git a/kernel/arch/mips/net/bpf_jit.c b/kernel/arch/mips/net/bpf_jit.c
index e23fdf2a9..1a8c96035 100644
--- a/kernel/arch/mips/net/bpf_jit.c
+++ b/kernel/arch/mips/net/bpf_jit.c
@@ -20,6 +20,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <asm/asm.h>
#include <asm/bitops.h>
#include <asm/cacheflush.h>
#include <asm/cpu-features.h>
@@ -28,14 +29,14 @@
#include "bpf_jit.h"
/* ABI
- *
- * s0 1st scratch register
- * s1 2nd scratch register
- * s2 offset register
- * s3 BPF register A
- * s4 BPF register X
- * s5 *skb
- * s6 *scratch memory
+ * r_skb_hl SKB header length
+ * r_data SKB data pointer
+ * r_off Offset
+ * r_A BPF register A
+ * r_X BPF register X
+ * r_skb *skb
+ * r_M *scratch memory
+ * r_skb_len SKB length
*
* On entry (*bpf_func)(*skb, *filter)
* a0 = MIPS_R_A0 = skb;
@@ -63,44 +64,8 @@
* ----------------------------------------------------
*/
-#define RSIZE (sizeof(unsigned long))
#define ptr typeof(unsigned long)
-/* ABI specific return values */
-#ifdef CONFIG_32BIT /* O32 */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define r_err MIPS_R_V1
-#define r_val MIPS_R_V0
-#else /* CONFIG_CPU_LITTLE_ENDIAN */
-#define r_err MIPS_R_V0
-#define r_val MIPS_R_V1
-#endif
-#else /* N64 */
-#define r_err MIPS_R_V0
-#define r_val MIPS_R_V0
-#endif
-
-#define r_ret MIPS_R_V0
-
-/*
- * Use 2 scratch registers to avoid pipeline interlocks.
- * There is no overhead during epilogue and prologue since
- * any of the $s0-$s6 registers will only be preserved if
- * they are going to actually be used.
- */
-#define r_s0 MIPS_R_S0 /* scratch reg 1 */
-#define r_s1 MIPS_R_S1 /* scratch reg 2 */
-#define r_off MIPS_R_S2
-#define r_A MIPS_R_S3
-#define r_X MIPS_R_S4
-#define r_skb MIPS_R_S5
-#define r_M MIPS_R_S6
-#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
-#define r_tmp MIPS_R_T7 /* No need to preserve this */
-#define r_zero MIPS_R_ZERO
-#define r_sp MIPS_R_SP
-#define r_ra MIPS_R_RA
-
#define SCRATCH_OFF(k) (4 * (k))
/* JIT flags */
@@ -108,13 +73,13 @@
#define SEEN_SREG_SFT (BPF_MEMWORDS + 1)
#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT)
#define SEEN_SREG(x) (SEEN_SREG_BASE << (x))
-#define SEEN_S0 SEEN_SREG(0)
-#define SEEN_S1 SEEN_SREG(1)
#define SEEN_OFF SEEN_SREG(2)
#define SEEN_A SEEN_SREG(3)
#define SEEN_X SEEN_SREG(4)
#define SEEN_SKB SEEN_SREG(5)
#define SEEN_MEM SEEN_SREG(6)
+/* SEEN_SK_DATA also implies skb_hl an skb_len */
+#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0))
/* Arguments used by JIT */
#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */
@@ -556,19 +521,6 @@ static inline u16 align_sp(unsigned int num)
return num;
}
-static bool is_load_to_a(u16 inst)
-{
- switch (inst) {
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- return true;
- default:
- return false;
- }
-}
-
static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
{
int i = 0, real_off = 0;
@@ -577,27 +529,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
/* Adjust the stack pointer */
emit_stack_offset(-align_sp(offset), ctx);
- if (ctx->flags & SEEN_CALL) {
- /* Argument save area */
- if (config_enabled(CONFIG_64BIT))
- /* Bottom of current frame */
- real_off = align_sp(offset) - RSIZE;
- else
- /* Top of previous frame */
- real_off = align_sp(offset) + RSIZE;
- emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
- emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
-
- real_off = 0;
- }
-
tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
/* sflags is essentially a bitmap */
while (tmp_flags) {
if ((sflags >> i) & 0x1) {
emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
ctx);
- real_off += RSIZE;
+ real_off += SZREG;
}
i++;
tmp_flags >>= 1;
@@ -606,13 +544,13 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
/* save return address */
if (ctx->flags & SEEN_CALL) {
emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
- real_off += RSIZE;
+ real_off += SZREG;
}
/* Setup r_M leaving the alignment gap if necessary */
if (ctx->flags & SEEN_MEM) {
- if (real_off % (RSIZE * 2))
- real_off += RSIZE;
+ if (real_off % (SZREG * 2))
+ real_off += SZREG;
emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
}
}
@@ -623,19 +561,6 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx,
int i, real_off = 0;
u32 sflags, tmp_flags;
- if (ctx->flags & SEEN_CALL) {
- if (config_enabled(CONFIG_64BIT))
- /* Bottom of current frame */
- real_off = align_sp(offset) - RSIZE;
- else
- /* Top of previous frame */
- real_off = align_sp(offset) + RSIZE;
- emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
- emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
-
- real_off = 0;
- }
-
tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
/* sflags is a bitmap */
i = 0;
@@ -643,7 +568,7 @@ static void restore_bpf_jit_regs(struct jit_ctx *ctx,
if ((sflags >> i) & 0x1) {
emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
ctx);
- real_off += RSIZE;
+ real_off += SZREG;
}
i++;
tmp_flags >>= 1;
@@ -663,30 +588,19 @@ static unsigned int get_stack_depth(struct jit_ctx *ctx)
/* How may s* regs do we need to preserved? */
- sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
+ sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG;
if (ctx->flags & SEEN_MEM)
sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
if (ctx->flags & SEEN_CALL)
- /*
- * The JIT code make calls to external functions using 2
- * arguments. Therefore, for o32 we don't need to allocate
- * space because we don't care if the argumetns are lost
- * across calls. We do need however to preserve incoming
- * arguments but the space is already allocated for us by
- * the caller. On the other hand, for n64, we need to allocate
- * this space ourselves. We need to preserve $ra as well.
- */
- sp_off += config_enabled(CONFIG_64BIT) ?
- (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
+ sp_off += SZREG; /* Space for our ra register */
return sp_off;
}
static void build_prologue(struct jit_ctx *ctx)
{
- u16 first_inst = ctx->skf->insns[0].code;
int sp_off;
/* Calculate the total offset for the stack pointer */
@@ -696,11 +610,24 @@ static void build_prologue(struct jit_ctx *ctx)
if (ctx->flags & SEEN_SKB)
emit_reg_move(r_skb, MIPS_R_A0, ctx);
+ if (ctx->flags & SEEN_SKB_DATA) {
+ /* Load packet length */
+ emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len),
+ ctx);
+ emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len),
+ ctx);
+ /* Load the data pointer */
+ emit_load_ptr(r_skb_data, r_skb,
+ offsetof(struct sk_buff, data), ctx);
+ /* Load the header length */
+ emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx);
+ }
+
if (ctx->flags & SEEN_X)
emit_jit_reg_move(r_X, r_zero, ctx);
/* Do not leak kernel data to userspace */
- if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
+ if (bpf_needs_clear_a(&ctx->skf->insns[0]))
emit_jit_reg_move(r_A, r_zero, ctx);
}
@@ -718,43 +645,17 @@ static void build_epilogue(struct jit_ctx *ctx)
emit_nop(ctx);
}
-static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
-{
- u8 ret;
- int err;
-
- err = skb_copy_bits(skb, offset, &ret, 1);
-
- return (u64)err << 32 | ret;
-}
-
-static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
-{
- u16 ret;
- int err;
-
- err = skb_copy_bits(skb, offset, &ret, 2);
-
- return (u64)err << 32 | ntohs(ret);
-}
-
-static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
-{
- u32 ret;
- int err;
-
- err = skb_copy_bits(skb, offset, &ret, 4);
-
- return (u64)err << 32 | ntohl(ret);
-}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
+ func##_positive)
static int build_body(struct jit_ctx *ctx)
{
- void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
const struct bpf_prog *prog = ctx->skf;
const struct sock_filter *inst;
- unsigned int i, off, load_order, condt;
+ unsigned int i, off, condt;
u32 k, b_off __maybe_unused;
+ u8 (*sk_load_func)(unsigned long *skb, int offset);
for (i = 0; i < prog->len; i++) {
u16 code;
@@ -788,71 +689,46 @@ static int build_body(struct jit_ctx *ctx)
break;
case BPF_LD | BPF_W | BPF_ABS:
/* A <- P[k:4] */
- load_order = 2;
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word);
goto load;
case BPF_LD | BPF_H | BPF_ABS:
/* A <- P[k:2] */
- load_order = 1;
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half);
goto load;
case BPF_LD | BPF_B | BPF_ABS:
/* A <- P[k:1] */
- load_order = 0;
+ sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte);
load:
- /* the interpreter will deal with the negative K */
- if ((int)k < 0)
- return -ENOTSUPP;
-
emit_load_imm(r_off, k, ctx);
load_common:
- /*
- * We may got here from the indirect loads so
- * return if offset is negative.
- */
- emit_slt(r_s0, r_off, r_zero, ctx);
- emit_bcond(MIPS_COND_NE, r_s0, r_zero,
- b_imm(prog->len, ctx), ctx);
- emit_reg_move(r_ret, r_zero, ctx);
+ ctx->flags |= SEEN_CALL | SEEN_OFF |
+ SEEN_SKB | SEEN_A | SEEN_SKB_DATA;
- ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
- SEEN_SKB | SEEN_A;
-
- emit_load_func(r_s0, (ptr)load_func[load_order],
- ctx);
+ emit_load_func(r_s0, (ptr)sk_load_func, ctx);
emit_reg_move(MIPS_R_A0, r_skb, ctx);
emit_jalr(MIPS_R_RA, r_s0, ctx);
/* Load second argument to delay slot */
emit_reg_move(MIPS_R_A1, r_off, ctx);
/* Check the error value */
- if (config_enabled(CONFIG_64BIT)) {
- /* Get error code from the top 32-bits */
- emit_dsrl32(r_s0, r_val, 0, ctx);
- /* Branch to 3 instructions ahead */
- emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
- ctx);
- } else {
- /* Branch to 3 instructions ahead */
- emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
- ctx);
- }
- emit_nop(ctx);
- /* We are good */
- emit_b(b_imm(i + 1, ctx), ctx);
- emit_jit_reg_move(r_A, r_val, ctx);
+ emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx),
+ ctx);
+ /* Load return register on DS for failures */
+ emit_reg_move(r_ret, r_zero, ctx);
/* Return with error */
emit_b(b_imm(prog->len, ctx), ctx);
- emit_reg_move(r_ret, r_zero, ctx);
+ emit_nop(ctx);
break;
case BPF_LD | BPF_W | BPF_IND:
/* A <- P[X + k:4] */
- load_order = 2;
+ sk_load_func = sk_load_word;
goto load_ind;
case BPF_LD | BPF_H | BPF_IND:
/* A <- P[X + k:2] */
- load_order = 1;
+ sk_load_func = sk_load_half;
goto load_ind;
case BPF_LD | BPF_B | BPF_IND:
/* A <- P[X + k:1] */
- load_order = 0;
+ sk_load_func = sk_load_byte;
load_ind:
ctx->flags |= SEEN_OFF | SEEN_X;
emit_addiu(r_off, r_X, k, ctx);
@@ -874,14 +750,10 @@ load_ind:
emit_load(r_X, r_skb, off, ctx);
break;
case BPF_LDX | BPF_B | BPF_MSH:
- /* the interpreter will deal with the negative K */
- if ((int)k < 0)
- return -ENOTSUPP;
-
/* X <- 4 * (P[k:1] & 0xf) */
- ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
+ ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB;
/* Load offset to a1 */
- emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
+ emit_load_func(r_s0, (ptr)sk_load_byte, ctx);
/*
* This may emit two instructions so it may not fit
* in the delay slot. So use a0 in the delay slot.
@@ -890,25 +762,15 @@ load_ind:
emit_jalr(MIPS_R_RA, r_s0, ctx);
emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
/* Check the error value */
- if (config_enabled(CONFIG_64BIT)) {
- /* Top 32-bits of $v0 on 64-bit */
- emit_dsrl32(r_s0, r_val, 0, ctx);
- emit_bcond(MIPS_COND_NE, r_s0, r_zero,
- 3 << 2, ctx);
- } else {
- emit_bcond(MIPS_COND_NE, r_err, r_zero,
- 3 << 2, ctx);
- }
- /* No need for delay slot */
+ emit_bcond(MIPS_COND_NE, r_ret, 0,
+ b_imm(prog->len, ctx), ctx);
+ emit_reg_move(r_ret, r_zero, ctx);
/* We are good */
/* X <- P[1:K] & 0xf */
- emit_andi(r_X, r_val, 0xf, ctx);
+ emit_andi(r_X, r_A, 0xf, ctx);
/* X << 2 */
emit_b(b_imm(i + 1, ctx), ctx);
emit_sll(r_X, r_X, 2, ctx); /* delay slot */
- /* Return with error */
- emit_b(b_imm(prog->len, ctx), ctx);
- emit_load_imm(r_ret, 0, ctx); /* delay slot */
break;
case BPF_ST:
/* M[k] <- A */
@@ -943,7 +805,7 @@ load_ind:
case BPF_ALU | BPF_MUL | BPF_K:
/* A *= K */
/* Load K to scratch register before MUL */
- ctx->flags |= SEEN_A | SEEN_S0;
+ ctx->flags |= SEEN_A;
emit_load_imm(r_s0, k, ctx);
emit_mul(r_A, r_A, r_s0, ctx);
break;
@@ -961,7 +823,7 @@ load_ind:
emit_srl(r_A, r_A, k, ctx);
break;
}
- ctx->flags |= SEEN_A | SEEN_S0;
+ ctx->flags |= SEEN_A;
emit_load_imm(r_s0, k, ctx);
emit_div(r_A, r_s0, ctx);
break;
@@ -971,7 +833,7 @@ load_ind:
ctx->flags |= SEEN_A;
emit_jit_reg_move(r_A, r_zero, ctx);
} else {
- ctx->flags |= SEEN_A | SEEN_S0;
+ ctx->flags |= SEEN_A;
emit_load_imm(r_s0, k, ctx);
emit_mod(r_A, r_s0, ctx);
}
@@ -982,7 +844,7 @@ load_ind:
/* Check if r_X is zero */
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
b_imm(prog->len, ctx), ctx);
- emit_load_imm(r_val, 0, ctx); /* delay slot */
+ emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_div(r_A, r_X, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
@@ -991,7 +853,7 @@ load_ind:
/* Check if r_X is zero */
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
b_imm(prog->len, ctx), ctx);
- emit_load_imm(r_val, 0, ctx); /* delay slot */
+ emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_mod(r_A, r_X, ctx);
break;
case BPF_ALU | BPF_OR | BPF_K:
@@ -1085,10 +947,10 @@ jmp_cmp:
if ((condt & MIPS_COND_GE) ||
(condt & MIPS_COND_GT)) {
if (condt & MIPS_COND_K) { /* K */
- ctx->flags |= SEEN_S0 | SEEN_A;
+ ctx->flags |= SEEN_A;
emit_sltiu(r_s0, r_A, k, ctx);
} else { /* X */
- ctx->flags |= SEEN_S0 | SEEN_A |
+ ctx->flags |= SEEN_A |
SEEN_X;
emit_sltu(r_s0, r_A, r_X, ctx);
}
@@ -1100,7 +962,7 @@ jmp_cmp:
/* A > (K|X) ? scratch = 0 */
if (condt & MIPS_COND_GT) {
/* Checking for equality */
- ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
+ ctx->flags |= SEEN_A | SEEN_X;
if (condt & MIPS_COND_K)
emit_load_imm(r_s0, k, ctx);
else
@@ -1123,7 +985,7 @@ jmp_cmp:
} else {
/* A == K|X */
if (condt & MIPS_COND_K) { /* K */
- ctx->flags |= SEEN_S0 | SEEN_A;
+ ctx->flags |= SEEN_A;
emit_load_imm(r_s0, k, ctx);
/* jump true */
b_off = b_imm(i + inst->jt + 1, ctx);
@@ -1153,7 +1015,7 @@ jmp_cmp:
}
break;
case BPF_JMP | BPF_JSET | BPF_K:
- ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
+ ctx->flags |= SEEN_A;
/* pc += (A & K) ? pc -> jt : pc -> jf */
emit_load_imm(r_s1, k, ctx);
emit_and(r_s0, r_A, r_s1, ctx);
@@ -1167,7 +1029,7 @@ jmp_cmp:
emit_nop(ctx);
break;
case BPF_JMP | BPF_JSET | BPF_X:
- ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
+ ctx->flags |= SEEN_X | SEEN_A;
/* pc += (A & X) ? pc -> jt : pc -> jf */
emit_and(r_s0, r_A, r_X, ctx);
/* jump true */
@@ -1251,7 +1113,7 @@ jmp_cmp:
break;
case BPF_ANC | SKF_AD_IFINDEX:
/* A = skb->dev->ifindex */
- ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
+ ctx->flags |= SEEN_SKB | SEEN_A;
off = offsetof(struct sk_buff, dev);
/* Load *dev pointer */
emit_load_ptr(r_s0, r_skb, off, ctx);
@@ -1278,7 +1140,7 @@ jmp_cmp:
break;
case BPF_ANC | SKF_AD_VLAN_TAG:
case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
+ ctx->flags |= SEEN_SKB | SEEN_A;
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
vlan_tci) != 2);
off = offsetof(struct sk_buff, vlan_tci);
@@ -1375,7 +1237,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
fp->bpf_func = (void *)ctx.target;
- fp->jited = true;
+ fp->jited = 1;
out:
kfree(ctx.offsets);
diff --git a/kernel/arch/mips/net/bpf_jit.h b/kernel/arch/mips/net/bpf_jit.h
index 3a5751b43..8f9f54841 100644
--- a/kernel/arch/mips/net/bpf_jit.h
+++ b/kernel/arch/mips/net/bpf_jit.h
@@ -15,9 +15,10 @@
/* Registers used by JIT */
#define MIPS_R_ZERO 0
#define MIPS_R_V0 2
-#define MIPS_R_V1 3
#define MIPS_R_A0 4
#define MIPS_R_A1 5
+#define MIPS_R_T4 12
+#define MIPS_R_T5 13
#define MIPS_R_T6 14
#define MIPS_R_T7 15
#define MIPS_R_S0 16
@@ -41,4 +42,43 @@
#define MIPS_COND_X (0x1 << 5)
#define MIPS_COND_K (0x1 << 6)
+#define r_ret MIPS_R_V0
+
+/*
+ * Use 2 scratch registers to avoid pipeline interlocks.
+ * There is no overhead during epilogue and prologue since
+ * any of the $s0-$s6 registers will only be preserved if
+ * they are going to actually be used.
+ */
+#define r_skb_hl MIPS_R_S0 /* skb header length */
+#define r_skb_data MIPS_R_S1 /* skb actual data */
+#define r_off MIPS_R_S2
+#define r_A MIPS_R_S3
+#define r_X MIPS_R_S4
+#define r_skb MIPS_R_S5
+#define r_M MIPS_R_S6
+#define r_skb_len MIPS_R_S7
+#define r_s0 MIPS_R_T4 /* scratch reg 1 */
+#define r_s1 MIPS_R_T5 /* scratch reg 2 */
+#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */
+#define r_tmp MIPS_R_T7 /* No need to preserve this */
+#define r_zero MIPS_R_ZERO
+#define r_sp MIPS_R_SP
+#define r_ra MIPS_R_RA
+
+#ifndef __ASSEMBLY__
+
+/* Declare ASM helpers */
+
+#define DECLARE_LOAD_FUNC(func) \
+ extern u8 func(unsigned long *skb, int offset); \
+ extern u8 func##_negative(unsigned long *skb, int offset); \
+ extern u8 func##_positive(unsigned long *skb, int offset)
+
+DECLARE_LOAD_FUNC(sk_load_word);
+DECLARE_LOAD_FUNC(sk_load_half);
+DECLARE_LOAD_FUNC(sk_load_byte);
+
+#endif
+
#endif /* BPF_JIT_MIPS_OP_H */
diff --git a/kernel/arch/mips/net/bpf_jit_asm.S b/kernel/arch/mips/net/bpf_jit_asm.S
new file mode 100644
index 000000000..5d2e0c8d2
--- /dev/null
+++ b/kernel/arch/mips/net/bpf_jit_asm.S
@@ -0,0 +1,283 @@
+/*
+ * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
+ * compiler.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include "bpf_jit.h"
+
+/* ABI
+ *
+ * r_skb_hl skb header length
+ * r_skb_data skb data
+ * r_off(a1) offset register
+ * r_A BPF register A
+ * r_X PF register X
+ * r_skb(a0) *skb
+ * r_M *scratch memory
+ * r_skb_le skb length
+ * r_s0 Scratch register 0
+ * r_s1 Scratch register 1
+ *
+ * On entry:
+ * a0: *skb
+ * a1: offset (imm or imm + X)
+ *
+ * All non-BPF-ABI registers are free for use. On return, we only
+ * care about r_ret. The BPF-ABI registers are assumed to remain
+ * unmodified during the entire filter operation.
+ */
+
+#define skb a0
+#define offset a1
+#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
+
+ /* We know better :) so prevent assembler reordering etc */
+ .set noreorder
+
+#define is_offset_negative(TYPE) \
+ /* If offset is negative we have more work to do */ \
+ slti t0, offset, 0; \
+ bgtz t0, bpf_slow_path_##TYPE##_neg; \
+ /* Be careful what follows in DS. */
+
+#define is_offset_in_header(SIZE, TYPE) \
+ /* Reading from header? */ \
+ addiu $r_s0, $r_skb_hl, -SIZE; \
+ slt t0, $r_s0, offset; \
+ bgtz t0, bpf_slow_path_##TYPE; \
+
+LEAF(sk_load_word)
+ is_offset_negative(word)
+FEXPORT(sk_load_word_positive)
+ is_offset_in_header(4, word)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ .set reorder
+ lw $r_A, 0(t1)
+ .set noreorder
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ wsbh t0, $r_A
+ rotr $r_A, t0, 16
+# else
+ sll t0, $r_A, 24
+ srl t1, $r_A, 24
+ srl t2, $r_A, 8
+ or t0, t0, t1
+ andi t2, t2, 0xff00
+ andi t1, $r_A, 0xff00
+ or t0, t0, t2
+ sll t1, t1, 8
+ or $r_A, t0, t1
+# endif
+#endif
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_word)
+
+LEAF(sk_load_half)
+ is_offset_negative(half)
+FEXPORT(sk_load_half_positive)
+ is_offset_in_header(2, half)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ .set reorder
+ lh $r_A, 0(t1)
+ .set noreorder
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ wsbh t0, $r_A
+ seh $r_A, t0
+# else
+ sll t0, $r_A, 24
+ andi t1, $r_A, 0xff00
+ sra t0, t0, 16
+ srl t1, t1, 8
+ or $r_A, t0, t1
+# endif
+#endif
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_half)
+
+LEAF(sk_load_byte)
+ is_offset_negative(byte)
+FEXPORT(sk_load_byte_positive)
+ is_offset_in_header(1, byte)
+ /* Offset within header boundaries */
+ PTR_ADDU t1, $r_skb_data, offset
+ lb $r_A, 0(t1)
+ jr $r_ra
+ move $r_ret, zero
+ END(sk_load_byte)
+
+/*
+ * call skb_copy_bits:
+ * (prototype in linux/skbuff.h)
+ *
+ * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
+ *
+ * o32 mandates we leave 4 spaces for argument registers in case
+ * the callee needs to use them. Even though we don't care about
+ * the argument registers ourselves, we need to allocate that space
+ * to remain ABI compliant since the callee may want to use that space.
+ * We also allocate 2 more spaces for $r_ra and our return register (*to).
+ *
+ * n64 is a bit different. The *caller* will allocate the space to preserve
+ * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
+ * good reason but it does not matter that much really.
+ *
+ * (void *to) is returned in r_s0
+ *
+ */
+#define bpf_slow_path_common(SIZE) \
+ /* Quick check. Are we within reasonable boundaries? */ \
+ LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
+ sltu $r_s0, offset, $r_s1; \
+ beqz $r_s0, fault; \
+ /* Load 4th argument in DS */ \
+ LONG_ADDIU a3, zero, SIZE; \
+ PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
+ PTR_LA t0, skb_copy_bits; \
+ PTR_S $r_ra, (5 * SZREG)($r_sp); \
+ /* Assign low slot to a2 */ \
+ move a2, $r_sp; \
+ jalr t0; \
+ /* Reset our destination slot (DS but it's ok) */ \
+ INT_S zero, (4 * SZREG)($r_sp); \
+ /* \
+ * skb_copy_bits returns 0 on success and -EFAULT \
+ * on error. Our data live in a2. Do not bother with \
+ * our data if an error has been returned. \
+ */ \
+ /* Restore our frame */ \
+ PTR_L $r_ra, (5 * SZREG)($r_sp); \
+ INT_L $r_s0, (4 * SZREG)($r_sp); \
+ bltz v0, fault; \
+ PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
+ move $r_ret, zero; \
+
+NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ wsbh t0, $r_s0
+ jr $r_ra
+ rotr $r_A, t0, 16
+# else
+ sll t0, $r_s0, 24
+ srl t1, $r_s0, 24
+ srl t2, $r_s0, 8
+ or t0, t0, t1
+ andi t2, t2, 0xff00
+ andi t1, $r_s0, 0xff00
+ or t0, t0, t2
+ sll t1, t1, 8
+ jr $r_ra
+ or $r_A, t0, t1
+# endif
+#else
+ jr $r_ra
+ move $r_A, $r_s0
+#endif
+
+ END(bpf_slow_path_word)
+
+NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(2)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ jr $r_ra
+ wsbh $r_A, $r_s0
+# else
+ sll t0, $r_s0, 8
+ andi t1, $r_s0, 0xff00
+ andi t0, t0, 0xff00
+ srl t1, t1, 8
+ jr $r_ra
+ or $r_A, t0, t1
+# endif
+#else
+ jr $r_ra
+ move $r_A, $r_s0
+#endif
+
+ END(bpf_slow_path_half)
+
+NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
+ bpf_slow_path_common(1)
+ jr $r_ra
+ move $r_A, $r_s0
+
+ END(bpf_slow_path_byte)
+
+/*
+ * Negative entry points
+ */
+ .macro bpf_is_end_of_data
+ li t0, SKF_LL_OFF
+ /* Reading link layer data? */
+ slt t1, offset, t0
+ bgtz t1, fault
+ /* Be careful what follows in DS. */
+ .endm
+/*
+ * call skb_copy_bits:
+ * (prototype in linux/filter.h)
+ *
+ * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
+ * int k, unsigned int size)
+ *
+ * see above (bpf_slow_path_common) for ABI restrictions
+ */
+#define bpf_negative_common(SIZE) \
+ PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
+ PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
+ PTR_S $r_ra, (5 * SZREG)($r_sp); \
+ jalr t0; \
+ li a2, SIZE; \
+ PTR_L $r_ra, (5 * SZREG)($r_sp); \
+ /* Check return pointer */ \
+ beqz v0, fault; \
+ PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
+ /* Preserve our pointer */ \
+ move $r_s0, v0; \
+ /* Set return value */ \
+ move $r_ret, zero; \
+
+bpf_slow_path_word_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(4)
+ jr $r_ra
+ lw $r_A, 0($r_s0)
+ END(sk_load_word_negative)
+
+bpf_slow_path_half_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(2)
+ jr $r_ra
+ lhu $r_A, 0($r_s0)
+ END(sk_load_half_negative)
+
+bpf_slow_path_byte_neg:
+ bpf_is_end_of_data
+NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
+ bpf_negative_common(1)
+ jr $r_ra
+ lbu $r_A, 0($r_s0)
+ END(sk_load_byte_negative)
+
+fault:
+ jr $r_ra
+ addiu $r_ret, zero, 1