diff options
Diffstat (limited to 'kernel/drivers/media/platform/rcar_jpu.c')
-rw-r--r-- | kernel/drivers/media/platform/rcar_jpu.c | 1802 |
1 files changed, 1802 insertions, 0 deletions
diff --git a/kernel/drivers/media/platform/rcar_jpu.c b/kernel/drivers/media/platform/rcar_jpu.c new file mode 100644 index 000000000..f8e3e83c5 --- /dev/null +++ b/kernel/drivers/media/platform/rcar_jpu.c @@ -0,0 +1,1802 @@ +/* + * Author: Mikhail Ulyanov + * Copyright (C) 2014-2015 Cogent Embedded, Inc. <source@cogentembedded.com> + * Copyright (C) 2014-2015 Renesas Electronics Corporation + * + * This is based on the drivers/media/platform/s5p-jpeg driver by + * Andrzej Pietrasiewicz and Jacek Anaszewski. + * Some portions of code inspired by VSP1 driver by Laurent Pinchart. + * + * TODO in order of priority: + * 1) Rotation + * 2) Cropping + * 3) V4L2_CID_JPEG_ACTIVE_MARKER + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/unaligned.h> +#include <linux/clk.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/videodev2.h> +#include <media/v4l2-ctrls.h> +#include <media/v4l2-device.h> +#include <media/v4l2-event.h> +#include <media/v4l2-fh.h> +#include <media/v4l2-mem2mem.h> +#include <media/v4l2-ioctl.h> +#include <media/videobuf2-v4l2.h> +#include <media/videobuf2-dma-contig.h> + + +#define DRV_NAME "rcar_jpu" + +/* + * Align JPEG header end to cache line to make sure we will not have any issues + * with cache; additionally to requerment (33.3.27 R01UH0501EJ0100 Rev.1.00) + */ +#define JPU_JPEG_HDR_SIZE (ALIGN(0x258, L1_CACHE_BYTES)) +#define JPU_JPEG_MAX_BYTES_PER_PIXEL 2 /* 16 bit precision format */ +#define JPU_JPEG_MIN_SIZE 25 /* SOI + SOF + EOI */ +#define JPU_JPEG_QTBL_SIZE 0x40 +#define JPU_JPEG_HDCTBL_SIZE 0x1c +#define JPU_JPEG_HACTBL_SIZE 0xb2 +#define JPU_JPEG_HEIGHT_OFFSET 0x91 +#define JPU_JPEG_WIDTH_OFFSET 0x93 +#define JPU_JPEG_SUBS_OFFSET 0x97 +#define JPU_JPEG_QTBL_LUM_OFFSET 0x07 +#define JPU_JPEG_QTBL_CHR_OFFSET 0x4c +#define JPU_JPEG_HDCTBL_LUM_OFFSET 0xa4 +#define JPU_JPEG_HACTBL_LUM_OFFSET 0xc5 +#define JPU_JPEG_HDCTBL_CHR_OFFSET 0x17c +#define JPU_JPEG_HACTBL_CHR_OFFSET 0x19d +#define JPU_JPEG_PADDING_OFFSET 0x24f +#define JPU_JPEG_LUM 0x00 +#define JPU_JPEG_CHR 0x01 +#define JPU_JPEG_DC 0x00 +#define JPU_JPEG_AC 0x10 + +#define JPU_JPEG_422 0x21 +#define JPU_JPEG_420 0x22 + +#define JPU_JPEG_DEFAULT_422_PIX_FMT V4L2_PIX_FMT_NV16M +#define JPU_JPEG_DEFAULT_420_PIX_FMT V4L2_PIX_FMT_NV12M + +/* JPEG markers */ +#define TEM 0x01 +#define SOF0 0xc0 +#define RST 0xd0 +#define SOI 0xd8 +#define EOI 0xd9 +#define DHP 0xde +#define DHT 0xc4 +#define COM 0xfe +#define DQT 0xdb +#define DRI 0xdd +#define APP0 0xe0 + +#define JPU_RESET_TIMEOUT 100 /* ms */ +#define JPU_JOB_TIMEOUT 300 /* ms */ +#define JPU_MAX_QUALITY 4 +#define JPU_WIDTH_MIN 16 +#define JPU_HEIGHT_MIN 16 +#define JPU_WIDTH_MAX 4096 +#define JPU_HEIGHT_MAX 4096 +#define JPU_MEMALIGN 8 + +/* Flags that indicate a format can be used for capture/output */ +#define JPU_FMT_TYPE_OUTPUT 0 +#define JPU_FMT_TYPE_CAPTURE 1 +#define JPU_ENC_CAPTURE (1 << 0) +#define JPU_ENC_OUTPUT (1 << 1) +#define JPU_DEC_CAPTURE (1 << 2) +#define JPU_DEC_OUTPUT (1 << 3) + +/* + * JPEG registers and bits + */ + +/* JPEG code mode register */ +#define JCMOD 0x00 +#define JCMOD_PCTR (1 << 7) +#define JCMOD_MSKIP_ENABLE (1 << 5) +#define JCMOD_DSP_ENC (0 << 3) +#define JCMOD_DSP_DEC (1 << 3) +#define JCMOD_REDU (7 << 0) +#define JCMOD_REDU_422 (1 << 0) +#define JCMOD_REDU_420 (2 << 0) + +/* JPEG code command register */ +#define JCCMD 0x04 +#define JCCMD_SRST (1 << 12) +#define JCCMD_JEND (1 << 2) +#define JCCMD_JSRT (1 << 0) + +/* JPEG code quantanization table number register */ +#define JCQTN 0x0c +#define JCQTN_SHIFT(t) (((t) - 1) << 1) + +/* JPEG code Huffman table number register */ +#define JCHTN 0x10 +#define JCHTN_AC_SHIFT(t) (((t) << 1) - 1) +#define JCHTN_DC_SHIFT(t) (((t) - 1) << 1) + +#define JCVSZU 0x1c /* JPEG code vertical size upper register */ +#define JCVSZD 0x20 /* JPEG code vertical size lower register */ +#define JCHSZU 0x24 /* JPEG code horizontal size upper register */ +#define JCHSZD 0x28 /* JPEG code horizontal size lower register */ +#define JCSZ_MASK 0xff /* JPEG code h/v size register contains only 1 byte*/ + +#define JCDTCU 0x2c /* JPEG code data count upper register */ +#define JCDTCM 0x30 /* JPEG code data count middle register */ +#define JCDTCD 0x34 /* JPEG code data count lower register */ + +/* JPEG interrupt enable register */ +#define JINTE 0x38 +#define JINTE_ERR (7 << 5) /* INT5 + INT6 + INT7 */ +#define JINTE_TRANSF_COMPL (1 << 10) + +/* JPEG interrupt status register */ +#define JINTS 0x3c +#define JINTS_MASK 0x7c68 +#define JINTS_ERR (1 << 5) +#define JINTS_PROCESS_COMPL (1 << 6) +#define JINTS_TRANSF_COMPL (1 << 10) + +#define JCDERR 0x40 /* JPEG code decode error register */ +#define JCDERR_MASK 0xf /* JPEG code decode error register mask*/ + +/* JPEG interface encoding */ +#define JIFECNT 0x70 +#define JIFECNT_INFT_422 0 +#define JIFECNT_INFT_420 1 +#define JIFECNT_SWAP_WB (3 << 4) /* to JPU */ + +#define JIFESYA1 0x74 /* encode source Y address register 1 */ +#define JIFESCA1 0x78 /* encode source C address register 1 */ +#define JIFESYA2 0x7c /* encode source Y address register 2 */ +#define JIFESCA2 0x80 /* encode source C address register 2 */ +#define JIFESMW 0x84 /* encode source memory width register */ +#define JIFESVSZ 0x88 /* encode source vertical size register */ +#define JIFESHSZ 0x8c /* encode source horizontal size register */ +#define JIFEDA1 0x90 /* encode destination address register 1 */ +#define JIFEDA2 0x94 /* encode destination address register 2 */ + +/* JPEG decoding control register */ +#define JIFDCNT 0xa0 +#define JIFDCNT_SWAP_WB (3 << 1) /* from JPU */ + +#define JIFDSA1 0xa4 /* decode source address register 1 */ +#define JIFDDMW 0xb0 /* decode destination memory width register */ +#define JIFDDVSZ 0xb4 /* decode destination vert. size register */ +#define JIFDDHSZ 0xb8 /* decode destination horiz. size register */ +#define JIFDDYA1 0xbc /* decode destination Y address register 1 */ +#define JIFDDCA1 0xc0 /* decode destination C address register 1 */ + +#define JCQTBL(n) (0x10000 + (n) * 0x40) /* quantization tables regs */ +#define JCHTBD(n) (0x10100 + (n) * 0x100) /* Huffman table DC regs */ +#define JCHTBA(n) (0x10120 + (n) * 0x100) /* Huffman table AC regs */ + +/** + * struct jpu - JPEG IP abstraction + * @mutex: the mutex protecting this structure + * @lock: spinlock protecting the device contexts + * @v4l2_dev: v4l2 device for mem2mem mode + * @vfd_encoder: video device node for encoder mem2mem mode + * @vfd_decoder: video device node for decoder mem2mem mode + * @m2m_dev: v4l2 mem2mem device data + * @curr: pointer to current context + * @irq_queue: interrupt handler waitqueue + * @regs: JPEG IP registers mapping + * @irq: JPEG IP irq + * @clk: JPEG IP clock + * @dev: JPEG IP struct device + * @alloc_ctx: videobuf2 memory allocator's context + * @ref_count: reference counter + */ +struct jpu { + struct mutex mutex; + spinlock_t lock; + struct v4l2_device v4l2_dev; + struct video_device vfd_encoder; + struct video_device vfd_decoder; + struct v4l2_m2m_dev *m2m_dev; + struct jpu_ctx *curr; + wait_queue_head_t irq_queue; + + void __iomem *regs; + unsigned int irq; + struct clk *clk; + struct device *dev; + void *alloc_ctx; + int ref_count; +}; + +/** + * struct jpu_buffer - driver's specific video buffer + * @buf: m2m buffer + * @compr_quality: destination image quality in compression mode + * @subsampling: source image subsampling in decompression mode + */ +struct jpu_buffer { + struct v4l2_m2m_buffer buf; + unsigned short compr_quality; + unsigned char subsampling; +}; + +/** + * struct jpu_fmt - driver's internal format data + * @fourcc: the fourcc code, 0 if not applicable + * @colorspace: the colorspace specifier + * @bpp: number of bits per pixel per plane + * @h_align: horizontal alignment order (align to 2^h_align) + * @v_align: vertical alignment order (align to 2^v_align) + * @subsampling: (horizontal:4 | vertical:4) subsampling factor + * @num_planes: number of planes + * @types: types of queue this format is applicable to + */ +struct jpu_fmt { + u32 fourcc; + u32 colorspace; + u8 bpp[2]; + u8 h_align; + u8 v_align; + u8 subsampling; + u8 num_planes; + u16 types; +}; + +/** + * jpu_q_data - parameters of one queue + * @fmtinfo: driver-specific format of this queue + * @format: multiplanar format of this queue + * @sequence: sequence number + */ +struct jpu_q_data { + struct jpu_fmt *fmtinfo; + struct v4l2_pix_format_mplane format; + unsigned int sequence; +}; + +/** + * jpu_ctx - the device context data + * @jpu: JPEG IP device for this context + * @encoder: compression (encode) operation or decompression (decode) + * @compr_quality: destination image quality in compression (encode) mode + * @out_q: source (output) queue information + * @cap_q: destination (capture) queue information + * @fh: file handler + * @ctrl_handler: controls handler + */ +struct jpu_ctx { + struct jpu *jpu; + bool encoder; + unsigned short compr_quality; + struct jpu_q_data out_q; + struct jpu_q_data cap_q; + struct v4l2_fh fh; + struct v4l2_ctrl_handler ctrl_handler; +}; + + /** + * jpeg_buffer - description of memory containing input JPEG data + * @end: end position in the buffer + * @curr: current position in the buffer + */ +struct jpeg_buffer { + void *end; + void *curr; +}; + +static struct jpu_fmt jpu_formats[] = { + { V4L2_PIX_FMT_JPEG, V4L2_COLORSPACE_JPEG, + {0, 0}, 0, 0, 0, 1, JPU_ENC_CAPTURE | JPU_DEC_OUTPUT }, + { V4L2_PIX_FMT_NV16M, V4L2_COLORSPACE_SRGB, + {8, 8}, 2, 2, JPU_JPEG_422, 2, JPU_ENC_OUTPUT | JPU_DEC_CAPTURE }, + { V4L2_PIX_FMT_NV12M, V4L2_COLORSPACE_SRGB, + {8, 4}, 2, 2, JPU_JPEG_420, 2, JPU_ENC_OUTPUT | JPU_DEC_CAPTURE }, + { V4L2_PIX_FMT_NV16, V4L2_COLORSPACE_SRGB, + {16, 0}, 2, 2, JPU_JPEG_422, 1, JPU_ENC_OUTPUT | JPU_DEC_CAPTURE }, + { V4L2_PIX_FMT_NV12, V4L2_COLORSPACE_SRGB, + {12, 0}, 2, 2, JPU_JPEG_420, 1, JPU_ENC_OUTPUT | JPU_DEC_CAPTURE }, +}; + +static const u8 zigzag[] = { + 0x03, 0x02, 0x0b, 0x13, 0x0a, 0x01, 0x00, 0x09, + 0x12, 0x1b, 0x23, 0x1a, 0x11, 0x08, 0x07, 0x06, + 0x0f, 0x10, 0x19, 0x22, 0x2b, 0x33, 0x2a, 0x21, + 0x18, 0x17, 0x0e, 0x05, 0x04, 0x0d, 0x16, 0x1f, + 0x20, 0x29, 0x32, 0x3b, 0x3a, 0x31, 0x28, 0x27, + 0x1e, 0x15, 0x0e, 0x14, 0x10, 0x26, 0x2f, 0x30, + 0x39, 0x38, 0x37, 0x2e, 0x25, 0x1c, 0x24, 0x2b, + 0x36, 0x3f, 0x3e, 0x35, 0x2c, 0x34, 0x3d, 0x3c +}; + +#define QTBL_SIZE (ALIGN(JPU_JPEG_QTBL_SIZE, \ + sizeof(unsigned int)) / sizeof(unsigned int)) +#define HDCTBL_SIZE (ALIGN(JPU_JPEG_HDCTBL_SIZE, \ + sizeof(unsigned int)) / sizeof(unsigned int)) +#define HACTBL_SIZE (ALIGN(JPU_JPEG_HACTBL_SIZE, \ + sizeof(unsigned int)) / sizeof(unsigned int)) +/* + * Start of image; Quantization tables + * SOF0 (17 bytes payload) is Baseline DCT - Sample precision, height, width, + * Number of image components, (Ci:8 - Hi:4 - Vi:4 - Tq:8) * 3 - Y,Cb,Cr; + * Huffman tables; Padding with 0xff (33.3.27 R01UH0501EJ0100 Rev.1.00) + */ +#define JPU_JPEG_HDR_BLOB { \ + 0xff, SOI, 0xff, DQT, 0x00, JPU_JPEG_QTBL_SIZE + 0x3, JPU_JPEG_LUM, \ + [JPU_JPEG_QTBL_LUM_OFFSET ... \ + JPU_JPEG_QTBL_LUM_OFFSET + JPU_JPEG_QTBL_SIZE - 1] = 0x00, \ + 0xff, DQT, 0x00, JPU_JPEG_QTBL_SIZE + 0x3, JPU_JPEG_CHR, \ + [JPU_JPEG_QTBL_CHR_OFFSET ... JPU_JPEG_QTBL_CHR_OFFSET + \ + JPU_JPEG_QTBL_SIZE - 1] = 0x00, 0xff, SOF0, 0x00, 0x11, 0x08, \ + [JPU_JPEG_HEIGHT_OFFSET ... JPU_JPEG_HEIGHT_OFFSET + 1] = 0x00, \ + [JPU_JPEG_WIDTH_OFFSET ... JPU_JPEG_WIDTH_OFFSET + 1] = 0x00, \ + 0x03, 0x01, [JPU_JPEG_SUBS_OFFSET] = 0x00, JPU_JPEG_LUM, \ + 0x02, 0x11, JPU_JPEG_CHR, 0x03, 0x11, JPU_JPEG_CHR, \ + 0xff, DHT, 0x00, JPU_JPEG_HDCTBL_SIZE + 0x3, JPU_JPEG_LUM|JPU_JPEG_DC, \ + [JPU_JPEG_HDCTBL_LUM_OFFSET ... \ + JPU_JPEG_HDCTBL_LUM_OFFSET + JPU_JPEG_HDCTBL_SIZE - 1] = 0x00, \ + 0xff, DHT, 0x00, JPU_JPEG_HACTBL_SIZE + 0x3, JPU_JPEG_LUM|JPU_JPEG_AC, \ + [JPU_JPEG_HACTBL_LUM_OFFSET ... \ + JPU_JPEG_HACTBL_LUM_OFFSET + JPU_JPEG_HACTBL_SIZE - 1] = 0x00, \ + 0xff, DHT, 0x00, JPU_JPEG_HDCTBL_SIZE + 0x3, JPU_JPEG_CHR|JPU_JPEG_DC, \ + [JPU_JPEG_HDCTBL_CHR_OFFSET ... \ + JPU_JPEG_HDCTBL_CHR_OFFSET + JPU_JPEG_HDCTBL_SIZE - 1] = 0x00, \ + 0xff, DHT, 0x00, JPU_JPEG_HACTBL_SIZE + 0x3, JPU_JPEG_CHR|JPU_JPEG_AC, \ + [JPU_JPEG_HACTBL_CHR_OFFSET ... \ + JPU_JPEG_HACTBL_CHR_OFFSET + JPU_JPEG_HACTBL_SIZE - 1] = 0x00, \ + [JPU_JPEG_PADDING_OFFSET ... JPU_JPEG_HDR_SIZE - 1] = 0xff \ +} + +static unsigned char jpeg_hdrs[JPU_MAX_QUALITY][JPU_JPEG_HDR_SIZE] = { + [0 ... JPU_MAX_QUALITY - 1] = JPU_JPEG_HDR_BLOB +}; + +static const unsigned int qtbl_lum[JPU_MAX_QUALITY][QTBL_SIZE] = { + { + 0x14101927, 0x322e3e44, 0x10121726, 0x26354144, + 0x19171f26, 0x35414444, 0x27262635, 0x41444444, + 0x32263541, 0x44444444, 0x2e354144, 0x44444444, + 0x3e414444, 0x44444444, 0x44444444, 0x44444444 + }, + { + 0x100b0b10, 0x171b1f1e, 0x0b0c0c0f, 0x1417171e, + 0x0b0c0d10, 0x171a232f, 0x100f1017, 0x1a252f40, + 0x1714171a, 0x27334040, 0x1b171a25, 0x33404040, + 0x1f17232f, 0x40404040, 0x1e1e2f40, 0x40404040 + }, + { + 0x0c08080c, 0x11151817, 0x0809090b, 0x0f131217, + 0x08090a0c, 0x13141b24, 0x0c0b0c15, 0x141c2435, + 0x110f1314, 0x1e27333b, 0x1513141c, 0x27333b3b, + 0x18121b24, 0x333b3b3b, 0x17172435, 0x3b3b3b3b + }, + { + 0x08060608, 0x0c0e1011, 0x06060608, 0x0a0d0c0f, + 0x06060708, 0x0d0e1218, 0x0808080e, 0x0d131823, + 0x0c0a0d0d, 0x141a2227, 0x0e0d0e13, 0x1a222727, + 0x100c1318, 0x22272727, 0x110f1823, 0x27272727 + } +}; + +static const unsigned int qtbl_chr[JPU_MAX_QUALITY][QTBL_SIZE] = { + { + 0x15192026, 0x36444444, 0x191c1826, 0x36444444, + 0x2018202b, 0x42444444, 0x26262b35, 0x44444444, + 0x36424444, 0x44444444, 0x44444444, 0x44444444, + 0x44444444, 0x44444444, 0x44444444, 0x44444444 + }, + { + 0x110f1115, 0x141a2630, 0x0f131211, 0x141a232b, + 0x11121416, 0x1a1e2e35, 0x1511161c, 0x1e273540, + 0x14141a1e, 0x27304040, 0x1a1a1e27, 0x303f4040, + 0x26232e35, 0x40404040, 0x302b3540, 0x40404040 + }, + { + 0x0d0b0d10, 0x14141d25, 0x0b0e0e0e, 0x10141a20, + 0x0d0e0f11, 0x14172328, 0x100e1115, 0x171e2832, + 0x14101417, 0x1e25323b, 0x1414171e, 0x25303b3b, + 0x1d1a2328, 0x323b3b3b, 0x25202832, 0x3b3b3b3b + }, + { + 0x0908090b, 0x0e111318, 0x080a090b, 0x0e0d1116, + 0x09090d0e, 0x0d0f171a, 0x0b0b0e0e, 0x0f141a21, + 0x0e0e0d0f, 0x14182127, 0x110d0f14, 0x18202727, + 0x1311171a, 0x21272727, 0x18161a21, 0x27272727 + } +}; + +static const unsigned int hdctbl_lum[HDCTBL_SIZE] = { + 0x00010501, 0x01010101, 0x01000000, 0x00000000, + 0x00010203, 0x04050607, 0x08090a0b +}; + +static const unsigned int hdctbl_chr[HDCTBL_SIZE] = { + 0x00010501, 0x01010101, 0x01000000, 0x00000000, + 0x00010203, 0x04050607, 0x08090a0b +}; + +static const unsigned int hactbl_lum[HACTBL_SIZE] = { + 0x00020103, 0x03020403, 0x05050404, 0x0000017d, 0x01020300, 0x04110512, + 0x21314106, 0x13516107, 0x22711432, 0x8191a108, 0x2342b1c1, 0x1552d1f0, + 0x24336272, 0x82090a16, 0x1718191a, 0x25262728, 0x292a3435, 0x36373839, + 0x3a434445, 0x46474849, 0x4a535455, 0x56575859, 0x5a636465, 0x66676869, + 0x6a737475, 0x76777879, 0x7a838485, 0x86878889, 0x8a929394, 0x95969798, + 0x999aa2a3, 0xa4a5a6a7, 0xa8a9aab2, 0xb3b4b5b6, 0xb7b8b9ba, 0xc2c3c4c5, + 0xc6c7c8c9, 0xcad2d3d4, 0xd5d6d7d8, 0xd9dae1e2, 0xe3e4e5e6, 0xe7e8e9ea, + 0xf1f2f3f4, 0xf5f6f7f8, 0xf9fa0000 +}; + +static const unsigned int hactbl_chr[HACTBL_SIZE] = { + 0x00020103, 0x03020403, 0x05050404, 0x0000017d, 0x01020300, 0x04110512, + 0x21314106, 0x13516107, 0x22711432, 0x8191a108, 0x2342b1c1, 0x1552d1f0, + 0x24336272, 0x82090a16, 0x1718191a, 0x25262728, 0x292a3435, 0x36373839, + 0x3a434445, 0x46474849, 0x4a535455, 0x56575859, 0x5a636465, 0x66676869, + 0x6a737475, 0x76777879, 0x7a838485, 0x86878889, 0x8a929394, 0x95969798, + 0x999aa2a3, 0xa4a5a6a7, 0xa8a9aab2, 0xb3b4b5b6, 0xb7b8b9ba, 0xc2c3c4c5, + 0xc6c7c8c9, 0xcad2d3d4, 0xd5d6d7d8, 0xd9dae1e2, 0xe3e4e5e6, 0xe7e8e9ea, + 0xf1f2f3f4, 0xf5f6f7f8, 0xf9fa0000 +}; + +static const char *error_to_text[16] = { + "Normal", + "SOI not detected", + "SOF1 to SOFF detected", + "Subsampling not detected", + "SOF accuracy error", + "DQT accuracy error", + "Component error 1", + "Component error 2", + "SOF0, DQT, and DHT not detected when SOS detected", + "SOS not detected", + "EOI not detected", + "Restart interval data number error detected", + "Image size error", + "Last MCU data number error", + "Block data number error", + "Unknown" +}; + +static struct jpu_buffer *vb2_to_jpu_buffer(struct vb2_v4l2_buffer *vb) +{ + struct v4l2_m2m_buffer *b = + container_of(vb, struct v4l2_m2m_buffer, vb); + + return container_of(b, struct jpu_buffer, buf); +} + +static u32 jpu_read(struct jpu *jpu, unsigned int reg) +{ + return ioread32(jpu->regs + reg); +} + +static void jpu_write(struct jpu *jpu, u32 val, unsigned int reg) +{ + iowrite32(val, jpu->regs + reg); +} + +static struct jpu_ctx *ctrl_to_ctx(struct v4l2_ctrl *c) +{ + return container_of(c->handler, struct jpu_ctx, ctrl_handler); +} + +static struct jpu_ctx *fh_to_ctx(struct v4l2_fh *fh) +{ + return container_of(fh, struct jpu_ctx, fh); +} + +static void jpu_set_tbl(struct jpu *jpu, u32 reg, const unsigned int *tbl, + unsigned int len) { + unsigned int i; + + for (i = 0; i < len; i++) + jpu_write(jpu, tbl[i], reg + (i << 2)); +} + +static void jpu_set_qtbl(struct jpu *jpu, unsigned short quality) +{ + jpu_set_tbl(jpu, JCQTBL(0), qtbl_lum[quality], QTBL_SIZE); + jpu_set_tbl(jpu, JCQTBL(1), qtbl_chr[quality], QTBL_SIZE); +} + +static void jpu_set_htbl(struct jpu *jpu) +{ + jpu_set_tbl(jpu, JCHTBD(0), hdctbl_lum, HDCTBL_SIZE); + jpu_set_tbl(jpu, JCHTBA(0), hactbl_lum, HACTBL_SIZE); + jpu_set_tbl(jpu, JCHTBD(1), hdctbl_chr, HDCTBL_SIZE); + jpu_set_tbl(jpu, JCHTBA(1), hactbl_chr, HACTBL_SIZE); +} + +static int jpu_wait_reset(struct jpu *jpu) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(JPU_RESET_TIMEOUT); + + while (jpu_read(jpu, JCCMD) & JCCMD_SRST) { + if (time_after(jiffies, timeout)) { + dev_err(jpu->dev, "timed out in reset\n"); + return -ETIMEDOUT; + } + schedule(); + } + + return 0; +} + +static int jpu_reset(struct jpu *jpu) +{ + jpu_write(jpu, JCCMD_SRST, JCCMD); + return jpu_wait_reset(jpu); +} + +/* + * ============================================================================ + * video ioctl operations + * ============================================================================ + */ +static void put_qtbl(u8 *p, const u8 *qtbl) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(zigzag); i++) + p[i] = *(qtbl + zigzag[i]); +} + +static void put_htbl(u8 *p, const u8 *htbl, unsigned int len) +{ + unsigned int i, j; + + for (i = 0; i < len; i += 4) + for (j = 0; j < 4 && (i + j) < len; ++j) + p[i + j] = htbl[i + 3 - j]; +} + +static void jpu_generate_hdr(unsigned short quality, unsigned char *p) +{ + put_qtbl(p + JPU_JPEG_QTBL_LUM_OFFSET, (const u8 *)qtbl_lum[quality]); + put_qtbl(p + JPU_JPEG_QTBL_CHR_OFFSET, (const u8 *)qtbl_chr[quality]); + + put_htbl(p + JPU_JPEG_HDCTBL_LUM_OFFSET, (const u8 *)hdctbl_lum, + JPU_JPEG_HDCTBL_SIZE); + put_htbl(p + JPU_JPEG_HACTBL_LUM_OFFSET, (const u8 *)hactbl_lum, + JPU_JPEG_HACTBL_SIZE); + + put_htbl(p + JPU_JPEG_HDCTBL_CHR_OFFSET, (const u8 *)hdctbl_chr, + JPU_JPEG_HDCTBL_SIZE); + put_htbl(p + JPU_JPEG_HACTBL_CHR_OFFSET, (const u8 *)hactbl_chr, + JPU_JPEG_HACTBL_SIZE); +} + +static int get_byte(struct jpeg_buffer *buf) +{ + if (buf->curr >= buf->end) + return -1; + + return *(u8 *)buf->curr++; +} + +static int get_word_be(struct jpeg_buffer *buf, unsigned int *word) +{ + if (buf->end - buf->curr < 2) + return -1; + + *word = get_unaligned_be16(buf->curr); + buf->curr += 2; + + return 0; +} + +static void skip(struct jpeg_buffer *buf, unsigned long len) +{ + buf->curr += min((unsigned long)(buf->end - buf->curr), len); +} + +static u8 jpu_parse_hdr(void *buffer, unsigned long size, unsigned int *width, + unsigned int *height) +{ + struct jpeg_buffer jpeg_buffer; + unsigned int word; + bool soi = false; + + jpeg_buffer.end = buffer + size; + jpeg_buffer.curr = buffer; + + /* + * basic size check and EOI - we don't want to let JPU cross + * buffer bounds in any case. Hope it's stopping by EOI. + */ + if (size < JPU_JPEG_MIN_SIZE || *(u8 *)(buffer + size - 1) != EOI) + return 0; + + for (;;) { + int c; + + /* skip preceding filler bytes */ + do + c = get_byte(&jpeg_buffer); + while (c == 0xff || c == 0); + + if (!soi && c == SOI) { + soi = true; + continue; + } else if (soi != (c != SOI)) + return 0; + + switch (c) { + case SOF0: /* SOF0: baseline JPEG */ + skip(&jpeg_buffer, 3); /* segment length and bpp */ + if (get_word_be(&jpeg_buffer, height) || + get_word_be(&jpeg_buffer, width) || + get_byte(&jpeg_buffer) != 3) /* YCbCr only */ + return 0; + + skip(&jpeg_buffer, 1); + return get_byte(&jpeg_buffer); + case DHT: + case DQT: + case COM: + case DRI: + case APP0 ... APP0 + 0x0f: + if (get_word_be(&jpeg_buffer, &word)) + return 0; + skip(&jpeg_buffer, (long)word - 2); + case 0: + break; + default: + return 0; + } + } + + return 0; +} + +static int jpu_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct jpu_ctx *ctx = fh_to_ctx(priv); + + if (ctx->encoder) + strlcpy(cap->card, DRV_NAME " encoder", sizeof(cap->card)); + else + strlcpy(cap->card, DRV_NAME " decoder", sizeof(cap->card)); + + strlcpy(cap->driver, DRV_NAME, sizeof(cap->driver)); + snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s", + dev_name(ctx->jpu->dev)); + cap->device_caps |= V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_M2M_MPLANE; + cap->capabilities = V4L2_CAP_DEVICE_CAPS | cap->device_caps; + memset(cap->reserved, 0, sizeof(cap->reserved)); + + return 0; +} + +static struct jpu_fmt *jpu_find_format(bool encoder, u32 pixelformat, + unsigned int fmt_type) +{ + unsigned int i, fmt_flag; + + if (encoder) + fmt_flag = fmt_type == JPU_FMT_TYPE_OUTPUT ? JPU_ENC_OUTPUT : + JPU_ENC_CAPTURE; + else + fmt_flag = fmt_type == JPU_FMT_TYPE_OUTPUT ? JPU_DEC_OUTPUT : + JPU_DEC_CAPTURE; + + for (i = 0; i < ARRAY_SIZE(jpu_formats); i++) { + struct jpu_fmt *fmt = &jpu_formats[i]; + + if (fmt->fourcc == pixelformat && fmt->types & fmt_flag) + return fmt; + } + + return NULL; +} + +static int jpu_enum_fmt(struct v4l2_fmtdesc *f, u32 type) +{ + unsigned int i, num = 0; + + for (i = 0; i < ARRAY_SIZE(jpu_formats); ++i) { + if (jpu_formats[i].types & type) { + if (num == f->index) + break; + ++num; + } + } + + if (i >= ARRAY_SIZE(jpu_formats)) + return -EINVAL; + + f->pixelformat = jpu_formats[i].fourcc; + + return 0; +} + +static int jpu_enum_fmt_cap(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + struct jpu_ctx *ctx = fh_to_ctx(priv); + + return jpu_enum_fmt(f, ctx->encoder ? JPU_ENC_CAPTURE : + JPU_DEC_CAPTURE); +} + +static int jpu_enum_fmt_out(struct file *file, void *priv, + struct v4l2_fmtdesc *f) +{ + struct jpu_ctx *ctx = fh_to_ctx(priv); + + return jpu_enum_fmt(f, ctx->encoder ? JPU_ENC_OUTPUT : JPU_DEC_OUTPUT); +} + +static struct jpu_q_data *jpu_get_q_data(struct jpu_ctx *ctx, + enum v4l2_buf_type type) +{ + if (V4L2_TYPE_IS_OUTPUT(type)) + return &ctx->out_q; + else + return &ctx->cap_q; +} + +static void jpu_bound_align_image(u32 *w, unsigned int w_min, + unsigned int w_max, unsigned int w_align, + u32 *h, unsigned int h_min, + unsigned int h_max, unsigned int h_align) +{ + unsigned int width, height, w_step, h_step; + + width = *w; + height = *h; + + w_step = 1U << w_align; + h_step = 1U << h_align; + v4l_bound_align_image(w, w_min, w_max, w_align, h, h_min, h_max, + h_align, 3); + + if (*w < width && *w + w_step < w_max) + *w += w_step; + if (*h < height && *h + h_step < h_max) + *h += h_step; +} + +static int __jpu_try_fmt(struct jpu_ctx *ctx, struct jpu_fmt **fmtinfo, + struct v4l2_pix_format_mplane *pix, + enum v4l2_buf_type type) +{ + struct jpu_fmt *fmt; + unsigned int f_type, w, h; + + f_type = V4L2_TYPE_IS_OUTPUT(type) ? JPU_FMT_TYPE_OUTPUT : + JPU_FMT_TYPE_CAPTURE; + + fmt = jpu_find_format(ctx->encoder, pix->pixelformat, f_type); + if (!fmt) { + unsigned int pixelformat; + + dev_dbg(ctx->jpu->dev, "unknown format; set default format\n"); + if (ctx->encoder) + pixelformat = f_type == JPU_FMT_TYPE_OUTPUT ? + V4L2_PIX_FMT_NV16M : V4L2_PIX_FMT_JPEG; + else + pixelformat = f_type == JPU_FMT_TYPE_CAPTURE ? + V4L2_PIX_FMT_NV16M : V4L2_PIX_FMT_JPEG; + fmt = jpu_find_format(ctx->encoder, pixelformat, f_type); + } + + pix->pixelformat = fmt->fourcc; + pix->colorspace = fmt->colorspace; + pix->field = V4L2_FIELD_NONE; + pix->num_planes = fmt->num_planes; + memset(pix->reserved, 0, sizeof(pix->reserved)); + + jpu_bound_align_image(&pix->width, JPU_WIDTH_MIN, JPU_WIDTH_MAX, + fmt->h_align, &pix->height, JPU_HEIGHT_MIN, + JPU_HEIGHT_MAX, fmt->v_align); + + w = pix->width; + h = pix->height; + + if (fmt->fourcc == V4L2_PIX_FMT_JPEG) { + /* ignore userspaces's sizeimage for encoding */ + if (pix->plane_fmt[0].sizeimage <= 0 || ctx->encoder) + pix->plane_fmt[0].sizeimage = JPU_JPEG_HDR_SIZE + + (JPU_JPEG_MAX_BYTES_PER_PIXEL * w * h); + pix->plane_fmt[0].bytesperline = 0; + memset(pix->plane_fmt[0].reserved, 0, + sizeof(pix->plane_fmt[0].reserved)); + } else { + unsigned int i, bpl = 0; + + for (i = 0; i < pix->num_planes; ++i) + bpl = max(bpl, pix->plane_fmt[i].bytesperline); + + bpl = clamp_t(unsigned int, bpl, w, JPU_WIDTH_MAX); + bpl = round_up(bpl, JPU_MEMALIGN); + + for (i = 0; i < pix->num_planes; ++i) { + pix->plane_fmt[i].bytesperline = bpl; + pix->plane_fmt[i].sizeimage = bpl * h * fmt->bpp[i] / 8; + memset(pix->plane_fmt[i].reserved, 0, + sizeof(pix->plane_fmt[i].reserved)); + } + } + + if (fmtinfo) + *fmtinfo = fmt; + + return 0; +} + +static int jpu_try_fmt(struct file *file, void *priv, struct v4l2_format *f) +{ + struct jpu_ctx *ctx = fh_to_ctx(priv); + + if (!v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type)) + return -EINVAL; + + return __jpu_try_fmt(ctx, NULL, &f->fmt.pix_mp, f->type); +} + +static int jpu_s_fmt(struct file *file, void *priv, struct v4l2_format *f) +{ + struct vb2_queue *vq; + struct jpu_ctx *ctx = fh_to_ctx(priv); + struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; + struct jpu_fmt *fmtinfo; + struct jpu_q_data *q_data; + int ret; + + vq = v4l2_m2m_get_vq(m2m_ctx, f->type); + if (!vq) + return -EINVAL; + + if (vb2_is_busy(vq)) { + v4l2_err(&ctx->jpu->v4l2_dev, "%s queue busy\n", __func__); + return -EBUSY; + } + + ret = __jpu_try_fmt(ctx, &fmtinfo, &f->fmt.pix_mp, f->type); + if (ret < 0) + return ret; + + q_data = jpu_get_q_data(ctx, f->type); + + q_data->format = f->fmt.pix_mp; + q_data->fmtinfo = fmtinfo; + + return 0; +} + +static int jpu_g_fmt(struct file *file, void *priv, struct v4l2_format *f) +{ + struct jpu_q_data *q_data; + struct jpu_ctx *ctx = fh_to_ctx(priv); + + if (!v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type)) + return -EINVAL; + + q_data = jpu_get_q_data(ctx, f->type); + f->fmt.pix_mp = q_data->format; + + return 0; +} + +/* + * V4L2 controls + */ +static int jpu_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct jpu_ctx *ctx = ctrl_to_ctx(ctrl); + unsigned long flags; + + spin_lock_irqsave(&ctx->jpu->lock, flags); + if (ctrl->id == V4L2_CID_JPEG_COMPRESSION_QUALITY) + ctx->compr_quality = ctrl->val; + spin_unlock_irqrestore(&ctx->jpu->lock, flags); + + return 0; +} + +static const struct v4l2_ctrl_ops jpu_ctrl_ops = { + .s_ctrl = jpu_s_ctrl, +}; + +static int jpu_streamon(struct file *file, void *priv, enum v4l2_buf_type type) +{ + struct jpu_ctx *ctx = fh_to_ctx(priv); + struct jpu_q_data *src_q_data, *dst_q_data, *orig, adj, *ref; + enum v4l2_buf_type adj_type; + + src_q_data = jpu_get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + dst_q_data = jpu_get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + + if (ctx->encoder) { + adj = *src_q_data; + orig = src_q_data; + ref = dst_q_data; + adj_type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + } else { + adj = *dst_q_data; + orig = dst_q_data; + ref = src_q_data; + adj_type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + } + + adj.format.width = ref->format.width; + adj.format.height = ref->format.height; + + __jpu_try_fmt(ctx, NULL, &adj.format, adj_type); + + if (adj.format.width != orig->format.width || + adj.format.height != orig->format.height) { + dev_err(ctx->jpu->dev, "src and dst formats do not match.\n"); + /* maybe we can return -EPIPE here? */ + return -EINVAL; + } + + return v4l2_m2m_streamon(file, ctx->fh.m2m_ctx, type); +} + +static const struct v4l2_ioctl_ops jpu_ioctl_ops = { + .vidioc_querycap = jpu_querycap, + + .vidioc_enum_fmt_vid_cap_mplane = jpu_enum_fmt_cap, + .vidioc_enum_fmt_vid_out_mplane = jpu_enum_fmt_out, + .vidioc_g_fmt_vid_cap_mplane = jpu_g_fmt, + .vidioc_g_fmt_vid_out_mplane = jpu_g_fmt, + .vidioc_try_fmt_vid_cap_mplane = jpu_try_fmt, + .vidioc_try_fmt_vid_out_mplane = jpu_try_fmt, + .vidioc_s_fmt_vid_cap_mplane = jpu_s_fmt, + .vidioc_s_fmt_vid_out_mplane = jpu_s_fmt, + + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, + + .vidioc_streamon = jpu_streamon, + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, + + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe +}; + +static int jpu_controls_create(struct jpu_ctx *ctx) +{ + struct v4l2_ctrl *ctrl; + int ret; + + v4l2_ctrl_handler_init(&ctx->ctrl_handler, 1); + + ctrl = v4l2_ctrl_new_std(&ctx->ctrl_handler, &jpu_ctrl_ops, + V4L2_CID_JPEG_COMPRESSION_QUALITY, + 0, JPU_MAX_QUALITY - 1, 1, 0); + + if (ctx->ctrl_handler.error) { + ret = ctx->ctrl_handler.error; + goto error_free; + } + + if (!ctx->encoder) + ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE | + V4L2_CTRL_FLAG_READ_ONLY; + + ret = v4l2_ctrl_handler_setup(&ctx->ctrl_handler); + if (ret < 0) + goto error_free; + + return 0; + +error_free: + v4l2_ctrl_handler_free(&ctx->ctrl_handler); + return ret; +} + +/* + * ============================================================================ + * Queue operations + * ============================================================================ + */ +static int jpu_queue_setup(struct vb2_queue *vq, + const void *parg, + unsigned int *nbuffers, unsigned int *nplanes, + unsigned int sizes[], void *alloc_ctxs[]) +{ + const struct v4l2_format *fmt = parg; + struct jpu_ctx *ctx = vb2_get_drv_priv(vq); + struct jpu_q_data *q_data; + unsigned int i; + + q_data = jpu_get_q_data(ctx, vq->type); + + *nplanes = q_data->format.num_planes; + + for (i = 0; i < *nplanes; i++) { + unsigned int q_size = q_data->format.plane_fmt[i].sizeimage; + unsigned int f_size = fmt ? + fmt->fmt.pix_mp.plane_fmt[i].sizeimage : 0; + + if (fmt && f_size < q_size) + return -EINVAL; + + sizes[i] = fmt ? f_size : q_size; + alloc_ctxs[i] = ctx->jpu->alloc_ctx; + } + + return 0; +} + +static int jpu_buf_prepare(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct jpu_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct jpu_q_data *q_data; + unsigned int i; + + q_data = jpu_get_q_data(ctx, vb->vb2_queue->type); + + if (V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) { + if (vbuf->field == V4L2_FIELD_ANY) + vbuf->field = V4L2_FIELD_NONE; + if (vbuf->field != V4L2_FIELD_NONE) { + dev_err(ctx->jpu->dev, "%s field isn't supported\n", + __func__); + return -EINVAL; + } + } + + for (i = 0; i < q_data->format.num_planes; i++) { + unsigned long size = q_data->format.plane_fmt[i].sizeimage; + + if (vb2_plane_size(vb, i) < size) { + dev_err(ctx->jpu->dev, + "%s: data will not fit into plane (%lu < %lu)\n", + __func__, vb2_plane_size(vb, i), size); + return -EINVAL; + } + + /* decoder capture queue */ + if (!ctx->encoder && !V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) + vb2_set_plane_payload(vb, i, size); + } + + return 0; +} + +static void jpu_buf_queue(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct jpu_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + if (!ctx->encoder && V4L2_TYPE_IS_OUTPUT(vb->vb2_queue->type)) { + struct jpu_buffer *jpu_buf = vb2_to_jpu_buffer(vbuf); + struct jpu_q_data *q_data, adjust; + void *buffer = vb2_plane_vaddr(vb, 0); + unsigned long buf_size = vb2_get_plane_payload(vb, 0); + unsigned int width, height; + + u8 subsampling = jpu_parse_hdr(buffer, buf_size, &width, + &height); + + /* check if JPEG data basic parsing was successful */ + if (subsampling != JPU_JPEG_422 && subsampling != JPU_JPEG_420) + goto format_error; + + q_data = &ctx->out_q; + + adjust = *q_data; + adjust.format.width = width; + adjust.format.height = height; + + __jpu_try_fmt(ctx, &adjust.fmtinfo, &adjust.format, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + + if (adjust.format.width != q_data->format.width || + adjust.format.height != q_data->format.height) + goto format_error; + + /* + * keep subsampling in buffer to check it + * for compatibility in device_run + */ + jpu_buf->subsampling = subsampling; + } + + if (ctx->fh.m2m_ctx) + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); + + return; + +format_error: + dev_err(ctx->jpu->dev, "incompatible or corrupted JPEG data\n"); + vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); +} + +static void jpu_buf_finish(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct jpu_buffer *jpu_buf = vb2_to_jpu_buffer(vbuf); + struct jpu_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct jpu_q_data *q_data = &ctx->out_q; + enum v4l2_buf_type type = vb->vb2_queue->type; + u8 *buffer; + + if (vb->state == VB2_BUF_STATE_DONE) + vbuf->sequence = jpu_get_q_data(ctx, type)->sequence++; + + if (!ctx->encoder || vb->state != VB2_BUF_STATE_DONE || + V4L2_TYPE_IS_OUTPUT(type)) + return; + + buffer = vb2_plane_vaddr(vb, 0); + + memcpy(buffer, jpeg_hdrs[jpu_buf->compr_quality], JPU_JPEG_HDR_SIZE); + *(__be16 *)(buffer + JPU_JPEG_HEIGHT_OFFSET) = + cpu_to_be16(q_data->format.height); + *(__be16 *)(buffer + JPU_JPEG_WIDTH_OFFSET) = + cpu_to_be16(q_data->format.width); + *(buffer + JPU_JPEG_SUBS_OFFSET) = q_data->fmtinfo->subsampling; +} + +static int jpu_start_streaming(struct vb2_queue *vq, unsigned count) +{ + struct jpu_ctx *ctx = vb2_get_drv_priv(vq); + struct jpu_q_data *q_data = jpu_get_q_data(ctx, vq->type); + + q_data->sequence = 0; + return 0; +} + +static void jpu_stop_streaming(struct vb2_queue *vq) +{ + struct jpu_ctx *ctx = vb2_get_drv_priv(vq); + struct vb2_v4l2_buffer *vb; + unsigned long flags; + + for (;;) { + if (V4L2_TYPE_IS_OUTPUT(vq->type)) + vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + else + vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + if (vb == NULL) + return; + spin_lock_irqsave(&ctx->jpu->lock, flags); + v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR); + spin_unlock_irqrestore(&ctx->jpu->lock, flags); + } +} + +static struct vb2_ops jpu_qops = { + .queue_setup = jpu_queue_setup, + .buf_prepare = jpu_buf_prepare, + .buf_queue = jpu_buf_queue, + .buf_finish = jpu_buf_finish, + .start_streaming = jpu_start_streaming, + .stop_streaming = jpu_stop_streaming, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, +}; + +static int jpu_queue_init(void *priv, struct vb2_queue *src_vq, + struct vb2_queue *dst_vq) +{ + struct jpu_ctx *ctx = priv; + int ret; + + memset(src_vq, 0, sizeof(*src_vq)); + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + src_vq->io_modes = VB2_MMAP | VB2_DMABUF; + src_vq->drv_priv = ctx; + src_vq->buf_struct_size = sizeof(struct jpu_buffer); + src_vq->ops = &jpu_qops; + src_vq->mem_ops = &vb2_dma_contig_memops; + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->jpu->mutex; + + ret = vb2_queue_init(src_vq); + if (ret) + return ret; + + memset(dst_vq, 0, sizeof(*dst_vq)); + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; + dst_vq->drv_priv = ctx; + dst_vq->buf_struct_size = sizeof(struct jpu_buffer); + dst_vq->ops = &jpu_qops; + dst_vq->mem_ops = &vb2_dma_contig_memops; + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->jpu->mutex; + + return vb2_queue_init(dst_vq); +} + +/* + * ============================================================================ + * Device file operations + * ============================================================================ + */ +static int jpu_open(struct file *file) +{ + struct jpu *jpu = video_drvdata(file); + struct video_device *vfd = video_devdata(file); + struct jpu_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + v4l2_fh_init(&ctx->fh, vfd); + ctx->fh.ctrl_handler = &ctx->ctrl_handler; + file->private_data = &ctx->fh; + v4l2_fh_add(&ctx->fh); + + ctx->jpu = jpu; + ctx->encoder = vfd == &jpu->vfd_encoder; + + __jpu_try_fmt(ctx, &ctx->out_q.fmtinfo, &ctx->out_q.format, + V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + __jpu_try_fmt(ctx, &ctx->cap_q.fmtinfo, &ctx->cap_q.format, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(jpu->m2m_dev, ctx, jpu_queue_init); + if (IS_ERR(ctx->fh.m2m_ctx)) { + ret = PTR_ERR(ctx->fh.m2m_ctx); + goto v4l_prepare_rollback; + } + + ret = jpu_controls_create(ctx); + if (ret < 0) + goto v4l_prepare_rollback; + + if (mutex_lock_interruptible(&jpu->mutex)) { + ret = -ERESTARTSYS; + goto v4l_prepare_rollback; + } + + if (jpu->ref_count == 0) { + ret = clk_prepare_enable(jpu->clk); + if (ret < 0) + goto device_prepare_rollback; + /* ...issue software reset */ + ret = jpu_reset(jpu); + if (ret) + goto device_prepare_rollback; + } + + jpu->ref_count++; + + mutex_unlock(&jpu->mutex); + return 0; + +device_prepare_rollback: + mutex_unlock(&jpu->mutex); +v4l_prepare_rollback: + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); + kfree(ctx); + return ret; +} + +static int jpu_release(struct file *file) +{ + struct jpu *jpu = video_drvdata(file); + struct jpu_ctx *ctx = fh_to_ctx(file->private_data); + + mutex_lock(&jpu->mutex); + if (--jpu->ref_count == 0) + clk_disable_unprepare(jpu->clk); + mutex_unlock(&jpu->mutex); + + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); + v4l2_ctrl_handler_free(&ctx->ctrl_handler); + v4l2_fh_del(&ctx->fh); + v4l2_fh_exit(&ctx->fh); + kfree(ctx); + + return 0; +} + +static const struct v4l2_file_operations jpu_fops = { + .owner = THIS_MODULE, + .open = jpu_open, + .release = jpu_release, + .unlocked_ioctl = video_ioctl2, + .poll = v4l2_m2m_fop_poll, + .mmap = v4l2_m2m_fop_mmap, +}; + +/* + * ============================================================================ + * mem2mem callbacks + * ============================================================================ + */ +static void jpu_cleanup(struct jpu_ctx *ctx, bool reset) +{ + /* remove current buffers and finish job */ + struct vb2_v4l2_buffer *src_buf, *dst_buf; + unsigned long flags; + + spin_lock_irqsave(&ctx->jpu->lock, flags); + + src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); + + /* ...and give it a chance on next run */ + if (reset) + jpu_write(ctx->jpu, JCCMD_SRST, JCCMD); + + spin_unlock_irqrestore(&ctx->jpu->lock, flags); + + v4l2_m2m_job_finish(ctx->jpu->m2m_dev, ctx->fh.m2m_ctx); +} + +static void jpu_device_run(void *priv) +{ + struct jpu_ctx *ctx = priv; + struct jpu *jpu = ctx->jpu; + struct jpu_buffer *jpu_buf; + struct jpu_q_data *q_data; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + unsigned int w, h, bpl; + unsigned char num_planes, subsampling; + unsigned long flags; + + /* ...wait until module reset completes; we have mutex locked here */ + if (jpu_wait_reset(jpu)) { + jpu_cleanup(ctx, true); + return; + } + + spin_lock_irqsave(&ctx->jpu->lock, flags); + + jpu->curr = ctx; + + src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); + + if (ctx->encoder) { + jpu_buf = vb2_to_jpu_buffer(dst_buf); + q_data = &ctx->out_q; + } else { + jpu_buf = vb2_to_jpu_buffer(src_buf); + q_data = &ctx->cap_q; + } + + w = q_data->format.width; + h = q_data->format.height; + bpl = q_data->format.plane_fmt[0].bytesperline; + num_planes = q_data->fmtinfo->num_planes; + subsampling = q_data->fmtinfo->subsampling; + + if (ctx->encoder) { + unsigned long src_1_addr, src_2_addr, dst_addr; + unsigned int redu, inft; + + dst_addr = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + src_1_addr = + vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + if (num_planes > 1) + src_2_addr = vb2_dma_contig_plane_dma_addr( + &src_buf->vb2_buf, 1); + else + src_2_addr = src_1_addr + w * h; + + jpu_buf->compr_quality = ctx->compr_quality; + + if (subsampling == JPU_JPEG_420) { + redu = JCMOD_REDU_420; + inft = JIFECNT_INFT_420; + } else { + redu = JCMOD_REDU_422; + inft = JIFECNT_INFT_422; + } + + /* only no marker mode works for encoding */ + jpu_write(jpu, JCMOD_DSP_ENC | JCMOD_PCTR | redu | + JCMOD_MSKIP_ENABLE, JCMOD); + + jpu_write(jpu, JIFECNT_SWAP_WB | inft, JIFECNT); + jpu_write(jpu, JIFDCNT_SWAP_WB, JIFDCNT); + jpu_write(jpu, JINTE_TRANSF_COMPL, JINTE); + + /* Y and C components source addresses */ + jpu_write(jpu, src_1_addr, JIFESYA1); + jpu_write(jpu, src_2_addr, JIFESCA1); + + /* memory width */ + jpu_write(jpu, bpl, JIFESMW); + + jpu_write(jpu, (w >> 8) & JCSZ_MASK, JCHSZU); + jpu_write(jpu, w & JCSZ_MASK, JCHSZD); + + jpu_write(jpu, (h >> 8) & JCSZ_MASK, JCVSZU); + jpu_write(jpu, h & JCSZ_MASK, JCVSZD); + + jpu_write(jpu, w, JIFESHSZ); + jpu_write(jpu, h, JIFESVSZ); + + jpu_write(jpu, dst_addr + JPU_JPEG_HDR_SIZE, JIFEDA1); + + jpu_write(jpu, 0 << JCQTN_SHIFT(1) | 1 << JCQTN_SHIFT(2) | + 1 << JCQTN_SHIFT(3), JCQTN); + + jpu_write(jpu, 0 << JCHTN_AC_SHIFT(1) | 0 << JCHTN_DC_SHIFT(1) | + 1 << JCHTN_AC_SHIFT(2) | 1 << JCHTN_DC_SHIFT(2) | + 1 << JCHTN_AC_SHIFT(3) | 1 << JCHTN_DC_SHIFT(3), + JCHTN); + + jpu_set_qtbl(jpu, ctx->compr_quality); + jpu_set_htbl(jpu); + } else { + unsigned long src_addr, dst_1_addr, dst_2_addr; + + if (jpu_buf->subsampling != subsampling) { + dev_err(ctx->jpu->dev, + "src and dst formats do not match.\n"); + spin_unlock_irqrestore(&ctx->jpu->lock, flags); + jpu_cleanup(ctx, false); + return; + } + + src_addr = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0); + dst_1_addr = + vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0); + if (q_data->fmtinfo->num_planes > 1) + dst_2_addr = vb2_dma_contig_plane_dma_addr( + &dst_buf->vb2_buf, 1); + else + dst_2_addr = dst_1_addr + w * h; + + /* ...set up decoder operation */ + jpu_write(jpu, JCMOD_DSP_DEC | JCMOD_PCTR, JCMOD); + jpu_write(jpu, JIFECNT_SWAP_WB, JIFECNT); + jpu_write(jpu, JIFDCNT_SWAP_WB, JIFDCNT); + + /* ...enable interrupts on transfer completion and d-g error */ + jpu_write(jpu, JINTE_TRANSF_COMPL | JINTE_ERR, JINTE); + + /* ...set source/destination addresses of encoded data */ + jpu_write(jpu, src_addr, JIFDSA1); + jpu_write(jpu, dst_1_addr, JIFDDYA1); + jpu_write(jpu, dst_2_addr, JIFDDCA1); + + jpu_write(jpu, bpl, JIFDDMW); + } + + /* ...start encoder/decoder operation */ + jpu_write(jpu, JCCMD_JSRT, JCCMD); + + spin_unlock_irqrestore(&ctx->jpu->lock, flags); +} + +static int jpu_job_ready(void *priv) +{ + return 1; +} + +static void jpu_job_abort(void *priv) +{ + struct jpu_ctx *ctx = priv; + + if (!wait_event_timeout(ctx->jpu->irq_queue, !ctx->jpu->curr, + msecs_to_jiffies(JPU_JOB_TIMEOUT))) + jpu_cleanup(ctx, true); +} + +static struct v4l2_m2m_ops jpu_m2m_ops = { + .device_run = jpu_device_run, + .job_ready = jpu_job_ready, + .job_abort = jpu_job_abort, +}; + +/* + * ============================================================================ + * IRQ handler + * ============================================================================ + */ +static irqreturn_t jpu_irq_handler(int irq, void *dev_id) +{ + struct jpu *jpu = dev_id; + struct jpu_ctx *curr_ctx; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + unsigned int int_status; + + int_status = jpu_read(jpu, JINTS); + + /* ...spurious interrupt */ + if (!((JINTS_TRANSF_COMPL | JINTS_PROCESS_COMPL | JINTS_ERR) & + int_status)) + return IRQ_NONE; + + /* ...clear interrupts */ + jpu_write(jpu, ~(int_status & JINTS_MASK), JINTS); + if (int_status & (JINTS_ERR | JINTS_PROCESS_COMPL)) + jpu_write(jpu, JCCMD_JEND, JCCMD); + + spin_lock(&jpu->lock); + + if ((int_status & JINTS_PROCESS_COMPL) && + !(int_status & JINTS_TRANSF_COMPL)) + goto handled; + + curr_ctx = v4l2_m2m_get_curr_priv(jpu->m2m_dev); + if (!curr_ctx) { + /* ...instance is not running */ + dev_err(jpu->dev, "no active context for m2m\n"); + goto handled; + } + + src_buf = v4l2_m2m_src_buf_remove(curr_ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_dst_buf_remove(curr_ctx->fh.m2m_ctx); + + if (int_status & JINTS_TRANSF_COMPL) { + if (curr_ctx->encoder) { + unsigned long payload_size = jpu_read(jpu, JCDTCU) << 16 + | jpu_read(jpu, JCDTCM) << 8 + | jpu_read(jpu, JCDTCD); + vb2_set_plane_payload(&dst_buf->vb2_buf, 0, + payload_size + JPU_JPEG_HDR_SIZE); + } + + dst_buf->field = src_buf->field; + dst_buf->timestamp = src_buf->timestamp; + if (src_buf->flags & V4L2_BUF_FLAG_TIMECODE) + dst_buf->timecode = src_buf->timecode; + dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK; + dst_buf->flags |= src_buf->flags & + V4L2_BUF_FLAG_TSTAMP_SRC_MASK; + dst_buf->flags = src_buf->flags & + (V4L2_BUF_FLAG_TIMECODE | V4L2_BUF_FLAG_KEYFRAME | + V4L2_BUF_FLAG_PFRAME | V4L2_BUF_FLAG_BFRAME | + V4L2_BUF_FLAG_TSTAMP_SRC_MASK); + + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); + } else if (int_status & JINTS_ERR) { + unsigned char error = jpu_read(jpu, JCDERR) & JCDERR_MASK; + + dev_dbg(jpu->dev, "processing error: %#X: %s\n", error, + error_to_text[error]); + + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); + } + + jpu->curr = NULL; + + /* ...reset JPU after completion */ + jpu_write(jpu, JCCMD_SRST, JCCMD); + spin_unlock(&jpu->lock); + + v4l2_m2m_job_finish(jpu->m2m_dev, curr_ctx->fh.m2m_ctx); + + /* ...wakeup abort routine if needed */ + wake_up(&jpu->irq_queue); + + return IRQ_HANDLED; + +handled: + spin_unlock(&jpu->lock); + return IRQ_HANDLED; +} + +/* + * ============================================================================ + * Driver basic infrastructure + * ============================================================================ + */ +static const struct of_device_id jpu_dt_ids[] = { + { .compatible = "renesas,jpu-r8a7790" }, /* H2 */ + { .compatible = "renesas,jpu-r8a7791" }, /* M2-W */ + { .compatible = "renesas,jpu-r8a7792" }, /* V2H */ + { .compatible = "renesas,jpu-r8a7793" }, /* M2-N */ + { }, +}; +MODULE_DEVICE_TABLE(of, jpu_dt_ids); + +static int jpu_probe(struct platform_device *pdev) +{ + struct jpu *jpu; + struct resource *res; + int ret; + unsigned int i; + + jpu = devm_kzalloc(&pdev->dev, sizeof(*jpu), GFP_KERNEL); + if (!jpu) + return -ENOMEM; + + init_waitqueue_head(&jpu->irq_queue); + mutex_init(&jpu->mutex); + spin_lock_init(&jpu->lock); + jpu->dev = &pdev->dev; + + /* memory-mapped registers */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + jpu->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(jpu->regs)) + return PTR_ERR(jpu->regs); + + /* interrupt service routine registration */ + jpu->irq = ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(&pdev->dev, "cannot find IRQ\n"); + return ret; + } + + ret = devm_request_irq(&pdev->dev, jpu->irq, jpu_irq_handler, 0, + dev_name(&pdev->dev), jpu); + if (ret) { + dev_err(&pdev->dev, "cannot claim IRQ %d\n", jpu->irq); + return ret; + } + + /* clocks */ + jpu->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(jpu->clk)) { + dev_err(&pdev->dev, "cannot get clock\n"); + return PTR_ERR(jpu->clk); + } + + /* v4l2 device */ + ret = v4l2_device_register(&pdev->dev, &jpu->v4l2_dev); + if (ret) { + dev_err(&pdev->dev, "Failed to register v4l2 device\n"); + return ret; + } + + /* mem2mem device */ + jpu->m2m_dev = v4l2_m2m_init(&jpu_m2m_ops); + if (IS_ERR(jpu->m2m_dev)) { + v4l2_err(&jpu->v4l2_dev, "Failed to init mem2mem device\n"); + ret = PTR_ERR(jpu->m2m_dev); + goto device_register_rollback; + } + + jpu->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev); + if (IS_ERR(jpu->alloc_ctx)) { + v4l2_err(&jpu->v4l2_dev, "Failed to init memory allocator\n"); + ret = PTR_ERR(jpu->alloc_ctx); + goto m2m_init_rollback; + } + + /* fill in qantization and Huffman tables for encoder */ + for (i = 0; i < JPU_MAX_QUALITY; i++) + jpu_generate_hdr(i, (unsigned char *)jpeg_hdrs[i]); + + strlcpy(jpu->vfd_encoder.name, DRV_NAME, sizeof(jpu->vfd_encoder.name)); + jpu->vfd_encoder.fops = &jpu_fops; + jpu->vfd_encoder.ioctl_ops = &jpu_ioctl_ops; + jpu->vfd_encoder.minor = -1; + jpu->vfd_encoder.release = video_device_release_empty; + jpu->vfd_encoder.lock = &jpu->mutex; + jpu->vfd_encoder.v4l2_dev = &jpu->v4l2_dev; + jpu->vfd_encoder.vfl_dir = VFL_DIR_M2M; + + ret = video_register_device(&jpu->vfd_encoder, VFL_TYPE_GRABBER, -1); + if (ret) { + v4l2_err(&jpu->v4l2_dev, "Failed to register video device\n"); + goto vb2_allocator_rollback; + } + + video_set_drvdata(&jpu->vfd_encoder, jpu); + + strlcpy(jpu->vfd_decoder.name, DRV_NAME, sizeof(jpu->vfd_decoder.name)); + jpu->vfd_decoder.fops = &jpu_fops; + jpu->vfd_decoder.ioctl_ops = &jpu_ioctl_ops; + jpu->vfd_decoder.minor = -1; + jpu->vfd_decoder.release = video_device_release_empty; + jpu->vfd_decoder.lock = &jpu->mutex; + jpu->vfd_decoder.v4l2_dev = &jpu->v4l2_dev; + jpu->vfd_decoder.vfl_dir = VFL_DIR_M2M; + + ret = video_register_device(&jpu->vfd_decoder, VFL_TYPE_GRABBER, -1); + if (ret) { + v4l2_err(&jpu->v4l2_dev, "Failed to register video device\n"); + goto enc_vdev_register_rollback; + } + + video_set_drvdata(&jpu->vfd_decoder, jpu); + platform_set_drvdata(pdev, jpu); + + v4l2_info(&jpu->v4l2_dev, "encoder device registered as /dev/video%d\n", + jpu->vfd_encoder.num); + v4l2_info(&jpu->v4l2_dev, "decoder device registered as /dev/video%d\n", + jpu->vfd_decoder.num); + + return 0; + +enc_vdev_register_rollback: + video_unregister_device(&jpu->vfd_encoder); + +vb2_allocator_rollback: + vb2_dma_contig_cleanup_ctx(jpu->alloc_ctx); + +m2m_init_rollback: + v4l2_m2m_release(jpu->m2m_dev); + +device_register_rollback: + v4l2_device_unregister(&jpu->v4l2_dev); + + return ret; +} + +static int jpu_remove(struct platform_device *pdev) +{ + struct jpu *jpu = platform_get_drvdata(pdev); + + video_unregister_device(&jpu->vfd_decoder); + video_unregister_device(&jpu->vfd_encoder); + vb2_dma_contig_cleanup_ctx(jpu->alloc_ctx); + v4l2_m2m_release(jpu->m2m_dev); + v4l2_device_unregister(&jpu->v4l2_dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int jpu_suspend(struct device *dev) +{ + struct jpu *jpu = dev_get_drvdata(dev); + + if (jpu->ref_count == 0) + return 0; + + clk_disable_unprepare(jpu->clk); + + return 0; +} + +static int jpu_resume(struct device *dev) +{ + struct jpu *jpu = dev_get_drvdata(dev); + + if (jpu->ref_count == 0) + return 0; + + clk_prepare_enable(jpu->clk); + + return 0; +} +#endif + +static const struct dev_pm_ops jpu_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(jpu_suspend, jpu_resume) +}; + +static struct platform_driver jpu_driver = { + .probe = jpu_probe, + .remove = jpu_remove, + .driver = { + .of_match_table = jpu_dt_ids, + .name = DRV_NAME, + .pm = &jpu_pm_ops, + }, +}; + +module_platform_driver(jpu_driver); + +MODULE_ALIAS("platform:" DRV_NAME); +MODULE_AUTHOR("Mikhail Ulianov <mikhail.ulyanov@cogentembedded.com>"); +MODULE_DESCRIPTION("Renesas R-Car JPEG processing unit driver"); +MODULE_LICENSE("GPL v2"); |