Browse Source

添加硬解码

leon 1 tháng trước cách đây
mục cha
commit
36f2fdc22b

+ 2 - 1
Makefile

@@ -40,8 +40,9 @@ link_opencv       := opencv_core opencv_imgproc opencv_videoio opencv_imgcodecs
 link_trt          := nvinfer nvinfer_plugin nvonnxparser
 link_cuda         := cuda cublas cudart cudnn
 link_sys          := stdc++ dl
+link_ffmpeg       := avcodec avformat swresample swscale avutil
 
-link_librarys     := $(link_opencv) $(link_trt) $(link_cuda) $(link_sys)
+link_librarys     := $(link_opencv) $(link_ffmpeg) $(link_trt) $(link_cuda) $(link_sys)
 
 
 empty := 

+ 1 - 1
src/nodes/httpPush/httpPush.cpp

@@ -20,7 +20,7 @@ void HttpPushNode::work()
             // printf("Node %s get data from %s\n", name_.c_str(), input_buffer.first.c_str());
             // do something
             cv::Mat image = metaData->draw_image;
-            std::string image_name = metaData->from + "_" + getTimeString() + ".jpg";
+            std::string image_name = "result/" + metaData->from + "_" + getTimeString() + ".jpg";
             cv::imwrite(image_name, image);
         }
     }

+ 63 - 10
src/nodes/stream/streamNode.cpp

@@ -6,22 +6,27 @@ namespace Node
 
 void StreamNode::work()
 {
-    cv::VideoCapture cap(stream_url_);
-    if (!cap.isOpened())
-    {
-        std::cerr << "Error: cannot open camera" << std::endl;
-        return;
-    }
     printf("StreamNode %s\n", name_.c_str());
     printf("stram url: %s\n", stream_url_.c_str());
-    printf("fps: %f\n", cap.get(cv::CAP_PROP_FPS));
-    printf("width: %f\n", cap.get(cv::CAP_PROP_FRAME_WIDTH));
-    printf("height: %f\n", cap.get(cv::CAP_PROP_FRAME_HEIGHT));
+    printf("Decode type: %d\n", static_cast<int>(decode_type_));
+
+    if (decode_type_ == DecodeType::CPU)
+    {
+        work_cpu();
+    }
+    else
+    {
+        work_gpu();
+    }
+}
 
+
+void StreamNode::work_cpu()
+{
     while (running_)
     {
         cv::Mat frame;
-        cap >> frame;
+        cap_ >> frame;
         frame_count_++;
         if (frame_count_ % skip_frame_ != 0)
         {
@@ -46,5 +51,53 @@ void StreamNode::work()
     }
 }
 
+void StreamNode::work_gpu()
+{
+    uint8_t* packet_data = nullptr;
+    int packet_size = 0;
+    int64_t pts = 0;
+
+    demuxer->get_extra_data(&packet_data, &packet_size);
+    decoder->decode(packet_data, packet_size);
+
+	printf("packet_size = %d\n", packet_size);
+
+    unsigned int frame_index = 0;
+    do{
+        demuxer->demux(&packet_data, &packet_size, &pts);
+        int ndecoded_frame = decoder->decode(packet_data, packet_size, pts);
+        for(int i = 0; i < ndecoded_frame; ++i){
+
+            /* 因为decoder获取的frame内存,是YUV-NV12格式的。储存内存大小是 [height * 1.5] * width byte
+             因此构造一个height * 1.5,  width 大小的空间
+             然后由opencv函数,把YUV-NV12转换到BGR,转换后的image则是正常的height, width, CV_8UC3
+            */
+            //cv::Mat image(decoder->get_height() * 1.5, decoder->get_width(), CV_8U, decoder->get_frame(&pts, &frame_index));
+            //cv::cvtColor(image, image, cv::COLOR_YUV2BGR_NV12);
+            frame_index = frame_index + 1;
+            // INFO("write imgs/img_%05d.jpg  %dx%d", frame_index, decoder->get_width(), decoder->get_height());
+            
+            frame_count_++;
+            if (frame_count_ % skip_frame_ != 0)
+            {
+                // printf("Skip frame %d\n", frame_count_);
+                continue;
+            }
+
+            auto metaData = std::make_shared<meta::MetaData>();
+            metaData->image = frame;
+            metaData->from = name_;
+
+            for (auto& output_buffer : output_buffers_)
+            {
+                output_buffer.second->push(metaData);
+                // printf("Node %s push data to %s\n", name_.c_str(), output_buffer.first.c_str());
+            }
+        }
+    }while(packet_size > 0);
+	printf("C++ Demo: %d frames\n", frame_index);
+	return 0;
+}
+
 
 }   // namespace Node

+ 60 - 2
src/nodes/stream/streamNode.hpp

@@ -2,17 +2,65 @@
 #define STREAMNODE_HPP__
 
 #include "nodes/base/base.hpp"
-#include <opencv2/opencv.hpp>
+#include "opencv2/opencv.hpp"
+
+#include "stream/stream.hpp"
 
 namespace Node
 {
 
+enum class DecodeType
+{
+    CPU = 0,
+    GPU = 1
+};
+
+enum class StreamStatus{
+    OPENED = 0,
+    CLOSED = 1,
+    OPEN_FAILED = 2
+};
+
 class StreamNode : public BaseNode
 {
 public:
     StreamNode() = delete;
     StreamNode(const std::string& name) : BaseNode(name, NODE_TYPE::SRC_NODE) {}
-    StreamNode(const std::string& name, const std::string& url) : BaseNode(name, NODE_TYPE::SRC_NODE), stream_url_(url){}
+    StreamNode(const std::string& name, const std::string& url, int gpu_id=0, DecodeType type=DecodeType::GPU) 
+        : BaseNode(name, NODE_TYPE::SRC_NODE), stream_url_(url), gpu_id_(gpu_id), decode_type_(type)
+    {
+        if (decode_type_ == DecodeType::GPU)
+        {
+            demuxer_ = std::make_shared<FFmpegDemuxer>(stream_url_);
+            if (demuxer_ == nullptr)
+            {
+                printf("demuxer create failed\n");
+                status_ = StreamStatus::OPEN_FAILED;
+                return;
+            }
+            decoder_ = FFHDDecoder::create_cuvid_decoder(
+                false, FFHDDecoder::ffmpeg2NvCodecId(demuxer_->get_video_codec()), -1, gpu_id, nullptr, nullptr, true
+            );
+            if (decoder_ == nullptr)
+            {
+                printf("decoder create failed\n");
+                status_ = StreamStatus::OPEN_FAILED;
+                return;
+            }
+            status_ = StreamStatus::OPENED;
+        }
+        else
+        {
+            cap_ = std::make_shared<cv::VideoCapture>(stream_url_);
+            if (!cap_->isOpened())
+            {
+                printf("cap open failed\n");
+                status_ = StreamStatus::OPEN_FAILED;
+                return;
+            }
+            status_ = StreamStatus::OPENED;
+        }
+    }
     virtual ~StreamNode() { };
 
     void set_stream_url(const std::string& stream_url)
@@ -30,11 +78,21 @@ public:
     }
 
     void work() override;
+    void work_cpu() override;
+    void work_gpu() override;
 
 private:
     std::string stream_url_;
     int skip_frame_ = 1;
     int frame_count_ = -1;
+    int gpu_id_ = 0;
+
+    std::shared_ptr<cv::VideoCapture> cap_ = nullptr;
+    std::shared_ptr<FFmpegDemuxer> demuxer_ = nullptr;
+    std::shared_ptr<FFHDDecoder::CUVIDDecoder> decoder_ = nullptr;
+
+    DecodeType decode_type_ = DecodeType::GPU;
+    StreamStatus status_ = StreamStatus::CLOSED;
 };
 
 }

+ 37 - 0
src/stream/ffhdd/color.cu

@@ -0,0 +1,37 @@
+#include <cuda_runtime.h>
+
+typedef unsigned char uint8_t;
+static __device__ uint8_t cast(float value){
+    return value < 0 ? 0 : (value > 255 ? 255 : value);
+}
+
+static __global__ void convert_nv12_to_bgr_kernel(
+    const uint8_t* y, const uint8_t* uv, int width, int height, int linesize, uint8_t* dst_bgr, int edge
+){
+    int position = blockDim.x * blockIdx.x + threadIdx.x;
+    if (position >= edge) return;
+
+    int ox = position % width;
+    int oy = position / width;
+    const uint8_t& yvalue = y[oy * linesize + ox];
+    int offset_uv = (oy >> 1) * linesize + (ox & 0xFFFFFFFE);
+    const uint8_t& u = uv[offset_uv + 0];
+    const uint8_t& v = uv[offset_uv + 1];
+    dst_bgr[position * 3 + 0] = cast(1.164f * (yvalue - 16.0f) + 2.018f * (u - 128.0f));
+    dst_bgr[position * 3 + 1] = cast(1.164f * (yvalue - 16.0f) - 0.813f * (v - 128.0f) - 0.391f * (u - 128.0f));
+    dst_bgr[position * 3 + 2] = cast(1.164f * (yvalue - 16.0f) + 1.596f * (v - 128.0f));
+}
+
+void convert_nv12_to_bgr_invoker(
+    const uint8_t* y, const uint8_t* uv, int width, int height, int linesize, uint8_t* dst_bgr, 
+    cudaStream_t stream
+){
+    int total = width * height;
+    int block = total < 512 ? total : 512;
+    int grid = (total + block - 1) / block;
+
+    convert_nv12_to_bgr_kernel<<<grid, block, 0, stream>>>(
+        y, uv, width, height, linesize,
+        dst_bgr, total
+    );
+}

+ 65 - 0
src/stream/ffhdd/cuda_tools.cpp

@@ -0,0 +1,65 @@
+/*
+ *  系统关于CUDA的功能函数
+ */
+
+
+ #include "stream/ffhdd/cuda_tools.hpp"
+ #include <string>
+ 
+ namespace CUDATools{
+     bool check_driver(CUresult e, const char* call, int line, const char *file) {
+         if (e != CUDA_SUCCESS) {
+ 
+             const char* message = nullptr;
+             const char* name = nullptr;
+             cuGetErrorString(e, &message);
+             cuGetErrorName(e, &name);
+             INFOE("CUDA Driver error %s # %s, code = %s [ %d ] in file %s:%d", call, message, name, e, file, line);
+             return false;
+         }
+         return true;
+     }
+ 
+     bool check_runtime(cudaError_t e, const char* call, int line, const char *file){
+         if (e != cudaSuccess) {
+             INFOE("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line);
+             return false;
+         }
+         return true;
+     }
+ 
+     bool check_device_id(int device_id){
+         int device_count = -1;
+         checkCudaRuntime(cudaGetDeviceCount(&device_count));
+         if(device_id < 0 || device_id >= device_count){
+             INFOE("Invalid device id: %d, count = %d", device_id, device_count);
+             return false;
+         }
+         return true;
+     }
+ 
+     int current_device_id(){
+         int device_id = 0;
+         checkCudaRuntime(cudaGetDevice(&device_id));
+         return device_id;
+     }
+ 
+     dim3 grid_dims(int numJobs) {
+         int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
+         return dim3(((numJobs + numBlockThreads - 1) / (float)numBlockThreads));
+     }
+ 
+     dim3 block_dims(int numJobs) {
+         return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS;
+     }
+ 
+     AutoDevice::AutoDevice(int device_id){
+ 
+         cudaGetDevice(&old_);
+         checkCudaRuntime(cudaSetDevice(device_id));
+     }
+ 
+     AutoDevice::~AutoDevice(){
+         checkCudaRuntime(cudaSetDevice(old_));
+     }
+ }

+ 69 - 0
src/stream/ffhdd/cuda_tools.hpp

@@ -0,0 +1,69 @@
+#ifndef CUDA_TOOLS_HPP
+#define CUDA_TOOLS_HPP
+
+/*
+ *  系统关于CUDA的功能函数
+ */
+ 
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "stream/ffhdd/simple-logger.hpp"
+
+#define GPU_BLOCK_THREADS  512
+
+
+#define KernelPositionBlock											\
+	int position = (blockDim.x * blockIdx.x + threadIdx.x);		    \
+    if (position >= (edge)) return;
+
+
+#define checkCudaDriver(call)  CUDATools::check_driver(call, #call, __LINE__, __FILE__)
+#define checkCudaRuntime(call) CUDATools::check_runtime(call, #call, __LINE__, __FILE__)
+
+#define checkCudaKernel(...)                                                                         \
+    __VA_ARGS__;                                                                                     \
+    do{cudaError_t cudaStatus = cudaPeekAtLastError();                                               \
+    if (cudaStatus != cudaSuccess){                                                                  \
+        INFOE("launch failed: %s", cudaGetErrorString(cudaStatus));                                  \
+    }} while(0);
+
+
+#define Assert(op)					 \
+	do{                              \
+		bool cond = !(!(op));        \
+		if(!cond){                   \
+			INFOF("Assert failed, " #op);  \
+		}                                  \
+	}while(false)
+
+
+struct CUctx_st;
+struct CUstream_st;
+
+typedef CUstream_st* ICUStream;
+typedef CUctx_st* ICUContext;
+typedef void* ICUDeviceptr;
+typedef int DeviceID;
+
+namespace CUDATools{
+
+    bool check_driver(CUresult e, const char* call, int iLine, const char *szFile);
+    bool check_runtime(cudaError_t e, const char* call, int iLine, const char *szFile);
+    bool check_device_id(int device_id);
+    int current_device_id();
+
+    dim3 grid_dims(int numJobs);
+    dim3 block_dims(int numJobs);
+
+    class AutoDevice{
+    public:
+        AutoDevice(int device_id = 0);
+        virtual ~AutoDevice();
+    
+    private:
+        int old_ = -1;
+    };
+}
+
+
+#endif // CUDA_TOOLS_HPP

+ 532 - 0
src/stream/ffhdd/cuvid_decoder.cpp

@@ -0,0 +1,532 @@
+
+#include "stream/ffhdd/cuvid_decoder.hpp"
+#include "stream/ffhdd/cuda_tools.hpp"
+#include <nvcuvid.h>
+#include <mutex>
+#include <vector>
+#include <sstream>
+#include <string.h>
+#include <assert.h>
+
+using namespace std;
+
+
+void convert_nv12_to_bgr_invoker(
+    const uint8_t* y, const uint8_t* uv, int width, int height, int linesize, uint8_t* dst_bgr,
+    cudaStream_t stream
+);
+
+namespace FFHDDecoder{
+    static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat)
+    {
+        float factor = 0.5;
+        switch (eSurfaceFormat)
+        {
+        case cudaVideoSurfaceFormat_NV12:
+        case cudaVideoSurfaceFormat_P016:
+            factor = 0.5;
+            break;
+        case cudaVideoSurfaceFormat_YUV444:
+        case cudaVideoSurfaceFormat_YUV444_16Bit:
+            factor = 1.0;
+            break;
+        }
+
+        return factor;
+    }
+
+    static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat)
+    {
+        int numPlane = 1;
+        switch (eSurfaceFormat)
+        {
+        case cudaVideoSurfaceFormat_NV12:
+        case cudaVideoSurfaceFormat_P016:
+            numPlane = 1;
+            break;
+        case cudaVideoSurfaceFormat_YUV444:
+        case cudaVideoSurfaceFormat_YUV444_16Bit:
+            numPlane = 2;
+            break;
+        }
+
+        return numPlane;
+    }
+
+    IcudaVideoCodec ffmpeg2NvCodecId(int ffmpeg_codec_id) {
+        switch (ffmpeg_codec_id) {
+            /*AV_CODEC_ID_MPEG1VIDEO*/ case 1   : return cudaVideoCodec_MPEG1;        
+            /*AV_CODEC_ID_MPEG2VIDEO*/ case 2   : return cudaVideoCodec_MPEG2;        
+            /*AV_CODEC_ID_MPEG4*/ case 12       : return cudaVideoCodec_MPEG4;        
+            /*AV_CODEC_ID_VC1*/ case 70         : return cudaVideoCodec_VC1;          
+            /*AV_CODEC_ID_H264*/ case 27        : return cudaVideoCodec_H264;         
+            /*AV_CODEC_ID_HEVC*/ case 173       : return cudaVideoCodec_HEVC;         
+            /*AV_CODEC_ID_VP8*/ case 139        : return cudaVideoCodec_VP8;          
+            /*AV_CODEC_ID_VP9*/ case 167        : return cudaVideoCodec_VP9;          
+            /*AV_CODEC_ID_MJPEG*/ case 7        : return cudaVideoCodec_JPEG;         
+            default                             : return cudaVideoCodec_NumCodecs;
+        }
+    }
+
+    class CUVIDDecoderImpl : public CUVIDDecoder{
+    public:
+        bool create(bool bUseDeviceFrame, int gpu_id, cudaVideoCodec eCodec, bool bLowLatency = false,
+                const CropRect *pCropRect = nullptr, const ResizeDim *pResizeDim = nullptr, int max_cache = -1,
+                int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000, bool output_bgr=false)
+            {
+            
+            m_bUseDeviceFrame = bUseDeviceFrame;
+            m_eCodec = eCodec;
+            m_nMaxWidth = maxWidth;
+            m_nMaxHeight = maxHeight;
+            m_nMaxCache  = max_cache;
+            m_gpuID      = gpu_id;
+            m_output_bgr = output_bgr;
+
+            if(m_gpuID == -1){
+                checkCudaRuntime(cudaGetDevice(&m_gpuID));
+            }
+
+            CUDATools::AutoDevice auto_device_exchange(m_gpuID);
+            if (pCropRect) m_cropRect = *pCropRect;
+            if (pResizeDim) m_resizeDim = *pResizeDim;
+            CUcontext cuContext = nullptr;
+            checkCudaDriver(cuCtxGetCurrent(&cuContext));
+
+            if(cuContext == nullptr){
+                INFOE("Current Context is nullptr.");
+                return false;
+            }
+
+            if(!checkCudaDriver(cuvidCtxLockCreate(&m_ctxLock, cuContext))) return false;
+            if(!checkCudaRuntime(cudaStreamCreate(&m_cuvidStream))) return false;
+
+            CUVIDPARSERPARAMS videoParserParameters = {};
+            videoParserParameters.CodecType = eCodec;
+            videoParserParameters.ulMaxNumDecodeSurfaces = 1;
+            videoParserParameters.ulClockRate = clkRate;
+            videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
+            videoParserParameters.pUserData = this;
+            videoParserParameters.pfnSequenceCallback = handleVideoSequenceProc;
+            videoParserParameters.pfnDecodePicture = handlePictureDecodeProc;
+            videoParserParameters.pfnDisplayPicture = handlePictureDisplayProc;
+            if(!checkCudaDriver(cuvidCreateVideoParser(&m_hParser, &videoParserParameters))) return false;
+            return true;
+        }
+
+        int decode(const uint8_t *pData, int nSize, int64_t nTimestamp=0) override
+        {
+            m_nDecodedFrame = 0;
+            m_nDecodedFrameReturned = 0;
+            CUVIDSOURCEDATAPACKET packet = { 0 };
+            packet.payload = pData;
+            packet.payload_size = nSize;
+            packet.flags = CUVID_PKT_TIMESTAMP;
+            packet.timestamp = nTimestamp;
+            if (!pData || nSize == 0) {
+                packet.flags |= CUVID_PKT_ENDOFSTREAM;
+            }
+
+            try{
+                CUDATools::AutoDevice auto_device_exchange(m_gpuID);
+                if(!checkCudaDriver(cuvidParseVideoData(m_hParser, &packet)))
+                    return -1;
+            }catch(...){
+                return -1;
+            }
+            return m_nDecodedFrame;
+        }
+
+        static int CUDAAPI handleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((CUVIDDecoderImpl *)pUserData)->handleVideoSequence(pVideoFormat); }
+        static int CUDAAPI handlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((CUVIDDecoderImpl *)pUserData)->handlePictureDecode(pPicParams); }
+        static int CUDAAPI handlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) { return ((CUVIDDecoderImpl *)pUserData)->handlePictureDisplay(pDispInfo); }
+        
+        virtual int device() override{
+            return this->m_gpuID;
+        }
+
+        virtual bool is_gpu_frame() override{
+            return this->m_bUseDeviceFrame;
+        }
+
+        int handleVideoSequence(CUVIDEOFORMAT *pVideoFormat){
+            int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
+            CUVIDDECODECAPS decodecaps;
+            memset(&decodecaps, 0, sizeof(decodecaps));
+
+            decodecaps.eCodecType = pVideoFormat->codec;
+            decodecaps.eChromaFormat = pVideoFormat->chroma_format;
+            decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+
+            checkCudaDriver(cuvidGetDecoderCaps(&decodecaps));
+            if(!decodecaps.bIsSupported){
+                throw std::runtime_error("Codec not supported on this GPU");
+                return nDecodeSurface;
+            }
+
+            if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
+                (pVideoFormat->coded_height > decodecaps.nMaxHeight)){
+
+                std::ostringstream errorString;
+                errorString << std::endl
+                            << "Resolution          : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height << std::endl
+                            << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl
+                            << "Resolution not supported on this GPU";
+
+                const std::string cErr = errorString.str();
+                throw std::runtime_error(cErr);
+                return nDecodeSurface;
+            }
+
+            if ((pVideoFormat->coded_width>>4)*(pVideoFormat->coded_height>>4) > decodecaps.nMaxMBCount){
+
+                std::ostringstream errorString;
+                errorString << std::endl
+                            << "MBCount             : " << (pVideoFormat->coded_width >> 4)*(pVideoFormat->coded_height >> 4) << std::endl
+                            << "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl
+                            << "MBCount not supported on this GPU";
+
+                const std::string cErr = errorString.str();
+                throw std::runtime_error(cErr);
+                return nDecodeSurface;
+            }
+
+            // eCodec has been set in the constructor (for parser). Here it's set again for potential correction
+            m_eCodec = pVideoFormat->codec;
+            m_eChromaFormat = pVideoFormat->chroma_format;
+            m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+            m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
+
+            // Set the output surface format same as chroma format
+            if (m_eChromaFormat == cudaVideoChromaFormat_420)
+                m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
+            else if (m_eChromaFormat == cudaVideoChromaFormat_444)
+                m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
+            else if (m_eChromaFormat == cudaVideoChromaFormat_422)
+                m_eOutputFormat = cudaVideoSurfaceFormat_NV12;  // no 4:2:2 output format supported yet so make 420 default
+
+            // Check if output format supported. If not, check falback options
+            if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat)))
+            {
+                if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
+                    m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
+                else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
+                    m_eOutputFormat = cudaVideoSurfaceFormat_P016;
+                else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
+                    m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
+                else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
+                    m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
+                else 
+                    throw std::runtime_error("No supported output format found");
+            }
+            m_videoFormat = *pVideoFormat;
+
+            CUVIDDECODECREATEINFO videoDecodeCreateInfo = { 0 };
+            videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
+            videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
+            videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
+            videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
+            if (pVideoFormat->progressive_sequence)
+                videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
+            else
+                videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
+            videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
+            // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
+            videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
+            videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
+            videoDecodeCreateInfo.vidLock = m_ctxLock;
+            videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
+            videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
+            if (m_nMaxWidth < (int)pVideoFormat->coded_width)
+                m_nMaxWidth = pVideoFormat->coded_width;
+            if (m_nMaxHeight < (int)pVideoFormat->coded_height)
+                m_nMaxHeight = pVideoFormat->coded_height;
+            videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
+            videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
+
+            if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
+                m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
+                m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
+                videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
+                videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
+            } else {
+                if (m_resizeDim.w && m_resizeDim.h) {
+                    videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
+                    videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
+                    videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right;
+                    videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom;
+                    m_nWidth = m_resizeDim.w;
+                    m_nLumaHeight = m_resizeDim.h;
+                }
+
+                if (m_cropRect.r && m_cropRect.b) {
+                    videoDecodeCreateInfo.display_area.left = m_cropRect.l;
+                    videoDecodeCreateInfo.display_area.top = m_cropRect.t;
+                    videoDecodeCreateInfo.display_area.right = m_cropRect.r;
+                    videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
+                    m_nWidth = m_cropRect.r - m_cropRect.l;
+                    m_nLumaHeight = m_cropRect.b - m_cropRect.t;
+                }
+                videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
+                videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
+            }
+
+            m_nChromaHeight = (int)(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
+            m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
+            m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
+            m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
+            m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
+            m_displayRect.t = videoDecodeCreateInfo.display_area.top;
+            m_displayRect.l = videoDecodeCreateInfo.display_area.left;
+            m_displayRect.r = videoDecodeCreateInfo.display_area.right;
+
+            checkCudaDriver(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
+            return nDecodeSurface;
+        }
+
+        int handlePictureDecode(CUVIDPICPARAMS *pPicParams){
+
+            if (!m_hDecoder)
+            {
+                throw std::runtime_error("Decoder not initialized.");
+                return false;
+            }
+            m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
+            checkCudaDriver(cuvidDecodePicture(m_hDecoder, pPicParams));
+            return 1;
+        }
+
+        int handlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo){
+            CUVIDPROCPARAMS videoProcessingParameters = {};
+            videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
+            videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
+            videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
+            videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
+            videoProcessingParameters.output_stream = m_cuvidStream;
+
+            CUdeviceptr dpSrcFrame = 0;
+            unsigned int nSrcPitch = 0;
+            checkCudaDriver(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame,
+                &nSrcPitch, &videoProcessingParameters));
+
+            CUVIDGETDECODESTATUS DecodeStatus;
+            memset(&DecodeStatus, 0, sizeof(DecodeStatus));
+
+            CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
+            if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed))
+            {
+                printf("Decode Error occurred for picture %d\n", m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
+            }
+
+            uint8_t *pDecodedFrame = nullptr;
+            {
+                if ((unsigned)++m_nDecodedFrame > m_vpFrame.size())
+                {
+                    /*
+                        如果超过了缓存限制,则覆盖最后一个图
+                    */
+                    bool need_alloc = true;
+                    if(m_nMaxCache != -1){
+                        if(m_vpFrame.size() >= m_nMaxCache){
+                            --m_nDecodedFrame;
+                            need_alloc = false;
+                        }
+                    }
+
+                    if(need_alloc){
+                        uint8_t *pFrame = nullptr;
+                        if (m_bUseDeviceFrame)
+                            //checkCudaDriver(cuMemAlloc((CUdeviceptr *)&pFrame, get_frame_bytes()));
+                            checkCudaRuntime(cudaMalloc(&pFrame, get_frame_bytes()));
+                        else
+                            checkCudaRuntime(cudaMallocHost(&pFrame, get_frame_bytes()));
+                            
+                        m_vpFrame.push_back(pFrame);
+                        m_vTimestamp.push_back(0);
+                    }
+                }
+                pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1];
+                m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
+            }
+
+            if(m_output_bgr){
+                if(m_pYUVFrame == 0){
+                    checkCudaDriver(cuMemAlloc(&m_pYUVFrame, m_nWidth * (m_nLumaHeight + m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP));
+                }
+                if(m_pBGRFrame == 0){
+                    checkCudaDriver(cuMemAlloc(&m_pBGRFrame, m_nWidth * m_nLumaHeight * 3));
+                }
+                CUDA_MEMCPY2D m = { 0 };
+                m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+                m.srcDevice = dpSrcFrame;
+                m.srcPitch = nSrcPitch; 
+                m.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+                m.dstDevice = (CUdeviceptr)(m.dstHost = (uint8_t*)m_pYUVFrame);
+                m.dstPitch = m_nWidth * m_nBPP;
+                m.WidthInBytes = m_nWidth * m_nBPP;
+                m.Height = m_nLumaHeight;
+                checkCudaDriver(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+                m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight);
+                m.dstDevice = (CUdeviceptr)(m.dstHost = (uint8_t*)m_pYUVFrame + m.dstPitch * m_nLumaHeight);
+                m.Height = m_nChromaHeight;
+                checkCudaDriver(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+                uint8_t* y  = (uint8_t*)m_pYUVFrame;
+                uint8_t* uv = y + m_nWidth * m_nLumaHeight;
+                convert_nv12_to_bgr_invoker(y, uv, m_nWidth, m_nLumaHeight, m_nWidth, (uint8_t*)m_pBGRFrame, m_cuvidStream);
+
+                if(m_bUseDeviceFrame){
+                    checkCudaDriver(cuMemcpyDtoDAsync((CUdeviceptr)pDecodedFrame, m_pBGRFrame, m_nWidth * m_nLumaHeight * 3, m_cuvidStream));
+                }else{
+                    checkCudaDriver(cuMemcpyDtoHAsync(pDecodedFrame, m_pBGRFrame, m_nWidth * m_nLumaHeight * 3, m_cuvidStream));
+                }
+            }else{
+                CUDA_MEMCPY2D m = { 0 };
+                m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+                m.srcDevice = dpSrcFrame;
+                m.srcPitch = nSrcPitch; 
+                m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
+                m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
+                m.dstPitch = m_nWidth * m_nBPP;
+                m.WidthInBytes = m_nWidth * m_nBPP;
+                m.Height = m_nLumaHeight;
+                checkCudaDriver(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+                m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight);
+                m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
+                m.Height = m_nChromaHeight;
+                checkCudaDriver(cuMemcpy2DAsync(&m, m_cuvidStream));
+
+                if (m_nNumChromaPlanes == 2){
+                    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight * 2);
+                    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight * 2);
+                    m.Height = m_nChromaHeight;
+                    checkCudaDriver(cuMemcpy2DAsync(&m, m_cuvidStream));
+                }
+            }
+            
+            if(!m_bUseDeviceFrame){
+                // 确保数据是到位的
+                checkCudaDriver(cuStreamSynchronize(m_cuvidStream));
+            }
+            checkCudaDriver(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
+            return 1;
+        }
+
+        virtual ICUStream get_stream() override{
+            return m_cuvidStream;
+        }
+
+        int get_frame_bytes() override { 
+            assert(m_nWidth); 
+            if(m_output_bgr){
+                return m_nWidth * m_nLumaHeight * 3; 
+            }
+            return m_nWidth * (m_nLumaHeight + m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; 
+        }
+
+        int get_width() override { assert(m_nWidth); return m_nWidth; }
+
+        int get_height() override { assert(m_nLumaHeight); return m_nLumaHeight; }
+
+        unsigned int get_frame_index() override { return m_iFrameIndex; }
+
+        unsigned int get_num_decoded_frame() override {return m_nDecodedFrame;}
+
+        cudaVideoSurfaceFormat get_output_format() { return m_eOutputFormat; }
+
+        uint8_t* get_frame(int64_t* pTimestamp = nullptr, unsigned int* pFrameIndex = nullptr) override{
+            if (m_nDecodedFrame > 0){
+                if (pFrameIndex)
+                    *pFrameIndex = m_iFrameIndex;
+
+                if (pTimestamp)
+                    *pTimestamp = m_vTimestamp[m_nDecodedFrameReturned];
+
+                m_nDecodedFrame--;
+                m_iFrameIndex++;
+                return m_vpFrame[m_nDecodedFrameReturned++];
+            }
+            return nullptr;
+        }
+
+        virtual ~CUVIDDecoderImpl(){
+            
+            if (m_hParser) 
+                cuvidDestroyVideoParser(m_hParser);
+
+            if (m_hDecoder) 
+                cuvidDestroyDecoder(m_hDecoder);
+
+            for (uint8_t *pFrame : m_vpFrame){
+                if (m_bUseDeviceFrame)
+                    //cuMemFree((CUdeviceptr)pFrame);
+                    cudaFree(pFrame);
+                else
+                    cudaFreeHost(pFrame);
+            }
+
+            if(m_pYUVFrame){
+                cuMemFree((CUdeviceptr)m_pYUVFrame);
+                m_pYUVFrame = 0;
+            }
+
+            if(m_pBGRFrame){
+                cuMemFree((CUdeviceptr)m_pBGRFrame);
+                m_pBGRFrame = 0;
+            }
+            
+            //2023-05-08 释放cudastream Mike
+            if (m_cuvidStream) {cudaStreamDestroy(m_cuvidStream);}
+            
+            cuvidCtxLockDestroy(m_ctxLock);
+        }
+
+    private:
+        CUvideoctxlock m_ctxLock = nullptr;
+        CUvideoparser m_hParser = nullptr;
+        CUvideodecoder m_hDecoder = nullptr;
+        bool m_bUseDeviceFrame = false;
+        // dimension of the output
+        unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
+        unsigned int m_nNumChromaPlanes = 0;
+        // height of the mapped surface 
+        int m_nSurfaceHeight = 0;
+        int m_nSurfaceWidth = 0;
+        cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
+        cudaVideoChromaFormat m_eChromaFormat;
+        cudaVideoSurfaceFormat m_eOutputFormat;
+        int m_nBitDepthMinus8 = 0;
+        int m_nBPP = 1;
+        CUVIDEOFORMAT m_videoFormat = {};
+        CropRect m_displayRect = {};
+        mutex m_lock;
+        // stock of frames
+        std::vector<uint8_t *> m_vpFrame;
+        CUdeviceptr m_pYUVFrame = 0;
+        CUdeviceptr m_pBGRFrame = 0;
+        // timestamps of decoded frames
+        std::vector<int64_t> m_vTimestamp;
+        int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
+        int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32];
+        CUstream m_cuvidStream = 0;
+        CropRect m_cropRect = {};
+        ResizeDim m_resizeDim = {};
+        unsigned int m_iFrameIndex = 0;
+        int m_nMaxCache = -1;
+        int m_gpuID = -1;
+        unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
+        bool m_output_bgr = true;
+    };
+
+    std::shared_ptr<CUVIDDecoder> create_cuvid_decoder(
+        bool bUseDeviceFrame, IcudaVideoCodec eCodec, int max_cache, int gpu_id,
+        const CropRect *pCropRect, const ResizeDim *pResizeDim, bool output_bgr){
+
+        shared_ptr<CUVIDDecoderImpl> instance(new CUVIDDecoderImpl());
+        if(!instance->create(bUseDeviceFrame, gpu_id, (cudaVideoCodec)eCodec, false, pCropRect, pResizeDim, max_cache, 0, 0, 1000, output_bgr))
+            instance.reset();
+        return instance;
+    }
+}; //FFHDDecoder

+ 49 - 0
src/stream/ffhdd/cuvid_decoder.hpp

@@ -0,0 +1,49 @@
+
+#ifndef CUVID_DECODER_HPP
+#define CUVID_DECODER_HPP
+
+#include <memory>
+// 就不用在这里包含cuda_runtime.h
+
+struct CUstream_st;
+
+namespace FFHDDecoder{
+
+    #define IcudaVideoCodec_H264            4
+
+    typedef CUstream_st* ICUStream;
+    typedef unsigned int IcudaVideoCodec;
+
+    struct CropRect {
+        int l, t, r, b;
+    };
+
+    struct ResizeDim {
+        int w, h;
+    };
+
+    class CUVIDDecoder{
+    public:
+        virtual int get_frame_bytes() = 0;
+        virtual int get_width() = 0;
+        virtual int get_height() = 0;
+        virtual unsigned int get_frame_index() = 0;
+        virtual unsigned int get_num_decoded_frame() = 0;
+        virtual uint8_t* get_frame(int64_t* pTimestamp = nullptr, unsigned int* pFrameIndex = nullptr) = 0;
+        virtual int decode(const uint8_t *pData, int nSize, int64_t nTimestamp=0) = 0;
+        virtual ICUStream get_stream() = 0;
+        virtual int device() = 0;
+        virtual bool is_gpu_frame() = 0;
+    };
+
+    IcudaVideoCodec ffmpeg2NvCodecId(int ffmpeg_codec_id);
+
+    /* max_cache 取 -1 时,无限缓存,根据实际情况缓存。实际上一般不超过5帧 */
+    // gpu_id = -1, current_device_id
+    std::shared_ptr<CUVIDDecoder> create_cuvid_decoder(
+        bool use_device_frame, IcudaVideoCodec codec, int max_cache = -1, int gpu_id = -1, 
+        const CropRect *crop_rect = nullptr, const ResizeDim *resize_dim = nullptr, bool output_bgr = false
+    );
+}; // FFHDDecoder
+
+#endif // CUVID_DECODER_HPP

+ 438 - 0
src/stream/ffhdd/ffmpeg_demuxer.cpp

@@ -0,0 +1,438 @@
+
+#include "ffmpeg_demuxer.hpp"
+#include <iostream>
+#include "simple-logger.hpp"
+
+extern "C"
+{
+    #include <libavcodec/avcodec.h>
+    #include <libavformat/avformat.h>
+    #include <libavutil/opt.h>
+    #include <libswscale/swscale.h>
+};
+
+using namespace std;
+
+namespace FFHDDemuxer{
+
+    static inline bool check_ffmpeg_retvalue(int e, const char* call, int iLine, const char *szFile) {
+        if (e < 0) {
+            std::cout << "FFMPEGDemuxer error " << call << ", cdoe = " << e << " in file " << szFile << ":" << iLine << std::endl;
+            return false;
+        }
+        return true;
+    }
+
+    #define checkFFMPEG(call) check_ffmpeg_retvalue(call, #call, __LINE__, __FILE__)
+
+    static bool string_begin_with(const string& str, const string& with){
+
+        if(str.size() < with.size()) return false;
+        if(with.empty()) return true;
+
+        return memcmp(str.c_str(), with.c_str(), with.size()) == 0;
+    }
+
+    class FFmpegDemuxerImpl : public FFmpegDemuxer{
+    public:
+        bool open(const string& uri, bool auto_reboot = true, int64_t timescale = 1000 /*Hz*/){
+            this->uri_opened_ = uri;
+            this->time_scale_opened_ = timescale;
+            this->auto_reboot_ = auto_reboot;
+            return this->open(this->CreateFormatContext(uri), timescale);
+        }
+
+        bool open(shared_ptr<DataProvider> pDataProvider){
+            bool ok = this->open(this->CreateFormatContext(pDataProvider));
+            if(ok){
+                m_avioc = m_fmtc->pb;
+            }
+            return ok;
+        }
+
+        bool reopen() override{
+            if(m_pDataProvider)  return false;
+            if(!flag_is_opened_) return false;
+
+            close();
+            return this->open(this->uri_opened_, this->auto_reboot_, this->time_scale_opened_);
+        }
+
+        void close(){
+            if (!m_fmtc) 
+                return;
+
+            if (m_pkt.data) {
+                av_packet_unref(&m_pkt);
+            }
+            if (m_pktFiltered.data) {
+                av_packet_unref(&m_pktFiltered);
+            }
+
+            if (m_bsfc) {
+                av_bsf_free(&m_bsfc);
+            }
+            
+            avformat_close_input(&m_fmtc);
+
+            if (m_avioc) {
+                av_freep(&m_avioc->buffer);
+                av_freep(&m_avioc);
+            }
+
+            if (m_pDataWithHeader) {
+                av_free(m_pDataWithHeader);
+            }
+            flag_is_opened_ = false;
+        }
+
+        ~FFmpegDemuxerImpl() {
+            close();
+        }
+
+        IAVCodecID get_video_codec() override{
+            return m_eVideoCodec;
+        }
+
+        IAVPixelFormat get_chroma_format() override{
+            return m_eChromaFormat;
+        }
+
+        int get_fps() override{
+            return m_fps;
+        }
+
+        int get_total_frames() override{
+            return m_total_frames;
+        }
+
+        int get_width() override{
+            return m_nWidth;
+        }
+
+        int get_height() override{
+            return m_nHeight;
+        }
+
+        int get_bit_depth() override{
+            return m_nBitDepth;
+        }
+
+        int get_frame_size() {
+            return m_nWidth * (m_nHeight + m_nChromaHeight) * m_nBPP;
+        }
+
+        void get_extra_data(uint8_t **ppData, int *bytes) override{
+
+            // AVBitStreamFilterContext* bsfc = av_bitstream_filter_init("h264_mp4toannexb");
+            // av_bitstream_filter_filter(bsfc, m_fmtc->streams[m_iVideoStream]->codec, nullptr, ppData, bytes, *ppData, *bytes, 0);
+            // av_bitstream_filter_close(bsfc);
+            *ppData = m_fmtc->streams[m_iVideoStream]->codec->extradata;
+            *bytes = m_fmtc->streams[m_iVideoStream]->codec->extradata_size;
+        }
+
+        bool demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = nullptr, bool *iskey_frame = nullptr) override{
+            
+            *pnVideoBytes = 0;
+            *ppVideo = nullptr;
+
+            if (!m_fmtc) {
+                return false;
+            }
+
+            if (m_pkt.data) {
+                av_packet_unref(&m_pkt);
+            }
+
+            int e = 0;
+            while ((e = av_read_frame(m_fmtc, &m_pkt)) >= 0 && m_pkt.stream_index != m_iVideoStream) 
+                av_packet_unref(&m_pkt);
+
+            if(iskey_frame){
+                *iskey_frame = m_pkt.flags & AV_PKT_FLAG_KEY;
+            }
+
+            if (e < 0) {
+                if(auto_reboot_){
+                    bool open_ok = this->reopen();
+                    if(!open_ok){
+                        INFOE("Reopen failed.");
+                        return false;
+                    }
+                    is_reboot_ = true;
+                    return this->demux(ppVideo, pnVideoBytes, pts);
+                }
+                return false;
+            }
+
+            int64_t local_pts = 0;
+            if (m_bMp4H264 || m_bMp4HEVC) {
+                if (m_pktFiltered.data) {
+                    av_packet_unref(&m_pktFiltered);
+                }
+                checkFFMPEG(av_bsf_send_packet(m_bsfc, &m_pkt));
+                checkFFMPEG(av_bsf_receive_packet(m_bsfc, &m_pktFiltered));
+                *ppVideo = m_pktFiltered.data;
+                *pnVideoBytes = m_pktFiltered.size;
+                local_pts = (int64_t) (m_pktFiltered.pts * m_userTimeScale * m_timeBase);
+            } else {
+
+                if (m_bMp4MPEG4 && (m_frameCount == 0)) {
+
+                    int extraDataSize = m_fmtc->streams[m_iVideoStream]->codecpar->extradata_size;
+
+                    if (extraDataSize > 0) {
+
+                        // extradata contains start codes 00 00 01. Subtract its size
+                        m_pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + m_pkt.size - 3*sizeof(uint8_t));
+
+                        if (!m_pDataWithHeader) {
+                            INFOE("FFmpeg error, m_pDataWithHeader alloc failed");
+                            return false;
+                        }
+
+                        memcpy(m_pDataWithHeader, m_fmtc->streams[m_iVideoStream]->codecpar->extradata, extraDataSize);
+                        memcpy(m_pDataWithHeader+extraDataSize, m_pkt.data+3, m_pkt.size - 3*sizeof(uint8_t));
+
+                        *ppVideo = m_pDataWithHeader;
+                        *pnVideoBytes = extraDataSize + m_pkt.size - 3*sizeof(uint8_t);
+                    }
+
+                } else {
+                    *ppVideo = m_pkt.data;
+                    *pnVideoBytes = m_pkt.size;
+                }
+                local_pts = (int64_t)(m_pkt.pts * m_userTimeScale * m_timeBase);
+            }
+
+            if(pts)
+                *pts = local_pts;
+            m_frameCount++;
+            return true;
+        }
+
+        virtual bool isreboot() override{
+            return is_reboot_;
+        }
+
+        virtual void reset_reboot_flag() override{
+            is_reboot_ = false;
+        }
+
+        static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
+            return ((DataProvider *)opaque)->get_data(pBuf, nBuf);
+        }
+
+    private:
+        double r2d(AVRational r) const{
+            return r.num == 0 || r.den == 0 ? 0. : (double)r.num / (double)r.den;
+        }
+
+        bool open(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) {
+            if (!fmtc) {
+                INFOE("No AVFormatContext provided.");
+                return false;
+            }
+
+            this->m_fmtc = fmtc;
+            // LOG(LINFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
+
+            if(!checkFFMPEG(avformat_find_stream_info(fmtc, nullptr))) return false;
+            m_iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
+            if (m_iVideoStream < 0) {
+                INFOE("FFmpeg error: Could not find stream in input file");
+                return false;
+            }
+
+            m_frameCount = 0;
+            //fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
+            m_eVideoCodec = fmtc->streams[m_iVideoStream]->codecpar->codec_id;
+            m_nWidth = fmtc->streams[m_iVideoStream]->codecpar->width;
+            m_nHeight = fmtc->streams[m_iVideoStream]->codecpar->height;
+            m_eChromaFormat = (AVPixelFormat)fmtc->streams[m_iVideoStream]->codecpar->format;
+            AVRational rTimeBase = fmtc->streams[m_iVideoStream]->time_base;
+            m_timeBase = av_q2d(rTimeBase);
+            m_userTimeScale = timeScale;
+            m_fps = r2d(fmtc->streams[m_iVideoStream]->avg_frame_rate);
+            m_total_frames = fmtc->streams[m_iVideoStream]->nb_frames;
+
+            // Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
+            switch (m_eChromaFormat)
+            {
+            case AV_PIX_FMT_YUV420P10LE:
+                m_nBitDepth = 10;
+                m_nChromaHeight = (m_nHeight + 1) >> 1;
+                m_nBPP = 2;
+                break;
+            case AV_PIX_FMT_YUV420P12LE:
+                m_nBitDepth = 12;
+                m_nChromaHeight = (m_nHeight + 1) >> 1;
+                m_nBPP = 2;
+                break;
+            case AV_PIX_FMT_YUV444P10LE:
+                m_nBitDepth = 10;
+                m_nChromaHeight = m_nHeight << 1;
+                m_nBPP = 2;
+                break;
+            case AV_PIX_FMT_YUV444P12LE:
+                m_nBitDepth = 12;
+                m_nChromaHeight = m_nHeight << 1;
+                m_nBPP = 2;
+                break;
+            case AV_PIX_FMT_YUV444P:
+                m_nBitDepth = 8;
+                m_nChromaHeight = m_nHeight << 1;
+                m_nBPP = 1;
+                break;
+            case AV_PIX_FMT_YUV420P:
+            case AV_PIX_FMT_YUVJ420P:
+            case AV_PIX_FMT_YUVJ422P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
+            case AV_PIX_FMT_YUVJ444P:   // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
+                m_nBitDepth = 8;
+                m_nChromaHeight = (m_nHeight + 1) >> 1;
+                m_nBPP = 1;
+                break;
+            default:
+                INFOW("ChromaFormat not recognized. Assuming 420");
+                m_nBitDepth = 8;
+                m_nChromaHeight = (m_nHeight + 1) >> 1;
+                m_nBPP = 1;
+            }
+
+            m_bMp4H264 = m_eVideoCodec == AV_CODEC_ID_H264 && (
+                    !strcmp(fmtc->iformat->long_name, "QuickTime / MOV") 
+                    || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") 
+                    || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
+                );
+            m_bMp4HEVC = m_eVideoCodec == AV_CODEC_ID_HEVC && (
+                    !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
+                    || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
+                    || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
+                );
+
+            m_bMp4MPEG4 = m_eVideoCodec == AV_CODEC_ID_MPEG4 && (
+                    !strcmp(fmtc->iformat->long_name, "QuickTime / MOV")
+                    || !strcmp(fmtc->iformat->long_name, "FLV (Flash Video)")
+                    || !strcmp(fmtc->iformat->long_name, "Matroska / WebM")
+                );
+
+            //Initialize packet fields with default values
+            av_init_packet(&m_pkt);
+            m_pkt.data = nullptr;
+            m_pkt.size = 0;
+            
+            av_init_packet(&m_pktFiltered);
+            m_pktFiltered.data = nullptr;
+            m_pktFiltered.size = 0;
+
+            // Initialize bitstream filter and its required resources
+            if (m_bMp4H264) {
+                const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
+                if (!bsf) {
+                    INFOE("FFmpeg error: av_bsf_get_by_name() failed");
+                    return false;
+                }
+                if(!checkFFMPEG(av_bsf_alloc(bsf, &m_bsfc))) return false;
+                avcodec_parameters_copy(m_bsfc->par_in, fmtc->streams[m_iVideoStream]->codecpar);
+                if(!checkFFMPEG(av_bsf_init(m_bsfc))) return false;
+            }
+            if (m_bMp4HEVC) {
+                const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
+                if (!bsf) {
+                    INFOE("FFmpeg error: av_bsf_get_by_name() failed");
+                    return false;
+                }
+                if(!checkFFMPEG(av_bsf_alloc(bsf, &m_bsfc))) return false;
+                avcodec_parameters_copy(m_bsfc->par_in, fmtc->streams[m_iVideoStream]->codecpar);
+                if(!checkFFMPEG(av_bsf_init(m_bsfc))) return false;
+            }
+            this->flag_is_opened_ = true;
+            return true;
+        }
+
+        AVFormatContext *CreateFormatContext(shared_ptr<DataProvider> pDataProvider) {
+            
+            AVFormatContext *ctx = nullptr;
+            if (!(ctx = avformat_alloc_context())) {
+                INFOE("FFmpeg error");
+                return nullptr;
+            }
+
+            uint8_t *avioc_buffer = nullptr;
+            int avioc_buffer_size = 8 * 1024 * 1024;
+            avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
+            if (!avioc_buffer) {
+                INFOE("FFmpeg error");
+                return nullptr;
+            }
+
+            m_pDataProvider = pDataProvider;
+            m_avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size,
+                0, pDataProvider.get(), &ReadPacket, nullptr, nullptr);
+            if (!m_avioc) {
+                INFOE("FFmpeg error");
+                return nullptr;
+            }
+            ctx->pb = m_avioc;
+
+            // 如果open失败,ctx会设置为nullptr
+            checkFFMPEG(avformat_open_input(&ctx, nullptr, nullptr, nullptr));
+            return ctx;
+        }
+
+        AVFormatContext *CreateFormatContext(const string& uri) {
+            avformat_network_init();
+
+            AVDictionary* options = nullptr;
+            if (string_begin_with(uri, "rtsp://")){
+                av_dict_set(&options, "rtsp_transport", "tcp", 0);
+                av_dict_set(&options, "buffer_size", "1024000", 0); /* 设置缓存大小,1080p可将值调大 */
+                av_dict_set(&options, "stimeout", "2000000", 0); /* 设置超时断开连接时间,单位微秒 */
+                av_dict_set(&options, "max_delay", "1000000", 0); /* 设置最大时延,单位微秒 */
+            }
+
+            // 如果open失败,ctx会设置为nullptr
+            AVFormatContext *ctx = nullptr;
+            checkFFMPEG(avformat_open_input(&ctx, uri.c_str(), nullptr, &options));
+            return ctx;
+        }
+
+    private:
+        shared_ptr<DataProvider> m_pDataProvider;
+        AVFormatContext *m_fmtc = nullptr;
+        AVIOContext *m_avioc = nullptr;
+        AVPacket m_pkt, m_pktFiltered; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input to decoders */
+        AVBSFContext *m_bsfc = nullptr;
+        int m_fps = 0;
+        int m_total_frames = 0;
+        int m_iVideoStream = -1;
+        bool m_bMp4H264, m_bMp4HEVC, m_bMp4MPEG4;
+        AVCodecID m_eVideoCodec;
+        AVPixelFormat m_eChromaFormat;
+        int m_nWidth, m_nHeight, m_nBitDepth, m_nBPP, m_nChromaHeight;
+        double m_timeBase = 0.0;
+        int64_t m_userTimeScale = 0; 
+        uint8_t *m_pDataWithHeader = nullptr;
+        unsigned int m_frameCount = 0;
+        string uri_opened_;
+        int64_t time_scale_opened_ = 0;
+        bool flag_is_opened_ = false;
+        bool auto_reboot_ = false;
+        bool is_reboot_ = false;
+    };
+
+
+    std::shared_ptr<FFmpegDemuxer> create_ffmpeg_demuxer(const std::string& path, bool auto_reboot){
+        std::shared_ptr<FFmpegDemuxerImpl> instance(new FFmpegDemuxerImpl());
+        if(!instance->open(path, auto_reboot))
+            instance.reset();
+        return instance;
+    }
+
+    std::shared_ptr<FFmpegDemuxer> create_ffmpeg_demuxer(std::shared_ptr<DataProvider> provider){
+        std::shared_ptr<FFmpegDemuxerImpl> instance(new FFmpegDemuxerImpl());
+        if(!instance->open(provider))
+            instance.reset();
+        return instance;
+    }
+}; // FFHDDemuxer

+ 40 - 0
src/stream/ffhdd/ffmpeg_demuxer.hpp

@@ -0,0 +1,40 @@
+
+#ifndef FFMPEG_DEMUXER_HPP
+#define FFMPEG_DEMUXER_HPP
+
+#include <stdint.h>
+#include <memory>
+#include <string>
+
+
+namespace FFHDDemuxer{
+
+    typedef int IAVCodecID;
+    typedef int IAVPixelFormat;
+
+    class DataProvider {
+    public:
+        virtual int get_data(uint8_t *pBuf, int nBuf) = 0;
+    };
+
+    class FFmpegDemuxer{
+    public:
+        virtual IAVCodecID get_video_codec() = 0;
+        virtual IAVPixelFormat get_chroma_format() = 0;
+        virtual int get_width() = 0;
+        virtual int get_height() = 0;
+        virtual int get_bit_depth() = 0;
+        virtual int get_fps() = 0;
+        virtual int get_total_frames() = 0;
+        virtual void get_extra_data(uint8_t **ppData, int *bytes) = 0;
+        virtual bool isreboot() = 0;
+        virtual void reset_reboot_flag() = 0;
+        virtual bool demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = nullptr, bool *iskey_frame = nullptr) = 0;
+        virtual bool reopen() = 0;
+    };
+
+    std::shared_ptr<FFmpegDemuxer> create_ffmpeg_demuxer(const std::string& uri, bool auto_reboot = false);
+    std::shared_ptr<FFmpegDemuxer> create_ffmpeg_demuxer(std::shared_ptr<DataProvider> provider);
+}; // namespace FFHDDemuxer
+
+#endif // FFMPEG_DEMUXER_HPP

+ 281 - 0
src/stream/ffhdd/nalu.hpp

@@ -0,0 +1,281 @@
+#ifndef NALU_HPP
+#define NALU_HPP
+
+#include <vector>
+#include <tuple>
+#include <string.h>
+
+namespace NALU{
+
+    enum class nal_unit_type_t : unsigned char{
+        unuse = 0,
+        slice_nonidr_layer_without_partitioning_rbsp = 1,
+        slice_data_partition_a_layer_rbsp = 2,
+        slice_data_partition_b_layer_rbsp = 3,
+        slice_data_partition_c_layer_rbsp = 4,
+        slice_idr_layer_without_partitioning_rbsp = 5,
+        sei_rbsp = 6,
+        seq_parameter_set_rbsp = 7,
+        pic_parameter_set_rbsp = 8,
+        access_unit_delimiter_rbsp = 9,
+        end_of_seq_rbsp = 10,
+        end_of_stream_rbsp = 11,
+        filler_data_rbsp = 12,
+        seq_parameter_set_extension_rbsp = 13,
+        reserve = 14, // .. 14..18  reserve
+        slice_layer_without_partitioning_rbsp = 19,
+        reserve2 = 20, // 20..23  reserve
+        unuse2 = 24, // 24..31 unuse
+    };
+
+    // 当nal_unit_type_t = slice_idr_layer_without_partitioning_rbsp时
+    // slice_type_t必定等于I/SI/EX_I/EX_SI
+    // 当slice_type_t的值为5-9时,其值应该减去5
+    /* 7.4.3节描述 */
+    enum class slice_type_t : unsigned char{
+        UNKNOW = 0xFF,
+        P = 0,
+        B = 1,
+        I = 2,
+        SP = 3,
+        SI = 4,
+        EX_P = 5,
+        EX_B = 6,
+        EX_I = 7,
+        EX_SP = 8,
+        EX_SI = 9
+    };
+
+    struct nal_unit_t{
+        nal_unit_type_t nal_unit_type : 5;
+        unsigned char nal_ref_idc : 2;
+        unsigned char forbidden_zero_bit : 1;
+    };
+
+    struct nal_unit_info{
+        nal_unit_t head;
+        slice_type_t slice_type;
+        int offset;
+        int flag_size;
+    };
+
+    inline const char* nal_unit_type_string(nal_unit_type_t t){
+        switch(t){
+        case nal_unit_type_t::unuse: return "unuse";
+        case nal_unit_type_t::slice_nonidr_layer_without_partitioning_rbsp: return "slice_nonidr_layer_without_partitioning_rbsp";
+        case nal_unit_type_t::slice_data_partition_a_layer_rbsp: return "slice_data_partition_a_layer_rbsp";
+        case nal_unit_type_t::slice_data_partition_b_layer_rbsp: return "slice_data_partition_b_layer_rbsp";
+        case nal_unit_type_t::slice_data_partition_c_layer_rbsp: return "slice_data_partition_c_layer_rbsp";
+        case nal_unit_type_t::slice_idr_layer_without_partitioning_rbsp: return "slice_idr_layer_without_partitioning_rbsp";
+        case nal_unit_type_t::sei_rbsp: return "sei_rbsp";
+        case nal_unit_type_t::seq_parameter_set_rbsp: return "seq_parameter_set_rbsp";
+        case nal_unit_type_t::pic_parameter_set_rbsp: return "pic_parameter_set_rbsp";
+        case nal_unit_type_t::access_unit_delimiter_rbsp: return "access_unit_delimiter_rbsp";
+        case nal_unit_type_t::end_of_seq_rbsp: return "end_of_seq_rbsp";
+        case nal_unit_type_t::end_of_stream_rbsp: return "end_of_stream_rbsp";
+        case nal_unit_type_t::filler_data_rbsp: return "filler_data_rbsp";
+        case nal_unit_type_t::seq_parameter_set_extension_rbsp: return "seq_parameter_set_extension_rbsp";
+        case nal_unit_type_t::reserve: return "reserve";
+        case nal_unit_type_t::slice_layer_without_partitioning_rbsp: return "slice_layer_without_partitioning_rbsp";
+        case nal_unit_type_t::reserve2: return "reserve2"; // 20..23  reserve
+        case nal_unit_type_t::unuse2: return "unuse2"; // 24..31 unuse
+        default: return "unknow";
+        }
+    }
+
+    inline const char* nal_unit_type_short_string(nal_unit_type_t t){
+        switch(t){
+        case nal_unit_type_t::unuse: return "unuse";
+        case nal_unit_type_t::slice_nonidr_layer_without_partitioning_rbsp: return "nonidr";
+        case nal_unit_type_t::slice_data_partition_a_layer_rbsp: return "slice_a";
+        case nal_unit_type_t::slice_data_partition_b_layer_rbsp: return "slice_b";
+        case nal_unit_type_t::slice_data_partition_c_layer_rbsp: return "slice_c";
+        case nal_unit_type_t::slice_idr_layer_without_partitioning_rbsp: return "idr";
+        case nal_unit_type_t::sei_rbsp: return "sei";
+        case nal_unit_type_t::seq_parameter_set_rbsp: return "sps";
+        case nal_unit_type_t::pic_parameter_set_rbsp: return "pps";
+        case nal_unit_type_t::access_unit_delimiter_rbsp: return "aud";
+        case nal_unit_type_t::end_of_seq_rbsp: return "eos";
+        case nal_unit_type_t::end_of_stream_rbsp: return "eostr";
+        case nal_unit_type_t::filler_data_rbsp: return "filter";
+        case nal_unit_type_t::seq_parameter_set_extension_rbsp: return "sps_ext";
+        case nal_unit_type_t::reserve: return "reserve";
+        case nal_unit_type_t::slice_layer_without_partitioning_rbsp: return "slice";
+        case nal_unit_type_t::reserve2: return "reserve2"; // 20..23  reserve
+        case nal_unit_type_t::unuse2: return "unuse2"; // 24..31 unuse
+        default: return "unknow";
+        }
+    }
+
+    inline const char* slice_type_string(slice_type_t t){
+        //SI和SP:即Switch I和Switch P,是一种特殊的编解码条带,可以保证在视频流之间进行有效的切换,并且解码器可以任意的访问。比如,同一个视频源被编码成各种码率的码流,在传输的过程中可以根据网络环境进行实时的切换;
+        //SI宏块是一种特殊类型的内部编码宏块,按Intra_4x4预测宏块编码。
+        switch(t){
+        case slice_type_t::EX_P: return "P";
+        case slice_type_t::P: return "P";
+        case slice_type_t::EX_B: return "B";
+        case slice_type_t::B: return "B";
+        case slice_type_t::EX_I: return "I";
+        case slice_type_t::I: return "I";
+        case slice_type_t::EX_SP: return "SP";
+        case slice_type_t::SP: return "SP";
+        case slice_type_t::EX_SI: return "SI";
+        case slice_type_t::SI: return "SI";
+        case slice_type_t::UNKNOW: return "UNKNOW";
+        default: return "UNKNOW";
+        }
+    }
+
+    static slice_type_t get_slice_type_from_slice_header(unsigned char slice_header){
+
+        // slice_header = (
+        //     ((slice_header & 0xFF) << 24) |
+        //     ((slice_header & 0xFF00) << 8) |
+        //     ((slice_header & 0xFF0000) >> 8) |
+        //     ((slice_header & 0xFF000000) >> 24)
+        // );
+        /* 如果图简单,这一句话也可以判断类型,但是这并不靠谱,只能判断I或者P */
+        // return (slice_header & 0x40) == 0 ? slice_type_t::I : slice_type_t::P;
+        // 01000000  40
+        // 11100000  E0
+        // 10111000  B8
+        // 对于 10111000,按照h264语法 7.3.3,slice_header()
+        // first_mb_in_slice = ue(v)
+        // slice_type = ue(v)
+        // 9.1节 ue(v) 是哥伦布编码格式
+        /*
+            leadingZeroBits = -1;
+            for(b = 0; b == 0; leadingZeroBits++)
+                b = read_bits(1);
+
+            codeNumber = pow(2, leadingZeroBits) - 1 + read_bits(leadingZeroBits)
+        */
+        // 对于10111000而言 = 0xB8
+        // first_mb_in_slice = ue(1) = pow(2, 0) - 1 + 0 = 0 属于P帧
+        // slice_type = ue(011) = pow(2, 1) - 1 + 1 = 2      属于I帧
+        // 读取0的个数为leadingZeroBits,然后跳过第一个1,再读取leadingZeroBits个bit为整数,按照公式计算即可
+        int state = 0;  // 0找0统计leadingZeroBits,1read_bits,2计算code_number
+        int leading_zero_bits = 0;
+        int i_code = 0;
+
+        unsigned int code_number_read_bits = 0;
+        unsigned int code_number_exponent = 0;
+        for(int bit_index = 0; bit_index < sizeof(slice_header) * 8; ++bit_index){
+            unsigned int bit_value = (slice_header >> (sizeof(slice_header) * 8 - bit_index - 1)) & 0x01;
+
+            if(state == 0){
+                if(bit_value == 0)
+                    leading_zero_bits++;
+                else{
+                    code_number_exponent = leading_zero_bits;
+                    if(leading_zero_bits == 0)
+                        state = 2;
+                    else
+                        state = 1;
+                }
+            }else if(state == 1){
+                code_number_read_bits <<= 1;
+                code_number_read_bits |= bit_value;
+
+                if(--leading_zero_bits == 0)
+                    state = 2;
+            }
+            
+            if(state == 2){
+                unsigned int code_number = (1 << code_number_exponent) - 1 + code_number_read_bits;
+                if(i_code == 1){
+                    if(code_number >= 5)
+                        code_number -= 5;
+
+                    return (slice_type_t)code_number;
+                }
+
+                state = 0;
+                leading_zero_bits = 0;
+                code_number_read_bits = 0;
+                i_code++;
+            }
+        }
+        return slice_type_t::UNKNOW;
+    }
+
+    /* 在h264_data的内存,以start为起点,查找nalu的头(0x00, 0x00, 0x01 或者 0x00, 0x00, 0x00, 0x01),找到则返回起始位置 */
+    static std::tuple<size_t, size_t> find_nalu(const uint8_t* h264_data, size_t end, size_t start = 0){
+
+        const uint8_t* ptr = h264_data;
+        uint8_t head2[] = {0x00, 0x00, 0x00, 0x01};
+
+        for(size_t i = start; i < end; ++i){
+            if(ptr[i] == 0x00){
+                if(end - i >= sizeof(head2)){
+                    if(memcmp(ptr + i, head2, sizeof(head2)) == 0)
+                        return std::make_tuple(i, 4);
+                }
+            }
+        }
+        return std::make_tuple(0, 0);
+    }
+
+    static std::vector<nal_unit_info> find_all_nalu_info(const uint8_t* h264_data, size_t end, size_t start = 0){
+
+        int pos = 0, flag_size = 0;
+        size_t cursor = start;
+        std::vector<nal_unit_info> output;
+
+        do{
+            std::tie(pos, flag_size) = find_nalu(h264_data, end, cursor);
+            if(flag_size == 0)
+                break;
+                
+            nal_unit_info item;
+            memcpy(&item.head, h264_data + pos + flag_size, sizeof(item.head));
+            item.flag_size = flag_size;
+            item.offset = pos;
+
+            if(item.head.nal_unit_type == nal_unit_type_t::slice_idr_layer_without_partitioning_rbsp ||
+               item.head.nal_unit_type == nal_unit_type_t::slice_nonidr_layer_without_partitioning_rbsp){
+                item.slice_type = get_slice_type_from_slice_header(*(unsigned char*)(h264_data + pos + flag_size + 1));
+            }else{
+                item.slice_type = slice_type_t::UNKNOW;
+            }
+            output.emplace_back(item);
+            cursor = pos + flag_size + 1;
+        }while(cursor < end);
+        return output;
+    }
+
+    static std::string format_nalu_frame_type(const std::vector<nal_unit_info>& info_array){
+
+        std::string output;
+        for(int i = 0; i < info_array.size(); ++i){
+            auto& item = info_array[i];
+
+            if(item.head.nal_unit_type == nal_unit_type_t::slice_idr_layer_without_partitioning_rbsp ||
+                item.head.nal_unit_type == nal_unit_type_t::slice_nonidr_layer_without_partitioning_rbsp){
+                output += slice_type_string(item.slice_type);
+            }else{
+                output += nal_unit_type_short_string(item.head.nal_unit_type);
+            }
+
+            if(i + 1 < info_array.size())
+                output += ",";
+        }
+        return output;
+    }
+
+    static std::string format_nalu_type(const std::vector<nal_unit_info>& info_array){
+
+        std::string output;
+        for(int i = 0; i < info_array.size(); ++i){
+            auto& item = info_array[i];
+            output += nal_unit_type_short_string(item.head.nal_unit_type);
+
+            if(i + 1 < info_array.size())
+                output += ",";
+        }
+        return output;
+    }
+
+}; // namespace NALU
+
+#endif NALU_HPP

+ 97 - 0
src/stream/ffhdd/simple-logger.cpp

@@ -0,0 +1,97 @@
+
+#include "stream/ffhdd/simple-logger.hpp"
+#include <string>
+#include <stdarg.h>
+
+using namespace std;
+
+namespace SimpleLogger{
+
+    static LogLevel g_level = LogLevel::Info;
+
+    const char* level_string(LogLevel level){
+        switch (level){
+            case LogLevel::Debug: return "debug";
+            case LogLevel::Verbose: return "verbo";
+            case LogLevel::Info: return "info";
+            case LogLevel::Warning: return "warn";
+            case LogLevel::Error: return "error";
+            case LogLevel::Fatal: return "fatal";
+            default: return "unknow";
+        }
+    }
+
+    void set_log_level(LogLevel level){
+        g_level = level;
+    }
+
+    LogLevel get_log_level(){
+        return g_level;
+    }
+
+    string file_name(const string& path, bool include_suffix){
+
+        if (path.empty()) return "";
+
+        int p = path.rfind('/');
+        p += 1;
+
+        //include suffix
+        if (include_suffix)
+            return path.substr(p);
+
+        int u = path.rfind('.');
+        if (u == -1)
+            return path.substr(p);
+
+        if (u <= p) u = path.size();
+        return path.substr(p, u - p);
+    }
+
+    string time_now(){
+        char time_string[20];
+        time_t timep;							
+        time(&timep);							
+        tm& t = *(tm*)localtime(&timep);
+
+        sprintf(time_string, "%04d-%02d-%02d %02d:%02d:%02d", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
+        return time_string;
+    }
+
+    void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...){
+        if(level > g_level) return;
+
+        va_list vl;
+        va_start(vl, fmt);
+        
+        char buffer[2048];
+        auto now = time_now();
+        string filename = file_name(file, true);
+        int n = snprintf(buffer, sizeof(buffer), "[%s]", now.c_str());
+
+        if (level == LogLevel::Fatal or level == LogLevel::Error) {
+            n += snprintf(buffer + n, sizeof(buffer) - n, "[\033[31m%s\033[0m]", level_string(level));
+        }
+        else if (level == LogLevel::Warning) {
+            n += snprintf(buffer + n, sizeof(buffer) - n, "[\033[33m%s\033[0m]", level_string(level));
+        }
+        else if (level == LogLevel::Info) {
+            n += snprintf(buffer + n, sizeof(buffer) - n, "[\033[35m%s\033[0m]", level_string(level));
+        }
+        else if (level == LogLevel::Verbose) {
+            n += snprintf(buffer + n, sizeof(buffer) - n, "[\033[34m%s\033[0m]", level_string(level));
+        }
+        else {
+            n += snprintf(buffer + n, sizeof(buffer) - n, "[%s]", level_string(level));
+        }
+
+        n += snprintf(buffer + n, sizeof(buffer) - n, "[%s:%d]:", filename.c_str(), line);
+        vsnprintf(buffer + n, sizeof(buffer) - n, fmt, vl);
+        fprintf(stdout, "%s\n", buffer);
+
+        if(level == LogLevel::Fatal || level == LogLevel::Error){
+            fflush(stdout);
+            abort();
+        }
+    }
+};

+ 32 - 0
src/stream/ffhdd/simple-logger.hpp

@@ -0,0 +1,32 @@
+#ifndef SIMPLE_LOGGER_HPP
+#define SIMPLE_LOGGER_HPP
+
+#include <stdio.h>
+
+#define INFOD(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Debug, __VA_ARGS__)
+#define INFOV(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Verbose, __VA_ARGS__)
+#define INFO(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Info, __VA_ARGS__)
+#define INFOW(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Warning, __VA_ARGS__)
+//#define INFOE(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Error, __VA_ARGS__)
+#define INFOE(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Info, __VA_ARGS__) //修改为info,使用infoe python层会crash MIke 2020--5-11
+#define INFOF(...)			SimpleLogger::__log_func(__FILE__, __LINE__, SimpleLogger::LogLevel::Fatal, __VA_ARGS__)
+
+
+namespace SimpleLogger{
+
+    enum class LogLevel : int{
+        Debug   = 5,
+        Verbose = 4,
+        Info    = 3,
+        Warning = 2,
+        Error   = 1,
+        Fatal   = 0
+    };
+
+    void set_log_level(LogLevel level);
+    LogLevel get_log_level();
+    void __log_func(const char* file, int line, LogLevel level, const char* fmt, ...);
+
+};  // SimpleLogger
+
+#endif // SIMPLE_LOGGER_HPP

+ 99 - 0
src/stream/stream.hpp

@@ -1,5 +1,104 @@
 #ifndef STREAM_HPP__
 #define STREAM_HPP__
 
+#include <algorithm>
+
+#include <unordered_map>
+#include <memory>
+
+#include "opencv2/opencv.hpp"
+
+#include "stream/ffhdd/ffmpeg_demuxer.hpp"
+#include "stream/ffhdd/cuvid_decoder.hpp"
+#include "stream/ffhdd/cuda_tools.hpp"
+
+
+class FFmpegDemuxer 
+{ 
+public:
+    FFmpegDemuxer(std::string uri, bool auto_reboot = false){
+
+        instance_ = FFHDDemuxer::create_ffmpeg_demuxer(
+            uri, 
+            auto_reboot
+        );
+    }
+
+    bool valid(){
+        return instance_ != nullptr;
+    }
+
+    FFHDDemuxer::IAVCodecID get_video_codec() {return instance_->get_video_codec();}
+    virtual FFHDDemuxer::IAVPixelFormat get_chroma_format(){return instance_->get_chroma_format();}
+    virtual int get_width() {return instance_->get_width();}
+    virtual int get_height() {return instance_->get_height();}
+    virtual int get_bit_depth() {return instance_->get_bit_depth();}
+    virtual int get_fps() {return instance_->get_fps();}
+    virtual int get_total_frames() {return instance_->get_total_frames();}
+    virtual py::tuple get_extra_data() {
+        uint8_t* pdata = nullptr;
+        int pbytes = 0;
+        instance_->get_extra_data(&pdata, &pbytes);
+        return py::make_tuple((uint64_t)pdata, pbytes);
+    }
+
+    virtual bool isreboot() {return instance_->isreboot();}
+    virtual void reset_reboot_flag() {instance_->reset_reboot_flag();}
+
+    virtual bool reopen() {return instance_->reopen();}
+
+private:
+    int64_t time_pts_ = 0;
+    std::shared_ptr<FFHDDemuxer::FFmpegDemuxer> instance_;
+}; // FFmpegDemuxer
+
+class CUVIDDecoder 
+{ 
+public:
+    CUVIDDecoder(bool bUseDeviceFrame, FFHDDemuxer::IAVCodecID eCodec, int max_cache, int gpu_id,
+        int cl, int ct, int cr, int cb, int rw, int rh, bool output_bgr){
+        
+        FFHDDecoder::IcudaVideoCodec codec = FFHDDecoder::ffmpeg2NvCodecId(eCodec);
+        FFHDDecoder::CropRect crop{0, 0, 0, 0};
+        FFHDDecoder::ResizeDim resize{0, 0};
+        if(cr - cl > 0 && cb - ct > 0){
+            crop.l = cl;
+            crop.t = ct;
+            crop.r = cr;
+            crop.b = cb;
+        }
+
+        if(rw > 0 && rh > 0){
+            resize.w = rw;
+            resize.h = rh;
+        }
+
+        output_bgr_ = output_bgr;
+        instance_ = FFHDDecoder::create_cuvid_decoder(
+            bUseDeviceFrame, codec, max_cache, gpu_id, &crop, &resize, output_bgr
+        );
+    }
+
+    bool valid(){
+        return instance_ != nullptr;
+    }
+
+
+    int get_frame_bytes() {return instance_->get_frame_bytes();}
+    int get_width() {return instance_->get_width();}
+    int get_height() {return instance_->get_height();}
+    unsigned int get_frame_index() {return instance_->get_frame_index();}
+    unsigned int get_num_decoded_frame() {return instance_->get_num_decoded_frame();}
+
+    int decode(uint64_t pData, int nSize, int64_t nTimestamp=0) {
+        const uint8_t* ptr = (const uint8_t*)pData;
+        return instance_->decode(ptr, nSize, nTimestamp);
+    }
+    int64_t get_stream() {return (uint64_t)instance_->get_stream();}
+
+private:
+    std::shared_ptr<FFHDDecoder::CUVIDDecoder> instance_;
+    bool output_bgr_ = false;
+}; 
 
 #endif  // STREAM_HPP__