Explorar el Código

添加目标追踪

leon hace 1 mes
padre
commit
0b79eb5bbf

+ 2 - 1
.vscode/settings.json

@@ -58,6 +58,7 @@
         "iomanip": "cpp",
         "fstream": "cpp",
         "iterator": "cpp",
-        "utility": "cpp"
+        "utility": "cpp",
+        "chrono": "cpp"
     }
 }

+ 3 - 1
Makefile

@@ -13,6 +13,7 @@ project_include_path := src
 opencv_include_path  := /workspace/compile/__install/opencv490/include/opencv4
 trt_include_path     := /usr/include/x86_64-linux-gnu/
 cuda_include_path    := $(cuda_home)/include
+bt_include_path      := src\3rd\ByteTrack\include
 ffmpeg_include_path  := 
 
 python_include_path  := /usr/include/python3.10
@@ -22,7 +23,8 @@ include_paths        := $(project_include_path) \
 						$(opencv_include_path) \
 						$(trt_include_path) \
 						$(cuda_include_path) \
-						$(python_include_path)
+						$(python_include_path) \
+						$(bt_include_path)
 
 
 opencv_library_path  := /workspace/compile/__install/opencv490/lib/

+ 49 - 0
src/3rd/ByteTrack/include/BYTETracker.h

@@ -0,0 +1,49 @@
+#pragma once
+
+#include "STrack.h"
+
+struct Object
+{
+    cv::Rect_<float> rect;
+    int label;
+    float prob;
+};
+
+class BYTETracker
+{
+public:
+	BYTETracker(int frame_rate = 30, int track_buffer = 30);
+	~BYTETracker();
+
+	vector<STrack> update(const vector<Object>& objects);
+	Scalar get_color(int idx);
+
+private:
+	vector<STrack*> joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb);
+	vector<STrack> joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
+
+	vector<STrack> sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb);
+	void remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb);
+
+	void linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
+		vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b);
+	vector<vector<float> > iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size);
+	vector<vector<float> > iou_distance(vector<STrack> &atracks, vector<STrack> &btracks);
+	vector<vector<float> > ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs);
+
+	double lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol, 
+		bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true);
+
+private:
+
+	float track_thresh;
+	float high_thresh;
+	float match_thresh;
+	int frame_id;
+	int max_time_lost;
+
+	vector<STrack> tracked_stracks;
+	vector<STrack> lost_stracks;
+	vector<STrack> removed_stracks;
+	byte_kalman::KalmanFilter kalman_filter;
+};

+ 50 - 0
src/3rd/ByteTrack/include/STrack.h

@@ -0,0 +1,50 @@
+#pragma once
+
+#include <opencv2/opencv.hpp>
+#include "kalmanFilter.h"
+
+using namespace cv;
+using namespace std;
+
+enum TrackState { New = 0, Tracked, Lost, Removed };
+
+class STrack
+{
+public:
+	STrack(vector<float> tlwh_, float score);
+	~STrack();
+
+	vector<float> static tlbr_to_tlwh(vector<float> &tlbr);
+	void static multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter);
+	void static_tlwh();
+	void static_tlbr();
+	vector<float> tlwh_to_xyah(vector<float> tlwh_tmp);
+	vector<float> to_xyah();
+	void mark_lost();
+	void mark_removed();
+	int next_id();
+	int end_frame();
+	
+	void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id);
+	void re_activate(STrack &new_track, int frame_id, bool new_id = false);
+	void update(STrack &new_track, int frame_id);
+
+public:
+	bool is_activated;
+	int track_id;
+	int state;
+
+	vector<float> _tlwh;
+	vector<float> tlwh;
+	vector<float> tlbr;
+	int frame_id;
+	int tracklet_len;
+	int start_frame;
+
+	KAL_MEAN mean;
+	KAL_COVA covariance;
+	float score;
+
+private:
+	byte_kalman::KalmanFilter kalman_filter;
+};

+ 36 - 0
src/3rd/ByteTrack/include/dataType.h

@@ -0,0 +1,36 @@
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include <Eigen/Core>
+#include <Eigen/Dense>
+typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> DETECTBOX;
+typedef Eigen::Matrix<float, -1, 4, Eigen::RowMajor> DETECTBOXSS;
+typedef Eigen::Matrix<float, 1, 128, Eigen::RowMajor> FEATURE;
+typedef Eigen::Matrix<float, Eigen::Dynamic, 128, Eigen::RowMajor> FEATURESS;
+//typedef std::vector<FEATURE> FEATURESS;
+
+//Kalmanfilter
+//typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_FILTER;
+typedef Eigen::Matrix<float, 1, 8, Eigen::RowMajor> KAL_MEAN;
+typedef Eigen::Matrix<float, 8, 8, Eigen::RowMajor> KAL_COVA;
+typedef Eigen::Matrix<float, 1, 4, Eigen::RowMajor> KAL_HMEAN;
+typedef Eigen::Matrix<float, 4, 4, Eigen::RowMajor> KAL_HCOVA;
+using KAL_DATA = std::pair<KAL_MEAN, KAL_COVA>;
+using KAL_HDATA = std::pair<KAL_HMEAN, KAL_HCOVA>;
+
+//main
+using RESULT_DATA = std::pair<int, DETECTBOX>;
+
+//tracker:
+using TRACKER_DATA = std::pair<int, FEATURESS>;
+using MATCH_DATA = std::pair<int, int>;
+typedef struct t {
+	std::vector<MATCH_DATA> matches;
+	std::vector<int> unmatched_tracks;
+	std::vector<int> unmatched_detections;
+}TRACHER_MATCHD;
+
+//linear_assignment:
+typedef Eigen::Matrix<float, -1, -1, Eigen::RowMajor> DYNAMICM;

+ 31 - 0
src/3rd/ByteTrack/include/kalmanFilter.h

@@ -0,0 +1,31 @@
+#pragma once
+
+#include "dataType.h"
+
+namespace byte_kalman
+{
+	class KalmanFilter
+	{
+	public:
+		static const double chi2inv95[10];
+		KalmanFilter();
+		KAL_DATA initiate(const DETECTBOX& measurement);
+		void predict(KAL_MEAN& mean, KAL_COVA& covariance);
+		KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance);
+		KAL_DATA update(const KAL_MEAN& mean,
+			const KAL_COVA& covariance,
+			const DETECTBOX& measurement);
+
+		Eigen::Matrix<float, 1, -1> gating_distance(
+			const KAL_MEAN& mean,
+			const KAL_COVA& covariance,
+			const std::vector<DETECTBOX>& measurements,
+			bool only_position = false);
+
+	private:
+		Eigen::Matrix<float, 8, 8, Eigen::RowMajor> _motion_mat;
+		Eigen::Matrix<float, 4, 8, Eigen::RowMajor> _update_mat;
+		float _std_weight_position;
+		float _std_weight_velocity;
+	};
+}

+ 63 - 0
src/3rd/ByteTrack/include/lapjv.h

@@ -0,0 +1,63 @@
+#ifndef LAPJV_H
+#define LAPJV_H
+
+#define LARGE 1000000
+
+#if !defined TRUE
+#define TRUE 1
+#endif
+#if !defined FALSE
+#define FALSE 0
+#endif
+
+#define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; }
+#define FREE(x) if (x != 0) { free(x); x = 0; }
+#define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; }
+
+#if 0
+#include <assert.h>
+#define ASSERT(cond) assert(cond)
+#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#define PRINT_COST_ARRAY(a, n) \
+    while (1) { \
+        printf(#a" = ["); \
+        if ((n) > 0) { \
+            printf("%f", (a)[0]); \
+            for (uint_t j = 1; j < n; j++) { \
+                printf(", %f", (a)[j]); \
+            } \
+        } \
+        printf("]\n"); \
+        break; \
+    }
+#define PRINT_INDEX_ARRAY(a, n) \
+    while (1) { \
+        printf(#a" = ["); \
+        if ((n) > 0) { \
+            printf("%d", (a)[0]); \
+            for (uint_t j = 1; j < n; j++) { \
+                printf(", %d", (a)[j]); \
+            } \
+        } \
+        printf("]\n"); \
+        break; \
+    }
+#else
+#define ASSERT(cond)
+#define PRINTF(fmt, ...)
+#define PRINT_COST_ARRAY(a, n)
+#define PRINT_INDEX_ARRAY(a, n)
+#endif
+
+
+typedef signed int int_t;
+typedef unsigned int uint_t;
+typedef double cost_t;
+typedef char boolean;
+typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
+
+extern int_t lapjv_internal(
+	const uint_t n, cost_t *cost[],
+	int_t *x, int_t *y);
+
+#endif // LAPJV_H

+ 503 - 0
src/3rd/ByteTrack/include/logging.h

@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_LOGGING_H
+#define TENSORRT_LOGGING_H
+
+#include "NvInferRuntimeCommon.h"
+#include <cassert>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+using Severity = nvinfer1::ILogger::Severity;
+
+class LogStreamConsumerBuffer : public std::stringbuf
+{
+public:
+    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mOutput(stream)
+        , mPrefix(prefix)
+        , mShouldLog(shouldLog)
+    {
+    }
+
+    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
+        : mOutput(other.mOutput)
+    {
+    }
+
+    ~LogStreamConsumerBuffer()
+    {
+        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
+        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
+        // if the pointer to the beginning is not equal to the pointer to the current position,
+        // call putOutput() to log the output to the stream
+        if (pbase() != pptr())
+        {
+            putOutput();
+        }
+    }
+
+    // synchronizes the stream buffer and returns 0 on success
+    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
+    // resetting the buffer and flushing the stream
+    virtual int sync()
+    {
+        putOutput();
+        return 0;
+    }
+
+    void putOutput()
+    {
+        if (mShouldLog)
+        {
+            // prepend timestamp
+            std::time_t timestamp = std::time(nullptr);
+            tm* tm_local = std::localtime(&timestamp);
+            std::cout << "[";
+            std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
+            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
+            // std::stringbuf::str() gets the string contents of the buffer
+            // insert the buffer contents pre-appended by the appropriate prefix into the stream
+            mOutput << mPrefix << str();
+            // set the buffer to empty
+            str("");
+            // flush the stream
+            mOutput.flush();
+        }
+    }
+
+    void setShouldLog(bool shouldLog)
+    {
+        mShouldLog = shouldLog;
+    }
+
+private:
+    std::ostream& mOutput;
+    std::string mPrefix;
+    bool mShouldLog;
+};
+
+//!
+//! \class LogStreamConsumerBase
+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
+//!
+class LogStreamConsumerBase
+{
+public:
+    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mBuffer(stream, prefix, shouldLog)
+    {
+    }
+
+protected:
+    LogStreamConsumerBuffer mBuffer;
+};
+
+//!
+//! \class LogStreamConsumer
+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
+//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
+//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
+//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
+//!  Please do not change the order of the parent classes.
+//!
+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
+{
+public:
+    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
+    //!  Reportable severity determines if the messages are severe enough to be logged.
+    LogStreamConsumer(Severity reportableSeverity, Severity severity)
+        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(severity <= reportableSeverity)
+        , mSeverity(severity)
+    {
+    }
+
+    LogStreamConsumer(LogStreamConsumer&& other)
+        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(other.mShouldLog)
+        , mSeverity(other.mSeverity)
+    {
+    }
+
+    void setReportableSeverity(Severity reportableSeverity)
+    {
+        mShouldLog = mSeverity <= reportableSeverity;
+        mBuffer.setShouldLog(mShouldLog);
+    }
+
+private:
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    static std::string severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    bool mShouldLog;
+    Severity mSeverity;
+};
+
+//! \class Logger
+//!
+//! \brief Class which manages logging of TensorRT tools and samples
+//!
+//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
+//! and supports logging two types of messages:
+//!
+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
+//! - Test pass/fail messages
+//!
+//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
+//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
+//!
+//! In the future, this class could be extended to support dumping test results to a file in some standard format
+//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
+//!
+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
+//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
+//! library and messages coming from the sample.
+//!
+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
+//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
+//! object.
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+    Logger(Severity severity = Severity::kWARNING)
+        : mReportableSeverity(severity)
+    {
+    }
+
+    //!
+    //! \enum TestResult
+    //! \brief Represents the state of a given test
+    //!
+    enum class TestResult
+    {
+        kRUNNING, //!< The test is running
+        kPASSED,  //!< The test passed
+        kFAILED,  //!< The test failed
+        kWAIVED   //!< The test was waived
+    };
+
+    //!
+    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
+    //! \return The nvinfer1::ILogger associated with this Logger
+    //!
+    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
+    //! we can eliminate the inheritance of Logger from ILogger
+    //!
+    nvinfer1::ILogger& getTRTLogger()
+    {
+        return *this;
+    }
+
+    //!
+    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
+    //!
+    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
+    //! inheritance from nvinfer1::ILogger
+    //!
+    void log(Severity severity, const char* msg) noexcept override
+    {
+        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
+    }
+
+    //!
+    //! \brief Method for controlling the verbosity of logging output
+    //!
+    //! \param severity The logger will only emit messages that have severity of this level or higher.
+    //!
+    void setReportableSeverity(Severity severity)
+    {
+        mReportableSeverity = severity;
+    }
+
+    //!
+    //! \brief Opaque handle that holds logging information for a particular test
+    //!
+    //! This object is an opaque handle to information used by the Logger to print test results.
+    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
+    //! with Logger::reportTest{Start,End}().
+    //!
+    class TestAtom
+    {
+    public:
+        TestAtom(TestAtom&&) = default;
+
+    private:
+        friend class Logger;
+
+        TestAtom(bool started, const std::string& name, const std::string& cmdline)
+            : mStarted(started)
+            , mName(name)
+            , mCmdline(cmdline)
+        {
+        }
+
+        bool mStarted;
+        std::string mName;
+        std::string mCmdline;
+    };
+
+    //!
+    //! \brief Define a test for logging
+    //!
+    //! \param[in] name The name of the test.  This should be a string starting with
+    //!                  "TensorRT" and containing dot-separated strings containing
+    //!                  the characters [A-Za-z0-9_].
+    //!                  For example, "TensorRT.sample_googlenet"
+    //! \param[in] cmdline The command line used to reproduce the test
+    //
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
+    {
+        return TestAtom(false, name, cmdline);
+    }
+
+    //!
+    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
+    //!        as input
+    //!
+    //! \param[in] name The name of the test
+    //! \param[in] argc The number of command-line arguments
+    //! \param[in] argv The array of command-line arguments (given as C strings)
+    //!
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
+    {
+        auto cmdline = genCmdlineString(argc, argv);
+        return defineTest(name, cmdline);
+    }
+
+    //!
+    //! \brief Report that a test has started.
+    //!
+    //! \pre reportTestStart() has not been called yet for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has started
+    //!
+    static void reportTestStart(TestAtom& testAtom)
+    {
+        reportTestResult(testAtom, TestResult::kRUNNING);
+        assert(!testAtom.mStarted);
+        testAtom.mStarted = true;
+    }
+
+    //!
+    //! \brief Report that a test has ended.
+    //!
+    //! \pre reportTestStart() has been called for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has ended
+    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
+    //!                   TestResult::kFAILED, TestResult::kWAIVED
+    //!
+    static void reportTestEnd(const TestAtom& testAtom, TestResult result)
+    {
+        assert(result != TestResult::kRUNNING);
+        assert(testAtom.mStarted);
+        reportTestResult(testAtom, result);
+    }
+
+    static int reportPass(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kPASSED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportFail(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kFAILED);
+        return EXIT_FAILURE;
+    }
+
+    static int reportWaive(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kWAIVED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportTest(const TestAtom& testAtom, bool pass)
+    {
+        return pass ? reportPass(testAtom) : reportFail(testAtom);
+    }
+
+    Severity getReportableSeverity() const
+    {
+        return mReportableSeverity;
+    }
+
+private:
+    //!
+    //! \brief returns an appropriate string for prefixing a log message with the given severity
+    //!
+    static const char* severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate string for prefixing a test result message with the given result
+    //!
+    static const char* testResultString(TestResult result)
+    {
+        switch (result)
+        {
+        case TestResult::kRUNNING: return "RUNNING";
+        case TestResult::kPASSED: return "PASSED";
+        case TestResult::kFAILED: return "FAILED";
+        case TestResult::kWAIVED: return "WAIVED";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
+    //!
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    //!
+    //! \brief method that implements logging test results
+    //!
+    static void reportTestResult(const TestAtom& testAtom, TestResult result)
+    {
+        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
+                                         << testAtom.mCmdline << std::endl;
+    }
+
+    //!
+    //! \brief generate a command line string from the given (argc, argv) values
+    //!
+    static std::string genCmdlineString(int argc, char const* const* argv)
+    {
+        std::stringstream ss;
+        for (int i = 0; i < argc; i++)
+        {
+            if (i > 0)
+                ss << " ";
+            ss << argv[i];
+        }
+        return ss.str();
+    }
+
+    Severity mReportableSeverity;
+};
+
+namespace
+{
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
+//!
+//! Example usage:
+//!
+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
+//!
+//! Example usage:
+//!
+//!     LOG_INFO(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_INFO(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
+//!
+//! Example usage:
+//!
+//!     LOG_WARN(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_WARN(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
+//!
+//! Example usage:
+//!
+//!     LOG_ERROR(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_ERROR(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
+//         ("fatal" severity)
+//!
+//! Example usage:
+//!
+//!     LOG_FATAL(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_FATAL(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
+}
+
+} // anonymous namespace
+
+#endif // TENSORRT_LOGGING_H

+ 241 - 0
src/3rd/ByteTrack/src/BYTETracker.cpp

@@ -0,0 +1,241 @@
+#include "BYTETracker.h"
+#include <fstream>
+
+BYTETracker::BYTETracker(int frame_rate, int track_buffer)
+{
+	track_thresh = 0.5;
+	high_thresh = 0.6;
+	match_thresh = 0.8;
+
+	frame_id = 0;
+	max_time_lost = int(frame_rate / 30.0 * track_buffer);
+	cout << "Init ByteTrack!" << endl;
+}
+
+BYTETracker::~BYTETracker()
+{
+}
+
+vector<STrack> BYTETracker::update(const vector<Object>& objects)
+{
+
+	////////////////// Step 1: Get detections //////////////////
+	this->frame_id++;
+	vector<STrack> activated_stracks;
+	vector<STrack> refind_stracks;
+	vector<STrack> removed_stracks;
+	vector<STrack> lost_stracks;
+	vector<STrack> detections;
+	vector<STrack> detections_low;
+
+	vector<STrack> detections_cp;
+	vector<STrack> tracked_stracks_swap;
+	vector<STrack> resa, resb;
+	vector<STrack> output_stracks;
+
+	vector<STrack*> unconfirmed;
+	vector<STrack*> tracked_stracks;
+	vector<STrack*> strack_pool;
+	vector<STrack*> r_tracked_stracks;
+
+	if (objects.size() > 0)
+	{
+		for (int i = 0; i < objects.size(); i++)
+		{
+			vector<float> tlbr_;
+			tlbr_.resize(4);
+			tlbr_[0] = objects[i].rect.x;
+			tlbr_[1] = objects[i].rect.y;
+			tlbr_[2] = objects[i].rect.x + objects[i].rect.width;
+			tlbr_[3] = objects[i].rect.y + objects[i].rect.height;
+
+			float score = objects[i].prob;
+
+			STrack strack(STrack::tlbr_to_tlwh(tlbr_), score);
+			if (score >= track_thresh)
+			{
+				detections.push_back(strack);
+			}
+			else
+			{
+				detections_low.push_back(strack);
+			}
+			
+		}
+	}
+
+	// Add newly detected tracklets to tracked_stracks
+	for (int i = 0; i < this->tracked_stracks.size(); i++)
+	{
+		if (!this->tracked_stracks[i].is_activated)
+			unconfirmed.push_back(&this->tracked_stracks[i]);
+		else
+			tracked_stracks.push_back(&this->tracked_stracks[i]);
+	}
+
+	////////////////// Step 2: First association, with IoU //////////////////
+	strack_pool = joint_stracks(tracked_stracks, this->lost_stracks);
+	STrack::multi_predict(strack_pool, this->kalman_filter);
+
+	vector<vector<float> > dists;
+	int dist_size = 0, dist_size_size = 0;
+	dists = iou_distance(strack_pool, detections, dist_size, dist_size_size);
+
+	vector<vector<int> > matches;
+	vector<int> u_track, u_detection;
+	linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection);
+
+	for (int i = 0; i < matches.size(); i++)
+	{
+		STrack *track = strack_pool[matches[i][0]];
+		STrack *det = &detections[matches[i][1]];
+		if (track->state == TrackState::Tracked)
+		{
+			track->update(*det, this->frame_id);
+			activated_stracks.push_back(*track);
+		}
+		else
+		{
+			track->re_activate(*det, this->frame_id, false);
+			refind_stracks.push_back(*track);
+		}
+	}
+
+	////////////////// Step 3: Second association, using low score dets //////////////////
+	for (int i = 0; i < u_detection.size(); i++)
+	{
+		detections_cp.push_back(detections[u_detection[i]]);
+	}
+	detections.clear();
+	detections.assign(detections_low.begin(), detections_low.end());
+	
+	for (int i = 0; i < u_track.size(); i++)
+	{
+		if (strack_pool[u_track[i]]->state == TrackState::Tracked)
+		{
+			r_tracked_stracks.push_back(strack_pool[u_track[i]]);
+		}
+	}
+
+	dists.clear();
+	dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size);
+
+	matches.clear();
+	u_track.clear();
+	u_detection.clear();
+	linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection);
+
+	for (int i = 0; i < matches.size(); i++)
+	{
+		STrack *track = r_tracked_stracks[matches[i][0]];
+		STrack *det = &detections[matches[i][1]];
+		if (track->state == TrackState::Tracked)
+		{
+			track->update(*det, this->frame_id);
+			activated_stracks.push_back(*track);
+		}
+		else
+		{
+			track->re_activate(*det, this->frame_id, false);
+			refind_stracks.push_back(*track);
+		}
+	}
+
+	for (int i = 0; i < u_track.size(); i++)
+	{
+		STrack *track = r_tracked_stracks[u_track[i]];
+		if (track->state != TrackState::Lost)
+		{
+			track->mark_lost();
+			lost_stracks.push_back(*track);
+		}
+	}
+
+	// Deal with unconfirmed tracks, usually tracks with only one beginning frame
+	detections.clear();
+	detections.assign(detections_cp.begin(), detections_cp.end());
+
+	dists.clear();
+	dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size);
+
+	matches.clear();
+	vector<int> u_unconfirmed;
+	u_detection.clear();
+	linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection);
+
+	for (int i = 0; i < matches.size(); i++)
+	{
+		unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id);
+		activated_stracks.push_back(*unconfirmed[matches[i][0]]);
+	}
+
+	for (int i = 0; i < u_unconfirmed.size(); i++)
+	{
+		STrack *track = unconfirmed[u_unconfirmed[i]];
+		track->mark_removed();
+		removed_stracks.push_back(*track);
+	}
+
+	////////////////// Step 4: Init new stracks //////////////////
+	for (int i = 0; i < u_detection.size(); i++)
+	{
+		STrack *track = &detections[u_detection[i]];
+		if (track->score < this->high_thresh)
+			continue;
+		track->activate(this->kalman_filter, this->frame_id);
+		activated_stracks.push_back(*track);
+	}
+
+	////////////////// Step 5: Update state //////////////////
+	for (int i = 0; i < this->lost_stracks.size(); i++)
+	{
+		if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost)
+		{
+			this->lost_stracks[i].mark_removed();
+			removed_stracks.push_back(this->lost_stracks[i]);
+		}
+	}
+	
+	for (int i = 0; i < this->tracked_stracks.size(); i++)
+	{
+		if (this->tracked_stracks[i].state == TrackState::Tracked)
+		{
+			tracked_stracks_swap.push_back(this->tracked_stracks[i]);
+		}
+	}
+	this->tracked_stracks.clear();
+	this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end());
+
+	this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks);
+	this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks);
+
+	//std::cout << activated_stracks.size() << std::endl;
+
+	this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks);
+	for (int i = 0; i < lost_stracks.size(); i++)
+	{
+		this->lost_stracks.push_back(lost_stracks[i]);
+	}
+
+	this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks);
+	for (int i = 0; i < removed_stracks.size(); i++)
+	{
+		this->removed_stracks.push_back(removed_stracks[i]);
+	}
+	
+	remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks);
+
+	this->tracked_stracks.clear();
+	this->tracked_stracks.assign(resa.begin(), resa.end());
+	this->lost_stracks.clear();
+	this->lost_stracks.assign(resb.begin(), resb.end());
+	
+	for (int i = 0; i < this->tracked_stracks.size(); i++)
+	{
+		if (this->tracked_stracks[i].is_activated)
+		{
+			output_stracks.push_back(this->tracked_stracks[i]);
+		}
+	}
+	return output_stracks;
+}

+ 194 - 0
src/3rd/ByteTrack/src/STrack.cpp

@@ -0,0 +1,194 @@
+#include "STrack.h"
+
+STrack::STrack(vector<float> tlwh_, float score)
+{
+	_tlwh.resize(4);
+	_tlwh.assign(tlwh_.begin(), tlwh_.end());
+
+	is_activated = false;
+	track_id = 0;
+	state = TrackState::New;
+	
+	tlwh.resize(4);
+	tlbr.resize(4);
+
+	static_tlwh();
+	static_tlbr();
+	frame_id = 0;
+	tracklet_len = 0;
+	this->score = score;
+	start_frame = 0;
+}
+
+STrack::~STrack()
+{
+}
+
+void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id)
+{
+	this->kalman_filter = kalman_filter;
+	this->track_id = this->next_id();
+
+	vector<float> _tlwh_tmp(4);
+	_tlwh_tmp[0] = this->_tlwh[0];
+	_tlwh_tmp[1] = this->_tlwh[1];
+	_tlwh_tmp[2] = this->_tlwh[2];
+	_tlwh_tmp[3] = this->_tlwh[3];
+	vector<float> xyah = tlwh_to_xyah(_tlwh_tmp);
+	DETECTBOX xyah_box;
+	xyah_box[0] = xyah[0];
+	xyah_box[1] = xyah[1];
+	xyah_box[2] = xyah[2];
+	xyah_box[3] = xyah[3];
+	auto mc = this->kalman_filter.initiate(xyah_box);
+	this->mean = mc.first;
+	this->covariance = mc.second;
+
+	static_tlwh();
+	static_tlbr();
+
+	this->tracklet_len = 0;
+	this->state = TrackState::Tracked;
+	if (frame_id == 1)
+	{
+		this->is_activated = true;
+	}
+	//this->is_activated = true;
+	this->frame_id = frame_id;
+	this->start_frame = frame_id;
+}
+
+void STrack::re_activate(STrack &new_track, int frame_id, bool new_id)
+{
+	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
+	DETECTBOX xyah_box;
+	xyah_box[0] = xyah[0];
+	xyah_box[1] = xyah[1];
+	xyah_box[2] = xyah[2];
+	xyah_box[3] = xyah[3];
+	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
+	this->mean = mc.first;
+	this->covariance = mc.second;
+
+	static_tlwh();
+	static_tlbr();
+
+	this->tracklet_len = 0;
+	this->state = TrackState::Tracked;
+	this->is_activated = true;
+	this->frame_id = frame_id;
+	this->score = new_track.score;
+	if (new_id)
+		this->track_id = next_id();
+}
+
+void STrack::update(STrack &new_track, int frame_id)
+{
+	this->frame_id = frame_id;
+	this->tracklet_len++;
+
+	vector<float> xyah = tlwh_to_xyah(new_track.tlwh);
+	DETECTBOX xyah_box;
+	xyah_box[0] = xyah[0];
+	xyah_box[1] = xyah[1];
+	xyah_box[2] = xyah[2];
+	xyah_box[3] = xyah[3];
+
+	auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box);
+	this->mean = mc.first;
+	this->covariance = mc.second;
+
+	static_tlwh();
+	static_tlbr();
+
+	this->state = TrackState::Tracked;
+	this->is_activated = true;
+
+	this->score = new_track.score;
+}
+
+void STrack::static_tlwh()
+{
+	if (this->state == TrackState::New)
+	{
+		tlwh[0] = _tlwh[0];
+		tlwh[1] = _tlwh[1];
+		tlwh[2] = _tlwh[2];
+		tlwh[3] = _tlwh[3];
+		return;
+	}
+
+	tlwh[0] = mean[0];
+	tlwh[1] = mean[1];
+	tlwh[2] = mean[2];
+	tlwh[3] = mean[3];
+
+	tlwh[2] *= tlwh[3];
+	tlwh[0] -= tlwh[2] / 2;
+	tlwh[1] -= tlwh[3] / 2;
+}
+
+void STrack::static_tlbr()
+{
+	tlbr.clear();
+	tlbr.assign(tlwh.begin(), tlwh.end());
+	tlbr[2] += tlbr[0];
+	tlbr[3] += tlbr[1];
+}
+
+vector<float> STrack::tlwh_to_xyah(vector<float> tlwh_tmp)
+{
+	vector<float> tlwh_output = tlwh_tmp;
+	tlwh_output[0] += tlwh_output[2] / 2;
+	tlwh_output[1] += tlwh_output[3] / 2;
+	tlwh_output[2] /= tlwh_output[3];
+	return tlwh_output;
+}
+
+vector<float> STrack::to_xyah()
+{
+	return tlwh_to_xyah(tlwh);
+}
+
+vector<float> STrack::tlbr_to_tlwh(vector<float> &tlbr)
+{
+	tlbr[2] -= tlbr[0];
+	tlbr[3] -= tlbr[1];
+	return tlbr;
+}
+
+void STrack::mark_lost()
+{
+	state = TrackState::Lost;
+}
+
+void STrack::mark_removed()
+{
+	state = TrackState::Removed;
+}
+
+int STrack::next_id()
+{
+	static int _count = 0;
+	_count++;
+	return _count;
+}
+
+int STrack::end_frame()
+{
+	return this->frame_id;
+}
+
+void STrack::multi_predict(vector<STrack*> &stracks, byte_kalman::KalmanFilter &kalman_filter)
+{
+	for (int i = 0; i < stracks.size(); i++)
+	{
+		if (stracks[i]->state != TrackState::Tracked)
+		{
+			stracks[i]->mean[7] = 0;
+		}
+		kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance);
+		stracks[i]->static_tlwh();
+		stracks[i]->static_tlbr();
+	}
+}

+ 507 - 0
src/3rd/ByteTrack/src/bytetrack.cpp

@@ -0,0 +1,507 @@
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <numeric>
+#include <chrono>
+#include <vector>
+#include <opencv2/opencv.hpp>
+#include <dirent.h>
+#include "NvInfer.h"
+#include "cuda_runtime_api.h"
+#include "logging.h"
+#include "BYTETracker.h"
+
+#define CHECK(status) \
+    do\
+    {\
+        auto ret = (status);\
+        if (ret != 0)\
+        {\
+            cerr << "Cuda failure: " << ret << endl;\
+            abort();\
+        }\
+    } while (0)
+
+#define DEVICE 0  // GPU id
+#define NMS_THRESH 0.7
+#define BBOX_CONF_THRESH 0.1
+
+using namespace nvinfer1;
+
+// stuff we know about the network and the input/output blobs
+static const int INPUT_W = 1088;
+static const int INPUT_H = 608;
+const char* INPUT_BLOB_NAME = "input_0";
+const char* OUTPUT_BLOB_NAME = "output_0";
+static Logger gLogger;
+
+Mat static_resize(Mat& img) {
+    float r = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
+    // r = std::min(r, 1.0f);
+    int unpad_w = r * img.cols;
+    int unpad_h = r * img.rows;
+    Mat re(unpad_h, unpad_w, CV_8UC3);
+    resize(img, re, re.size());
+    Mat out(INPUT_H, INPUT_W, CV_8UC3, Scalar(114, 114, 114));
+    re.copyTo(out(Rect(0, 0, re.cols, re.rows)));
+    return out;
+}
+
+struct GridAndStride
+{
+    int grid0;
+    int grid1;
+    int stride;
+};
+
+static void generate_grids_and_stride(const int target_w, const int target_h, vector<int>& strides, vector<GridAndStride>& grid_strides)
+{
+    for (auto stride : strides)
+    {
+        int num_grid_w = target_w / stride;
+        int num_grid_h = target_h / stride;
+        for (int g1 = 0; g1 < num_grid_h; g1++)
+        {
+            for (int g0 = 0; g0 < num_grid_w; g0++)
+            {
+                grid_strides.push_back((GridAndStride){g0, g1, stride});
+            }
+        }
+    }
+}
+
+static inline float intersection_area(const Object& a, const Object& b)
+{
+    Rect_<float> inter = a.rect & b.rect;
+    return inter.area();
+}
+
+static void qsort_descent_inplace(vector<Object>& faceobjects, int left, int right)
+{
+    int i = left;
+    int j = right;
+    float p = faceobjects[(left + right) / 2].prob;
+
+    while (i <= j)
+    {
+        while (faceobjects[i].prob > p)
+            i++;
+
+        while (faceobjects[j].prob < p)
+            j--;
+
+        if (i <= j)
+        {
+            // swap
+            swap(faceobjects[i], faceobjects[j]);
+
+            i++;
+            j--;
+        }
+    }
+
+    #pragma omp parallel sections
+    {
+        #pragma omp section
+        {
+            if (left < j) qsort_descent_inplace(faceobjects, left, j);
+        }
+        #pragma omp section
+        {
+            if (i < right) qsort_descent_inplace(faceobjects, i, right);
+        }
+    }
+}
+
+static void qsort_descent_inplace(vector<Object>& objects)
+{
+    if (objects.empty())
+        return;
+
+    qsort_descent_inplace(objects, 0, objects.size() - 1);
+}
+
+static void nms_sorted_bboxes(const vector<Object>& faceobjects, vector<int>& picked, float nms_threshold)
+{
+    picked.clear();
+
+    const int n = faceobjects.size();
+
+    vector<float> areas(n);
+    for (int i = 0; i < n; i++)
+    {
+        areas[i] = faceobjects[i].rect.area();
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        const Object& a = faceobjects[i];
+
+        int keep = 1;
+        for (int j = 0; j < (int)picked.size(); j++)
+        {
+            const Object& b = faceobjects[picked[j]];
+
+            // intersection over union
+            float inter_area = intersection_area(a, b);
+            float union_area = areas[i] + areas[picked[j]] - inter_area;
+            // float IoU = inter_area / union_area
+            if (inter_area / union_area > nms_threshold)
+                keep = 0;
+        }
+
+        if (keep)
+            picked.push_back(i);
+    }
+}
+
+
+static void generate_yolox_proposals(vector<GridAndStride> grid_strides, float* feat_blob, float prob_threshold, vector<Object>& objects)
+{
+    const int num_class = 1;
+
+    const int num_anchors = grid_strides.size();
+
+    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++)
+    {
+        const int grid0 = grid_strides[anchor_idx].grid0;
+        const int grid1 = grid_strides[anchor_idx].grid1;
+        const int stride = grid_strides[anchor_idx].stride;
+
+        const int basic_pos = anchor_idx * (num_class + 5);
+
+        // yolox/models/yolo_head.py decode logic
+        float x_center = (feat_blob[basic_pos+0] + grid0) * stride;
+        float y_center = (feat_blob[basic_pos+1] + grid1) * stride;
+        float w = exp(feat_blob[basic_pos+2]) * stride;
+        float h = exp(feat_blob[basic_pos+3]) * stride;
+        float x0 = x_center - w * 0.5f;
+        float y0 = y_center - h * 0.5f;
+
+        float box_objectness = feat_blob[basic_pos+4];
+        for (int class_idx = 0; class_idx < num_class; class_idx++)
+        {
+            float box_cls_score = feat_blob[basic_pos + 5 + class_idx];
+            float box_prob = box_objectness * box_cls_score;
+            if (box_prob > prob_threshold)
+            {
+                Object obj;
+                obj.rect.x = x0;
+                obj.rect.y = y0;
+                obj.rect.width = w;
+                obj.rect.height = h;
+                obj.label = class_idx;
+                obj.prob = box_prob;
+
+                objects.push_back(obj);
+            }
+
+        } // class loop
+
+    } // point anchor loop
+}
+
+float* blobFromImage(Mat& img){
+    cvtColor(img, img, COLOR_BGR2RGB);
+
+    float* blob = new float[img.total()*3];
+    int channels = 3;
+    int img_h = img.rows;
+    int img_w = img.cols;
+    vector<float> mean = {0.485, 0.456, 0.406};
+    vector<float> std = {0.229, 0.224, 0.225};
+    for (size_t c = 0; c < channels; c++) 
+    {
+        for (size_t  h = 0; h < img_h; h++) 
+        {
+            for (size_t w = 0; w < img_w; w++) 
+            {
+                blob[c * img_w * img_h + h * img_w + w] =
+                    (((float)img.at<Vec3b>(h, w)[c]) / 255.0f - mean[c]) / std[c];
+            }
+        }
+    }
+    return blob;
+}
+
+
+static void decode_outputs(float* prob, vector<Object>& objects, float scale, const int img_w, const int img_h) {
+        vector<Object> proposals;
+        vector<int> strides = {8, 16, 32};
+        vector<GridAndStride> grid_strides;
+        generate_grids_and_stride(INPUT_W, INPUT_H, strides, grid_strides);
+        generate_yolox_proposals(grid_strides, prob,  BBOX_CONF_THRESH, proposals);
+        //std::cout << "num of boxes before nms: " << proposals.size() << std::endl;
+
+        qsort_descent_inplace(proposals);
+
+        vector<int> picked;
+        nms_sorted_bboxes(proposals, picked, NMS_THRESH);
+
+
+        int count = picked.size();
+
+        //std::cout << "num of boxes: " << count << std::endl;
+
+        objects.resize(count);
+        for (int i = 0; i < count; i++)
+        {
+            objects[i] = proposals[picked[i]];
+
+            // adjust offset to original unpadded
+            float x0 = (objects[i].rect.x) / scale;
+            float y0 = (objects[i].rect.y) / scale;
+            float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
+            float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;
+
+            // clip
+            // x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
+            // y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
+            // x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
+            // y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
+
+            objects[i].rect.x = x0;
+            objects[i].rect.y = y0;
+            objects[i].rect.width = x1 - x0;
+            objects[i].rect.height = y1 - y0;
+        }
+}
+
+const float color_list[80][3] =
+{
+    {0.000, 0.447, 0.741},
+    {0.850, 0.325, 0.098},
+    {0.929, 0.694, 0.125},
+    {0.494, 0.184, 0.556},
+    {0.466, 0.674, 0.188},
+    {0.301, 0.745, 0.933},
+    {0.635, 0.078, 0.184},
+    {0.300, 0.300, 0.300},
+    {0.600, 0.600, 0.600},
+    {1.000, 0.000, 0.000},
+    {1.000, 0.500, 0.000},
+    {0.749, 0.749, 0.000},
+    {0.000, 1.000, 0.000},
+    {0.000, 0.000, 1.000},
+    {0.667, 0.000, 1.000},
+    {0.333, 0.333, 0.000},
+    {0.333, 0.667, 0.000},
+    {0.333, 1.000, 0.000},
+    {0.667, 0.333, 0.000},
+    {0.667, 0.667, 0.000},
+    {0.667, 1.000, 0.000},
+    {1.000, 0.333, 0.000},
+    {1.000, 0.667, 0.000},
+    {1.000, 1.000, 0.000},
+    {0.000, 0.333, 0.500},
+    {0.000, 0.667, 0.500},
+    {0.000, 1.000, 0.500},
+    {0.333, 0.000, 0.500},
+    {0.333, 0.333, 0.500},
+    {0.333, 0.667, 0.500},
+    {0.333, 1.000, 0.500},
+    {0.667, 0.000, 0.500},
+    {0.667, 0.333, 0.500},
+    {0.667, 0.667, 0.500},
+    {0.667, 1.000, 0.500},
+    {1.000, 0.000, 0.500},
+    {1.000, 0.333, 0.500},
+    {1.000, 0.667, 0.500},
+    {1.000, 1.000, 0.500},
+    {0.000, 0.333, 1.000},
+    {0.000, 0.667, 1.000},
+    {0.000, 1.000, 1.000},
+    {0.333, 0.000, 1.000},
+    {0.333, 0.333, 1.000},
+    {0.333, 0.667, 1.000},
+    {0.333, 1.000, 1.000},
+    {0.667, 0.000, 1.000},
+    {0.667, 0.333, 1.000},
+    {0.667, 0.667, 1.000},
+    {0.667, 1.000, 1.000},
+    {1.000, 0.000, 1.000},
+    {1.000, 0.333, 1.000},
+    {1.000, 0.667, 1.000},
+    {0.333, 0.000, 0.000},
+    {0.500, 0.000, 0.000},
+    {0.667, 0.000, 0.000},
+    {0.833, 0.000, 0.000},
+    {1.000, 0.000, 0.000},
+    {0.000, 0.167, 0.000},
+    {0.000, 0.333, 0.000},
+    {0.000, 0.500, 0.000},
+    {0.000, 0.667, 0.000},
+    {0.000, 0.833, 0.000},
+    {0.000, 1.000, 0.000},
+    {0.000, 0.000, 0.167},
+    {0.000, 0.000, 0.333},
+    {0.000, 0.000, 0.500},
+    {0.000, 0.000, 0.667},
+    {0.000, 0.000, 0.833},
+    {0.000, 0.000, 1.000},
+    {0.000, 0.000, 0.000},
+    {0.143, 0.143, 0.143},
+    {0.286, 0.286, 0.286},
+    {0.429, 0.429, 0.429},
+    {0.571, 0.571, 0.571},
+    {0.714, 0.714, 0.714},
+    {0.857, 0.857, 0.857},
+    {0.000, 0.447, 0.741},
+    {0.314, 0.717, 0.741},
+    {0.50, 0.5, 0}
+};
+
+void doInference(IExecutionContext& context, float* input, float* output, const int output_size, Size input_shape) {
+    const ICudaEngine& engine = context.getEngine();
+
+    // Pointers to input and output device buffers to pass to engine.
+    // Engine requires exactly IEngine::getNbBindings() number of buffers.
+    assert(engine.getNbBindings() == 2);
+    void* buffers[2];
+
+    // In order to bind the buffers, we need to know the names of the input and output tensors.
+    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
+    const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
+
+    assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT);
+    const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
+    assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT);
+    int mBatchSize = engine.getMaxBatchSize();
+
+    // Create GPU buffers on device
+    CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float)));
+    CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float)));
+
+    // Create stream
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
+    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream));
+    context.enqueue(1, buffers, stream, nullptr);
+    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+
+    // Release stream and buffers
+    cudaStreamDestroy(stream);
+    CHECK(cudaFree(buffers[inputIndex]));
+    CHECK(cudaFree(buffers[outputIndex]));
+}
+
+
+int main(int argc, char** argv) {
+    cudaSetDevice(DEVICE);
+    
+    // create a model using the API directly and serialize it to a stream
+    char *trtModelStream{nullptr};
+    size_t size{0};
+
+    if (argc == 4 && string(argv[2]) == "-i") {
+        const string engine_file_path {argv[1]};
+        ifstream file(engine_file_path, ios::binary);
+        if (file.good()) {
+            file.seekg(0, file.end);
+            size = file.tellg();
+            file.seekg(0, file.beg);
+            trtModelStream = new char[size];
+            assert(trtModelStream);
+            file.read(trtModelStream, size);
+            file.close();
+        }
+    } else {
+        cerr << "arguments not right!" << endl;
+        cerr << "run 'python3 tools/trt.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar' to serialize model first!" << std::endl;
+        cerr << "Then use the following command:" << endl;
+        cerr << "cd demo/TensorRT/cpp/build" << endl;
+        cerr << "./bytetrack ../../../../YOLOX_outputs/yolox_s_mix_det/model_trt.engine -i ../../../../videos/palace.mp4  // deserialize file and run inference" << std::endl;
+        return -1;
+    }
+    const string input_video_path {argv[3]};
+
+    IRuntime* runtime = createInferRuntime(gLogger);
+    assert(runtime != nullptr);
+    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
+    assert(engine != nullptr); 
+    IExecutionContext* context = engine->createExecutionContext();
+    assert(context != nullptr);
+    delete[] trtModelStream;
+    auto out_dims = engine->getBindingDimensions(1);
+    auto output_size = 1;
+    for(int j=0;j<out_dims.nbDims;j++) {
+        output_size *= out_dims.d[j];
+    }
+    static float* prob = new float[output_size];
+
+    VideoCapture cap(input_video_path);
+	if (!cap.isOpened())
+		return 0;
+
+	int img_w = cap.get(CAP_PROP_FRAME_WIDTH);
+	int img_h = cap.get(CAP_PROP_FRAME_HEIGHT);
+    int fps = cap.get(CAP_PROP_FPS);
+    long nFrame = static_cast<long>(cap.get(CAP_PROP_FRAME_COUNT));
+    cout << "Total frames: " << nFrame << endl;
+
+    VideoWriter writer("demo.mp4", VideoWriter::fourcc('m', 'p', '4', 'v'), fps, Size(img_w, img_h));
+
+    Mat img;
+    BYTETracker tracker(fps, 30);
+    int num_frames = 0;
+    int total_ms = 0;
+	while (true)
+    {
+        if(!cap.read(img))
+            break;
+        num_frames ++;
+        if (num_frames % 20 == 0)
+        {
+            cout << "Processing frame " << num_frames << " (" << num_frames * 1000000 / total_ms << " fps)" << endl;
+        }
+		if (img.empty())
+			break;
+        Mat pr_img = static_resize(img);
+        
+        float* blob;
+        blob = blobFromImage(pr_img);
+        float scale = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0));
+        
+        // run inference
+        auto start = chrono::system_clock::now();
+        doInference(*context, blob, prob, output_size, pr_img.size());
+        vector<Object> objects;
+        decode_outputs(prob, objects, scale, img_w, img_h);
+        vector<STrack> output_stracks = tracker.update(objects);
+        auto end = chrono::system_clock::now();
+        total_ms = total_ms + chrono::duration_cast<chrono::microseconds>(end - start).count();
+
+        for (int i = 0; i < output_stracks.size(); i++)
+		{
+			vector<float> tlwh = output_stracks[i].tlwh;
+			bool vertical = tlwh[2] / tlwh[3] > 1.6;
+			if (tlwh[2] * tlwh[3] > 20 && !vertical)
+			{
+				Scalar s = tracker.get_color(output_stracks[i].track_id);
+				putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5), 
+                        0, 0.6, Scalar(0, 0, 255), 2, LINE_AA);
+                rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2);
+			}
+		}
+        putText(img, format("frame: %d fps: %d num: %d", num_frames, num_frames * 1000000 / total_ms, output_stracks.size()), 
+                Point(0, 30), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA);
+        writer.write(img);
+
+        delete blob;
+        char c = waitKey(1);
+        if (c > 0)
+        {
+            break;
+        }
+    }
+    cap.release();
+    cout << "FPS: " << num_frames * 1000000 / total_ms << endl;
+    // destroy the engine
+    context->destroy();
+    engine->destroy();
+    runtime->destroy();
+    return 0;
+}

+ 152 - 0
src/3rd/ByteTrack/src/kalmanFilter.cpp

@@ -0,0 +1,152 @@
+#include "kalmanFilter.h"
+#include <Eigen/Cholesky>
+
+namespace byte_kalman
+{
+	const double KalmanFilter::chi2inv95[10] = {
+	0,
+	3.8415,
+	5.9915,
+	7.8147,
+	9.4877,
+	11.070,
+	12.592,
+	14.067,
+	15.507,
+	16.919
+	};
+	KalmanFilter::KalmanFilter()
+	{
+		int ndim = 4;
+		double dt = 1.;
+
+		_motion_mat = Eigen::MatrixXf::Identity(8, 8);
+		for (int i = 0; i < ndim; i++) {
+			_motion_mat(i, ndim + i) = dt;
+		}
+		_update_mat = Eigen::MatrixXf::Identity(4, 8);
+
+		this->_std_weight_position = 1. / 20;
+		this->_std_weight_velocity = 1. / 160;
+	}
+
+	KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement)
+	{
+		DETECTBOX mean_pos = measurement;
+		DETECTBOX mean_vel;
+		for (int i = 0; i < 4; i++) mean_vel(i) = 0;
+
+		KAL_MEAN mean;
+		for (int i = 0; i < 8; i++) {
+			if (i < 4) mean(i) = mean_pos(i);
+			else mean(i) = mean_vel(i - 4);
+		}
+
+		KAL_MEAN std;
+		std(0) = 2 * _std_weight_position * measurement[3];
+		std(1) = 2 * _std_weight_position * measurement[3];
+		std(2) = 1e-2;
+		std(3) = 2 * _std_weight_position * measurement[3];
+		std(4) = 10 * _std_weight_velocity * measurement[3];
+		std(5) = 10 * _std_weight_velocity * measurement[3];
+		std(6) = 1e-5;
+		std(7) = 10 * _std_weight_velocity * measurement[3];
+
+		KAL_MEAN tmp = std.array().square();
+		KAL_COVA var = tmp.asDiagonal();
+		return std::make_pair(mean, var);
+	}
+
+	void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance)
+	{
+		//revise the data;
+		DETECTBOX std_pos;
+		std_pos << _std_weight_position * mean(3),
+			_std_weight_position * mean(3),
+			1e-2,
+			_std_weight_position * mean(3);
+		DETECTBOX std_vel;
+		std_vel << _std_weight_velocity * mean(3),
+			_std_weight_velocity * mean(3),
+			1e-5,
+			_std_weight_velocity * mean(3);
+		KAL_MEAN tmp;
+		tmp.block<1, 4>(0, 0) = std_pos;
+		tmp.block<1, 4>(0, 4) = std_vel;
+		tmp = tmp.array().square();
+		KAL_COVA motion_cov = tmp.asDiagonal();
+		KAL_MEAN mean1 = this->_motion_mat * mean.transpose();
+		KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose());
+		covariance1 += motion_cov;
+
+		mean = mean1;
+		covariance = covariance1;
+	}
+
+	KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance)
+	{
+		DETECTBOX std;
+		std << _std_weight_position * mean(3), _std_weight_position * mean(3),
+			1e-1, _std_weight_position * mean(3);
+		KAL_HMEAN mean1 = _update_mat * mean.transpose();
+		KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose());
+		Eigen::Matrix<float, 4, 4> diag = std.asDiagonal();
+		diag = diag.array().square().matrix();
+		covariance1 += diag;
+		//    covariance1.diagonal() << diag;
+		return std::make_pair(mean1, covariance1);
+	}
+
+	KAL_DATA
+		KalmanFilter::update(
+			const KAL_MEAN &mean,
+			const KAL_COVA &covariance,
+			const DETECTBOX &measurement)
+	{
+		KAL_HDATA pa = project(mean, covariance);
+		KAL_HMEAN projected_mean = pa.first;
+		KAL_HCOVA projected_cov = pa.second;
+
+		//chol_factor, lower =
+		//scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
+		//kalmain_gain =
+		//scipy.linalg.cho_solve((cho_factor, lower),
+		//np.dot(covariance, self._upadte_mat.T).T,
+		//check_finite=False).T
+		Eigen::Matrix<float, 4, 8> B = (covariance * (_update_mat.transpose())).transpose();
+		Eigen::Matrix<float, 8, 4> kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4
+		Eigen::Matrix<float, 1, 4> innovation = measurement - projected_mean; //eg.1x4
+		auto tmp = innovation * (kalman_gain.transpose());
+		KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix();
+		KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose());
+		return std::make_pair(new_mean, new_covariance);
+	}
+
+	Eigen::Matrix<float, 1, -1>
+		KalmanFilter::gating_distance(
+			const KAL_MEAN &mean,
+			const KAL_COVA &covariance,
+			const std::vector<DETECTBOX> &measurements,
+			bool only_position)
+	{
+		KAL_HDATA pa = this->project(mean, covariance);
+		if (only_position) {
+			printf("not implement!");
+			exit(0);
+		}
+		KAL_HMEAN mean1 = pa.first;
+		KAL_HCOVA covariance1 = pa.second;
+
+		//    Eigen::Matrix<float, -1, 4, Eigen::RowMajor> d(size, 4);
+		DETECTBOXSS d(measurements.size(), 4);
+		int pos = 0;
+		for (DETECTBOX box : measurements) {
+			d.row(pos++) = box - mean1;
+		}
+		Eigen::Matrix<float, -1, -1, Eigen::RowMajor> factor = covariance1.llt().matrixL();
+		Eigen::Matrix<float, -1, -1> z = factor.triangularView<Eigen::Lower>().solve<Eigen::OnTheRight>(d).transpose();
+		auto zz = ((z.array())*(z.array())).matrix();
+		auto square_maha = zz.colwise().sum();
+		return square_maha;
+	}
+}

+ 343 - 0
src/3rd/ByteTrack/src/lapjv.cpp

@@ -0,0 +1,343 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lapjv.h"
+
+/** Column-reduction and reduction transfer for a dense cost matrix.
+ */
+int_t _ccrrt_dense(const uint_t n, cost_t *cost[],
+	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
+{
+	int_t n_free_rows;
+	boolean *unique;
+
+	for (uint_t i = 0; i < n; i++) {
+		x[i] = -1;
+		v[i] = LARGE;
+		y[i] = 0;
+	}
+	for (uint_t i = 0; i < n; i++) {
+		for (uint_t j = 0; j < n; j++) {
+			const cost_t c = cost[i][j];
+			if (c < v[j]) {
+				v[j] = c;
+				y[j] = i;
+			}
+			PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]);
+		}
+	}
+	PRINT_COST_ARRAY(v, n);
+	PRINT_INDEX_ARRAY(y, n);
+	NEW(unique, boolean, n);
+	memset(unique, TRUE, n);
+	{
+		int_t j = n;
+		do {
+			j--;
+			const int_t i = y[j];
+			if (x[i] < 0) {
+				x[i] = j;
+			}
+			else {
+				unique[i] = FALSE;
+				y[j] = -1;
+			}
+		} while (j > 0);
+	}
+	n_free_rows = 0;
+	for (uint_t i = 0; i < n; i++) {
+		if (x[i] < 0) {
+			free_rows[n_free_rows++] = i;
+		}
+		else if (unique[i]) {
+			const int_t j = x[i];
+			cost_t min = LARGE;
+			for (uint_t j2 = 0; j2 < n; j2++) {
+				if (j2 == (uint_t)j) {
+					continue;
+				}
+				const cost_t c = cost[i][j2] - v[j2];
+				if (c < min) {
+					min = c;
+				}
+			}
+			PRINTF("v[%d] = %f - %f\n", j, v[j], min);
+			v[j] -= min;
+		}
+	}
+	FREE(unique);
+	return n_free_rows;
+}
+
+
+/** Augmenting row reduction for a dense cost matrix.
+ */
+int_t _carr_dense(
+	const uint_t n, cost_t *cost[],
+	const uint_t n_free_rows,
+	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
+{
+	uint_t current = 0;
+	int_t new_free_rows = 0;
+	uint_t rr_cnt = 0;
+	PRINT_INDEX_ARRAY(x, n);
+	PRINT_INDEX_ARRAY(y, n);
+	PRINT_COST_ARRAY(v, n);
+	PRINT_INDEX_ARRAY(free_rows, n_free_rows);
+	while (current < n_free_rows) {
+		int_t i0;
+		int_t j1, j2;
+		cost_t v1, v2, v1_new;
+		boolean v1_lowers;
+
+		rr_cnt++;
+		PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt);
+		const int_t free_i = free_rows[current++];
+		j1 = 0;
+		v1 = cost[free_i][0] - v[0];
+		j2 = -1;
+		v2 = LARGE;
+		for (uint_t j = 1; j < n; j++) {
+			PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2);
+			const cost_t c = cost[free_i][j] - v[j];
+			if (c < v2) {
+				if (c >= v1) {
+					v2 = c;
+					j2 = j;
+				}
+				else {
+					v2 = v1;
+					v1 = c;
+					j2 = j1;
+					j1 = j;
+				}
+			}
+		}
+		i0 = y[j1];
+		v1_new = v[j1] - (v2 - v1);
+		v1_lowers = v1_new < v[j1];
+		PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new);
+		if (rr_cnt < current * n) {
+			if (v1_lowers) {
+				v[j1] = v1_new;
+			}
+			else if (i0 >= 0 && j2 >= 0) {
+				j1 = j2;
+				i0 = y[j2];
+			}
+			if (i0 >= 0) {
+				if (v1_lowers) {
+					free_rows[--current] = i0;
+				}
+				else {
+					free_rows[new_free_rows++] = i0;
+				}
+			}
+		}
+		else {
+			PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n);
+			if (i0 >= 0) {
+				free_rows[new_free_rows++] = i0;
+			}
+		}
+		x[free_i] = j1;
+		y[j1] = free_i;
+	}
+	return new_free_rows;
+}
+
+
+/** Find columns with minimum d[j] and put them on the SCAN list.
+ */
+uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y)
+{
+	uint_t hi = lo + 1;
+	cost_t mind = d[cols[lo]];
+	for (uint_t k = hi; k < n; k++) {
+		int_t j = cols[k];
+		if (d[j] <= mind) {
+			if (d[j] < mind) {
+				hi = lo;
+				mind = d[j];
+			}
+			cols[k] = cols[hi];
+			cols[hi++] = j;
+		}
+	}
+	return hi;
+}
+
+
+// Scan all columns in TODO starting from arbitrary column in SCAN
+// and try to decrease d of the TODO columns using the SCAN column.
+int_t _scan_dense(const uint_t n, cost_t *cost[],
+	uint_t *plo, uint_t*phi,
+	cost_t *d, int_t *cols, int_t *pred,
+	int_t *y, cost_t *v)
+{
+	uint_t lo = *plo;
+	uint_t hi = *phi;
+	cost_t h, cred_ij;
+
+	while (lo != hi) {
+		int_t j = cols[lo++];
+		const int_t i = y[j];
+		const cost_t mind = d[j];
+		h = cost[i][j] - v[j] - mind;
+		PRINTF("i=%d j=%d h=%f\n", i, j, h);
+		// For all columns in TODO
+		for (uint_t k = hi; k < n; k++) {
+			j = cols[k];
+			cred_ij = cost[i][j] - v[j] - h;
+			if (cred_ij < d[j]) {
+				d[j] = cred_ij;
+				pred[j] = i;
+				if (cred_ij == mind) {
+					if (y[j] < 0) {
+						return j;
+					}
+					cols[k] = cols[hi];
+					cols[hi++] = j;
+				}
+			}
+		}
+	}
+	*plo = lo;
+	*phi = hi;
+	return -1;
+}
+
+
+/** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper.
+ *
+ * This is a dense matrix version.
+ *
+ * \return The closest free column index.
+ */
+int_t find_path_dense(
+	const uint_t n, cost_t *cost[],
+	const int_t start_i,
+	int_t *y, cost_t *v,
+	int_t *pred)
+{
+	uint_t lo = 0, hi = 0;
+	int_t final_j = -1;
+	uint_t n_ready = 0;
+	int_t *cols;
+	cost_t *d;
+
+	NEW(cols, int_t, n);
+	NEW(d, cost_t, n);
+
+	for (uint_t i = 0; i < n; i++) {
+		cols[i] = i;
+		pred[i] = start_i;
+		d[i] = cost[start_i][i] - v[i];
+	}
+	PRINT_COST_ARRAY(d, n);
+	while (final_j == -1) {
+		// No columns left on the SCAN list.
+		if (lo == hi) {
+			PRINTF("%d..%d -> find\n", lo, hi);
+			n_ready = lo;
+			hi = _find_dense(n, lo, d, cols, y);
+			PRINTF("check %d..%d\n", lo, hi);
+			PRINT_INDEX_ARRAY(cols, n);
+			for (uint_t k = lo; k < hi; k++) {
+				const int_t j = cols[k];
+				if (y[j] < 0) {
+					final_j = j;
+				}
+			}
+		}
+		if (final_j == -1) {
+			PRINTF("%d..%d -> scan\n", lo, hi);
+			final_j = _scan_dense(
+				n, cost, &lo, &hi, d, cols, pred, y, v);
+			PRINT_COST_ARRAY(d, n);
+			PRINT_INDEX_ARRAY(cols, n);
+			PRINT_INDEX_ARRAY(pred, n);
+		}
+	}
+
+	PRINTF("found final_j=%d\n", final_j);
+	PRINT_INDEX_ARRAY(cols, n);
+	{
+		const cost_t mind = d[cols[lo]];
+		for (uint_t k = 0; k < n_ready; k++) {
+			const int_t j = cols[k];
+			v[j] += d[j] - mind;
+		}
+	}
+
+	FREE(cols);
+	FREE(d);
+
+	return final_j;
+}
+
+
+/** Augment for a dense cost matrix.
+ */
+int_t _ca_dense(
+	const uint_t n, cost_t *cost[],
+	const uint_t n_free_rows,
+	int_t *free_rows, int_t *x, int_t *y, cost_t *v)
+{
+	int_t *pred;
+
+	NEW(pred, int_t, n);
+
+	for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
+		int_t i = -1, j;
+		uint_t k = 0;
+
+		PRINTF("looking at free_i=%d\n", *pfree_i);
+		j = find_path_dense(n, cost, *pfree_i, y, v, pred);
+		ASSERT(j >= 0);
+		ASSERT(j < n);
+		while (i != *pfree_i) {
+			PRINTF("augment %d\n", j);
+			PRINT_INDEX_ARRAY(pred, n);
+			i = pred[j];
+			PRINTF("y[%d]=%d -> %d\n", j, y[j], i);
+			y[j] = i;
+			PRINT_INDEX_ARRAY(x, n);
+			SWAP_INDICES(j, x[i]);
+			k++;
+			if (k >= n) {
+				ASSERT(FALSE);
+			}
+		}
+	}
+	FREE(pred);
+	return 0;
+}
+
+
+/** Solve dense sparse LAP.
+ */
+int lapjv_internal(
+	const uint_t n, cost_t *cost[],
+	int_t *x, int_t *y)
+{
+	int ret;
+	int_t *free_rows;
+	cost_t *v;
+
+	NEW(free_rows, int_t, n);
+	NEW(v, cost_t, n);
+	ret = _ccrrt_dense(n, cost, free_rows, x, y, v);
+	int i = 0;
+	while (ret > 0 && i < 2) {
+		ret = _carr_dense(n, cost, ret, free_rows, x, y, v);
+		i++;
+	}
+	if (ret > 0) {
+		ret = _ca_dense(n, cost, ret, free_rows, x, y, v);
+	}
+	FREE(v);
+	FREE(free_rows);
+	return ret;
+}

+ 429 - 0
src/3rd/ByteTrack/src/utils.cpp

@@ -0,0 +1,429 @@
+#include "BYTETracker.h"
+#include "lapjv.h"
+
+vector<STrack*> BYTETracker::joint_stracks(vector<STrack*> &tlista, vector<STrack> &tlistb)
+{
+	map<int, int> exists;
+	vector<STrack*> res;
+	for (int i = 0; i < tlista.size(); i++)
+	{
+		exists.insert(pair<int, int>(tlista[i]->track_id, 1));
+		res.push_back(tlista[i]);
+	}
+	for (int i = 0; i < tlistb.size(); i++)
+	{
+		int tid = tlistb[i].track_id;
+		if (!exists[tid] || exists.count(tid) == 0)
+		{
+			exists[tid] = 1;
+			res.push_back(&tlistb[i]);
+		}
+	}
+	return res;
+}
+
+vector<STrack> BYTETracker::joint_stracks(vector<STrack> &tlista, vector<STrack> &tlistb)
+{
+	map<int, int> exists;
+	vector<STrack> res;
+	for (int i = 0; i < tlista.size(); i++)
+	{
+		exists.insert(pair<int, int>(tlista[i].track_id, 1));
+		res.push_back(tlista[i]);
+	}
+	for (int i = 0; i < tlistb.size(); i++)
+	{
+		int tid = tlistb[i].track_id;
+		if (!exists[tid] || exists.count(tid) == 0)
+		{
+			exists[tid] = 1;
+			res.push_back(tlistb[i]);
+		}
+	}
+	return res;
+}
+
+vector<STrack> BYTETracker::sub_stracks(vector<STrack> &tlista, vector<STrack> &tlistb)
+{
+	map<int, STrack> stracks;
+	for (int i = 0; i < tlista.size(); i++)
+	{
+		stracks.insert(pair<int, STrack>(tlista[i].track_id, tlista[i]));
+	}
+	for (int i = 0; i < tlistb.size(); i++)
+	{
+		int tid = tlistb[i].track_id;
+		if (stracks.count(tid) != 0)
+		{
+			stracks.erase(tid);
+		}
+	}
+
+	vector<STrack> res;
+	std::map<int, STrack>::iterator  it;
+	for (it = stracks.begin(); it != stracks.end(); ++it)
+	{
+		res.push_back(it->second);
+	}
+
+	return res;
+}
+
+void BYTETracker::remove_duplicate_stracks(vector<STrack> &resa, vector<STrack> &resb, vector<STrack> &stracksa, vector<STrack> &stracksb)
+{
+	vector<vector<float> > pdist = iou_distance(stracksa, stracksb);
+	vector<pair<int, int> > pairs;
+	for (int i = 0; i < pdist.size(); i++)
+	{
+		for (int j = 0; j < pdist[i].size(); j++)
+		{
+			if (pdist[i][j] < 0.15)
+			{
+				pairs.push_back(pair<int, int>(i, j));
+			}
+		}
+	}
+
+	vector<int> dupa, dupb;
+	for (int i = 0; i < pairs.size(); i++)
+	{
+		int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame;
+		int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame;
+		if (timep > timeq)
+			dupb.push_back(pairs[i].second);
+		else
+			dupa.push_back(pairs[i].first);
+	}
+
+	for (int i = 0; i < stracksa.size(); i++)
+	{
+		vector<int>::iterator iter = find(dupa.begin(), dupa.end(), i);
+		if (iter == dupa.end())
+		{
+			resa.push_back(stracksa[i]);
+		}
+	}
+
+	for (int i = 0; i < stracksb.size(); i++)
+	{
+		vector<int>::iterator iter = find(dupb.begin(), dupb.end(), i);
+		if (iter == dupb.end())
+		{
+			resb.push_back(stracksb[i]);
+		}
+	}
+}
+
+void BYTETracker::linear_assignment(vector<vector<float> > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh,
+	vector<vector<int> > &matches, vector<int> &unmatched_a, vector<int> &unmatched_b)
+{
+	if (cost_matrix.size() == 0)
+	{
+		for (int i = 0; i < cost_matrix_size; i++)
+		{
+			unmatched_a.push_back(i);
+		}
+		for (int i = 0; i < cost_matrix_size_size; i++)
+		{
+			unmatched_b.push_back(i);
+		}
+		return;
+	}
+
+	vector<int> rowsol; vector<int> colsol;
+	float c = lapjv(cost_matrix, rowsol, colsol, true, thresh);
+	for (int i = 0; i < rowsol.size(); i++)
+	{
+		if (rowsol[i] >= 0)
+		{
+			vector<int> match;
+			match.push_back(i);
+			match.push_back(rowsol[i]);
+			matches.push_back(match);
+		}
+		else
+		{
+			unmatched_a.push_back(i);
+		}
+	}
+
+	for (int i = 0; i < colsol.size(); i++)
+	{
+		if (colsol[i] < 0)
+		{
+			unmatched_b.push_back(i);
+		}
+	}
+}
+
+vector<vector<float> > BYTETracker::ious(vector<vector<float> > &atlbrs, vector<vector<float> > &btlbrs)
+{
+	vector<vector<float> > ious;
+	if (atlbrs.size()*btlbrs.size() == 0)
+		return ious;
+
+	ious.resize(atlbrs.size());
+	for (int i = 0; i < ious.size(); i++)
+	{
+		ious[i].resize(btlbrs.size());
+	}
+
+	//bbox_ious
+	for (int k = 0; k < btlbrs.size(); k++)
+	{
+		vector<float> ious_tmp;
+		float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1);
+		for (int n = 0; n < atlbrs.size(); n++)
+		{
+			float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1;
+			if (iw > 0)
+			{
+				float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1;
+				if(ih > 0)
+				{
+					float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih;
+					ious[n][k] = iw * ih / ua;
+				}
+				else
+				{
+					ious[n][k] = 0.0;
+				}
+			}
+			else
+			{
+				ious[n][k] = 0.0;
+			}
+		}
+	}
+
+	return ious;
+}
+
+vector<vector<float> > BYTETracker::iou_distance(vector<STrack*> &atracks, vector<STrack> &btracks, int &dist_size, int &dist_size_size)
+{
+	vector<vector<float> > cost_matrix;
+	if (atracks.size() * btracks.size() == 0)
+	{
+		dist_size = atracks.size();
+		dist_size_size = btracks.size();
+		return cost_matrix;
+	}
+	vector<vector<float> > atlbrs, btlbrs;
+	for (int i = 0; i < atracks.size(); i++)
+	{
+		atlbrs.push_back(atracks[i]->tlbr);
+	}
+	for (int i = 0; i < btracks.size(); i++)
+	{
+		btlbrs.push_back(btracks[i].tlbr);
+	}
+
+	dist_size = atracks.size();
+	dist_size_size = btracks.size();
+
+	vector<vector<float> > _ious = ious(atlbrs, btlbrs);
+	
+	for (int i = 0; i < _ious.size();i++)
+	{
+		vector<float> _iou;
+		for (int j = 0; j < _ious[i].size(); j++)
+		{
+			_iou.push_back(1 - _ious[i][j]);
+		}
+		cost_matrix.push_back(_iou);
+	}
+
+	return cost_matrix;
+}
+
+vector<vector<float> > BYTETracker::iou_distance(vector<STrack> &atracks, vector<STrack> &btracks)
+{
+	vector<vector<float> > atlbrs, btlbrs;
+	for (int i = 0; i < atracks.size(); i++)
+	{
+		atlbrs.push_back(atracks[i].tlbr);
+	}
+	for (int i = 0; i < btracks.size(); i++)
+	{
+		btlbrs.push_back(btracks[i].tlbr);
+	}
+
+	vector<vector<float> > _ious = ious(atlbrs, btlbrs);
+	vector<vector<float> > cost_matrix;
+	for (int i = 0; i < _ious.size(); i++)
+	{
+		vector<float> _iou;
+		for (int j = 0; j < _ious[i].size(); j++)
+		{
+			_iou.push_back(1 - _ious[i][j]);
+		}
+		cost_matrix.push_back(_iou);
+	}
+
+	return cost_matrix;
+}
+
+double BYTETracker::lapjv(const vector<vector<float> > &cost, vector<int> &rowsol, vector<int> &colsol,
+	bool extend_cost, float cost_limit, bool return_cost)
+{
+	vector<vector<float> > cost_c;
+	cost_c.assign(cost.begin(), cost.end());
+
+	vector<vector<float> > cost_c_extended;
+
+	int n_rows = cost.size();
+	int n_cols = cost[0].size();
+	rowsol.resize(n_rows);
+	colsol.resize(n_cols);
+
+	int n = 0;
+	if (n_rows == n_cols)
+	{
+		n = n_rows;
+	}
+	else
+	{
+		if (!extend_cost)
+		{
+			cout << "set extend_cost=True" << endl;
+			system("pause");
+			exit(0);
+		}
+	}
+		
+	if (extend_cost || cost_limit < LONG_MAX)
+	{
+		n = n_rows + n_cols;
+		cost_c_extended.resize(n);
+		for (int i = 0; i < cost_c_extended.size(); i++)
+			cost_c_extended[i].resize(n);
+
+		if (cost_limit < LONG_MAX)
+		{
+			for (int i = 0; i < cost_c_extended.size(); i++)
+			{
+				for (int j = 0; j < cost_c_extended[i].size(); j++)
+				{
+					cost_c_extended[i][j] = cost_limit / 2.0;
+				}
+			}
+		}
+		else
+		{
+			float cost_max = -1;
+			for (int i = 0; i < cost_c.size(); i++)
+			{
+				for (int j = 0; j < cost_c[i].size(); j++)
+				{
+					if (cost_c[i][j] > cost_max)
+						cost_max = cost_c[i][j];
+				}
+			}
+			for (int i = 0; i < cost_c_extended.size(); i++)
+			{
+				for (int j = 0; j < cost_c_extended[i].size(); j++)
+				{
+					cost_c_extended[i][j] = cost_max + 1;
+				}
+			}
+		}
+
+		for (int i = n_rows; i < cost_c_extended.size(); i++)
+		{
+			for (int j = n_cols; j < cost_c_extended[i].size(); j++)
+			{
+				cost_c_extended[i][j] = 0;
+			}
+		}
+		for (int i = 0; i < n_rows; i++)
+		{
+			for (int j = 0; j < n_cols; j++)
+			{
+				cost_c_extended[i][j] = cost_c[i][j];
+			}
+		}
+
+		cost_c.clear();
+		cost_c.assign(cost_c_extended.begin(), cost_c_extended.end());
+	}
+
+	double **cost_ptr;
+	cost_ptr = new double *[sizeof(double *) * n];
+	for (int i = 0; i < n; i++)
+		cost_ptr[i] = new double[sizeof(double) * n];
+
+	for (int i = 0; i < n; i++)
+	{
+		for (int j = 0; j < n; j++)
+		{
+			cost_ptr[i][j] = cost_c[i][j];
+		}
+	}
+
+	int* x_c = new int[sizeof(int) * n];
+	int *y_c = new int[sizeof(int) * n];
+
+	int ret = lapjv_internal(n, cost_ptr, x_c, y_c);
+	if (ret != 0)
+	{
+		cout << "Calculate Wrong!" << endl;
+		system("pause");
+		exit(0);
+	}
+
+	double opt = 0.0;
+
+	if (n != n_rows)
+	{
+		for (int i = 0; i < n; i++)
+		{
+			if (x_c[i] >= n_cols)
+				x_c[i] = -1;
+			if (y_c[i] >= n_rows)
+				y_c[i] = -1;
+		}
+		for (int i = 0; i < n_rows; i++)
+		{
+			rowsol[i] = x_c[i];
+		}
+		for (int i = 0; i < n_cols; i++)
+		{
+			colsol[i] = y_c[i];
+		}
+
+		if (return_cost)
+		{
+			for (int i = 0; i < rowsol.size(); i++)
+			{
+				if (rowsol[i] != -1)
+				{
+					//cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl;
+					opt += cost_ptr[i][rowsol[i]];
+				}
+			}
+		}
+	}
+	else if (return_cost)
+	{
+		for (int i = 0; i < rowsol.size(); i++)
+		{
+			opt += cost_ptr[i][rowsol[i]];
+		}
+	}
+
+	for (int i = 0; i < n; i++)
+	{
+		delete[]cost_ptr[i];
+	}
+	delete[]cost_ptr;
+	delete[]x_c;
+	delete[]y_c;
+
+	return opt;
+}
+
+Scalar BYTETracker::get_color(int idx)
+{
+	idx += 3;
+	return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255);
+}

+ 7 - 2
src/common/data.hpp

@@ -34,6 +34,7 @@ struct Box
 {
     float left, top, right, bottom, score;
     int class_id;
+    int track_id = -1;
     std::string label;
     std::vector<Point> keypoints;
     Box() : left(0), top(0), right(0), bottom(0), score(0), class_id(0), label("") {}
@@ -41,8 +42,12 @@ struct Box
         : left(left), top(top), right(right), bottom(bottom), score(score), class_id(class_id), label("") {}
     Box(float left, float top, float right, float bottom, float score, int class_id,  const std::string& label) 
         : left(left), top(top), right(right), bottom(bottom), score(score), class_id(class_id), label(label) {}
-    Box(const Box& b) : left(b.left), top(b.top), right(b.right), bottom(b.bottom), score(b.score), class_id(b.class_id), label(b.label), keypoints(b.keypoints) {}
-    Box(const Box&& b) : left(b.left), top(b.top), right(b.right), bottom(b.bottom), score(b.score), class_id(b.class_id), label(b.label), keypoints(b.keypoints) {}
+    Box(const Box& b) : 
+        left(b.left), top(b.top), right(b.right), bottom(b.bottom), score(b.score), 
+        class_id(b.class_id), label(b.label), track_id(b.track_id), keypoints(b.keypoints) {}
+    Box(const Box&& b) : 
+        left(b.left), top(b.top), right(b.right), bottom(b.bottom), score(b.score), 
+        class_id(b.class_id), label(b.label), track_id(b.track_id), keypoints(b.keypoints) {}
     Box& operator=(const Box& b)
     {
         left = b.left;

+ 3 - 0
src/common/utils.cpp

@@ -3,6 +3,9 @@
 #include <iomanip>
 #include <sstream>
 
+#include "ByteTrack.h"
+#include "common/data.hpp"
+
 std::string getTimeString()
 {
     auto now = std::chrono::system_clock::now();

+ 4 - 1
src/main.cpp

@@ -109,4 +109,7 @@ int main()
 // YOLO11 seg 
 // 通过配置文件创建 pipeline
 // 日志
-// 设置电子围栏
+// 设置电子围栏
+
+
+python3 detect.py --weights carperson/carperson.pt --source carperson/carperson.mp4

+ 63 - 0
src/nodes/track/trackNode.cpp

@@ -0,0 +1,63 @@
+#include "nodes/track/trackNode.hpp"
+
+namespace Node
+{
+
+
+void TrackNode::work()
+{
+    printf("TrackNode %s\n", name_.c_str());
+    while (running_)
+    {
+        bool has_data = false;
+        for (auto& input_buffer : input_buffers_)
+        {
+            std::shared_ptr<meta::MetaData> metaData;
+            if (!input_buffer.second->try_pop(metaData))
+            {
+                continue;
+            }
+            has_data = true;
+            // printf("Node %s get data from %s\n", name_.c_str(), input_buffer.first.c_str());
+
+            // auto res = model_->forward(tensor::cvimg(image), image.cols, image.rows, 0.0f, 0.0f);
+            if (!track_)
+            {
+                printf("track is nullptr\n");
+                continue;
+            }
+            std::vector<Object> objects;
+            std::transform(metaData->boxes.begin(), metaData->boxes.end(), objects.begin(), [](data::Box& box) {
+                Object obj;
+                obj.rect.x = box.left;
+                obj.rect.y = box.top;
+                obj.rect.width = box.right - box.left;
+                obj.rect.height = box.bottom - box.top;
+                obj.label = box.class_id;
+                obj.prob = box.score;
+                return obj;
+            });
+            std::vector<STrack> output_stracks = tracker_->update(objects);
+            for (int i = 0; i < output_stracks.size(); i++)
+            {
+                vector<float> tlwh = output_stracks[i].tlwh;
+                int track_id = output_stracks[i].track_id;
+                std::for_each(metaData->boxes.begin(), metaData->boxes.end(), [track_id, tlwh](data::Box& box) {
+                    if (box.left == tlwh[0] && box.top == tlwh[1] && box.right - box.left == tlwh[2] && box.bottom - box.top == tlwh[3])
+                    {
+                        box.track_id = track_id;
+                    }
+                });
+            }
+        }
+        if (!has_data)
+        {
+            std::unique_lock<std::mutex> lock(mutex_);
+            cond_var_->wait_for(lock, std::chrono::milliseconds(100), [this] {
+                return !running_;  // 等待时检查退出条件
+            });
+        }
+    }
+};
+
+}   // namespace Node

+ 31 - 0
src/nodes/track/trackNode.hpp

@@ -0,0 +1,31 @@
+#ifndef TRACKNODE_HPP__
+#define TRACKNODE_HPP__
+
+#include "nodes/base/base.hpp"
+#include <opencv2/opencv.hpp>
+#include "common/meta.hpp"
+#include "common/data.hpp"
+#include "BYTETracker.h"
+#include "infer/infer.hpp"
+
+namespace Node
+{
+
+class TrackNode : public BaseNode
+{
+public:
+    TrackNode() = delete;
+    TrackNode(const std::string& name, int frame_rate, int track_buffer) : BaseNode(name, NODE_TYPE::MID_NODE) 
+    {
+        track_ = std::make_shared<BYTETracker>(frame_rate, track_buffer);
+    }
+    virtual ~TrackNode()  { stop(); };
+    void work() override;
+
+private:
+    std::shared_ptr<BYTETracker> track_ = nullptr;
+};
+
+}
+
+#endif