From 588f2e6bc84adda2f0407cd475c3e21feab175b3 Mon Sep 17 00:00:00 2001 From: Charles Parker Date: Sun, 14 Apr 2024 19:53:18 -0700 Subject: [PATCH] fix: android inference with faster yuv->rgb conversion --- .clang-format | 26 ++ .eslintrc.js | 2 - .gitignore | 7 - .gitmodules | 3 + android/CMakeLists.txt | 36 ++ android/build.gradle | 42 ++ android/src/main/cpp/JImage.cpp | 33 ++ android/src/main/cpp/JImage.h | 27 ++ android/src/main/cpp/JImagePlane.cpp | 34 ++ android/src/main/cpp/JImagePlane.h | 27 ++ android/src/main/cpp/ResizeConvert.cpp | 421 ++++++++++++++++++ android/src/main/cpp/ResizeConvert.h | 89 ++++ android/src/main/cpp/ResizeConvertLib.cpp | 11 + .../ObjectDetectionFrameProcessorPlugin.kt | 172 ++++++- .../objectdetection/ObjectDetectionModule.kt | 7 +- .../objectdetection/ObjectDetectorHelper.kt | 176 +------- .../shared/ResizeConvert.kt | 39 ++ examples/objectdetection/src/App.tsx | 3 +- libyuv | 1 + package.json | 9 +- src/objectDetection/index.ts | 10 +- tsconfig.build.json | 1 - tsconfig.json | 1 - turbo.json | 20 +- yarn.lock | 2 +- 25 files changed, 992 insertions(+), 207 deletions(-) create mode 100644 .clang-format create mode 100644 .gitmodules create mode 100644 android/CMakeLists.txt create mode 100644 android/src/main/cpp/JImage.cpp create mode 100644 android/src/main/cpp/JImage.h create mode 100644 android/src/main/cpp/JImagePlane.cpp create mode 100644 android/src/main/cpp/JImagePlane.h create mode 100644 android/src/main/cpp/ResizeConvert.cpp create mode 100644 android/src/main/cpp/ResizeConvert.h create mode 100644 android/src/main/cpp/ResizeConvertLib.cpp create mode 100644 android/src/main/java/com/reactnativemediapipe/shared/ResizeConvert.kt create mode 160000 libyuv diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..f6fcde4 --- /dev/null +++ b/.clang-format @@ -0,0 +1,26 @@ +# Config for clang-format version 16 + +# Standard +BasedOnStyle: llvm +Standard: c++14 + +# Indentation +IndentWidth: 2 +ColumnLimit: 140 + +# Includes +SortIncludes: true +SortUsingDeclarations: true + +# Pointer and reference alignment +PointerAlignment: Left +ReferenceAlignment: Left +ReflowComments: true + +# Line breaking options +BreakBeforeBraces: Attach +BreakConstructorInitializers: BeforeColon +AllowShortFunctionsOnASingleLine: Empty +IndentCaseLabels: true +NamespaceIndentation: Inner + diff --git a/.eslintrc.js b/.eslintrc.js index 93fb984..225acb4 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -7,7 +7,6 @@ module.exports = { "./tsconfig.json", "docsite/tsconfig.json", "examples/objectdetection/tsconfig.json", - "example/tsconfig.json", ], ecmaFeatures: { jsx: true, @@ -25,7 +24,6 @@ module.exports = { "*.config.js", "jest.setup.js", "coverage", - "example/index.js", ], plugins: ["@typescript-eslint"], extends: [ diff --git a/.gitignore b/.gitignore index 1ceb4e9..fca61ae 100644 --- a/.gitignore +++ b/.gitignore @@ -44,13 +44,6 @@ project.xcworkspace local.properties android.iml -# Cocoapods -# -example/ios/Pods - -# Ruby -example/vendor/ - # node.js # node_modules/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5e9e16c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "libyuv"] + path = libyuv + url = https://android.googlesource.com/platform/external/libyuv/ diff --git a/android/CMakeLists.txt b/android/CMakeLists.txt new file mode 100644 index 0000000..0435059 --- /dev/null +++ b/android/CMakeLists.txt @@ -0,0 +1,36 @@ +project(ResizeConvertLib) +cmake_minimum_required(VERSION 3.9.0) + +set (PACKAGE_NAME "ResizeConvertLib") +set (BUILD_DIR ${CMAKE_SOURCE_DIR}/build) +set (CMAKE_VERBOSE_MAKEFILE ON) +set (CMAKE_CXX_STANDARD 17) + +# Third party libraries (Prefabs) +find_package(fbjni REQUIRED CONFIG) +find_library(LOG_LIB log) + +# libyuv +add_subdirectory(../libyuv libyuv) + +add_library(${PACKAGE_NAME} SHARED + src/main/cpp/ResizeConvert.cpp + src/main/cpp/JImage.cpp + src/main/cpp/JImagePlane.cpp + src/main/cpp/ResizeConvertLib.cpp +) + +# Specifies a path to native header files. +target_include_directories( + ${PACKAGE_NAME} PRIVATE + src/main/cpp + ../libyuv/include +) + +target_link_libraries( + ${PACKAGE_NAME} + ${LOG_LIB} # <-- Logcat logger + android # <-- Android JNI core + fbjni::fbjni # <-- fbjni + yuv # <-- libyuv +) diff --git a/android/build.gradle b/android/build.gradle index 0887830..18bd321 100644 --- a/android/build.gradle +++ b/android/build.gradle @@ -25,6 +25,11 @@ if (isNewArchitectureEnabled()) { apply plugin: "com.facebook.react" } +def reactNativeArchitectures() { + def value = project.getProperties().get("reactNativeArchitectures") + return value ? value.split(",") : ["armeabi-v7a", "x86", "x86_64", "arm64-v8a"] +} + def getExtOrDefault(name) { return rootProject.ext.has(name) ? rootProject.ext.get(name) : project.properties["Mediapipe_" + name] } @@ -53,12 +58,30 @@ android { } } + buildFeatures { + prefab true + } + + ndkVersion getExtOrDefault("ndkVersion") compileSdkVersion getExtOrIntegerDefault("compileSdkVersion") defaultConfig { minSdkVersion getExtOrIntegerDefault("minSdkVersion") targetSdkVersion getExtOrIntegerDefault("targetSdkVersion") + externalNativeBuild { + cmake { + cppFlags "-O2 -frtti -fexceptions -Wall -fstack-protector-all" + abiFilters (*reactNativeArchitectures()) + arguments "-DANDROID_STL=c++_shared" + } + } + } + + externalNativeBuild { + cmake { + path "CMakeLists.txt" + } } buildTypes { @@ -75,6 +98,25 @@ android { sourceCompatibility JavaVersion.VERSION_1_8 targetCompatibility JavaVersion.VERSION_1_8 } + + // packagingOptions { + // excludes = [ + // "**/libc++_shared.so", + // "**/libfbjni.so", + // "**/libjsi.so", + // "**/libfolly_json.so", + // "**/libfolly_runtime.so", + // "**/libglog.so", + // "**/libhermes.so", + // "**/libhermes-executor-debug.so", + // "**/libhermes_executor.so", + // "**/libreactnativejni.so", + // "**/libturbomodulejsijni.so", + // "**/libreact_nativemodule_core.so", + // "**/libjscexecutor.so" + // ] + // } + } repositories { diff --git a/android/src/main/cpp/JImage.cpp b/android/src/main/cpp/JImage.cpp new file mode 100644 index 0000000..37532e6 --- /dev/null +++ b/android/src/main/cpp/JImage.cpp @@ -0,0 +1,33 @@ +// +// Created by Marc Rousavy on 25.01.24. +// + +#include "JImage.h" + +#include +#include + +namespace resizeconvert { + +using namespace facebook; +using namespace jni; + +int JImage::getWidth() const { + auto method = getClass()->getMethod("getWidth"); + auto result = method(self()); + return result; +} + +int JImage::getHeight() const { + auto method = getClass()->getMethod("getHeight"); + auto result = method(self()); + return result; +} + +jni::local_ref> JImage::getPlanes() const { + auto method = getClass()->getMethod()>("getPlanes"); + auto result = method(self()); + return result; +} + +} // namespace resizeconvert \ No newline at end of file diff --git a/android/src/main/cpp/JImage.h b/android/src/main/cpp/JImage.h new file mode 100644 index 0000000..b9acab0 --- /dev/null +++ b/android/src/main/cpp/JImage.h @@ -0,0 +1,27 @@ +// +// Created by Marc Rousavy on 25.01.24. +// + +#pragma once + +#include "JImagePlane.h" +#include +#include + +namespace resizeconvert +{ + + using namespace facebook; + using namespace jni; + + struct JImage : public JavaClass + { + static constexpr auto kJavaDescriptor = "Landroid/media/Image;"; + + public: + int getWidth() const; + int getHeight() const; + jni::local_ref> getPlanes() const; + }; + +} // namespace resizeconvert diff --git a/android/src/main/cpp/JImagePlane.cpp b/android/src/main/cpp/JImagePlane.cpp new file mode 100644 index 0000000..b260dd6 --- /dev/null +++ b/android/src/main/cpp/JImagePlane.cpp @@ -0,0 +1,34 @@ +// +// Created by Marc Rousavy on 25.01.24. +// + +#include "JImagePlane.h" + +namespace resizeconvert +{ + + using namespace facebook; + using namespace jni; + + int JImagePlane::getPixelStride() const + { + auto method = getClass()->getMethod("getPixelStride"); + auto result = method(self()); + return result; + } + + int JImagePlane::getRowStride() const + { + auto method = getClass()->getMethod("getRowStride"); + auto result = method(self()); + return result; + } + + jni::local_ref JImagePlane::getBuffer() const + { + auto method = getClass()->getMethod("getBuffer"); + auto result = method(self()); + return result; + } + +} // namespace resizeconvert \ No newline at end of file diff --git a/android/src/main/cpp/JImagePlane.h b/android/src/main/cpp/JImagePlane.h new file mode 100644 index 0000000..cc7b70e --- /dev/null +++ b/android/src/main/cpp/JImagePlane.h @@ -0,0 +1,27 @@ +// +// Created by Marc Rousavy on 25.01.24. +// + +#pragma once + +#include +#include +#include + +namespace resizeconvert +{ + + using namespace facebook; + using namespace jni; + + struct JImagePlane : public JavaClass + { + static constexpr auto kJavaDescriptor = "Landroid/media/Image$Plane;"; + + public: + jni::local_ref getBuffer() const; + int getPixelStride() const; + int getRowStride() const; + }; + +} // namespace resizeconvert diff --git a/android/src/main/cpp/ResizeConvert.cpp b/android/src/main/cpp/ResizeConvert.cpp new file mode 100644 index 0000000..ead2b61 --- /dev/null +++ b/android/src/main/cpp/ResizeConvert.cpp @@ -0,0 +1,421 @@ +// +// Created by Marc Rousavy on 25.01.24 +// + +#include "ResizeConvert.h" +#include "libyuv.h" +#include +#include +#include +#include + +namespace resizeconvert { + + using namespace facebook; + using namespace facebook::jni; + + void ResizeConvert::registerNatives() { + registerHybrid({ + makeNativeMethod("initHybrid", ResizeConvert::initHybrid), + makeNativeMethod("resize", ResizeConvert::resize), + }); + } + + ResizeConvert::ResizeConvert(const jni::alias_ref &javaThis) { + _javaThis = jni::make_global(javaThis); + } + + int getChannelCount(PixelFormat pixelFormat) { + switch (pixelFormat) { + case RGB: + case BGR: + return 3; + case ARGB: + case RGBA: + case BGRA: + case ABGR: + return 4; + } + } + + int getBytesPerChannel(DataType type) { + switch (type) { + case UINT8: + return sizeof(uint8_t); + case FLOAT32: + return sizeof(float_t); + } + } + + int getBytesPerPixel(PixelFormat pixelFormat, DataType type) { + return getChannelCount(pixelFormat) * getBytesPerChannel(type); + } + + int FrameBuffer::bytesPerRow() { + size_t bytesPerPixel = getBytesPerPixel(pixelFormat, dataType); + return width * bytesPerPixel; + } + + uint8_t *FrameBuffer::data() { + return buffer->getDirectBytes(); + } + + global_ref ResizeConvert::allocateBuffer(size_t size, std::string debugName) { + __android_log_print(ANDROID_LOG_INFO, TAG, "Allocating %s Buffer with size %zu...", + debugName.c_str(), size); + local_ref buffer = JByteBuffer::allocateDirect(size); + buffer->order(JByteOrder::nativeOrder()); + return make_global(buffer); + } + + FrameBuffer ResizeConvert::imageToFrameBuffer(alias_ref image) { + __android_log_write(ANDROID_LOG_INFO, TAG, "Converting YUV 4:2:0 -> ARGB 8888..."); + + jni::local_ref> planes = image->getPlanes(); + + jni::local_ref yPlane = planes->getElement(0); + jni::local_ref yBuffer = yPlane->getBuffer(); + jni::local_ref uPlane = planes->getElement(1); + jni::local_ref uBuffer = uPlane->getBuffer(); + jni::local_ref vPlane = planes->getElement(2); + jni::local_ref vBuffer = vPlane->getBuffer(); + + size_t uvPixelStride = uPlane->getPixelStride(); + if (uPlane->getPixelStride() != vPlane->getPixelStride()) { + throw std::runtime_error( + "U and V planes do not have the same pixel stride! Are you sure this is a 4:2:0 YUV format?"); + } + + int width = image->getWidth(); + int height = image->getHeight(); + + size_t channels = getChannelCount(PixelFormat::ARGB); + size_t channelSize = getBytesPerChannel(DataType::UINT8); + size_t argbSize = width * height * channels * channelSize; + if (_argbBuffer == nullptr || _argbBuffer->getDirectSize() != argbSize) { + _argbBuffer = allocateBuffer(argbSize, "_argbBuffer"); + } + FrameBuffer destination = { + .width = width, + .height = height, + .pixelFormat = PixelFormat::ARGB, + .dataType = DataType::UINT8, + .buffer = _argbBuffer, + }; + + // 1. Convert from YUV -> ARGB + int status = libyuv::Android420ToARGB(yBuffer->getDirectBytes(), yPlane->getRowStride(), + uBuffer->getDirectBytes(), + uPlane->getRowStride(), vBuffer->getDirectBytes(), + vPlane->getRowStride(), uvPixelStride, + destination.data(), width * channels * channelSize, + width, height); + + if (status != 0) { + throw std::runtime_error( + "Failed to convert YUV 4:2:0 to ARGB! Error: " + std::to_string(status)); + } + + return destination; + } + + std::string rectToString(int x, int y, int width, int height) { + return std::to_string(x) + ", " + std::to_string(y) + " @ " + std::to_string(width) + "x" + + std::to_string(height); + } + + FrameBuffer + ResizeConvert::cropARGBBuffer(resizeconvert::FrameBuffer frameBuffer, int x, int y, int width, + int height) { + if (width == frameBuffer.width && height == frameBuffer.height && x == 0 && y == 0) { + // already in correct size. + return frameBuffer; + } + + auto rectString = rectToString(0, 0, frameBuffer.width, frameBuffer.height); + auto targetString = rectToString(x, y, width, height); + __android_log_print(ANDROID_LOG_INFO, TAG, "Cropping [%s] ARGB buffer to [%s]...", + rectString.c_str(), targetString.c_str()); + + size_t channels = getChannelCount(PixelFormat::ARGB); + size_t channelSize = getBytesPerChannel(DataType::UINT8); + size_t argbSize = width * height * channels * channelSize; + if (_cropBuffer == nullptr || _cropBuffer->getDirectSize() != argbSize) { + _cropBuffer = allocateBuffer(argbSize, "_cropBuffer"); + } + FrameBuffer destination = { + .width = width, + .height = height, + .pixelFormat = PixelFormat::ARGB, + .dataType = DataType::UINT8, + .buffer = _cropBuffer, + }; + + int status = libyuv::ConvertToARGB(frameBuffer.data(), + frameBuffer.height * frameBuffer.bytesPerRow(), + destination.data(), + destination.bytesPerRow(), x, y, frameBuffer.width, + frameBuffer.height, width, height, + libyuv::kRotate0, libyuv::FOURCC_ARGB); + if (status != 0) { + throw std::runtime_error( + "Failed to crop ARGB Buffer! Status: " + std::to_string(status)); + } + + return destination; + } + + FrameBuffer ResizeConvert::mirrorARGBBuffer(FrameBuffer frameBuffer, bool mirror) { + if (!mirror) { + return frameBuffer; + } + + __android_log_print(ANDROID_LOG_INFO, TAG, "Mirroring ARGB buffer..."); + + size_t channels = getChannelCount(PixelFormat::ARGB); + size_t channelSize = getBytesPerChannel(DataType::UINT8); + size_t argbSize = frameBuffer.width * frameBuffer.height * channels * channelSize; + if (_mirrorBuffer == nullptr || _mirrorBuffer->getDirectSize() != argbSize) { + _mirrorBuffer = allocateBuffer(argbSize, "_mirrorBuffer"); + } + FrameBuffer destination = { + .width = frameBuffer.width, + .height = frameBuffer.height, + .pixelFormat = PixelFormat::ARGB, + .dataType = DataType::UINT8, + .buffer = _mirrorBuffer, + }; + + int status = libyuv::ARGBMirror(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destination.bytesPerRow(), + frameBuffer.width, frameBuffer.height); + if (status != 0) { + throw std::runtime_error( + "Failed to mirror ARGB Buffer! Status: " + std::to_string(status)); + } + + return destination; + } + + FrameBuffer ResizeConvert::rotateARGBBuffer(FrameBuffer frameBuffer, int rotation) { + if (rotation == 0) { + return frameBuffer; + } + + int rotatedWidth = frameBuffer.width; + int rotatedHeight = frameBuffer.height; + if (rotation == 90 || rotation == 270) { + std::swap(rotatedWidth, rotatedHeight); + } + + size_t channels = getChannelCount(PixelFormat::ARGB); + size_t channelSize = getBytesPerChannel(DataType::UINT8); + size_t destinationStride = + rotation == 90 || rotation == 270 ? rotatedWidth * channels * channelSize + : frameBuffer.bytesPerRow(); + size_t argbSize = rotatedWidth * rotatedHeight * channels * channelSize; + + if (_rotatedBuffer == nullptr || _rotatedBuffer->getDirectSize() != argbSize) { + _rotatedBuffer = allocateBuffer(argbSize, "_rotatedBuffer"); + } + + FrameBuffer destination = { + .width = rotatedWidth, + .height = rotatedHeight, + .pixelFormat = PixelFormat::ARGB, + .dataType = DataType::UINT8, + .buffer = _rotatedBuffer, + }; + + int status = libyuv::ARGBRotate(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destinationStride, frameBuffer.width, + frameBuffer.height, + static_cast(rotation)); + if (status != 0) { + throw std::runtime_error( + "Failed to rotate ARGB Buffer! Status: " + std::to_string(status)); + } + + return destination; + } + + FrameBuffer + ResizeConvert::scaleARGBBuffer(resizeconvert::FrameBuffer frameBuffer, int width, int height) { + if (width == frameBuffer.width && height == frameBuffer.height) { + // already in correct size. + return frameBuffer; + } + auto rectString = rectToString(0, 0, frameBuffer.width, frameBuffer.height); + auto targetString = rectToString(0, 0, width, height); + __android_log_print(ANDROID_LOG_INFO, TAG, "Scaling [%s] ARGB buffer to [%s]...", + rectString.c_str(), targetString.c_str()); + + size_t channels = getChannelCount(PixelFormat::ARGB); + size_t channelSize = getBytesPerChannel(DataType::UINT8); + size_t argbSize = width * height * channels * channelSize; + if (_scaleBuffer == nullptr || _scaleBuffer->getDirectSize() != argbSize) { + _scaleBuffer = allocateBuffer(argbSize, "_scaleBuffer"); + } + FrameBuffer destination = { + .width = width, + .height = height, + .pixelFormat = PixelFormat::ARGB, + .dataType = DataType::UINT8, + .buffer = _scaleBuffer, + }; + + int status = libyuv::ARGBScale(frameBuffer.data(), frameBuffer.bytesPerRow(), + frameBuffer.width, frameBuffer.height, destination.data(), + destination.bytesPerRow(), width, height, + libyuv::FilterMode::kFilterBilinear); + if (status != 0) { + throw std::runtime_error( + "Failed to scale ARGB Buffer! Status: " + std::to_string(status)); + } + + return destination; + } + + FrameBuffer + ResizeConvert::convertARGBBufferTo(FrameBuffer frameBuffer, PixelFormat pixelFormat) { + if (frameBuffer.pixelFormat == pixelFormat) { + // Already in the correct format. + return frameBuffer; + } + + __android_log_print(ANDROID_LOG_INFO, TAG, "Converting ARGB Buffer to Pixel Format %zu...", + pixelFormat); + + size_t bytesPerPixel = getBytesPerPixel(pixelFormat, frameBuffer.dataType); + size_t targetBufferSize = frameBuffer.width * frameBuffer.height * bytesPerPixel; + if (_customFormatBuffer == nullptr || + _customFormatBuffer->getDirectSize() != targetBufferSize) { + _customFormatBuffer = allocateBuffer(targetBufferSize, "_customFormatBuffer"); + } + FrameBuffer destination = { + .width = frameBuffer.width, + .height = frameBuffer.height, + .pixelFormat = pixelFormat, + .dataType = frameBuffer.dataType, + .buffer = _customFormatBuffer, + }; + + int error = 0; + switch (pixelFormat) { + case PixelFormat::ARGB: + // do nothing, we're already in ARGB + return frameBuffer; + case RGB: + error = libyuv::ARGBToRGB24(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destination.bytesPerRow(), + destination.width, destination.height); + break; + case BGR: + throw std::runtime_error("BGR is not supported on Android!"); + case RGBA: + error = libyuv::ARGBToRGBA(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destination.bytesPerRow(), + destination.width, destination.height); + break; + case BGRA: + error = libyuv::ARGBToBGRA(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destination.bytesPerRow(), + destination.width, destination.height); + break; + case ABGR: + error = libyuv::ARGBToABGR(frameBuffer.data(), frameBuffer.bytesPerRow(), + destination.data(), destination.bytesPerRow(), + destination.width, destination.height); + break; + } + + if (error != 0) { + throw std::runtime_error( + "Failed to convert ARGB Buffer to target Pixel Format! Error: " + + std::to_string(error)); + } + + return destination; + } + + FrameBuffer ResizeConvert::convertBufferToDataType(FrameBuffer frameBuffer, DataType dataType) { + if (frameBuffer.dataType == dataType) { + // Already in correct data-type + return frameBuffer; + } + + __android_log_print(ANDROID_LOG_INFO, TAG, "Converting ARGB Buffer to Data Type %zu...", + dataType); + + size_t targetSize = frameBuffer.width * frameBuffer.height * + getBytesPerPixel(frameBuffer.pixelFormat, dataType); + if (_customTypeBuffer == nullptr || _customTypeBuffer->getDirectSize() != targetSize) { + _customTypeBuffer = allocateBuffer(targetSize, "_customTypeBuffer"); + } + size_t size = frameBuffer.buffer->getDirectSize(); + FrameBuffer destination = { + .width = frameBuffer.width, + .height = frameBuffer.height, + .pixelFormat = frameBuffer.pixelFormat, + .dataType = dataType, + .buffer = _customTypeBuffer, + }; + + int status = 0; + switch (dataType) { + case UINT8: + // it's already uint8 + return frameBuffer; + case FLOAT32: { + float *floatData = reinterpret_cast(destination.data()); + status = libyuv::ByteToFloat(frameBuffer.data(), floatData, 1.0f / 255.0f, size); + break; + } + } + + if (status != 0) { + throw std::runtime_error("Failed to convert Buffer to target Data Type! Error: " + + std::to_string(status)); + } + + return destination; + } + + jni::global_ref + ResizeConvert::resize(jni::alias_ref image, int cropX, int cropY, int cropWidth, + int cropHeight, + int scaleWidth, int scaleHeight, int rotationOrdinal, bool mirror, + int /* PixelFormat */ pixelFormatOrdinal, + int /* DataType */ dataTypeOrdinal) { + PixelFormat pixelFormat = static_cast(pixelFormatOrdinal); + DataType dataType = static_cast(dataTypeOrdinal); + + // 1. Convert from YUV -> ARGB + FrameBuffer result = imageToFrameBuffer(image); + + // 2. Crop ARGB + result = cropARGBBuffer(result, cropX, cropY, cropWidth, cropHeight); + + // 3. Scale ARGB + result = scaleARGBBuffer(result, scaleWidth, scaleHeight); + + // 4. Rotate ARGB + result = rotateARGBBuffer(result, rotationOrdinal); + + // 5 Mirror ARGB if needed + result = mirrorARGBBuffer(result, mirror); + + // 6. Convert from ARGB -> ???? + result = convertARGBBufferTo(result, pixelFormat); + + // 7. Convert from data type to other data type + result = convertBufferToDataType(result, dataType); + + return result.buffer; + } + + jni::local_ref ResizeConvert::initHybrid(jni::alias_ref javaThis) { + return makeCxxInstance(javaThis); + } + +} // namespace resizeconvert diff --git a/android/src/main/cpp/ResizeConvert.h b/android/src/main/cpp/ResizeConvert.h new file mode 100644 index 0000000..dc658a1 --- /dev/null +++ b/android/src/main/cpp/ResizeConvert.h @@ -0,0 +1,89 @@ +// +// Created by Marc Rousavy on 25.01.24 +// + +#pragma once + +#include +#include +#include +#include +#include + +#include "JImage.h" + +namespace resizeconvert { + + using namespace facebook; + using namespace jni; + + enum PixelFormat { + RGB, BGR, ARGB, RGBA, BGRA, ABGR + }; + + enum DataType { + UINT8, FLOAT32 + }; + + struct FrameBuffer { + int width; + int height; + PixelFormat pixelFormat; + DataType dataType; + global_ref buffer; + + uint8_t *data(); + + int bytesPerRow(); + }; + + struct ResizeConvert : public HybridClass { + public: + static auto constexpr kJavaDescriptor = "Lcom/reactnativemediapipe/shared/ResizeConvert;"; + static void registerNatives(); + + private: + explicit ResizeConvert(const alias_ref &javaThis); + + global_ref + resize(alias_ref image, int cropX, int cropY, int cropWidth, int cropHeight, + int scaleWidth, + int scaleHeight, int rotation, bool mirror, int /* PixelFormat */ pixelFormat, + int /* DataType */ dataType); + + FrameBuffer imageToFrameBuffer(alias_ref image); + + FrameBuffer cropARGBBuffer(FrameBuffer frameBuffer, int x, int y, int width, int height); + + FrameBuffer scaleARGBBuffer(FrameBuffer frameBuffer, int width, int height); + + FrameBuffer convertARGBBufferTo(FrameBuffer frameBuffer, PixelFormat toFormat); + + FrameBuffer convertBufferToDataType(FrameBuffer frameBuffer, DataType dataType); + + FrameBuffer rotateARGBBuffer(FrameBuffer frameBuffer, int rotation); + + FrameBuffer mirrorARGBBuffer(FrameBuffer frameBuffer, bool mirror); + + global_ref allocateBuffer(size_t size, std::string debugName); + + private: + static auto constexpr TAG = "ResizeConvert"; + friend HybridBase; + global_ref _javaThis; + // YUV (?x?) -> ARGB (?x?) + global_ref _argbBuffer; + // ARGB (?x?) -> ARGB (!x!) + global_ref _cropBuffer; + global_ref _scaleBuffer; + global_ref _rotatedBuffer; + global_ref _mirrorBuffer; + // ARGB (?x?) -> !!!! (?x?) + global_ref _customFormatBuffer; + // Custom Data Type (e.g. float32) + global_ref _customTypeBuffer; + + static local_ref initHybrid(alias_ref javaThis); + }; + +} // namespace resizeconvert diff --git a/android/src/main/cpp/ResizeConvertLib.cpp b/android/src/main/cpp/ResizeConvertLib.cpp new file mode 100644 index 0000000..816d0f0 --- /dev/null +++ b/android/src/main/cpp/ResizeConvertLib.cpp @@ -0,0 +1,11 @@ +// +// Created by Marc Rousavy on 25.01.24 +// + +#include "ResizeConvert.h" +#include +#include + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void*) { + return facebook::jni::initialize(vm, [] { resizeconvert::ResizeConvert::registerNatives(); }); +} diff --git a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionFrameProcessorPlugin.kt b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionFrameProcessorPlugin.kt index 2a54339..9c3ede6 100644 --- a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionFrameProcessorPlugin.kt +++ b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionFrameProcessorPlugin.kt @@ -1,18 +1,186 @@ package com.reactnativemediapipe.objectdetection +import android.graphics.ImageFormat +import android.util.Log +import com.google.mediapipe.framework.image.ByteBufferImageBuilder +import com.google.mediapipe.framework.image.MPImage import com.mrousavy.camera.frameprocessor.Frame import com.mrousavy.camera.frameprocessor.FrameProcessorPlugin import com.mrousavy.camera.frameprocessor.VisionCameraProxy +import com.reactnativemediapipe.shared.ResizeConvert class ObjectDetectionFrameProcessorPlugin() : FrameProcessorPlugin() { + companion object { + private const val TAG = "ObjectDetectionFrameProcessorPlugin" + } + private val resizeConvert: ResizeConvert = ResizeConvert() + override fun callback(frame: Frame, params: MutableMap?): Any? { - val detectorHandle:Double = params!!["detectorHandle"] as Double + val detectorHandle: Double = params!!["detectorHandle"] as Double val detector = ObjectDetectorMap.detectorMap[detectorHandle.toInt()] ?: return false + var cropWidth = frame.width + var cropHeight = frame.height + var cropX = 0 + var cropY = 0 + var scaleWidth = frame.width + var scaleHeight = frame.height + + val rotationParam = params["rotation"] + val rotation: Rotation + if (rotationParam is String) { + rotation = Rotation.fromString(rotationParam) + Log.i(TAG, "Rotation: ${rotation.degrees}") + } else { + rotation = Rotation.Rotation0 + Log.i(TAG, "Rotation not specified, defaulting to: ${rotation.degrees}") + } + + val mirrorParam = params["mirror"] + val mirror: Boolean + if (mirrorParam is Boolean) { + mirror = mirrorParam + Log.i(TAG, "Mirror: $mirror") + } else { + mirror = false + Log.i(TAG, "Mirror not specified, defaulting to: $mirror") + } + + val scale = params["scale"] + if (scale != null) { + if (scale is Map<*, *>) { + val scaleWidthDouble = scale["width"] as? Double + val scaleHeightDouble = scale["height"] as? Double + if (scaleWidthDouble != null && scaleHeightDouble != null) { + scaleWidth = scaleWidthDouble.toInt() + scaleHeight = scaleHeightDouble.toInt() + } else { + throw Error("Failed to parse values in scale dictionary!") + } + Log.i(TAG, "Target scale: $scaleWidth x $scaleHeight") + } else if (scale is Double) { + scaleWidth = (scale * frame.width).toInt() + scaleHeight = (scale * frame.height).toInt() + Log.i(TAG, "Uniform scale factor applied: $scaleWidth x $scaleHeight") + } else { + throw Error("Scale must be either a map with width and height or a double value!") + } + } + + val crop = params["crop"] as? Map<*, *> + if (crop != null) { + val cropWidthDouble = crop["width"] as? Double + val cropHeightDouble = crop["height"] as? Double + val cropXDouble = crop["x"] as? Double + val cropYDouble = crop["y"] as? Double + if (cropWidthDouble != null && cropHeightDouble != null && cropXDouble != null && cropYDouble != null) { + cropWidth = cropWidthDouble.toInt() + cropHeight = cropHeightDouble.toInt() + cropX = cropXDouble.toInt() + cropY = cropYDouble.toInt() + Log.i(TAG, "Target size: $cropWidth x $cropHeight") + } else { + throw Error("Failed to parse values in crop dictionary!") + } + } else { + if (scale != null) { + val aspectRatio = frame.width.toDouble() / frame.height.toDouble() + val targetAspectRatio = scaleWidth.toDouble() / scaleHeight.toDouble() + + if (aspectRatio > targetAspectRatio) { + cropWidth = (frame.height * targetAspectRatio).toInt() + cropHeight = frame.height + } else { + cropWidth = frame.width + cropHeight = (frame.width / targetAspectRatio).toInt() + } + cropX = (frame.width / 2) - (cropWidth / 2) + cropY = (frame.height / 2) - (cropHeight / 2) + Log.i(TAG, "Cropping to $cropWidth x $cropHeight at ($cropX, $cropY)") + } else { + Log.i(TAG, "Both scale and crop are null, using Frame's original dimensions.") + } + } + val image = frame.image - detector.detectLivestreamFrame(image) + + if (image.format != ImageFormat.YUV_420_888) { + throw Error("Frame has invalid PixelFormat! Only YUV_420_888 is supported. Did you set pixelFormat=\"yuv\"?") + } + + val resized = resizeConvert.resize( + image, + cropX, cropY, + cropWidth, cropHeight, + scaleWidth, scaleHeight, + rotation.degrees, + mirror, + PixelFormat.RGB.ordinal, + DataType.UINT8.ordinal + ) + + val mpImage = + ByteBufferImageBuilder(resized, scaleWidth, scaleHeight, MPImage.IMAGE_FORMAT_RGB).build() + + detector.detectLivestreamFrame(mpImage) return true } + + private enum class PixelFormat { + // Integer-Values (ordinals) to be in sync with ResizePlugin.h + RGB, + BGR, + ARGB, + RGBA, + BGRA, + ABGR; + + companion object { + fun fromString(string: String): PixelFormat = + when (string) { + "rgb" -> RGB + "rgba" -> RGBA + "argb" -> ARGB + "bgra" -> BGRA + "bgr" -> BGR + "abgr" -> ABGR + else -> throw Error("Invalid PixelFormat! ($string)") + } + } + } + + private enum class DataType { + // Integer-Values (ordinals) to be in sync with ResizePlugin.h + UINT8, + FLOAT32; + + companion object { + fun fromString(string: String): DataType = + when (string) { + "uint8" -> UINT8 + "float32" -> FLOAT32 + else -> throw Error("Invalid DataType! ($string)") + } + } + } +} + +private enum class Rotation(val degrees: Int) { + Rotation0(0), + Rotation90(90), + Rotation180(180), + Rotation270(270); + + companion object { + fun fromString(value: String): Rotation = + when (value) { + "0deg" -> Rotation0 + "90deg" -> Rotation90 + "180deg" -> Rotation180 + "270deg" -> Rotation270 + else -> throw Error("Invalid rotation value! ($value)") + } + } } diff --git a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionModule.kt b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionModule.kt index 34e5839..abc6798 100644 --- a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionModule.kt +++ b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectionModule.kt @@ -7,11 +7,13 @@ import com.facebook.react.bridge.ReactContextBaseJavaModule import com.facebook.react.bridge.ReactMethod import com.facebook.react.modules.core.DeviceEventManagerModule import com.google.mediapipe.tasks.vision.core.RunningMode +import com.mrousavy.camera.frameprocessor.SharedArray object ObjectDetectorMap { internal val detectorMap = mutableMapOf() } + class ObjectDetectionModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) { @@ -52,14 +54,14 @@ class ObjectDetectionModule(reactContext: ReactApplicationContext) : currentModel = model, runningMode = enumValues().first { it.ordinal == runningMode }, context = reactApplicationContext.applicationContext, - objectDetectorListener = ObjectDetectorListener(this,id) + objectDetectorListener = ObjectDetectorListener(this, id) ) ObjectDetectorMap.detectorMap[id] = helper promise.resolve(id) } @ReactMethod - fun releaseDetector(handle: Int,promise: Promise) { + fun releaseDetector(handle: Int, promise: Promise) { val entry = ObjectDetectorMap.detectorMap[handle] if (entry != null) { entry.clearObjectDetector() @@ -77,6 +79,7 @@ class ObjectDetectionModule(reactContext: ReactApplicationContext) : fun removeListeners(count: Int?) { /* Required for RN built-in Event Emitter Calls. */ } + private fun sendErrorEvent(handle: Int, message: String, code: Int) { val errorArgs = Arguments.makeNativeMap(mapOf("handle" to handle, "message" to message, "code" to code)) diff --git a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectorHelper.kt b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectorHelper.kt index 676c1d1..07a770e 100644 --- a/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectorHelper.kt +++ b/android/src/main/java/com/reactnativemediapipe/objectdetection/ObjectDetectorHelper.kt @@ -18,6 +18,7 @@ import android.os.Looper import androidx.core.math.MathUtils.clamp import com.facebook.react.common.annotations.VisibleForTesting import com.google.mediapipe.framework.image.BitmapImageBuilder +import com.google.mediapipe.framework.image.ByteBufferImageBuilder import com.google.mediapipe.framework.image.MPImage import com.google.mediapipe.tasks.core.BaseOptions import com.google.mediapipe.tasks.core.Delegate @@ -145,172 +146,6 @@ class ObjectDetectorHelper( return objectDetector == null } - - private fun yuv420ToBitmapRS(image: Image, context: Context?): Bitmap? { - val rs = RenderScript.create(context) - val script = ScriptIntrinsicYuvToRGB.create( - rs, Element.U8_4(rs) - ) - - // Refer the logic in a section below on how to convert a YUV_420_888 image - // to single channel flat 1D array. For sake of this example I'll abstract it - // as a method. -// val yuvByteArray: ByteArray = yuv420ToByteArray(image) - val planes = image.planes - val yPlane = planes[0].buffer - val uPlane = planes[1].buffer - val vPlane = planes[2].buffer - - val ySize = yPlane.remaining() - val uSize = uPlane.remaining() - val vSize = vPlane.remaining() - - // Assuming YUV_420_888 format, which means chroma planes have half the width and height of the luma plane. - val width = image.width - val height = image.height - - // Temporary storage for the YUV data - val yuvBytes = ByteArray(ySize + uSize + vSize) - - // Copy the YUV data into yuvBytes - yPlane.get(yuvBytes, 0, ySize) - uPlane.get(yuvBytes, ySize, uSize) - vPlane.get(yuvBytes, ySize + uSize, vSize) - - val yuvType: Type.Builder = Type.Builder(rs, Element.U8(rs)) - .setX(yuvBytes.size) - val `in` = Allocation.createTyped( - rs, yuvType.create(), Allocation.USAGE_SCRIPT - ) - val rgbaType: Type.Builder = Type.Builder(rs, Element.RGBA_8888(rs)) - .setX(image.width) - .setY(image.height) - val out = Allocation.createTyped( - rs, rgbaType.create(), Allocation.USAGE_SCRIPT - ) - - if (image.format != ImageFormat.YUV_420_888) { - throw IllegalArgumentException("Only YUV_420_888 format can be processed.") - } - - // The allocations above "should" be cached if you are going to perform - // repeated conversion of YUV_420_888 to Bitmap. - `in`.copyFrom(yuvBytes) - script.setInput(`in`) - script.forEach(out) - val bitmap = Bitmap.createBitmap( - image.width, image.height, Bitmap.Config.ARGB_8888 - ) - out.copyTo(bitmap) - return bitmap - } - - private fun yuv420ToBitmap(image: Image): Bitmap? { - require(image.format == ImageFormat.YUV_420_888) { "Invalid image format" } - val imageWidth = image.width - val imageHeight = image.height - // ARGB array needed by Bitmap static factory method I use below. - val argbArray = IntArray(imageWidth * imageHeight) - val yBuffer = image.planes[0].buffer - yBuffer.position(0) - - // A YUV Image could be implemented with planar or semi planar layout. - // A planar YUV image would have following structure: - // YYYYYYYYYYYYYYYY - // ................ - // UUUUUUUU - // ........ - // VVVVVVVV - // ........ - // - // While a semi-planar YUV image would have layout like this: - // YYYYYYYYYYYYYYYY - // ................ - // UVUVUVUVUVUVUVUV <-- Interleaved UV channel - // ................ - // This is defined by row stride and pixel strides in the planes of the - // image. - - // Plane 1 is always U & plane 2 is always V - // https://developer.android.com/reference/android/graphics/ImageFormat#YUV_420_888 - val uBuffer = image.planes[1].buffer - uBuffer.position(0) - val vBuffer = image.planes[2].buffer - vBuffer.position(0) - - // The U/V planes are guaranteed to have the same row stride and pixel - // stride. - val yRowStride = image.planes[0].rowStride - val yPixelStride = image.planes[0].pixelStride - val uvRowStride = image.planes[1].rowStride - val uvPixelStride = image.planes[1].pixelStride - var r: Int - var g: Int - var b: Int - var yValue: Int - var uValue: Int - var vValue: Int - for (y in 0 until imageHeight) { - for (x in 0 until imageWidth) { - val yIndex = y * yRowStride + x * yPixelStride - // Y plane should have positive values belonging to [0...255] - yValue = yBuffer[yIndex].toInt() and 0xff - val uvx = x / 2 - val uvy = y / 2 - // U/V Values are subsampled i.e. each pixel in U/V chanel in a - // YUV_420 image act as chroma value for 4 neighbouring pixels - val uvIndex = uvy * uvRowStride + uvx * uvPixelStride - - // U/V values ideally fall under [-0.5, 0.5] range. To fit them into - // [0, 255] range they are scaled up and centered to 128. - // Operation below brings U/V values to [-128, 127]. - uValue = (uBuffer[uvIndex].toInt() and 0xff) - 128 - vValue = (vBuffer[uvIndex].toInt() and 0xff) - 128 - - // Compute RGB values per formula above. - r = (yValue + 1.370705f * vValue).toInt() - g = (yValue - 0.698001f * vValue - 0.337633f * uValue).toInt() - b = (yValue + 1.732446f * uValue).toInt() - r = clamp(r, 0, 255) - g = clamp(g, 0, 255) - b = clamp(b, 0, 255) - - // Use 255 for alpha value, no transparency. ARGB values are - // positioned in each byte of a single 4 byte integer - // [AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB] - val argbIndex = y * imageWidth + x - argbArray[argbIndex] = 255 shl 24 or (r and 255 shl 16) or (g and 255 shl 8) or (b and 255) - } - } - if (argbArray.size != imageHeight * imageWidth) { - throw IllegalArgumentException("Byte array length must be a multiple of 4") - } - val bitmap = Bitmap.createBitmap(imageWidth, imageHeight, Bitmap.Config.ARGB_8888) - bitmap.setPixels(argbArray, 0, imageWidth, 0, 0, imageWidth, imageHeight) - return bitmap - } - -// private fun toBitmap(image: Image): Bitmap? { -// try { -// return yuv420ToBitmap(image) -// -//// val planes = image.planes -//// val buffer = planes[0].buffer -//// val pixelStride = planes[0].pixelStride -//// val rowStride = planes[0].rowStride -//// val rowPadding = rowStride - pixelStride * image.width -//// val bitmap = Bitmap.createBitmap( -//// image.width + rowPadding / pixelStride, -//// image.height, Bitmap.Config.ARGB_8888 -//// ) -//// bitmap.copyPixelsFromBuffer(buffer) -//// return bitmap -// } catch (e: Exception) { -// objectDetectorListener?.onError(error = e.toString(), code = 666) -// -// } -// } - // Accepts the URI for a video file loaded from the user's gallery and attempts to run // object detection inference on the video. This process will evaluate every frame in // the video and attach the results to a bundle that will be returned. @@ -400,7 +235,7 @@ class ObjectDetectorHelper( // Runs object detection on live streaming cameras frame-by-frame and returns the results // asynchronously to the caller. - fun detectLivestreamFrame(image: Image) { + fun detectLivestreamFrame(mpImage: MPImage) { if (runningMode != RunningMode.LIVE_STREAM) { throw IllegalArgumentException( @@ -410,13 +245,6 @@ class ObjectDetectorHelper( val frameTime = SystemClock.uptimeMillis() - // Convert the input Bitmap object to an MPImage object to run inference -// val bitmap = toBitmap(image) -// val bitmap = yuv420ToBitmap(image) - val bitmap = yuv420ToBitmapRS(image, context) -// val mpImage = MediaImageBuilder(image).build() - val mpImage = BitmapImageBuilder(bitmap).build() - detectAsync(mpImage, frameTime) } diff --git a/android/src/main/java/com/reactnativemediapipe/shared/ResizeConvert.kt b/android/src/main/java/com/reactnativemediapipe/shared/ResizeConvert.kt new file mode 100644 index 0000000..9ca38d7 --- /dev/null +++ b/android/src/main/java/com/reactnativemediapipe/shared/ResizeConvert.kt @@ -0,0 +1,39 @@ +package com.reactnativemediapipe.shared + +import android.media.Image +import androidx.annotation.Keep +import com.facebook.jni.HybridData +import com.facebook.jni.annotations.DoNotStrip +import java.nio.ByteBuffer + +class ResizeConvert { + @DoNotStrip + @Keep + private val mHybridData: HybridData + init { + mHybridData = initHybrid() + } + private external fun initHybrid(): HybridData + + companion object { + // Load the native library once, shared by all instances + init { + System.loadLibrary("ResizeConvertLib") + } + } + + // Native methods are instance methods now + external fun resize( + image: Image, + cropX: Int, + cropY: Int, + cropWidth: Int, + cropHeight: Int, + scaleWidth: Int, + scaleHeight: Int, + rotationDegrees: Int, + mirror: Boolean, + pixelFormat: Int, + dataType: Int + ): ByteBuffer +} diff --git a/examples/objectdetection/src/App.tsx b/examples/objectdetection/src/App.tsx index 1698608..df280d2 100644 --- a/examples/objectdetection/src/App.tsx +++ b/examples/objectdetection/src/App.tsx @@ -64,9 +64,10 @@ export default function App(): React.ReactElement | null { const frameProcessor = useObjectDetection( (results) => { - console.log(results); + console.log("inference time", results.inferenceTime); const firstResult = results.results[0]; const detections = firstResult?.detections ?? []; + console.log("detections", JSON.stringify(detections)); setObjectFrames( detections.map((detection) => { return { diff --git a/libyuv b/libyuv new file mode 160000 index 0000000..488a2af --- /dev/null +++ b/libyuv @@ -0,0 +1 @@ +Subproject commit 488a2af021e3e7473f083a9435b1472c0d411f3d diff --git a/package.json b/package.json index 445f7b8..8cbda09 100644 --- a/package.json +++ b/package.json @@ -29,8 +29,9 @@ "test": "jest", "typecheck": "tsc --noEmit", "lint": "eslint \"**/*.{js,ts,tsx}\"", - "clean": "del-cli android/build example/android/build example/android/app/build example/ios/build lib", - "prepare": "bob build", + "clean": "del-cli android/build examples/*/android/build examples/*/android/app/build examples/*/ios/build lib", + "prepare": "git submodule update --init --recursive && bob build", + "update-submodule": "git submodule update --remote --merge", "release": "release-it" }, "keywords": [ @@ -69,7 +70,7 @@ "react": "18.2.0", "react-native": "0.73.4", "react-native-builder-bob": "^0.23.2", - "react-native-vision-camera": "^3.8.2", + "react-native-vision-camera": "^3.9.2", "react-native-worklets-core": "^0.3.0", "release-it": "^15.0.0", "turbo": "^1.10.7", @@ -88,14 +89,12 @@ }, "workspaces": [ "docsite", - "example", "examples/*" ], "packageManager": "yarn@3.6.1", "jest": { "preset": "react-native", "modulePathIgnorePatterns": [ - "/example/node_modules", "/examples/objectdetection/node_modules", "/lib/" ] diff --git a/src/objectDetection/index.ts b/src/objectDetection/index.ts index 60799b1..0516a7e 100644 --- a/src/objectDetection/index.ts +++ b/src/objectDetection/index.ts @@ -93,6 +93,7 @@ export interface ObjectDetectionOptions { threshold: number; maxResults: number; delegate: Delegate; + resize: { scale: number; aspect: "preserve" | "default" | number }; } export interface ObjectDetectionCallbacks { onResults: (result: ResultBundleMap) => void; @@ -144,7 +145,7 @@ export function useObjectDetection( .createDetector( options?.threshold ?? 0.5, options?.maxResults ?? 3, - options?.delegate ?? Delegate.CPU, + options?.delegate ?? Delegate.GPU, model, runningMode ) @@ -178,8 +179,15 @@ export function useObjectDetection( const frameProcessor = useFrameProcessor( (frame) => { "worklet"; + plugin?.call(frame, { detectorHandle, + scale: { + width: frame.width * 0.5, + height: frame.height * 0.5, + }, + pixelFormat: "rgb", + dataType: "uint8", }); }, [detectorHandle] diff --git a/tsconfig.build.json b/tsconfig.build.json index ed83890..b412bff 100644 --- a/tsconfig.build.json +++ b/tsconfig.build.json @@ -1,7 +1,6 @@ { "extends": "./tsconfig", "exclude": [ - "example", "docsite" ] } \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json index a3f369d..95e5967 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -14,6 +14,5 @@ "babel.config.js", "react-native.config.js", "docsite", - "example", ] } \ No newline at end of file diff --git a/turbo.json b/turbo.json index 331e289..096b0c7 100644 --- a/turbo.json +++ b/turbo.json @@ -8,11 +8,11 @@ "!android/build", "src/*.ts", "src/*.tsx", - "example/package.json", - "example/android", - "!example/android/.gradle", - "!example/android/build", - "!example/android/app/build" + "examples/*/package.json", + "examples/*/android", + "!examples/*/android/.gradle", + "!examples/*/android/build", + "!examples/*/android/app/build" ], "outputs": [] }, @@ -23,12 +23,12 @@ "ios", "src/*.ts", "src/*.tsx", - "example/package.json", - "example/ios", - "!example/ios/build", - "!example/ios/Pods" + "examples/*/package.json", + "examples/*/ios", + "!examples/*/ios/build", + "!examples/*/ios/Pods" ], "outputs": [] } } -} +} \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 7f49149..2fa7c97 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17443,7 +17443,7 @@ __metadata: react: 18.2.0 react-native: 0.73.4 react-native-builder-bob: ^0.23.2 - react-native-vision-camera: ^3.8.2 + react-native-vision-camera: ^3.9.2 react-native-worklets-core: ^0.3.0 release-it: ^15.0.0 turbo: ^1.10.7