Pose Tracking

API Interface of the iOS Framework

Define holistic pose model variable

private var holisticPoseModel: HolisticPoseModel?

Configure and Instantiate the model with HolisticPoseModelBuilder

@interface HolisticPoseModelBuilder : NSObject

- (instancetype _Nonnull)init;

/**
 * \brief Sets the complexity of the used pose landmark model.
 *
 * \param complexity New pose landmark model complexity.
 *                   Higher model complexity corresponds to higher accuracy of
 *                   the landmarks, but also increases inference latency.
 *                   Default to \a PoseModelComplexity_Normal.
 *
 * \returns Pointer to the \a HolisticPoseModelBuilder.
 */
- (HolisticPoseModelBuilder* _Nonnull)setPoseModelComplexity:(PoseModelComplexity)complexity;

/**
 * \brief Enables pose landmarks smoothing to reduce jitter between consequent
 * input frames.
 *
 * \param enable If set to \a true, the solution filters pose landmarks across
 *               different input images to reduce jitter. Default to \a true.
 *
 * \returns Pointer to the \a HolisticPoseModelBuilder.
 */
- (HolisticPoseModelBuilder* _Nonnull)enablePoseLandmarksSmoothing:(bool)enable;

/**
 * \brief Enables face landmarks refinement.
 *
 * \param enable If set to \a true, the solution refines face landmarks
 *               coordinates around the eyes and lips, and output additional
 *               landmarks around the irises.
 *               Default to \a false.
 *
 * \returns Pointer to the \a HolisticPoseModelBuilder.
 */
- (HolisticPoseModelBuilder* _Nonnull)enableFaceLandmarksRefinement:(bool)enable;

/**
 * \brief Creates a new instance of \a HolisticPoseModel.
 *
 * \param error Object containing error information if model instantiation fails.
 *
 * \returns Pointer to the new instance of \a HolisticPoseModel if instantiation
 * is successful, \a nil otherwise.
 *
 * \note Model instantiation is a blocking call which can take some time, therefore
 * this should be done on a separate serial dispatch queue.
 * That won't block the main queue which keeps the UI responsive.
 */
- (HolisticPoseModel* _Nullable)build:(NSError* _Nullable* _Nonnull)error;
@end

Example:

do {
    self.holisticPoseModel = try HolisticPoseModelBuilder()
        .build()
} catch {
    fatalError(
        "Failed to instantiate holistic pose model: \(error.localizedDescription)"
    )
}

Model instantiation is a blocking call that can take some time, therefore this should be done on a separate serial dispatch queue. That won't block the main queue which keeps the UI responsive.

Schedule the task with HolisticPoseModel.detect method when the model is instantiated

func captureOutput(_ output: AVCaptureOutput,
                    didOutput sampleBuffer: CMSampleBuffer,
                    from connection: AVCaptureConnection) {
    guard let imageBuffer = sampleBuffer.imageBuffer else {
        return
    }

    do {
        try holisticPoseModel!.detect(on: imageBuffer,
                                      at: sampleBuffer.outputPresentationTimeStamp)
    } catch {
        NSLog("Failed to submit holistic pose detection task: \(error.localizedDescription)")
    }
}

HolisticPoseModel returns its results through the HolisticPoseDelegate

@protocol HolisticPoseDelegate <NSObject>

/**
 * \brief Callback triggered whenever the \a HolisticPoseModel completes the
 * processing of the passed frame
 *
 * \param model The \a HolisticPoseModel that acquired provided detection.
 * \param detection Optional \a HolisticPose prediction. \a nil corresponds
 *                  to the situation when none of the \a HolisticPose parts
 *                  are detected.
 */
@optional
- (void)holisticPoseModel:(HolisticPoseModel* _Nonnull)model
       didOutputDetection:(HolisticPose* _Nullable)detection;

@end

Example:

func holisticPoseModel(_ model: HolisticPoseModel,
                        didOutputDetection detection: HolisticPose?) {
    cameraPreviewView.draw(detection)
}

Each HolisticPose instance is represented with the following class

@interface HolisticPose: NSObject
/**
 * \brief Collection of 468 face landmarks with normalized coordinates.
 * Reference distribution of the landmarks:
 * https://raw.githubusercontent.com/google/mediapipe/master/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png
 *
 * Depth is predicted relative to the center of the head being the origin.
 * \a nil corresponds the situation when face detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable faceLandmarks;

/**
 * \brief Collection of 16 pose landmarks with normalized coordinates.
 *
 * Landmarks related to the face and hands are removed for the sake of
 * more accurate individual predictions.
 *
 * Depth is predicted relative to the midpoint of hips being the origin.
 * \a nil corresponds the situation when pose detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable mergedPoseLandmarks;

/**
 * \brief Collection of 33 pose landmarks with normalized coordinates.
 *
 * Depth is predicted relative to the midpoint of hips being the origin.
 * \a nil corresponds the situation when pose detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable poseLandmarks;

/**
 * \brief Collection of 33 pose landmarks with real-world 3D coordinates in
 * meters with the origin at the center between hips.
 *
 * \a nil corresponds the situation when pose detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable poseWorldLandmarks;

/**
 * \brief Collection of 21 hand landmarks on the left hand (without respect of
 * the real camera position).
 *
 * Depth is predicted relative to the wrist being the origin.
 * \a nil corresponds the situation when hand detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable leftHandLandmarks;

/**
 * \brief Collection of 21 hand landmarks on the right hand (without respect of
 * the real camera position).
 * Depth is predicted relative to the wrist being the origin.
 * \a nil corresponds the situation when hand detection has confidence lower
 * than the threshold.
 */
@property (readonly, nonatomic) NSArray<Landmark*>* _Nullable rightHandLandmarks;

@end

PreviousFace Detection NextBackground Removal for Video Conferences

Last updated 2 years ago