Character Remover from video in Next.js

Eugene Musebe

Introduction

This article demonstrates how a human character can be extracted from a video using nextjs and TensorFlow.

Codesandbox

Check the sandbox demo on Codesandbox.

You can also get the project GitHub repo using Github.

Prerequisites

Entry-level javascript and React/Nextjs knowledge.

Setting Up the Sample Project

In your respective folder, create a new net js app using npx create-next-app removePerson in your terminal. Head to your project root directory cd removePerson

We will begin by setting up Cloudinary integration in our next js backend. We will it to configure the cloudinary media file upload procedure.

Create your own cloudinary account using Link and log into it. Each cloudinary user account will have a dashboard containing environment variable keys that are necessary for the cloudinary integration in our project.

In your project directory, start by including Cloudinary in your project dependencies npm install cloudinary create a new file named .env and paste the following code. Fill the blanks with your environment variables from the cloudinary dashboard.

1CLOUDINARY_CLOUD_NAME =
2
3CLOUDINARY_API_KEY =
4
5CLOUDINARY_API_SECRET =

Restart your project: npm run dev.

In the pages/api folder, create a new file named upload.js. Start by configuring the environment keys and libraries.

1var cloudinary = require("cloudinary").v2;
2
3cloudinary.config({
4 cloud_name: process.env.CLOUDINARY_NAME,
5 api_key: process.env.CLOUDINARY_API_KEY,
6 api_secret: process.env.CLOUDINARY_API_SECRET,
7});

Create a handler function to execute the POST request. The function will receive media file data and post it to the cloudinary website. It then captures the media file's cloudinary link and sends it back as a response.

1export default async function handler(req, res) {
2 if (req.method === "POST") {
3 let url = ""
4 try {
5 let fileStr = req.body.data;
6 const uploadedResponse = await cloudinary.uploader.upload_large(
7 fileStr,
8 {
9 resource_type: "video",
10 chunk_size: 6000000,
11 }
12 );
13 url = uploadedResponse.url
14 } catch (error) {
15 res.status(500).json({ error: "Something wrong" });
16 }
17
18 res.status(200).json({data: url});
19 }
20}

The code above concludes our backend.

Before we proceed to the front end, we need to install the necessary dependencies: npm install @tensorflow-models/body-pix @tensorflow/tfjs.

In the pages/index folder, start by including the necessary imports.

1import React, { useRef, useEffect, useState } from "react";
2import * as bodyPix from "@tensorflow-models/body-pix";
3import * as tf from "@tensorflow/tfjs";

Our bodyPix model configuration will be used to load a MobileNetV1 architecture with a 0.75 multiplier which is preferably flexible for lower-end GPUs. As for the outputStrides, multiplier and quantBytes, the higher the settings number, the better the segmentation accuracy at the cost of processing speed.

1const modelConfig = {
2 architecture: "MobileNetV1",
3 outputStride: 16,
4 multiplier: 1,
5 quantBytes: 4,
6};

Declare the following variables. We will use them as we move on:

1let outputContext, inputVideo, temporaryContext, temporaryCanvas, outputCanvas;
2
3 const processedVid = useRef();
4 const rawVideo = useRef();
5 const startBtn = useRef();
6 const closeBtn = useRef();
7 const videoDownloadRef = useRef();
8 const [model, setModel] = useState(null);
9 const [link, setLink] = useState("");
10 const [blob, setBlob] = useState();

Next, let's configure the segmentation model.

1const segmentationConfig = {
2 internalResolution: "full",
3 segmentationThreshold: 0.1,
4 scoreThreshold: 0.4,
5 flipHorizontal: true,
6 maxDetections: 1,
7 };

In the code above, the higher the internalResolution the more accurate the model is at the cost of slower prediction times. The segmentation threshold is the minimum confident threshold before each pixel is considered part of a human body. The score threshold is the minimum confident threshold to recognize the entire human body.

Next, we load the bodyPix model with our configuration aside from a useEffext hook

1useEffect(() => {
2 if (model) return;
3 bodyPix.load(modelConfig).then((m) => {
4 setModel(m);
5 });
6 }, []);

Create a function startVideo that will trigger the video element to play

1const startVideo = async () => {
2 console.log("playing video...")
3 rawVideo.current.play()
4 inputVideo = rawVideo.current;
5 await rawVideo.current.play().then(() => {
6 transform()
7 console.log("object")
8 })
9 }

The function above also triggers the transform function.

1let transform = () => {
2 // let ;
3 outputCanvas = processedVid.current;
4 outputContext = outputCanvas.getContext("2d");
5
6 temporaryCanvas = document.createElement("canvas");
7 temporaryCanvas.setAttribute("width", 800);
8 temporaryCanvas.setAttribute("height", 450);
9
10 temporaryContext = temporaryCanvas.getContext("2d");
11
12 computeFrame();
13 };

Here we assign our variables to the dom canvas element and another to a temporary canvas, then trigger the computeFrame function.

1let computeFrame = () => {
2 // console.log(inputVideo.videoWidth)
3 temporaryContext.drawImage(
4 inputVideo,
5 0,
6 0,
7 inputVideo.videoWidth,
8 inputVideo.videoHeight
9 );
10
11 let frame = temporaryContext.getImageData(
12 0,
13 0,
14 inputVideo.videoWidth,
15 inputVideo.videoHeight
16 );
17
18 model.segmentPerson(frame, segmentationConfig).then((segmentation) => {
19 let output_img = outputContext.getImageData(
20 0,
21 0,
22 inputVideo.videoWidth,
23 inputVideo.videoHeight
24 );
25
26 for (let x = 0; x < inputVideo.videoWidth; x++) {
27 for (let y = 0; y < inputVideo.videoHeight; y++) {
28 let n = x + y * inputVideo.videoWidth;
29 if (segmentation.data[n] == 0) {
30 output_img.data[n * 4] = frame.data[n * 4]; // R
31 output_img.data[n * 4 + 1] = frame.data[n * 4 + 1]; // G
32 output_img.data[n * 4 + 2] = frame.data[n * 4 + 2]; // B
33 output_img.data[n * 4 + 3] = frame.data[n * 4 + 3]; // A
34 }
35 }
36 }
37 // console.log(segmentation);
38 outputContext.putImageData(output_img, 0, 0);
39 setTimeout(computeFrame, 0);
40 });
41 const chunks = [];
42 const cnv = processedVid.current;
43 const stream = cnv.captureStream();
44 const rec = new MediaRecorder(stream);
45 rec.ondataavailable = e => chunks.push(e.data);
46 rec.onstop = e => setBlob(new Blob(chunks, { type: 'video/webm' }));
47 rec.start();
48 setTimeout(() => rec.stop(), 10000);
49}

The computeFrame function will first draw the current video frame on the temporary canvas using the draw image method. It then uses the getImageData method to get pixel data on the canvas. We then call a segmentPerson method using the current image data on the canvas and configuration for analysis. We iterate through every pixel using a nested loop and use an index variable n to access the pixels. We then use an if statement to check if the current pixel is not part of a Human then we'll copy the pixel data from the video to the output canvas. The updating process will be skipped in case the pixels are not human. The index is multiplied by 4 because each of the pixels has 4 data, the RGB and the alpha, accumulating to 4 array spaces. Finally, put the result in the output canvas.

To record our final video, we create an array of chunks in which we will populate your processed canvas frames using a media stream to create a blob.

The final function required will be the uploadVideo function which will convert the blob to base64 format using a file reader and post it to the backend for cloudinary upload.

1"pages/index"
2
3function readFile(file) {
4 console.log("readFile()=>", file);
5 return new Promise(function (resolve, reject) {
6 let fr = new FileReader();
7
8 fr.onload = function () {
9 resolve(fr.result);
10 };
11
12 fr.onerror = function () {
13 reject(fr);
14 };
15
16 fr.readAsDataURL(file);
17 });
18 }
19
20 const uploadVideo = async (base64) => {
21 console.log("uploading to backend...");
22 await readFile(blob).then((encoded_file) => {
23 try {
24 fetch('/api/upload', {
25 method: 'POST',
26 body: JSON.stringify({ data: encoded_file }),
27 headers: { 'Content-Type': 'application/json' },
28 })
29 .then((response) => response.json())
30 .then((data) => {
31 setLink(data.data);
32 });
33 } catch (error) {
34 console.error(error);
35 }
36 });
37};

Finally, use the code below in your return statement to design your UI.

1"pages/index"
2
3return (
4 <>
5 <div className="container">
6 <div className="header">
7 <h1 className="heading">
8 Remove character from video
9 </h1>
10 </div>
11 <div className="row">
12 <div className="column">
13 <video
14 id="video"
15 width="800px"
16 src="sample.mp4"
17 autoPlay
18 ref={rawVideo}
19 loop
20 muted
21 />
22 </div>
23 <div className="column">
24 {link ?
25 <h4><a href={link}>Get Copy</a></h4>
26 :
27 <img id="loading" width="50" height="30" src="https://mir-s3-cdn-cf.behance.net/project_modules/disp/f1055231234507.564a1d234bfb6.gif" />
28 }
29 <br />
30 <canvas className="display" width={800} height={450} ref={processedVid}></canvas>
31 </div>
32 </div>
33 <div className="buttons">
34 <button className="button" ref={startBtn} onClick={startVideo}>
35 Process Video
36 </button>
37
38 <button className="button" onClick={uploadVideo}>
39 <a ref={videoDownloadRef}>
40 Stop and upload
41 </a>
42 </button>
43 </div>
44 </div>
45 </>
46)

The code above results in the following. Check the css in the Github repo.

Our project is complete. Ensure to go through the article to enjoy the experience.

Eugene Musebe

Software Developer

I’m a full-stack software developer, content creator, and tech community builder based in Nairobi, Kenya. I am addicted to learning new technologies and loves working with like-minded people.