Skip to content

Commit

Permalink
EEDI3: Add parameter mclip
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyWu committed Dec 11, 2018
1 parent 1eafcfb commit af9a442
Show file tree
Hide file tree
Showing 11 changed files with 1,006 additions and 633 deletions.
351 changes: 237 additions & 114 deletions EEDI3/EEDI3.cpp

Large diffs are not rendered by default.

43 changes: 35 additions & 8 deletions EEDI3/EEDI3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,17 @@
#define MAX_VECTOR_SIZE 512
#include "vectorclass/vectorclass.h"

static void copyMask(const VSFrameRef * src, VSFrameRef * dst, const int plane, const int field_n, const bool dh, const VSAPI * vsapi) noexcept {
const int off = dh ? 0 : field_n;
const int mul = dh ? 1 : 2;

vs_bitblt(vsapi->getWritePtr(dst, plane), vsapi->getStride(dst, plane),
vsapi->getReadPtr(src, plane) + vsapi->getStride(src, plane) * off, vsapi->getStride(src, plane) * mul,
vsapi->getFrameWidth(src, plane), vsapi->getFrameHeight(dst, plane));
}

template<typename T1, typename T2>
static inline void reorder(const T1 * srcp, T2 * _dstp, const int width, const int height, const int srcStride, const int dstStride, const int srcY, const int vectorSize) noexcept {
static inline void prepareLines(const T1 * srcp, T2 * _dstp, const int width, const int height, const int srcStride, const int dstStride, const int srcY, const int vectorSize) noexcept {
for (int y = srcY - 2; y < srcY + 2; y++) {
T2 * VS_RESTRICT dstp = _dstp;

Expand All @@ -25,11 +34,11 @@ static inline void reorder(const T1 * srcp, T2 * _dstp, const int width, const i
}

for (int x = 0; x < width; x++)
dstp[(x + 12) * vectorSize] = line[x];
dstp[(12 + x) * vectorSize] = line[x];

for (int x = 0; x < 12; x++) {
const int srcX = std::max(width - 1 - x, 0);
dstp[(width + x + 12) * vectorSize] = line[srcX];
dstp[(width + 12 + x) * vectorSize] = line[srcX];
}

dstp++;
Expand All @@ -38,17 +47,35 @@ static inline void reorder(const T1 * srcp, T2 * _dstp, const int width, const i
_dstp += dstStride * vectorSize;
}
}

static inline void prepareMask(const uint8_t * srcp, uint8_t * VS_RESTRICT dstp, const int width, const int height, const int stride, const int srcY, const int vectorSize) noexcept {
for (int y = srcY; y < srcY + vectorSize; y++) {
int realY = y;
if (realY >= height)
realY = height * 2 - 1 - realY;
realY = std::max(realY, 0);

const uint8_t * line = srcp + stride * realY;

for (int x = 0; x < width; x++)
dstp[x * vectorSize] = line[x];

dstp++;
}
}
#endif

struct EEDI3Data {
VSNodeRef * node, * sclip;
VSNodeRef * node, * sclip, * mclip;
VSVideoInfo vi;
int field, nrad, mdis, vcheck;
bool dh, process[3], ucubic, cost3;
float alpha, beta, gamma, vthresh2;
int peak, vectorSize, tpitch, mdisVector, tpitchVector, alignment;
int vectorSize, alignment, tpitch, tpitchVector, mdisVector, peak;
float remainingWeight, rcpVthresh0, rcpVthresh1, rcpVthresh2;
std::unordered_map<std::thread::id, float *> ccosts, pcosts, tline;
std::unordered_map<std::thread::id, int *> srcVector, pbackt, fpath, dmap;
void (*filter)(const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3Data *, const VSAPI *);
std::unordered_map<std::thread::id, int *> srcVector, pbackt, fpath, dmap, tline;
std::unordered_map<std::thread::id, uint8_t *> mskVector;
std::unordered_map<std::thread::id, bool *> bmask;
std::unordered_map<std::thread::id, float *> ccosts, pcosts;
void (*filter)(const VSFrameRef *, const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3Data * const VS_RESTRICT, const VSAPI *);
};
128 changes: 64 additions & 64 deletions EEDI3/EEDI3CL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@
#include "EEDI3CL.hpp"
#include "EEDI3CL.cl"

template<typename T> extern void filterCL_sse2(const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3CLData *, const VSAPI *);
template<typename T> extern void filterCL_sse2(const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3CLData * const VS_RESTRICT, const VSAPI *);

template<typename T>
static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRef * dst, VSFrameRef ** pad, const int field_n, const EEDI3CLData * d, const VSAPI * vsapi) {
static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRef * dst, VSFrameRef ** pad,
const int field_n, const EEDI3CLData * const VS_RESTRICT d, const VSAPI * vsapi) {
for (int plane = 0; plane < d->vi.format->numPlanes; plane++) {
if (d->process[plane]) {
copyPad<T>(src, pad[plane], plane, 1 - field_n, d->dh, vsapi);
Expand All @@ -58,7 +59,7 @@ static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRe
int * pbackt = d->pbackt.at(threadId) + d->mdis;
int * fpath = d->fpath.at(threadId);
int * _dmap = d->dmap.at(threadId);
float * tline = d->tline.at(threadId);
int * tline = d->tline.at(threadId);

const size_t globalWorkSize[] = { static_cast<size_t>((dstWidth + 63) & -64), 1 };
constexpr size_t localWorkSize[] = { 64, 1 };
Expand Down Expand Up @@ -96,7 +97,6 @@ static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRe

const int umax = std::min({ x, dstWidth - 1 - x, d->mdis });
const int umax2 = std::min({ x - 1, dstWidth - x, d->mdis });

for (int u = -umax; u <= umax; u++) {
int idx = 0;
float bval = FLT_MAX;
Expand All @@ -121,7 +121,7 @@ static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRe
for (int x = dstWidth - 2; x >= 0; x--)
fpath[x] = pbackt[d->tpitch * x + fpath[x + 1]];

interpolate<T>(src3p, src1p, src1n, src3n, fpath, dmap, dstp, dstWidth, d->ucubic, d->peak);
interpolate<T>(src3p, src1p, src1n, src3n, nullptr, fpath, dmap, dstp, dstWidth, d->ucubic, d->peak);

queue.enqueue_unmap_buffer(_ccosts, ccosts - d->mdis);
}
Expand All @@ -131,7 +131,7 @@ static void filterCL_c(const VSFrameRef * src, const VSFrameRef * scp, VSFrameRe
const T * scpp = nullptr;
if (d->sclip)
scpp = reinterpret_cast<const T *>(vsapi->getReadPtr(scp, plane)) + dstStride * field_n;
T * dstp = _dstp + dstStride * field_n;;
T * dstp = _dstp + dstStride * field_n;

vCheck<T>(srcp, scpp, dstp, _dmap, tline, field_n, dstWidth, srcHeight, srcStride, dstStride, d->vcheck, d->vthresh2, d->rcpVthresh0, d->rcpVthresh1, d->rcpVthresh2, d->peak);
}
Expand Down Expand Up @@ -217,7 +217,7 @@ static const VSFrameRef *VS_CC eedi3clGetFrame(int n, int activationReason, void
d->dmap.emplace(threadId, dmap);

if (d->vcheck) {
float * tline = new (std::nothrow) float[d->vi.width];
int * tline = new (std::nothrow) int[d->vi.width];
if (!tline)
throw std::string{ "malloc failure (tline)" };
d->tline.emplace(threadId, tline);
Expand Down Expand Up @@ -486,6 +486,53 @@ void VS_CC eedi3clCreate(const VSMap *in, VSMap *out, void *userData, VSCore *co
return;
}

d->gpu = compute::system::default_device();
if (device > -1)
d->gpu = compute::system::devices().at(device);
d->ctx = compute::context{ d->gpu };

if (!!vsapi->propGetInt(in, "info", 0, &err)) {
vsapi->freeNode(d->sclip);

std::string text{ "=== Device Info ===\n" };
text += "Name: " + d->gpu.get_info<CL_DEVICE_NAME>() + "\n";
text += "Vendor: " + d->gpu.get_info<CL_DEVICE_VENDOR>() + "\n";
text += "Profile: " + d->gpu.get_info<CL_DEVICE_PROFILE>() + "\n";
text += "Version: " + d->gpu.get_info<CL_DEVICE_VERSION>() + "\n";
text += "Global Memory Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_GLOBAL_MEM_SIZE>() / 1024 / 1024) + " MB\n";
text += "Local Memory Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_LOCAL_MEM_SIZE>() / 1024) + " KB\n";
text += "Local Memory Type: " + std::string{ d->gpu.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() == CL_LOCAL ? "CL_LOCAL" : "CL_GLOBAL" } +"\n";
text += "Image Support: " + std::string{ d->gpu.get_info<CL_DEVICE_IMAGE_SUPPORT>() ? "CL_TRUE" : "CL_FALSE" } +"\n";
text += "1D Image Max Buffer Size: " + std::to_string(d->gpu.get_info<size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE)) + "\n";
text += "2D Image Max Width: " + std::to_string(d->gpu.get_info<CL_DEVICE_IMAGE2D_MAX_WIDTH>()) + "\n";
text += "2D Image Max Height: " + std::to_string(d->gpu.get_info<CL_DEVICE_IMAGE2D_MAX_HEIGHT>()) + "\n";
text += "Max Constant Arguments: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_CONSTANT_ARGS>()) + "\n";
text += "Max Constant Buffer Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE>() / 1024) + " KB\n";
text += "Max Work-group Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>()) + "\n";
const auto MAX_WORK_ITEM_SIZES = d->gpu.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
text += "Max Work-item Sizes: (" + std::to_string(MAX_WORK_ITEM_SIZES[0]) + ", " + std::to_string(MAX_WORK_ITEM_SIZES[1]) + ", " + std::to_string(MAX_WORK_ITEM_SIZES[2]) + ")";

VSMap * args = vsapi->createMap();
vsapi->propSetNode(args, "clip", d->node, paReplace);
vsapi->freeNode(d->node);
vsapi->propSetData(args, "text", text.c_str(), -1, paReplace);

VSMap * ret = vsapi->invoke(vsapi->getPluginById("com.vapoursynth.text", core), "Text", args);
if (vsapi->getError(ret)) {
vsapi->setError(out, vsapi->getError(ret));
vsapi->freeMap(args);
vsapi->freeMap(ret);
return;
}

d->node = vsapi->propGetNode(ret, "clip", 0, nullptr);
vsapi->freeMap(args);
vsapi->freeMap(ret);
vsapi->propSetNode(out, "clip", d->node, paReplace);
vsapi->freeNode(d->node);
return;
}

if (d->field > 1) {
if (d->vi.numFrames > INT_MAX / 2)
throw std::string{ "resulting clip is too long" };
Expand All @@ -504,7 +551,7 @@ void VS_CC eedi3clCreate(const VSMap *in, VSMap *out, void *userData, VSCore *co

if (d->vcheck && d->sclip) {
if (!isSameFormat(vsapi->getVideoInfo(d->sclip), &d->vi))
throw std::string{ "sclip must have the same dimensions as main clip and be the same format" };
throw std::string{ "sclip's format doesn't match" };

if (vsapi->getVideoInfo(d->sclip)->numFrames != d->vi.numFrames)
throw std::string{ "sclip's number of frames doesn't match" };
Expand All @@ -521,6 +568,8 @@ void VS_CC eedi3clCreate(const VSMap *in, VSMap *out, void *userData, VSCore *co
d->dmap.reserve(numThreads);
d->tline.reserve(numThreads);

selectFunctions(opt, d.get());

if (d->vi.format->sampleType == stInteger) {
d->peak = (1 << d->vi.format->bitsPerSample) - 1;
const float scale = d->peak / 255.f;
Expand All @@ -535,62 +584,20 @@ void VS_CC eedi3clCreate(const VSMap *in, VSMap *out, void *userData, VSCore *co
vthresh1 /= 255.f;
}

selectFunctions(opt, d.get());

d->tpitch = d->mdis * 2 + 1;
d->mdisVector = d->mdis * d->vectorSize;
d->tpitchVector = d->tpitch * d->vectorSize;
d->mdisVector = d->mdis * d->vectorSize;

d->rcpVthresh0 = 1.f / vthresh0;
d->rcpVthresh1 = 1.f / vthresh1;
d->rcpVthresh2 = 1.f / d->vthresh2;

d->gpu = compute::system::default_device();
if (device > -1)
d->gpu = compute::system::devices().at(device);
d->ctx = compute::context{ d->gpu };

if (!!vsapi->propGetInt(in, "info", 0, &err)) {
vsapi->freeNode(d->sclip);

std::string text{ "=== Device Info ===\n" };
text += "Name: " + d->gpu.get_info<CL_DEVICE_NAME>() + "\n";
text += "Vendor: " + d->gpu.get_info<CL_DEVICE_VENDOR>() + "\n";
text += "Profile: " + d->gpu.get_info<CL_DEVICE_PROFILE>() + "\n";
text += "Version: " + d->gpu.get_info<CL_DEVICE_VERSION>() + "\n";
text += "Global Memory Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_GLOBAL_MEM_SIZE>() / 1024 / 1024) + " MB\n";
text += "Local Memory Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_LOCAL_MEM_SIZE>() / 1024) + " KB\n";
text += "Local Memory Type: " + std::string{ d->gpu.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() == CL_LOCAL ? "CL_LOCAL" : "CL_GLOBAL" } +"\n";
text += "Image Support: " + std::string{ d->gpu.get_info<CL_DEVICE_IMAGE_SUPPORT>() ? "CL_TRUE" : "CL_FALSE" } +"\n";
text += "1D Image Max Buffer Size: " + std::to_string(d->gpu.get_info<size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE)) + "\n";
text += "2D Image Max Width: " + std::to_string(d->gpu.get_info<CL_DEVICE_IMAGE2D_MAX_WIDTH>()) + "\n";
text += "2D Image Max Height: " + std::to_string(d->gpu.get_info<CL_DEVICE_IMAGE2D_MAX_HEIGHT>()) + "\n";
text += "Max Constant Arguments: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_CONSTANT_ARGS>()) + "\n";
text += "Max Constant Buffer Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE>() / 1024) + " KB\n";
text += "Max Work-group Size: " + std::to_string(d->gpu.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>()) + "\n";
const auto MAX_WORK_ITEM_SIZES = d->gpu.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
text += "Max Work-item Sizes: (" + std::to_string(MAX_WORK_ITEM_SIZES[0]) + ", " + std::to_string(MAX_WORK_ITEM_SIZES[1]) + ", " + std::to_string(MAX_WORK_ITEM_SIZES[2]) + ")";

VSMap * args = vsapi->createMap();
vsapi->propSetNode(args, "clip", d->node, paReplace);
vsapi->freeNode(d->node);
vsapi->propSetData(args, "text", text.c_str(), -1, paReplace);

VSMap * ret = vsapi->invoke(vsapi->getPluginById("com.vapoursynth.text", core), "Text", args);
if (vsapi->getError(ret)) {
vsapi->setError(out, vsapi->getError(ret));
vsapi->freeMap(args);
vsapi->freeMap(ret);
return;
}

d->node = vsapi->propGetNode(ret, "clip", 0, nullptr);
vsapi->freeMap(args);
vsapi->freeMap(ret);
vsapi->propSetNode(out, "clip", d->node, paReplace);
vsapi->freeNode(d->node);
return;
}
if (d->vi.format->bytesPerSample == 1)
d->clImageFormat = { CL_R, CL_UNSIGNED_INT8 };
else if (d->vi.format->bytesPerSample == 2)
d->clImageFormat = { CL_R, CL_UNSIGNED_INT16 };
else
d->clImageFormat = { CL_R, CL_FLOAT };

try {
std::setlocale(LC_ALL, "C");
Expand All @@ -616,13 +623,6 @@ void VS_CC eedi3clCreate(const VSMap *in, VSMap *out, void *userData, VSCore *co
} catch (const compute::opencl_error & error) {
throw error.error_string() + "\n" + d->program.build_log();
}

if (d->vi.format->bytesPerSample == 1)
d->clImageFormat = { CL_R, CL_UNSIGNED_INT8 };
else if (d->vi.format->bytesPerSample == 2)
d->clImageFormat = { CL_R, CL_UNSIGNED_INT16 };
else
d->clImageFormat = { CL_R, CL_FLOAT };
} catch (const std::string & error) {
vsapi->setError(out, ("EEDI3CL: " + error).c_str());
vsapi->freeNode(d->node);
Expand Down
6 changes: 3 additions & 3 deletions EEDI3/EEDI3CL.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct EEDI3CLData {
std::unordered_map<std::thread::id, compute::kernel> calculateConnectionCosts;
std::unordered_map<std::thread::id, compute::image2d> src;
std::unordered_map<std::thread::id, compute::buffer> ccosts;
std::unordered_map<std::thread::id, float *> pcosts, tline;
std::unordered_map<std::thread::id, int *> pbackt, fpath, dmap;
void (*filter)(const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3CLData *, const VSAPI *);
std::unordered_map<std::thread::id, float *> pcosts;
std::unordered_map<std::thread::id, int *> pbackt, fpath, dmap, tline;
void (*filter)(const VSFrameRef *, const VSFrameRef *, VSFrameRef *, VSFrameRef **, const int, const EEDI3CLData * const VS_RESTRICT, const VSAPI *);
};
Loading

0 comments on commit af9a442

Please sign in to comment.