Files
unity/crnlib/crn_threaded_resampler.cpp
T
Alexander Suvorov 3e12aff909 Fix miscellaneous compiler warnings
DXT Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). All the decompressed test images are identical to the images being compressed and decompressed using original version of Crunch (revision ea9b8d8).

[Compressing Kodak set without mipmaps using DXT1 encoding]
Original: 1582222 bytes / 28.866 sec
Modified: 1468204 bytes / 11.858 sec
Improvement: 7.21% (compression ratio) / 58.92% (compression time)

[Compressing Kodak set with mipmaps using DXT1 encoding]
Original: 2065243 bytes / 36.878 sec
Modified: 1914805 bytes / 15.625 sec
Improvement: 7.28% (compression ratio) / 57.63% (compression time)

ETC Testing:

The modified algorithm has been tested on the Kodak test set using 64-bit build with default settings (running on Windows 10, i7-4790, 3.6GHz). The ETC1 quantization parameters have been selected in such a way, so that ETC1 compression gives approximately the same average Luma PSNR as the corresponding DXT1 compression (which is equal to 34.044 dB for the Kodak test set compressed without mipmaps using DXT1 encoding and default quality settings).

[Compressing Kodak set without mipmaps using ETC1 encoding]
Total size: 1607858 bytes
Total time: 17.181 sec
Average bitrate: 1.363 bpp
Average Luma PSNR: 34.050 dB
2017-09-11 13:52:21 +02:00

283 lines
8.1 KiB
C++

// File: crn_threaded_resampler.cpp
// See Copyright Notice and license at the end of inc/crnlib.h
#include "crn_core.h"
#include "crn_threaded_resampler.h"
#include "crn_resample_filters.h"
#include "crn_threading.h"
namespace crnlib {
threaded_resampler::threaded_resampler(task_pool& tp)
: m_pTask_pool(&tp),
m_pParams(NULL),
m_pX_contribs(NULL),
m_pY_contribs(NULL),
m_bytes_per_pixel(0) {
}
threaded_resampler::~threaded_resampler() {
free_contrib_lists();
}
void threaded_resampler::free_contrib_lists() {
if (m_pX_contribs) {
crnlib_free(m_pX_contribs->p);
m_pX_contribs->p = NULL;
crnlib_free(m_pX_contribs);
m_pX_contribs = NULL;
}
if (m_pY_contribs) {
crnlib_free(m_pY_contribs->p);
m_pY_contribs->p = NULL;
crnlib_free(m_pY_contribs);
m_pY_contribs = NULL;
}
}
void threaded_resampler::resample_x_task(uint64 data, void*) {
const uint thread_index = (uint)data;
for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++) {
if (m_pTask_pool->get_num_threads()) {
if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
const Resampler::Contrib_List* pContribs = m_pX_contribs;
const Resampler::Contrib_List* pContribs_end = m_pX_contribs + m_pParams->m_dst_width;
switch (m_pParams->m_fmt) {
case cPF_Y_F32: {
const float* pSrc = reinterpret_cast<const float*>(static_cast<const uint8*>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
do {
const Resampler::Contrib* p = pContribs->p;
const Resampler::Contrib* p_end = pContribs->p + pContribs->n;
vec4F s(0.0f);
while (p != p_end) {
const uint src_pixel = p->pixel;
const float src_weight = p->weight;
s[0] += pSrc[src_pixel] * src_weight;
p++;
}
*pDst++ = s;
pContribs++;
} while (pContribs != pContribs_end);
break;
}
case cPF_RGBX_F32: {
const vec4F* pSrc = reinterpret_cast<const vec4F*>(static_cast<const uint8*>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
do {
const Resampler::Contrib* p = pContribs->p;
const Resampler::Contrib* p_end = pContribs->p + pContribs->n;
vec4F s(0.0f);
while (p != p_end) {
const float src_weight = p->weight;
const vec4F& src_pixel = pSrc[p->pixel];
s[0] += src_pixel[0] * src_weight;
s[1] += src_pixel[1] * src_weight;
s[2] += src_pixel[2] * src_weight;
p++;
}
*pDst++ = s;
pContribs++;
} while (pContribs != pContribs_end);
break;
}
case cPF_RGBA_F32: {
const vec4F* pSrc = reinterpret_cast<const vec4F*>(static_cast<const uint8*>(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y);
vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y;
do {
Resampler::Contrib* p = pContribs->p;
Resampler::Contrib* p_end = pContribs->p + pContribs->n;
vec4F s(0.0f);
while (p != p_end) {
const float src_weight = p->weight;
const vec4F& src_pixel = pSrc[p->pixel];
s[0] += src_pixel[0] * src_weight;
s[1] += src_pixel[1] * src_weight;
s[2] += src_pixel[2] * src_weight;
s[3] += src_pixel[3] * src_weight;
p++;
}
*pDst++ = s;
pContribs++;
} while (pContribs != pContribs_end);
break;
}
default:
break;
}
}
}
void threaded_resampler::resample_y_task(uint64 data, void*) {
const uint thread_index = (uint)data;
crnlib::vector<vec4F> tmp(m_pParams->m_dst_width);
for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++) {
if (m_pTask_pool->get_num_threads()) {
if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
continue;
}
const Resampler::Contrib_List& contribs = m_pY_contribs[dst_y];
const vec4F* pSrc;
if (contribs.n == 1) {
pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel;
} else {
for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++) {
const vec4F* p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel;
const float weight = contribs.p[src_y_iter].weight;
if (!src_y_iter) {
for (uint i = 0; i < m_pParams->m_dst_width; i++)
tmp[i] = p[i] * weight;
} else {
for (uint i = 0; i < m_pParams->m_dst_width; i++)
tmp[i] += p[i] * weight;
}
}
pSrc = tmp.get_ptr();
}
const vec4F* pSrc_end = pSrc + m_pParams->m_dst_width;
const float l = m_pParams->m_sample_low;
const float h = m_pParams->m_sample_high;
switch (m_pParams->m_fmt) {
case cPF_Y_F32: {
float* pDst = reinterpret_cast<float*>(static_cast<uint8*>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
do {
*pDst++ = math::clamp((*pSrc)[0], l, h);
pSrc++;
} while (pSrc != pSrc_end);
break;
}
case cPF_RGBX_F32: {
vec4F* pDst = reinterpret_cast<vec4F*>(static_cast<uint8*>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
do {
(*pDst)[0] = math::clamp((*pSrc)[0], l, h);
(*pDst)[1] = math::clamp((*pSrc)[1], l, h);
(*pDst)[2] = math::clamp((*pSrc)[2], l, h);
(*pDst)[3] = h;
pSrc++;
pDst++;
} while (pSrc != pSrc_end);
break;
}
case cPF_RGBA_F32: {
vec4F* pDst = reinterpret_cast<vec4F*>(static_cast<uint8*>(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y);
do {
(*pDst)[0] = math::clamp((*pSrc)[0], l, h);
(*pDst)[1] = math::clamp((*pSrc)[1], l, h);
(*pDst)[2] = math::clamp((*pSrc)[2], l, h);
(*pDst)[3] = math::clamp((*pSrc)[3], l, h);
pSrc++;
pDst++;
} while (pSrc != pSrc_end);
break;
}
default:
break;
}
}
}
bool threaded_resampler::resample(const params& p) {
free_contrib_lists();
m_pParams = &p;
CRNLIB_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height);
CRNLIB_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height);
switch (p.m_fmt) {
case cPF_Y_F32:
m_bytes_per_pixel = 4;
break;
case cPF_RGBX_F32:
case cPF_RGBA_F32:
m_bytes_per_pixel = 16;
break;
default:
CRNLIB_ASSERT(false);
return false;
}
int filter_index = find_resample_filter(p.m_Pfilter_name);
if (filter_index < 0)
return false;
const resample_filter& filter = g_resample_filters[filter_index];
m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_x_scale, 0.0f);
if (!m_pX_contribs)
return false;
m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, 0.0f);
if (!m_pY_contribs)
return false;
if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height))
return false;
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, NULL);
m_pTask_pool->join();
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, NULL);
m_pTask_pool->join();
m_tmp_img.clear();
free_contrib_lists();
return true;
}
} // namespace crnlib