Home » Developing U++ » U++ Developers corner » SSE2 and SVO optimization (Painter, memcpy....)
Re: BufferPainter::Clear() optimization [message #53952 is a reply to message #53951] |
Sun, 17 May 2020 18:05 |
|
mirek
Messages: 13975 Registered: November 2005
|
Ultimate Member |
|
|
What about this:
#include <CtrlLib/CtrlLib.h>
#include <smmintrin.h>
using namespace Upp;
void Fill0(RGBA *t, RGBA c, int len)
{
while(len >= 16) {
t[0] = c; t[1] = c; t[2] = c; t[3] = c;
t[4] = c; t[5] = c; t[6] = c; t[7] = c;
t[8] = c; t[9] = c; t[10] = c; t[11] = c;
t[12] = c; t[13] = c; t[14] = c; t[15] = c;
t += 16;
len -= 16;
}
switch(len & 15) {
case 15: t[14] = c;
case 14: t[13] = c;
case 13: t[12] = c;
case 12: t[11] = c;
case 11: t[10] = c;
case 10: t[9] = c;
case 9: t[8] = c;
case 8: t[7] = c;
case 7: t[6] = c;
case 6: t[5] = c;
case 5: t[4] = c;
case 4: t[3] = c;
case 3: t[2] = c;
case 2: t[1] = c;
case 1: t[0] = c;
}
}
void Fill2(RGBA *t, RGBA c, int len)
{
while(len >= 16) {
t[0] = c; t[1] = c; t[2] = c; t[3] = c;
t[4] = c; t[5] = c; t[6] = c; t[7] = c;
t[8] = c; t[9] = c; t[10] = c; t[11] = c;
t[12] = c; t[13] = c; t[14] = c; t[15] = c;
t += 16;
len -= 16;
}
if(len & 8) {
t[0] = t[1] = t[2] = t[3] = t[4] = t[5] = t[6] = t[7] = c;
t += 8;
}
if(len & 4) {
t[0] = t[1] = t[2] = t[3] = c;
t += 4;
}
if(len & 2) {
t[0] = t[1] = c;
t += 2;
}
if(len & 1)
t[0] = c;
}
void Fill3(RGBA *t, RGBA c, int len)
{
dword m[4];
m[0] = m[1] = m[2] = m[3] = *(dword*)&(c);
__m128d val = _mm_loadu_pd((double *)m);
if(len >= 16) {
if(len > 1024*1024 / 16 && ((uintptr_t)t & 3) == 0) { // for really huge data, bypass the cache
while((uintptr_t)t & 15) { // align to 16 bytes for SSE
*t++ = c;
len--;
}
do {
_mm_stream_pd((double *)t, val);
_mm_stream_pd((double *)(t + 4), val);
_mm_stream_pd((double *)(t + 8), val);
_mm_stream_pd((double *)(t + 12), val);
t += 16;
len -= 16;
}
while(len >= 16);
_mm_sfence();
}
else
do {
_mm_storeu_pd((double *)t, val);
_mm_storeu_pd((double *)(t + 4), val);
_mm_storeu_pd((double *)(t + 8), val);
_mm_storeu_pd((double *)(t + 12), val);
t += 16;
len -= 16;
}
while(len >= 16);
}
if(len & 8) {
_mm_storeu_pd((double *)t, val);
_mm_storeu_pd((double *)(t + 4), val);
t += 8;
}
if(len & 4) {
_mm_storeu_pd((double *)t, val);
t += 4;
}
if(len & 2) {
t[0] = t[1] = c;
t += 2;
}
if(len & 1)
t[0] = c;
}
int len = 2000 * 4000;
GUI_APP_MAIN
{
Color c = Red();
Buffer<RGBA> b(2000);
Vector<int> rnd;
for(int i = 0; i < 200; i++)
rnd << Random(100);
for(int i = 0; i < 1000000; i++) {
{
RTIMING("memsetd");
for(int i = 0; i < rnd.GetCount(); i += 2)
memsetd(b + rnd[i], *(dword*)&(c), rnd[i + 1]);
}
{
RTIMING("Fill");
for(int i = 0; i < rnd.GetCount(); i += 2)
Fill(b + rnd[i], c, rnd[i + 1]);
}
{
RTIMING("Fill0");
for(int i = 0; i < rnd.GetCount(); i += 2)
Fill0(b + rnd[i], c, rnd[i + 1]);
}
{
RTIMING("Fill2");
for(int i = 0; i < rnd.GetCount(); i += 2)
Fill2(b + rnd[i], c, rnd[i + 1]);
}
{
RTIMING("Fill3");
for(int i = 0; i < rnd.GetCount(); i += 2)
Fill3(b + rnd[i], c, rnd[i + 1]);
}
{
RTIMING("memset");
for(int i = 0; i < rnd.GetCount(); i += 2)
memset(b + 4 * rnd[i], 255, 4 * rnd[i + 1]);
}
}
b.Alloc(len);
for(int i = 0; i < 20; i++) {
memsetd(b, *(dword*)&(c), len);
{
RTIMING("HUGE memsetd");
memsetd(b, *(dword*)&(c), len);
}
{
RTIMING("HUGE Fill");
Fill(b, c, len);
}
{
RTIMING("HUGE Fill3");
Fill3(b, c, len);
}
{
RTIMING("HUGE memset");
memset(b, c, len * 4);
}
}
BeepExclamation();
}
I believe Fill3 does not have any weakness here... Actually, CLANG produced almost exactly the same code for Fill2 and memsetd for small fills, but I guess providing SSE2 implementation directly does not hurt anything. Plus we still like to have that cache bypass...
So I would go for Fill3 for X86 and Fill2 for non-X86 (in the hope it gets optimized for neon on ARM...)
Mirek
[Updated on: Sun, 17 May 2020 18:09] Report message to a moderator
|
|
|
|
|
SSE2 and SVO optimization (Painter, memcpy....)
By: Tom1 on Mon, 27 April 2020 19:19
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 28 April 2020 10:12
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 28 April 2020 10:20
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 28 April 2020 10:27
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 28 April 2020 10:33
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 28 April 2020 11:10
|
|
|
Re: BufferPainter::Clear() optimization
By: Oblivion on Tue, 28 April 2020 11:27
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 28 April 2020 11:17
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 15 May 2020 09:04
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 10:18
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 15 May 2020 11:33
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 11:41
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 15 May 2020 11:47
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 12:08
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 15 May 2020 13:15
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 13:36
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 23:13
|
|
|
Re: BufferPainter::Clear() optimization
By: Didier on Fri, 15 May 2020 23:45
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sat, 16 May 2020 01:59
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sun, 17 May 2020 08:47
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sun, 17 May 2020 10:01
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sun, 17 May 2020 15:49
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sun, 17 May 2020 18:05
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sun, 17 May 2020 20:56
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sun, 17 May 2020 21:46
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sun, 17 May 2020 23:25
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 10:16
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 11:13
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 13:31
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 13:53
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 16:06
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 17:08
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 18:12
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 18:28
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 20:57
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 21:20
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Mon, 18 May 2020 21:40
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 21:56
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 13:33
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Sun, 17 May 2020 00:10
|
|
|
Re: BufferPainter::Clear() optimization
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 15 May 2020 12:28
|
|
|
Re: BufferPainter::Clear() optimization
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 18 May 2020 13:43
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 19 May 2020 00:02
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 19 May 2020 08:59
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 19 May 2020 09:14
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 19 May 2020 09:49
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 19 May 2020 12:35
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 19 May 2020 12:45
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 19 May 2020 13:18
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 19 May 2020 16:22
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 01:34
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 01:52
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 08:22
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 10:04
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 10:20
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 10:55
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 11:56
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 12:23
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 12:41
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 12:53
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 13:01
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 15:18
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Thu, 21 May 2020 16:21
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 15:58
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 16:15
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 17:16
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 17:31
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Wed, 20 May 2020 17:37
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 19:51
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Thu, 21 May 2020 09:04
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Thu, 21 May 2020 13:28
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Thu, 21 May 2020 16:38
|
|
|
Re: BufferPainter::Clear() optimization
By: koldo on Thu, 21 May 2020 17:51
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Thu, 21 May 2020 19:22
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Thu, 21 May 2020 19:25
|
|
|
Re: BufferPainter::Clear() optimization
By: Didier on Fri, 22 May 2020 09:32
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 10:04
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 10:05
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 10:28
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 22 May 2020 11:13
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 11:39
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 22 May 2020 11:46
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 22 May 2020 11:59
|
|
|
Re: BufferPainter::Clear() optimization
By: koldo on Fri, 22 May 2020 12:47
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 13:01
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 22 May 2020 13:06
|
|
|
Re: BufferPainter::Clear() optimization
By: koldo on Fri, 22 May 2020 16:58
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 19:03
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sun, 24 May 2020 10:20
|
|
|
Re: BufferPainter::Clear() optimization
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 26 May 2020 13:14
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 26 May 2020 14:15
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 01 June 2020 00:39
|
|
|
Re: BufferPainter::Clear() optimization
By: omari on Mon, 01 June 2020 11:24
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Mon, 01 June 2020 15:47
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 02 June 2020 13:59
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 02 June 2020 17:43
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Tue, 02 June 2020 18:31
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Thu, 04 June 2020 17:23
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Thu, 04 June 2020 17:45
|
|
|
Re: BufferPainter::Clear() optimization
By: Novo on Thu, 04 June 2020 18:07
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Thu, 04 June 2020 20:20
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 12 June 2020 12:23
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 12 June 2020 12:55
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 12 June 2020 16:28
|
|
|
Re: BufferPainter::Clear() optimization
By: Novo on Fri, 12 June 2020 18:45
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sat, 13 June 2020 10:15
|
|
|
Re: BufferPainter::Clear() optimization
By: coolman on Sat, 13 June 2020 10:33
|
|
|
Re: BufferPainter::Clear() optimization
By: Novo on Sat, 13 June 2020 13:07
|
|
|
Re: BufferPainter::Clear() optimization
By: coolman on Sat, 13 June 2020 14:45
|
|
|
Re: BufferPainter::Clear() optimization
By: Didier on Sun, 14 June 2020 12:45
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Sun, 14 June 2020 14:09
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Thu, 04 June 2020 18:48
|
|
|
Re: BufferPainter::Clear() optimization
By: koldo on Fri, 22 May 2020 10:29
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Fri, 22 May 2020 11:32
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Fri, 22 May 2020 11:32
|
|
|
Re: BufferPainter::Clear() optimization
By: Tom1 on Wed, 20 May 2020 12:52
|
|
|
Re: BufferPainter::Clear() optimization
By: mirek on Tue, 19 May 2020 11:32
|
Goto Forum:
Current Time: Mon May 06 01:16:54 CEST 2024
Total time taken to generate the page: 0.02431 seconds
|