DIE Engine
Toggle main menu visibility
Loading...
Searching...
No Matches
colors.h
Go to the documentation of this file.
1
11
12
#ifndef COLORS_H
13
#define COLORS_H
14
15
#include <smmintrin.h>
16
17
#include <stdint.h>
18
19
/*****************************************************************************/
26
inline
uint32_t
colorsScaleSSE4
(uint32_t color, uint16_t scale)
27
{
28
// Load color and unpack bytes to words
29
__m128i c = _mm_cvtsi32_si128((
int
) color);
30
__m128i c_unpacked = _mm_cvtepu8_epi32(c);
31
32
// Multiply color by scale
33
__m128i scale32 = _mm_set1_epi32(scale);
34
__m128i result = _mm_mullo_epi32(c_unpacked, scale32);
35
result = _mm_srli_epi32(result, 16);
36
37
// Pack back to bytes
38
result = _mm_packus_epi32(result, result);
39
result = _mm_packus_epi16(result, result);
40
41
return
_mm_cvtsi128_si32(result);
42
}
43
50
inline
uint32_t
colorsMultiplySSE4
(uint32_t color1, uint32_t color2)
51
{
52
// Load colors and unpack bytes to words
53
__m128i c1 = _mm_cvtsi32_si128((
int
) color1);
54
__m128i c2 = _mm_cvtsi32_si128((
int
) color2);
55
__m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
56
__m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
57
58
// Multiply the components
59
__m128i result = _mm_mullo_epi32(c1_unpacked, c2_unpacked);
60
result = _mm_srli_epi32(result, 8);
61
62
// Pack back to bytes
63
result = _mm_packus_epi32(result, result);
64
result = _mm_packus_epi16(result, result);
65
return
_mm_cvtsi128_si32(result);
66
}
67
75
inline
uint32_t
colorsLinearSSE4
(uint32_t color1, uint32_t color2, uint16_t t)
76
{
77
// Load colors and unpack bytes to words
78
__m128i c1 = _mm_cvtsi32_si128((
int
) color1);
79
__m128i c2 = _mm_cvtsi32_si128((
int
) color2);
80
__m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
81
__m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
82
83
// Calculate difference (color2 - color1)
84
__m128i diff = _mm_sub_epi32(c2_unpacked, c1_unpacked);
85
86
// Compute linear interpolation color1 + (color2 - color1) * t
87
__m128i scaled_diff = _mm_mullo_epi32(diff, _mm_set1_epi32(t));
88
__m128i result = _mm_add_epi32(c1_unpacked, _mm_srai_epi32(scaled_diff, 8));
89
90
// Pack back to bytes
91
result = _mm_packus_epi32(result, result);
92
result = _mm_packus_epi16(result, result);
93
return
_mm_cvtsi128_si32(result);
94
}
95
103
inline
uint32_t
colorsScaleAccumulateSSE4
(uint32_t color1, uint32_t color2, uint16_t scale)
104
{
105
// Load colors and unpack bytes to words
106
__m128i c1 = _mm_cvtsi32_si128((
int
) color1);
107
__m128i c2 = _mm_cvtsi32_si128((
int
) color2);
108
__m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
109
__m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
110
111
// Compute scaling color2 * t
112
__m128i scaled_c2 = _mm_mullo_epi32(c2_unpacked, _mm_set1_epi32(scale));
113
scaled_c2 = _mm_srli_epi32(scaled_c2, 8);
114
115
// Add to accumulator (packus saturates to 255)
116
__m128i result = _mm_add_epi32(c1_unpacked, scaled_c2);
117
118
// Pack back to bytes
119
result = _mm_packus_epi32(result, result);
120
result = _mm_packus_epi16(result, result);
121
return
_mm_cvtsi128_si32(result);
122
}
123
124
/*****************************************************************************/
132
inline
__m128
vectorLinearSSE4
(__m128 color1, __m128 color2,
float
t)
133
{
134
__m128 delta = _mm_sub_ps(color2, color1);
135
return
_mm_add_ps(color1, _mm_mul_ps(delta, _mm_set1_ps(t)));
136
}
137
138
/*****************************************************************************/
144
inline
__m128
unpackColorToVectorSSE4
(uint32_t color)
145
{
146
static
constexpr
float
d8 = 1.0f / 256.0f;
147
__m128i vec4i = _mm_cvtsi32_si128((
int
) color);
148
vec4i = _mm_cvtepu8_epi32(vec4i);
149
__m128 vec4f = _mm_cvtepi32_ps(vec4i);
150
return
_mm_mul_ps(vec4f, _mm_set1_ps(d8));
151
}
152
159
inline
__m128
unpackColorToVectorScaledSSE4
(uint32_t color,
float
scale)
160
{
161
static
constexpr
float
d8 = 1.0f / 256.0f;
162
__m128i vec4i = _mm_cvtsi32_si128((
int
) color);
163
vec4i = _mm_cvtepu8_epi32(vec4i);
164
__m128 vec4f = _mm_cvtepi32_ps(vec4i);
165
return
_mm_mul_ps(vec4f, _mm_set1_ps(scale * d8));
166
}
167
173
inline
uint32_t
packVectorToColorSSE4
(__m128 color)
174
{
175
color = _mm_mul_ps(color, _mm_set1_ps(256.0f));
176
__m128i vec4i = _mm_cvttps_epi32(color);
177
vec4i = _mm_packus_epi32(vec4i, vec4i);
178
vec4i = _mm_packus_epi16(vec4i, vec4i);
179
return
_mm_cvtsi128_si32(vec4i);
180
}
181
182
#endif
// COLORS_H
colorsScaleAccumulateSSE4
uint32_t colorsScaleAccumulateSSE4(uint32_t color1, uint32_t color2, uint16_t scale)
Scale a packed ARGB color and accumulate it on another one.
Definition
colors.h:103
vectorLinearSSE4
__m128 vectorLinearSSE4(__m128 color1, __m128 color2, float t)
Linearly interpolate between two color vectors.
Definition
colors.h:132
unpackColorToVectorScaledSSE4
__m128 unpackColorToVectorScaledSSE4(uint32_t color, float scale)
Unpack a packed ARGB color into a scaled 4-float vector.
Definition
colors.h:159
colorsLinearSSE4
uint32_t colorsLinearSSE4(uint32_t color1, uint32_t color2, uint16_t t)
Linearly interpolate between two packed ARGB colors.
Definition
colors.h:75
colorsMultiplySSE4
uint32_t colorsMultiplySSE4(uint32_t color1, uint32_t color2)
Multiply two packed ARGB colors component-wise.
Definition
colors.h:50
packVectorToColorSSE4
uint32_t packVectorToColorSSE4(__m128 color)
Pack a normalised 4-float color vector into a packed ARGB color.
Definition
colors.h:173
colorsScaleSSE4
uint32_t colorsScaleSSE4(uint32_t color, uint16_t scale)
Scale a packed ARGB color by a fixed-point factor.
Definition
colors.h:26
unpackColorToVectorSSE4
__m128 unpackColorToVectorSSE4(uint32_t color)
Unpack a packed ARGB color into a normalised 4-float vector.
Definition
colors.h:144
common
engine
colors.h
Generated by
1.17.0