DIE Engine
Loading...
Searching...
No Matches
colors.h
Go to the documentation of this file.
1
11
12#ifndef COLORS_H
13#define COLORS_H
14
15#include <smmintrin.h>
16
17#include <stdint.h>
18
19/*****************************************************************************/
26inline uint32_t colorsScaleSSE4(uint32_t color, uint16_t scale)
27{
28// Load color and unpack bytes to words
29 __m128i c = _mm_cvtsi32_si128((int) color);
30 __m128i c_unpacked = _mm_cvtepu8_epi32(c);
31
32// Multiply color by scale
33 __m128i scale32 = _mm_set1_epi32(scale);
34 __m128i result = _mm_mullo_epi32(c_unpacked, scale32);
35 result = _mm_srli_epi32(result, 16);
36
37// Pack back to bytes
38 result = _mm_packus_epi32(result, result);
39 result = _mm_packus_epi16(result, result);
40
41 return _mm_cvtsi128_si32(result);
42}
43
50inline uint32_t colorsMultiplySSE4(uint32_t color1, uint32_t color2)
51{
52// Load colors and unpack bytes to words
53 __m128i c1 = _mm_cvtsi32_si128((int) color1);
54 __m128i c2 = _mm_cvtsi32_si128((int) color2);
55 __m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
56 __m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
57
58// Multiply the components
59 __m128i result = _mm_mullo_epi32(c1_unpacked, c2_unpacked);
60 result = _mm_srli_epi32(result, 8);
61
62// Pack back to bytes
63 result = _mm_packus_epi32(result, result);
64 result = _mm_packus_epi16(result, result);
65 return _mm_cvtsi128_si32(result);
66}
67
75inline uint32_t colorsLinearSSE4(uint32_t color1, uint32_t color2, uint16_t t)
76{
77// Load colors and unpack bytes to words
78 __m128i c1 = _mm_cvtsi32_si128((int) color1);
79 __m128i c2 = _mm_cvtsi32_si128((int) color2);
80 __m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
81 __m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
82
83// Calculate difference (color2 - color1)
84 __m128i diff = _mm_sub_epi32(c2_unpacked, c1_unpacked);
85
86// Compute linear interpolation color1 + (color2 - color1) * t
87 __m128i scaled_diff = _mm_mullo_epi32(diff, _mm_set1_epi32(t));
88 __m128i result = _mm_add_epi32(c1_unpacked, _mm_srai_epi32(scaled_diff, 8));
89
90// Pack back to bytes
91 result = _mm_packus_epi32(result, result);
92 result = _mm_packus_epi16(result, result);
93 return _mm_cvtsi128_si32(result);
94}
95
103inline uint32_t colorsScaleAccumulateSSE4(uint32_t color1, uint32_t color2, uint16_t scale)
104{
105// Load colors and unpack bytes to words
106 __m128i c1 = _mm_cvtsi32_si128((int) color1);
107 __m128i c2 = _mm_cvtsi32_si128((int) color2);
108 __m128i c1_unpacked = _mm_cvtepu8_epi32(c1);
109 __m128i c2_unpacked = _mm_cvtepu8_epi32(c2);
110
111// Compute scaling color2 * t
112 __m128i scaled_c2 = _mm_mullo_epi32(c2_unpacked, _mm_set1_epi32(scale));
113 scaled_c2 = _mm_srli_epi32(scaled_c2, 8);
114
115// Add to accumulator (packus saturates to 255)
116 __m128i result = _mm_add_epi32(c1_unpacked, scaled_c2);
117
118// Pack back to bytes
119 result = _mm_packus_epi32(result, result);
120 result = _mm_packus_epi16(result, result);
121 return _mm_cvtsi128_si32(result);
122}
123
124/*****************************************************************************/
132inline __m128 vectorLinearSSE4(__m128 color1, __m128 color2, float t)
133{
134 __m128 delta = _mm_sub_ps(color2, color1);
135 return _mm_add_ps(color1, _mm_mul_ps(delta, _mm_set1_ps(t)));
136}
137
138/*****************************************************************************/
144inline __m128 unpackColorToVectorSSE4(uint32_t color)
145{
146 static constexpr float d8 = 1.0f / 256.0f;
147 __m128i vec4i = _mm_cvtsi32_si128((int) color);
148 vec4i = _mm_cvtepu8_epi32(vec4i);
149 __m128 vec4f = _mm_cvtepi32_ps(vec4i);
150 return _mm_mul_ps(vec4f, _mm_set1_ps(d8));
151}
152
159inline __m128 unpackColorToVectorScaledSSE4(uint32_t color, float scale)
160{
161 static constexpr float d8 = 1.0f / 256.0f;
162 __m128i vec4i = _mm_cvtsi32_si128((int) color);
163 vec4i = _mm_cvtepu8_epi32(vec4i);
164 __m128 vec4f = _mm_cvtepi32_ps(vec4i);
165 return _mm_mul_ps(vec4f, _mm_set1_ps(scale * d8));
166}
167
173inline uint32_t packVectorToColorSSE4(__m128 color)
174{
175 color = _mm_mul_ps(color, _mm_set1_ps(256.0f));
176 __m128i vec4i = _mm_cvttps_epi32(color);
177 vec4i = _mm_packus_epi32(vec4i, vec4i);
178 vec4i = _mm_packus_epi16(vec4i, vec4i);
179 return _mm_cvtsi128_si32(vec4i);
180}
181
182#endif // COLORS_H
uint32_t colorsScaleAccumulateSSE4(uint32_t color1, uint32_t color2, uint16_t scale)
Scale a packed ARGB color and accumulate it on another one.
Definition colors.h:103
__m128 vectorLinearSSE4(__m128 color1, __m128 color2, float t)
Linearly interpolate between two color vectors.
Definition colors.h:132
__m128 unpackColorToVectorScaledSSE4(uint32_t color, float scale)
Unpack a packed ARGB color into a scaled 4-float vector.
Definition colors.h:159
uint32_t colorsLinearSSE4(uint32_t color1, uint32_t color2, uint16_t t)
Linearly interpolate between two packed ARGB colors.
Definition colors.h:75
uint32_t colorsMultiplySSE4(uint32_t color1, uint32_t color2)
Multiply two packed ARGB colors component-wise.
Definition colors.h:50
uint32_t packVectorToColorSSE4(__m128 color)
Pack a normalised 4-float color vector into a packed ARGB color.
Definition colors.h:173
uint32_t colorsScaleSSE4(uint32_t color, uint16_t scale)
Scale a packed ARGB color by a fixed-point factor.
Definition colors.h:26
__m128 unpackColorToVectorSSE4(uint32_t color)
Unpack a packed ARGB color into a normalised 4-float vector.
Definition colors.h:144