33 inline void LeRasterizer::fillFlatTexZC(
int y,
float x1,
float x2,
float w1,
float w2,
float u1,
float u2,
float v1,
float v2)
36 if (d == 0.0f)
return;
39 float au = (u2 - u1) *
id;
40 float av = (v2 - v1) *
id;
41 float aw = (w2 - w1) *
id;
43 __m128 u_4 = _mm_set_ps(u1 + 3.0f * au, u1 + 2.0f * au, u1 + au, u1);
44 __m128 v_4 = _mm_set_ps(v1 + 3.0f * av, v1 + 2.0f * av, v1 + av, v1);
45 __m128 w_4 = _mm_set_ps(w1 + 3.0f * aw, w1 + 2.0f * aw, w1 + aw, w1);
47 __m128 au_4 = _mm_set1_ps(au * 4.0f);
48 __m128 av_4 = _mm_set1_ps(av * 4.0f);
49 __m128 aw_4 = _mm_set1_ps(aw * 4.0f);
52 int xe = (int)(x2 + 1.9999f);
56 int b = (xe - xb) >> 2;
57 int r = (xe - xb) & 0x3;
59 for (
int x = 0; x < b; x ++) {
60 __m128 z_4 = _mm_rcp_ps(w_4);
63 mu_4 = _mm_mul_ps(u_4, z_4);
64 mv_4 = _mm_mul_ps(v_4, z_4);
65 mv_4 = _mm_mul_ps(mv_4, texScale_4);
68 mui_4 = _mm_cvtps_epi32(mu_4);
69 mvi_4 = _mm_cvtps_epi32(mv_4);
70 mui_4 = _mm_and_si128(mui_4, texMaskU_4);
71 mvi_4 = _mm_and_si128(mvi_4, texMaskV_4);
72 mui_4 = _mm_add_epi32(mui_4, mvi_4);
74 __m128i zv = _mm_set1_epi32(0);
75 __m128i tp, tq, t1, t2;
76 tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *) &mui_4)[0]]);
77 tq = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *) &mui_4)[1]]);
78 t1 = _mm_unpacklo_epi32(tp, tq);
79 t1 = _mm_unpacklo_epi8(t1, zv);
80 t1 = _mm_mullo_epi16(t1, color_4);
81 t1 = _mm_srli_epi16(t1, 8);
83 tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[2]]);
84 tq = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[3]]);
85 t2 = _mm_unpacklo_epi32(tp, tq);
86 t2 = _mm_unpacklo_epi8(t2, zv);
87 t2 = _mm_mullo_epi16(t2, color_4);
88 t2 = _mm_srli_epi16(t2, 8);
90 tp = _mm_packus_epi16(t1, t2);
91 _mm_storeu_si128((__m128i *) p, tp);
94 w_4 = _mm_add_ps(w_4, aw_4);
95 u_4 = _mm_add_ps(u_4, au_4);
96 v_4 = _mm_add_ps(v_4, av_4);
100 __m128 z_4 = _mm_rcp_ps(w_4);
103 mu_4 = _mm_mul_ps(u_4, z_4);
104 mv_4 = _mm_mul_ps(v_4, z_4);
105 mv_4 = _mm_mul_ps(mv_4, texScale_4);
107 __m128i mui_4, mvi_4;
108 mui_4 = _mm_cvtps_epi32(mu_4);
109 mvi_4 = _mm_cvtps_epi32(mv_4);
110 mui_4 = _mm_and_si128(mui_4, texMaskU_4);
111 mvi_4 = _mm_and_si128(mvi_4, texMaskV_4);
112 mui_4 = _mm_add_epi32(mui_4, mvi_4);
114 __m128i zv = _mm_set1_epi32(0);
116 tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[0]]);
117 tp = _mm_unpacklo_epi8(tp, zv);
118 tp = _mm_mullo_epi16(tp, color_4);
119 tp = _mm_srli_epi16(tp, 8);
120 tp = _mm_packus_epi16(tp, zv);
121 *p++ = _mm_cvtsi128_si32(tp);
124 tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[1]]);
125 tp = _mm_unpacklo_epi8(tp, zv);
126 tp = _mm_mullo_epi16(tp, color_4);
127 tp = _mm_srli_epi16(tp, 8);
128 tp = _mm_packus_epi16(tp, zv);
129 *p++ = _mm_cvtsi128_si32(tp);
132 tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[2]]);
133 tp = _mm_unpacklo_epi8(tp, zv);
134 tp = _mm_mullo_epi16(tp, color_4);
135 tp = _mm_srli_epi16(tp, 8);
136 tp = _mm_packus_epi16(tp, zv);
137 *p++ = _mm_cvtsi128_si32(tp);
int tx
Definition: bitmap.h:81
LeBitmap frame
Definition: rasterizer_float.h:61
Represent an RGBA color.
Definition: color.h:42