le3d - LightEngine 3D
A straightforward C++ 3D software engine for real-time graphics
flattexzc.h
Go to the documentation of this file.
1 
33 inline void LeRasterizer::fillFlatTexZC(int y, float x1, float x2, float w1, float w2, float u1, float u2, float v1, float v2)
34 {
35  float d = x2 - x1;
36  if (d == 0.0f) return;
37 
38  float id = 1.0f / d;
39  float au = (u2 - u1) * id;
40  float av = (v2 - v1) * id;
41  float aw = (w2 - w1) * id;
42 
43  __m128 u_4 = _mm_set_ps(u1 + 3.0f * au, u1 + 2.0f * au, u1 + au, u1);
44  __m128 v_4 = _mm_set_ps(v1 + 3.0f * av, v1 + 2.0f * av, v1 + av, v1);
45  __m128 w_4 = _mm_set_ps(w1 + 3.0f * aw, w1 + 2.0f * aw, w1 + aw, w1);
46 
47  __m128 au_4 = _mm_set1_ps(au * 4.0f);
48  __m128 av_4 = _mm_set1_ps(av * 4.0f);
49  __m128 aw_4 = _mm_set1_ps(aw * 4.0f);
50 
51  int xb = (int)(x1);
52  int xe = (int)(x2 + 1.9999f);
53  if (xe > frame.tx) xe = frame.tx;
54 
55  LeColor * p = xb + ((int) y) * frame.tx + pixels;
56  int b = (xe - xb) >> 2;
57  int r = (xe - xb) & 0x3;
58 
59  for (int x = 0; x < b; x ++) {
60  __m128 z_4 = _mm_rcp_ps(w_4);
61 
62  __m128 mu_4, mv_4;
63  mu_4 = _mm_mul_ps(u_4, z_4);
64  mv_4 = _mm_mul_ps(v_4, z_4);
65  mv_4 = _mm_mul_ps(mv_4, texScale_4);
66 
67  __m128i mui_4, mvi_4;
68  mui_4 = _mm_cvtps_epi32(mu_4);
69  mvi_4 = _mm_cvtps_epi32(mv_4);
70  mui_4 = _mm_and_si128(mui_4, texMaskU_4);
71  mvi_4 = _mm_and_si128(mvi_4, texMaskV_4);
72  mui_4 = _mm_add_epi32(mui_4, mvi_4);
73 
74  __m128i zv = _mm_set1_epi32(0);
75  __m128i tp, tq, t1, t2;
76  tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *) &mui_4)[0]]);
77  tq = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *) &mui_4)[1]]);
78  t1 = _mm_unpacklo_epi32(tp, tq);
79  t1 = _mm_unpacklo_epi8(t1, zv);
80  t1 = _mm_mullo_epi16(t1, color_4);
81  t1 = _mm_srli_epi16(t1, 8);
82 
83  tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[2]]);
84  tq = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[3]]);
85  t2 = _mm_unpacklo_epi32(tp, tq);
86  t2 = _mm_unpacklo_epi8(t2, zv);
87  t2 = _mm_mullo_epi16(t2, color_4);
88  t2 = _mm_srli_epi16(t2, 8);
89 
90  tp = _mm_packus_epi16(t1, t2);
91  _mm_storeu_si128((__m128i *) p, tp);
92  p += 4;
93 
94  w_4 = _mm_add_ps(w_4, aw_4);
95  u_4 = _mm_add_ps(u_4, au_4);
96  v_4 = _mm_add_ps(v_4, av_4);
97  }
98 
99  if (r == 0) return;
100  __m128 z_4 = _mm_rcp_ps(w_4);
101 
102  __m128 mu_4, mv_4;
103  mu_4 = _mm_mul_ps(u_4, z_4);
104  mv_4 = _mm_mul_ps(v_4, z_4);
105  mv_4 = _mm_mul_ps(mv_4, texScale_4);
106 
107  __m128i mui_4, mvi_4;
108  mui_4 = _mm_cvtps_epi32(mu_4);
109  mvi_4 = _mm_cvtps_epi32(mv_4);
110  mui_4 = _mm_and_si128(mui_4, texMaskU_4);
111  mvi_4 = _mm_and_si128(mvi_4, texMaskV_4);
112  mui_4 = _mm_add_epi32(mui_4, mvi_4);
113 
114  __m128i zv = _mm_set1_epi32(0);
115  __m128i tp;
116  tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[0]]);
117  tp = _mm_unpacklo_epi8(tp, zv);
118  tp = _mm_mullo_epi16(tp, color_4);
119  tp = _mm_srli_epi16(tp, 8);
120  tp = _mm_packus_epi16(tp, zv);
121  *p++ = _mm_cvtsi128_si32(tp);
122 
123  if (r == 1) return;
124  tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[1]]);
125  tp = _mm_unpacklo_epi8(tp, zv);
126  tp = _mm_mullo_epi16(tp, color_4);
127  tp = _mm_srli_epi16(tp, 8);
128  tp = _mm_packus_epi16(tp, zv);
129  *p++ = _mm_cvtsi128_si32(tp);
130 
131  if (r == 2) return;
132  tp = _mm_loadl_epi64((__m128i *) &texDiffusePixels[((uint32_t *)&mui_4)[2]]);
133  tp = _mm_unpacklo_epi8(tp, zv);
134  tp = _mm_mullo_epi16(tp, color_4);
135  tp = _mm_srli_epi16(tp, 8);
136  tp = _mm_packus_epi16(tp, zv);
137  *p++ = _mm_cvtsi128_si32(tp);
138 
139 }
int tx
Definition: bitmap.h:81
LeBitmap frame
Definition: rasterizer_float.h:61
Represent an RGBA color.
Definition: color.h:42