Mill Computing, Inc. Forums The Mill Architecture Speculative execution Reply To: Speculative execution

Laurent_Birtz
Member
Post count: 10

For the intra case.

Get the content of this block.

``````
[XXXX]
[XXXX]
[XXXX]
[XXXX]
``````

From a vector of pixels.
`[XXXXXXXX]`

``````
// Pass every row.
int angle_sum = 0;
for (int y = 0; y < bs; y++)
{
angle_sum += angle;
int off = angle_sum>>5;
int frac = angle_sum&31;

// Interpolate.
if (frac)
for (int x = 0; x < bs; x++)
dst[y*bs+x] = ((32-frac)*ref[off+x] + frac*ref[off+x+1] + 16)>>5;

// Copy.
else
for (int x = 0; x < bs; x++)
dst[y*bs+x] = ref[off+x];
}
``````

``````
for (int y = 0; y < height; y++, src0 += stride0, src1 += stride1)
for (int x = 0; x < width; x++)
``````

For the DCT case.

``````
void fenc_dct_8_1d(int16_t *dst, int16_t *src, int shift)
{
int add = 1 << (shift - 1);

for (int i = 0; i < 8; i++, dst++, src += 8)
{
int sub_0_7 = src[0]-src[7], sub_1_6 = src[1]-src[6], sub_2_5 = src[2]-src[5], sub_3_4 = src[3]-src[4];

dst[8]  = (89*sub_0_7 + 75*sub_1_6 + 50*sub_2_5 + 18*sub_3_4 + add) >> shift;
dst[24] = (75*sub_0_7 - 18*sub_1_6 - 89*sub_2_5 - 50*sub_3_4 + add) >> shift;
dst[40] = (50*sub_0_7 - 89*sub_1_6 + 18*sub_2_5 + 75*sub_3_4 + add) >> shift;
dst[56] = (18*sub_0_7 - 50*sub_1_6 + 75*sub_2_5 - 89*sub_3_4 + add) >> shift;
}
}

// This function is the assembly function.
void fenc_dct_8_c(int16_t *dst, f265_pix *src, int src_stride, f265_pix *pred, int pred_stride)
{
int lg_bs = 3, bd = 8;
int bs = 1<<lg_bs, bs2 = 1<<(lg_bs<<1);
int shift1 = lg_bs + bd - 9, shift2 = lg_bs + 6;
int16_t diff[bs2], tmp[bs2];
fenc_get_block_residual(diff, src, src_stride, pred, pred_stride, bs);
fenc_dct_8_1d(tmp, diff, shift1);
fenc_dct_8_1d(dst, tmp, shift2);
}
``````