x264-dsp
bitstream.h
1 /*****************************************************************************
2  * bitstream.h: bitstream writing
3  *****************************************************************************/
4 
5 #ifndef X264_BS_H
6 #define X264_BS_H
7 
8 typedef struct
9 {
10  uint8_t i_bits;
11  uint8_t i_size;
12 } vlc_t;
13 
14 typedef struct
15 {
16  uint16_t i_bits;
17  uint8_t i_size;
18  /* Next level table to use */
19  uint8_t i_next;
20 } vlc_large_t;
21 
22 typedef struct bs_s {
23  uint8_t *p_start;
24  uint8_t *p;
25  uint8_t *p_end;
26 
27  uintptr_t cur_bits;
28  int i_left; /* i_count number of available bits */
29  int i_bits_encoded; /* RD only */
30 } bs_t;
31 
32 typedef struct
33 {
34  int last;
35  int mask;
36  dctcoef level[16];
38 
39 extern const vlc_t x264_coeff0_token[6];
40 extern const vlc_t x264_coeff_token[6][16][4];
41 extern const vlc_t x264_total_zeros[15][16];
42 extern const vlc_t x264_total_zeros_2x2_dc[3][4];
43 extern const vlc_t x264_total_zeros_2x4_dc[7][8];
44 
45 typedef struct
46 {
47  uint8_t *(*nal_escape)(uint8_t *dst, uint8_t *src, uint8_t *end);
49 
50 void x264_bitstream_init(int cpu, x264_bitstream_function_t *pf);
51 
52 /* A larger level table size theoretically could help a bit at extremely
53  * high bitrates, but the cost in cache is usually too high for it to be
54  * useful.
55  * This size appears to be optimal for QP18 encoding on a Nehalem CPU.
56  * FIXME: Do further testing? */
57 #define LEVEL_TABLE_SIZE 128
58 extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
59 
60 /* The longest possible set of zero run codes sums to 25 bits. This leaves
61  * plenty of room for both the code (25 bits) and size (5 bits) in a uint32_t. */
62 
63 extern uint32_t x264_run_before[1 << 16];
64 
65 static inline void bs_init(bs_t *s, void *p_data, int i_data) {
66  int offset = ((intptr_t)p_data & 3);
67  s->p = s->p_start = (uint8_t *)p_data - offset;
68  s->p_end = (uint8_t *)p_data + i_data;
69  s->i_left = (WORD_SIZE - offset) * 8;
70  s->cur_bits = endian_fix32(M32(s->p));
71  s->cur_bits >>= (4 - offset) * 8;
72 }
73 static inline int bs_pos(bs_t *s) {
74  return (8 * (s->p - s->p_start) + (WORD_SIZE * 8) - s->i_left);
75 }
76 
77 /* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
78 static inline void bs_flush(bs_t *s) {
79  M32(s->p) = endian_fix32(s->cur_bits << (s->i_left & 31));
80  s->p += WORD_SIZE - (s->i_left >> 3);
81  s->i_left = WORD_SIZE * 8;
82 }
83 /* The inverse of bs_flush: prepare the bitstream to be written to again. */
84 static inline void bs_realign(bs_t *s) {
85  int offset = ((intptr_t)s->p & 3);
86  if (offset) {
87  s->p = (uint8_t *)s->p - offset;
88  s->i_left = (WORD_SIZE - offset) * 8;
89  s->cur_bits = endian_fix32(M32(s->p));
90  s->cur_bits >>= (4 - offset) * 8;
91  }
92 }
93 
94 static inline void bs_write(bs_t *s, int i_count, uint32_t i_bits) {
95  if (WORD_SIZE == 8) {
96  s->cur_bits = (s->cur_bits << i_count) | i_bits;
97  s->i_left -= i_count;
98  if (s->i_left <= 32) {
99 #if WORDS_BIGENDIAN
100  M32(s->p) = s->cur_bits >> (32 - s->i_left);
101 #else
102  M32(s->p) = endian_fix(s->cur_bits << s->i_left);
103 #endif
104  s->i_left += 32;
105  s->p += 4;
106  }
107  } else {
108  if (i_count < s->i_left) {
109  s->cur_bits = (s->cur_bits << i_count) | i_bits;
110  s->i_left -= i_count;
111  } else {
112  i_count -= s->i_left;
113  s->cur_bits = (s->cur_bits << s->i_left) | (i_bits >> i_count);
114  M32(s->p) = endian_fix(s->cur_bits);
115  s->p += 4;
116  s->cur_bits = i_bits;
117  s->i_left = 32 - i_count;
118  }
119  }
120 }
121 
122 /* Special case to eliminate branch in normal bs_write. */
123 /* Golomb never writes an even-size code, so this is only used in slice headers. */
124 static inline void bs_write32(bs_t *s, uint32_t i_bits) {
125  bs_write(s, 16, i_bits >> 16);
126  bs_write(s, 16, i_bits);
127 }
128 
129 static inline void bs_write1(bs_t *s, uint32_t i_bit) {
130  s->cur_bits <<= 1;
131  s->cur_bits |= i_bit;
132  s->i_left--;
133  if (s->i_left == WORD_SIZE * 8 - 32) {
134  M32(s->p) = endian_fix32(s->cur_bits);
135  s->p += 4;
136  s->i_left = WORD_SIZE * 8;
137  }
138 }
139 
140 static inline void bs_align_0(bs_t *s) {
141  bs_write(s, s->i_left & 7, 0);
142  bs_flush(s);
143 }
144 static inline void bs_align_1(bs_t *s) {
145  bs_write(s, s->i_left & 7, (1 << (s->i_left & 7)) - 1);
146  bs_flush(s);
147 }
148 static inline void bs_align_10(bs_t *s) {
149  if (s->i_left & 7)
150  bs_write(s, s->i_left & 7, 1 << ((s->i_left & 7) - 1));
151 }
152 
153 /* golomb functions */
154 
155 static const uint8_t x264_ue_size_tab[256] =
156  {
157  1,
158  1,
159  3,
160  3,
161  5,
162  5,
163  5,
164  5,
165  7,
166  7,
167  7,
168  7,
169  7,
170  7,
171  7,
172  7,
173  9,
174  9,
175  9,
176  9,
177  9,
178  9,
179  9,
180  9,
181  9,
182  9,
183  9,
184  9,
185  9,
186  9,
187  9,
188  9,
189  11,
190  11,
191  11,
192  11,
193  11,
194  11,
195  11,
196  11,
197  11,
198  11,
199  11,
200  11,
201  11,
202  11,
203  11,
204  11,
205  11,
206  11,
207  11,
208  11,
209  11,
210  11,
211  11,
212  11,
213  11,
214  11,
215  11,
216  11,
217  11,
218  11,
219  11,
220  11,
221  13,
222  13,
223  13,
224  13,
225  13,
226  13,
227  13,
228  13,
229  13,
230  13,
231  13,
232  13,
233  13,
234  13,
235  13,
236  13,
237  13,
238  13,
239  13,
240  13,
241  13,
242  13,
243  13,
244  13,
245  13,
246  13,
247  13,
248  13,
249  13,
250  13,
251  13,
252  13,
253  13,
254  13,
255  13,
256  13,
257  13,
258  13,
259  13,
260  13,
261  13,
262  13,
263  13,
264  13,
265  13,
266  13,
267  13,
268  13,
269  13,
270  13,
271  13,
272  13,
273  13,
274  13,
275  13,
276  13,
277  13,
278  13,
279  13,
280  13,
281  13,
282  13,
283  13,
284  13,
285  15,
286  15,
287  15,
288  15,
289  15,
290  15,
291  15,
292  15,
293  15,
294  15,
295  15,
296  15,
297  15,
298  15,
299  15,
300  15,
301  15,
302  15,
303  15,
304  15,
305  15,
306  15,
307  15,
308  15,
309  15,
310  15,
311  15,
312  15,
313  15,
314  15,
315  15,
316  15,
317  15,
318  15,
319  15,
320  15,
321  15,
322  15,
323  15,
324  15,
325  15,
326  15,
327  15,
328  15,
329  15,
330  15,
331  15,
332  15,
333  15,
334  15,
335  15,
336  15,
337  15,
338  15,
339  15,
340  15,
341  15,
342  15,
343  15,
344  15,
345  15,
346  15,
347  15,
348  15,
349  15,
350  15,
351  15,
352  15,
353  15,
354  15,
355  15,
356  15,
357  15,
358  15,
359  15,
360  15,
361  15,
362  15,
363  15,
364  15,
365  15,
366  15,
367  15,
368  15,
369  15,
370  15,
371  15,
372  15,
373  15,
374  15,
375  15,
376  15,
377  15,
378  15,
379  15,
380  15,
381  15,
382  15,
383  15,
384  15,
385  15,
386  15,
387  15,
388  15,
389  15,
390  15,
391  15,
392  15,
393  15,
394  15,
395  15,
396  15,
397  15,
398  15,
399  15,
400  15,
401  15,
402  15,
403  15,
404  15,
405  15,
406  15,
407  15,
408  15,
409  15,
410  15,
411  15,
412  15,
413 };
414 
415 /* ue(v): unsigned integer Exp-Golomb-coded syntax element with the left bit first. */
416 /* Works on values in range: [0 - 0xffffffff] */
417 static inline void bs_write_ue_big(bs_t *s, unsigned int val) {
418  int size = 0;
419  int tmp = ++val;
420  if (tmp >= 0x10000) {
421  size = 32;
422  tmp >>= 16;
423  }
424  if (tmp >= 0x100) {
425  size += 16;
426  tmp >>= 8;
427  }
428  size += x264_ue_size_tab[tmp];
429  bs_write(s, size >> 1, 0);
430  bs_write(s, (size >> 1) + 1, val);
431 }
432 
433 /* ue(v): unsigned integer Exp-Golomb-coded syntax element with the left bit first. */
434 /* Only works on values under 255. */
435 static inline void bs_write_ue(bs_t *s, int val) {
436  bs_write(s, x264_ue_size_tab[val + 1], val + 1);
437 }
438 
439 /* se(v): signed integer Exp-Golomb-coded syntax element with the left bit first. */
440 static inline void bs_write_se(bs_t *s, int val) {
441  int size = 0;
442  /* Faster than (val <= 0 ? -val*2+1 : val*2) */
443  /* 4 instructions on x86, 3 on ARM */
444  int tmp = 1 - val * 2;
445  if (tmp < 0)
446  tmp = val * 2;
447  val = tmp;
448 
449  if (tmp >= 0x100) {
450  size = 16;
451  tmp >>= 8;
452  }
453  size += x264_ue_size_tab[tmp];
454  bs_write(s, size, val);
455 }
456 
457 /* te(v): truncated Exp-Golomb-coded syntax element with left bit first. */
458 static inline void bs_write_te(bs_t *s, int x, int val) {
459  if (x == 1)
460  bs_write1(s, 1 ^ val);
461  else // if( x > 1 )
462  bs_write_ue(s, val);
463 }
464 
465 static inline void bs_rbsp_trailing(bs_t *s) {
466  bs_write1(s, 1);
467  bs_write(s, s->i_left & 7, 0);
468 }
469 
470 static ALWAYS_INLINE int bs_size_ue(unsigned int val) {
471  return x264_ue_size_tab[val + 1];
472 }
473 
474 static ALWAYS_INLINE int bs_size_ue_big(unsigned int val) {
475  if (val < 255)
476  return x264_ue_size_tab[val + 1];
477  else
478  return x264_ue_size_tab[(val + 1) >> 8] + 16;
479 }
480 
481 static ALWAYS_INLINE int bs_size_se(int val) {
482  int tmp = 1 - val * 2;
483  if (tmp < 0)
484  tmp = val * 2;
485  if (tmp < 256)
486  return x264_ue_size_tab[tmp];
487  else
488  return x264_ue_size_tab[tmp >> 8] + 16;
489 }
490 
491 static ALWAYS_INLINE int bs_size_te(int x, int val) {
492  if (x == 1)
493  return 1;
494  else // if( x > 1 )
495  return x264_ue_size_tab[val + 1];
496 }
497 
498 #endif
Definition: bitstream.h:22
Definition: bitstream.h:15
Definition: bitstream.h:9
Definition: bitstream.h:46
Definition: bitstream.h:33