79
82
/* some static function declarations */
80
83
static void srl_dump_sv(pTHX_ srl_encoder_t *enc, SV *src);
81
static void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8);
84
SRL_STATIC_INLINE void srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src);
85
SRL_STATIC_INLINE void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8);
82
86
SRL_STATIC_INLINE void srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc);
83
87
SRL_STATIC_INLINE void srl_dump_av(pTHX_ srl_encoder_t *enc, AV *src, U32 refcnt);
84
88
SRL_STATIC_INLINE void srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcnt);
89
93
SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc);
90
94
SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc);
91
95
SRL_STATIC_INLINE PTABLE_t *srl_init_weak_hash(srl_encoder_t *enc);
93
#define SRL_GET_STR_SEENHASH(enc) ( (enc)->str_seenhash == NULL \
96
SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc);
98
#define SRL_GET_STR_DEDUPER_HV(enc) ( (enc)->string_deduper_hv == NULL \
99
? srl_init_string_deduper_hv(aTHX_ enc) \
100
: (enc)->string_deduper_hv )
102
#define SRL_GET_STR_PTR_SEENHASH(enc) ( (enc)->str_seenhash == NULL \
94
103
? srl_init_string_hash(enc) \
95
104
: (enc)->str_seenhash )
232
244
/* load options */
233
245
if (opt != NULL) {
234
246
int undef_unknown = 0;
235
248
/* SRL_F_SHARED_HASHKEYS on by default */
236
249
svp = hv_fetchs(opt, "no_shared_hashkeys", 0);
237
250
if ( !svp || !SvTRUE(*svp) )
238
enc->flags |= SRL_F_SHARED_HASHKEYS;
251
SRL_ENC_SET_OPTION(enc, SRL_F_SHARED_HASHKEYS);
240
253
svp = hv_fetchs(opt, "croak_on_bless", 0);
241
254
if ( svp && SvTRUE(*svp) )
242
enc->flags |= SRL_F_CROAK_ON_BLESS;
255
SRL_ENC_SET_OPTION(enc, SRL_F_CROAK_ON_BLESS);
257
svp = hv_fetchs(opt, "no_bless_objects", 0);
258
if ( svp && SvTRUE(*svp) )
259
SRL_ENC_SET_OPTION(enc, SRL_F_NO_BLESS_OBJECTS);
244
261
svp = hv_fetchs(opt, "snappy", 0);
245
if ( svp && SvTRUE(*svp) )
246
enc->flags |= SRL_F_COMPRESS_SNAPPY;
262
if ( svp && SvTRUE(*svp) ) {
264
SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY);
248
267
svp = hv_fetchs(opt, "snappy_incr", 0);
249
if ( svp && SvTRUE(*svp) )
250
enc->flags |= SRL_F_COMPRESS_SNAPPY_INCREMENTAL;
268
if ( svp && SvTRUE(*svp) ) {
270
croak("'snappy' and 'snappy_incr' options are mutually exclusive");
271
SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL);
252
274
svp = hv_fetchs(opt, "undef_unknown", 0);
253
275
if ( svp && SvTRUE(*svp) ) {
254
276
undef_unknown = 1;
255
enc->flags |= SRL_F_UNDEF_UNKNOWN;
277
SRL_ENC_SET_OPTION(enc, SRL_F_UNDEF_UNKNOWN);
258
280
svp = hv_fetchs(opt, "sort_keys", 0);
259
if ( svp && SvTRUE(*svp) ) {
261
enc->flags |= SRL_F_SORT_KEYS;
281
if ( svp && SvTRUE(*svp) )
282
SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS);
284
svp = hv_fetchs(opt, "aliased_dedupe_strings", 0);
285
if ( svp && SvTRUE(*svp) )
286
SRL_ENC_SET_OPTION(enc, SRL_F_ALIASED_DEDUPE_STRINGS | SRL_F_DEDUPE_STRINGS);
288
svp = hv_fetchs(opt, "dedupe_strings", 0);
289
if ( svp && SvTRUE(*svp) )
290
SRL_ENC_SET_OPTION(enc, SRL_F_DEDUPE_STRINGS);
264
293
svp = hv_fetchs(opt, "stringify_unknown", 0);
265
294
if ( svp && SvTRUE(*svp) ) {
266
if (expect_false( undef_unknown )) {
295
if (expect_false( undef_unknown ))
267
296
croak("'undef_unknown' and 'stringify_unknown' "
268
297
"options are mutually exclusive");
270
enc->flags |= SRL_F_STRINGIFY_UNKNOWN;
298
SRL_ENC_SET_OPTION(enc, SRL_F_STRINGIFY_UNKNOWN);
273
301
svp = hv_fetchs(opt, "warn_unknown", 0);
274
302
if ( svp && SvTRUE(*svp) ) {
275
enc->flags |= SRL_F_WARN_UNKNOWN;
303
SRL_ENC_SET_OPTION(enc, SRL_F_WARN_UNKNOWN);
276
304
if (SvIV(*svp) < 0)
277
enc->flags |= SRL_F_NOWARN_UNKNOWN_OVERLOAD;
305
SRL_ENC_SET_OPTION(enc, SRL_F_NOWARN_UNKNOWN_OVERLOAD);
280
308
svp = hv_fetchs(opt, "snappy_threshold", 0);
328
356
return enc->weak_seenhash;
359
SRL_STATIC_INLINE HV *
360
srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc)
362
enc->string_deduper_hv = newHV();
363
return enc->string_deduper_hv;
366
/* Lazy working buffer alloc */
367
SRL_STATIC_INLINE void
368
srl_init_snappy_workmem(pTHX_ srl_encoder_t *enc)
370
/* Lazy working buffer alloc */
371
if (expect_false( enc->snappy_workmem == NULL )) {
372
/* Cleaned up automatically by the cleanup handler */
373
Newx(enc->snappy_workmem, CSNAPPY_WORKMEM_BYTES, char);
374
if (enc->snappy_workmem == NULL)
375
croak("Out of memory!");
333
381
srl_write_header(pTHX_ srl_encoder_t *enc)
484
srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src)
531
/* Prepare encoder for encoding: Clone if already in use since
532
* encoders aren't "reentrant". Set as in use and register cleanup
533
* routine with Perl. */
534
SRL_STATIC_INLINE srl_encoder_t *
535
srl_prepare_encoder(pTHX_ srl_encoder_t *enc)
486
if (DEBUGHACK) warn("== start dump");
488
537
/* Check whether encoder is in use and create a new one on the
489
538
* fly if necessary. Should only happen in bizarre edge cases... hopefully. */
490
539
if (SRL_ENC_HAVE_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY)) {
498
547
/* Register our structure for destruction on scope exit */
499
548
SAVEDESTRUCTOR_X(&srl_destructor_hook, (void *)enc);
554
/* Update a varint anywhere in the output stream with defined start and end
555
* positions. This can produce non-canonical varints and is useful for filling
556
* pre-allocated varints. */
557
SRL_STATIC_INLINE void
558
srl_update_varint_from_to(pTHX_ char *varint_start, char *varint_end, UV number)
560
while (number >= 0x80) { /* while we are larger than 7 bits long */
561
*varint_start++ = (number & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */
562
number = number >> 7; /* shift off the 7 least significant bits */
564
/* if it is the same size we can use a canonical varint */
565
if ( varint_start == varint_end ) {
566
*varint_start = number; /* encode the last 7 bits without the high bit being set */
568
/* if not we produce a non-canonical varint, basically we stuff
569
* 0 bits (via 0x80) into the "tail" of the varint, until we can
570
* stick in a null to terminate the sequence. This means that the
571
* varint is effectively "self-padding", and we only need special
572
* logic in the encoder - a decoder will happily process a non-canonical
573
* varint with no problem */
574
*varint_start++ = (number & 0x7f) | 0x80;
575
while ( varint_start < varint_end )
576
*varint_start++ = 0x80;
582
/* Resets the Snappy-compression header flag to OFF.
583
* Obviously requires that a Sereal header was already written to the
584
* encoder's output buffer. */
585
SRL_STATIC_INLINE void
586
srl_reset_snappy_header_flag(srl_encoder_t *enc)
588
/* sizeof(const char *) includes a count of \0 */
589
char *flags_and_version_byte = enc->buf_start + sizeof(SRL_MAGIC_STRING) - 1;
590
/* disable snappy flag in header */
591
*flags_and_version_byte = SRL_PROTOCOL_ENCODING_RAW |
592
(*flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK);
596
srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src)
598
enc = srl_prepare_encoder(aTHX_ enc);
501
600
if (!SRL_ENC_HAVE_OPTION(enc, (SRL_F_COMPRESS_SNAPPY | SRL_F_COMPRESS_SNAPPY_INCREMENTAL))) {
502
601
srl_write_header(aTHX_ enc);
503
602
srl_dump_sv(aTHX_ enc, src);
510
609
/* Alas, have to write entire packet first since the header length
511
610
* will determine offsets. */
512
611
srl_write_header(aTHX_ enc);
513
sereal_header_len = enc->pos - enc->buf_start;
612
sereal_header_len = BUF_POS_OFS(enc);
514
613
srl_dump_sv(aTHX_ enc, src);
515
614
srl_fixup_weakrefs(aTHX_ enc);
516
615
assert(BUF_POS_OFS(enc) > sereal_header_len);
517
616
uncompressed_body_length = BUF_POS_OFS(enc) - sereal_header_len;
519
/* Don't bother with snappy compression at all if we have less than $threshold bytes of payload */
520
618
if (enc->snappy_threshold > 0
521
619
&& uncompressed_body_length < (STRLEN)enc->snappy_threshold)
523
/* sizeof(const char *) includes a count of \0 */
524
char *flags_and_version_byte = enc->buf_start + sizeof(SRL_MAGIC_STRING) - 1;
525
/* disable snappy flag in header */
526
*flags_and_version_byte = SRL_PROTOCOL_ENCODING_RAW |
527
(*flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK);
621
/* Don't bother with snappy compression at all if we have less than $threshold bytes of payload */
622
srl_reset_snappy_header_flag(enc);
624
else { /* do snappy compression of body */
531
626
char *varint_start= NULL;
532
627
char *varint_end;
535
630
/* Get uncompressed payload and total packet output (after compression) lengths */
536
631
dest_len = csnappy_max_compressed_length(uncompressed_body_length) + sereal_header_len + 1;
538
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL ) ) {
633
/* Will have to embed compressed packet length as varint if in incremental mode */
634
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL ) )
539
635
dest_len += SRL_MAX_VARINT_LENGTH;
542
/* Lazy working buffer alloc */
543
if (expect_false( enc->snappy_workmem == NULL )) {
544
/* Cleaned up automatically by the cleanup handler */
545
Newx(enc->snappy_workmem, CSNAPPY_WORKMEM_BYTES, char);
546
if (enc->snappy_workmem == NULL)
547
croak("Out of memory!");
637
srl_init_snappy_workmem(aTHX_ enc);
550
639
/* Back up old buffer and allocate new one with correct size */
551
640
old_buf = enc->buf_start;
561
650
Copy(old_buf, enc->pos, sereal_header_len, char);
562
651
enc->pos += sereal_header_len;
653
/* Embed compressed packet length */
564
654
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL ) ) {
565
655
varint_start= enc->pos;
566
656
srl_buf_cat_varint_nocheck(aTHX_ enc, 0, dest_len);
567
657
varint_end= enc->pos - 1;
571
* fprintf(stderr, "'%u' %u %u\n", enc->pos - enc->buf_start, uncompressed_body_length, (uncompressed_body_length+sereal_header_len));
572
* fprintf(stdout, "%7s!%1s\n", old_buf, old_buf+6);
574
660
csnappy_compress(old_buf+sereal_header_len, (uint32_t)uncompressed_body_length, enc->pos, &dest_len,
575
661
enc->snappy_workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
577
if ( varint_start ) {
578
/* overwrite the max size varint with the real size of the compressed data */
580
while (n >= 0x80) { /* while we are larger than 7 bits long */
581
*varint_start++ = (n & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */
582
n = n >> 7; /* shift off the 7 least significant bits */
584
/* if it is the same size we can use a canonical varint */
585
if ( varint_start == varint_end ) {
586
*varint_start = n; /* encode the last 7 bits without the high bit being set */
588
/* if not we produce a non-canonical varint, basically we stuff
589
* 0 bits (via 0x80) into the "tail" of the varint, until we can
590
* stick in a null to terminate the sequence. This means that the
591
* varint is effectively "self-padding", and we only need special
592
* logic in the encoder - a decoder will happily process a non-canonical
593
* varint with no problem */
594
*varint_start++ = (n & 0x7f) | 0x80;
595
while ( varint_start < varint_end )
596
*varint_start++ = 0x80;
601
/* fprintf(stderr, "%u, %u %u %u\n", dest_len, enc->pos[0], enc->pos[1], enc->pos[2]); */
602
662
assert(dest_len != 0);
664
/* overwrite the max size varint with the real size of the compressed data */
666
srl_update_varint_from_to(aTHX_ varint_start, varint_end, dest_len);
603
668
Safefree(old_buf);
604
669
enc->pos += dest_len;
605
670
assert(enc->pos <= enc->buf_end);
608
if (expect_false( dest_len >= uncompressed_length )) {
609
/* FAIL. Swap old buffer back. Unset Snappy option */
610
char *compressed_buf = enc->buf_start;
611
char *flags_and_version_byte;
612
enc->buf_start = old_buf;
613
enc->pos = old_buf + sereal_header_len + uncompressed_length;
614
/* disable snappy flag in header */
615
flags_and_version_byte = enc->buf_start + sizeof(SRL_MAGIC_STRING) - 1;
616
flags_and_version_byte = SRL_PROTOCOL_ENCODING_RAW |
617
(flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK);
621
enc->pos += dest_len;
672
/* TODO If compression didn't help, swap back to old, uncompressed buffer */
673
} /* end of "actually do snappy compression" */
674
} /* end of "want snappy compression?" */
627
677
* SRL_ENC_RESET_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY);
628
678
* here because we're relying on the SAVEDESTRUCTOR_X call. */
629
if (DEBUGHACK) warn("== end dump");
632
681
SRL_STATIC_INLINE void
1023
SRL_STATIC_INLINE void
1024
srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src)
1027
const char const *str= SvPV(src, len);
1028
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_DEDUPE_STRINGS) && len > 3 ) {
1029
HV *string_deduper_hv= SRL_GET_STR_DEDUPER_HV(enc);
1030
HE *dupe_offset_he= hv_fetch_ent(string_deduper_hv, src, 1, 0);
1031
if (!dupe_offset_he) {
1032
croak("out of memory (hv_fetch_ent returned NULL)");
1034
const char out_tag= SRL_ENC_HAVE_OPTION(enc, SRL_F_ALIASED_DEDUPE_STRINGS)
1037
SV *ofs_sv= HeVAL(dupe_offset_he);
1038
if (SvIOK(ofs_sv)) {
1039
/* emit copy or alias */
1040
srl_buf_cat_varint(aTHX_ enc, out_tag, SvIV(ofs_sv));
1042
} else if (SvUOK(ofs_sv)) {
1043
srl_buf_cat_varint(aTHX_ enc, out_tag, SvUV(ofs_sv));
1046
/* start tracking this string */
1047
sv_setuv(ofs_sv, (UV)BUF_POS_OFS(enc));
1051
srl_dump_pv(aTHX_ enc, str, len, SvUTF8(src));
1054
SRL_STATIC_INLINE void
976
1055
srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8)
978
1057
BUF_SIZE_ASSERT(enc, 1 + SRL_MAX_VARINT_LENGTH + src_len); /* overallocate a bit sometimes */