Skip to content

Commit

Permalink
add two new types, unicode strings, and other encoded strings
Browse files Browse the repository at this point in the history
This commit adds two new types, one for unicode strings and one for
other encoded strings.  Unocide strings have no extra wire protocol
overhead, where "other" strings send the encoding name along with the
string.
  • Loading branch information
tenderlove committed Apr 14, 2016
1 parent 4e78dc4 commit aa084e7
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 34 deletions.
44 changes: 21 additions & 23 deletions ext/bert/c/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#define ERL_BIN 109
#define ERL_SMALL_BIGNUM 110
#define ERL_LARGE_BIGNUM 111
#define ERL_ENC_STRING 112
#define ERL_UNICODE_STRING 113
#define ERL_VERSION 131
#define ERL_VERSION2 132

#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_UNICODE_STRING)
#define BERT_TYPE_OFFSET (ERL_SMALL_INT)

static VALUE rb_mBERT;
Expand Down Expand Up @@ -47,7 +49,9 @@ static VALUE bert_read_nil(struct bert_buf *buf);
static VALUE bert_read_string(struct bert_buf *buf);
static VALUE bert_read_list(struct bert_buf *buf);
static VALUE bert_read_bin(struct bert_buf *buf);
static VALUE bert_read_bin_v2(struct bert_buf *buf);
static VALUE bert_read_enc_string(struct bert_buf *buf);
static VALUE bert_read_unicode_string(struct bert_buf *buf);
static VALUE bert_read_unicode_string(struct bert_buf *buf);
static VALUE bert_read_sbignum(struct bert_buf *buf);
static VALUE bert_read_lbignum(struct bert_buf *buf);

Expand All @@ -66,25 +70,9 @@ static bert_ptr bert_callbacks[] = {
&bert_read_list,
&bert_read_bin,
&bert_read_sbignum,
&bert_read_lbignum
};

static bert_ptr bert_callbacks_v2[] = {
&bert_read_sint,
&bert_read_int,
&bert_read_float,
&bert_read_atom,
&bert_read_invalid,
&bert_read_invalid,
&bert_read_invalid,
&bert_read_stuple,
&bert_read_ltuple,
&bert_read_nil,
&bert_read_string,
&bert_read_list,
&bert_read_bin_v2,
&bert_read_sbignum,
&bert_read_lbignum
&bert_read_lbignum,
&bert_read_enc_string,
&bert_read_unicode_string
};

static inline uint8_t bert_buf_read8(struct bert_buf *buf)
Expand Down Expand Up @@ -318,7 +306,17 @@ static VALUE bert_read_bin(struct bert_buf *buf)
return rb_bin;
}

static VALUE bert_read_bin_v2(struct bert_buf *buf)
static VALUE bert_read_unicode_string(struct bert_buf *buf)
{
VALUE rb_str;

rb_str = bert_read_bin(buf);
rb_enc_associate(rb_str, rb_utf8_encoding());

return rb_str;
}

static VALUE bert_read_enc_string(struct bert_buf *buf)
{
uint8_t type;
VALUE rb_bin, enc;
Expand Down Expand Up @@ -524,7 +522,7 @@ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
buf.callbacks = bert_callbacks;
break;
case ERL_VERSION2:
buf.callbacks = bert_callbacks_v2;
buf.callbacks = bert_callbacks;
break;
default:
rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
Expand Down
32 changes: 28 additions & 4 deletions lib/bert/decode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ def read_bin
length = read_4
read_string(length)
end

def read_erl_string
fail("Invalid Type, not an erlang string") unless read_1 == STRING
length = read_2
read_string(length).unpack('C' * length)
end
end

def self.impl
Expand Down Expand Up @@ -52,6 +58,8 @@ def read_any_raw
when STRING then read_erl_string
when LIST then read_list
when BIN then read_bin
when ENC_STRING then read_enc_string
when UNICODE_STRING then read_unicode_string
else
fail("Unknown term tag: #{peek_1}")
end
Expand Down Expand Up @@ -238,6 +246,14 @@ def read_nil
[]
end

def read_unicode_string
fail("Invalid Type, not a unicode string") unless read_1 == UNICODE_STRING
length = read_4
str = read_string(length)
str.force_encoding "UTF-8"
str
end

def read_erl_string
fail("Invalid Type, not an erlang string") unless read_1 == STRING
length = read_2
Expand All @@ -255,16 +271,24 @@ def read_list
def read_bin
fail("Invalid Type, not an erlang binary") unless read_1 == BIN
length = read_4
read_string(length)
end

def fail(str)
raise str
end

private

def read_enc_string
fail("Invalid Type, not an erlang binary") unless read_1 == ENC_STRING
length = read_4
x = read_string(length)

fail("Invalid Type, not an erlang binary") unless read_1 == BIN
length = read_4
x.force_encoding read_string(length)
x
end

def fail(str)
raise str
end
end
end
26 changes: 23 additions & 3 deletions lib/bert/encode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,35 @@ class Encode

class V2 < Encode
def write_binary(data)
super
enc = data.encoding
case enc
when ::Encoding::UTF_8, ::Encoding::US_ASCII
write_unicode_string data
when ::Encoding::ASCII_8BIT
super
else
write_enc_string data
end
end

private

def write_unicode_string(data)
write_1 UNICODE_STRING
write_4 data.bytesize
write_string data
end

def write_enc_string(data)
write_1 ENC_STRING
write_4 data.bytesize
write_string data
enc = data.encoding.name
write_1 BIN
write_4 enc.bytesize
write_string enc
end

private

def version_header
VERSION_2
end
Expand Down
3 changes: 2 additions & 1 deletion lib/bert/types.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ module Types
STRING = 107
LIST = 108
BIN = 109
ENC_STRING = 112
UNICODE_STRING = 113
FUN = 117
NEW_FUN = 112
MAGIC = 131
VERSION_2 = 132
MAX_INT = (1 << 27) -1
Expand Down
4 changes: 2 additions & 2 deletions test/bert_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class BertTest < Test::Unit::TestCase
setup do
@old_version = BERT::Encode.version
BERT::Encode.version = :v2
@bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04namem\x00\x00\x00\x03TPWm\x00\x00\x00\x05UTF-8jl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexm\x00\x00\x00\x03catm\x00\x00\x00\bUS-ASCIIl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b
@ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,109,0,0,0,5,85,84,70,45,56,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,109,0,0,0,8,85,83,45,65,83,67,73,73,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
@bert = "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04nameq\x00\x00\x00\x03TPWjl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexq\x00\x00\x00\x03catl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b
@ebin = "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,113,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,113,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>"
end

teardown do
Expand Down
2 changes: 1 addition & 1 deletion test/encoder_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class EncoderTest < Test::Unit::TestCase

should 'handle utf8 strings' do
str = "été".encode 'UTF-8'
bert = [132, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169, 109, 0, 0, 0, 5, 85, 84, 70, 45, 56].pack('C*')
bert = [132, 113, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
assert_equal bert, BERT::Encoder.encode("été")
end

Expand Down

0 comments on commit aa084e7

Please sign in to comment.