package Net::DNS::Text;

use strict;
use warnings;

our $VERSION = (qw$Id: Text.pm 1855 2021-11-26 11:33:48Z willem $)[2];




use integer;
use Carp;


use constant ASCII => ref eval {
	require Encode;
	Encode::find_encoding('ascii');
};

use constant UTF8 => scalar eval {	## not UTF-EBCDIC  [see Unicode TR#16 3.6]
	Encode::encode_utf8( chr(182) ) eq pack( 'H*', 'C2B6' );
};



my ( %escape, %escapeUTF8, %unescape );	## precalculated escape tables

sub new {
	my $self = bless [], shift;
	croak 'argument undefined' unless defined $_[0];

	local $_ = &_encode_utf8;

	s/^\042(.*)\042$/$1/s;					# strip paired quotes

	s/\134\134/\134\060\071\062/g;				# disguise escaped escape
	s/\134([\060-\071]{3})/$unescape{$1}/eg;		# numeric escape
	s/\134(.)/$1/g;						# character escape

	while ( length $_ > 255 ) {
		my $chunk = substr( $_, 0, 255 );		# carve into chunks
		$chunk =~ s/[\300-\377][\200-\277]*$//;
		push @$self, $chunk;
		substr( $_, 0, length $chunk ) = '';
	}
	push @$self, $_;

	return $self;
}



sub decode {
	my $class  = shift;
	my $buffer = shift;					# reference to data buffer
	my $offset = shift || 0;				# offset within buffer
	my $size   = shift;					# specify size of unbounded text

	unless ( defined $size ) {
		$size = unpack "\@$offset C", $$buffer;
		$offset++;
	}

	my $next = $offset + $size;
	croak 'corrupt wire-format data' if $next > length $$buffer;

	my $self = bless [unpack( "\@$offset a$size", $$buffer )], $class;

	return wantarray ? ( $self, $next ) : $self;
}



sub encode {
	my $self = shift;
	return join '', map { pack( 'C a*', length $_, $_ ) } @$self;
}



sub raw {
	my $self = shift;
	return join '', map { pack( 'a*', $_ ) } @$self;
}



sub value {
	return unless defined wantarray;
	my $self = shift;
	return _decode_utf8( join '', @$self );
}



sub string {
	my $self = shift;

	my @s = map { split '', $_ } @$self;			# escape special and ASCII non-printable
	my $s = _decode_utf8( join '', map { $escape{$_} } @s );
	return $s =~ /[ \t\n\r\f(),;]|^$/ ? qq("$s") : $s;	# quote special characters and empty string
}



sub unicode {
	my $self = shift;

	my @s = map { split '', $_ } @$self;			# escape special and non-printable
	my $s = _decode_utf8( join '', map { $escapeUTF8{$_} } @s );
	return $s =~ /[ \t\n\r\f();]|^$/ ? qq("$s") : $s;	# quote special characters and empty string
}



my $ascii = ASCII ? Encode::find_encoding('ascii') : undef;	# Osborn's Law:
my $utf8  = UTF8  ? Encode::find_encoding('utf8')  : undef;	# Variables won't; constants aren't.


sub _decode_utf8 {			## UTF-8 to perl internal encoding
	local $_ = shift;

	# partial transliteration for non-ASCII character encodings
	tr
	[\040-\176\000-\377]
	[ !"#$%&'()*+,\-./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~?] unless ASCII;

	my $z = length($_) - length($_);			# pre-5.18 taint workaround
	return ASCII ? substr( ( UTF8 ? $utf8 : $ascii )->decode($_), $z ) : $_;
}


sub _encode_utf8 {			## perl internal encoding to UTF-8
	local $_ = shift;

	# partial transliteration for non-ASCII character encodings
	tr
	[ !"#$%&'()*+,\-./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~]
	[\040-\176] unless ASCII;

	my $z = length($_) - length($_);			# pre-5.18 taint workaround
	return ASCII ? substr( ( UTF8 ? $utf8 : $ascii )->encode($_), $z ) : $_;
}


%escape = eval {			## precalculated ASCII escape table
	my %table = map { ( chr($_) => chr($_) ) } ( 0 .. 127 );

	foreach my $n ( 0 .. 31, 34, 92, 127 .. 255 ) {		# numerical escape
		my $codepoint = sprintf( '%03u', $n );

		# transliteration for non-ASCII character encodings
		$codepoint =~ tr [0-9] [\060-\071];

		$table{chr($n)} = pack 'C a3', 92, $codepoint;
	}

	return %table;
};

%escapeUTF8 = eval {			## precalculated UTF-8 escape table
	my @octet = UTF8 ? ( 128 .. 191, 194 .. 254 ) : ();
	return ( %escape, map { ( chr($_) => chr($_) ) } @octet );
};


%unescape = eval {			## precalculated numeric escape table
	my %table;

	foreach my $n ( 0 .. 255 ) {
		my $key = sprintf( '%03u', $n );

		# transliteration for non-ASCII character encodings
		$key =~ tr [0-9] [\060-\071];

		$table{$key} = pack 'C', $n;
	}
	$table{"\060\071\062"} = pack 'C2', 92, 92;		# escaped escape

	return %table;
};


1;
__END__




