package Unicode::Normalize;

BEGIN {
    unless ('A' eq pack('U', 0x41)) {
	die "Unicode::Normalize cannot stringify a Unicode code point\n";
    }
    unless (0x41 == unpack('U', 'A')) {
	die "Unicode::Normalize cannot get Unicode code point\n";
    }
}

use 5.006;
use strict;
use warnings;
use Carp;

no warnings 'utf8';

our $VERSION = '1.18';
our $PACKAGE = __PACKAGE__;

our @EXPORT = qw( NFC NFD NFKC NFKD );
our @EXPORT_OK = qw(
    normalize decompose reorder compose
    checkNFD checkNFKD checkNFC checkNFKC check
    getCanon getCompat getComposite getCombinClass
    isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
    isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
    FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter
    normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial
);
our %EXPORT_TAGS = (
    all       => [ @EXPORT, @EXPORT_OK ],
    normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
    check     => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
    fast      => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
);


sub pack_U {
    return pack('U*', @_);
}

sub unpack_U {
    return unpack('U*', shift(@_).pack('U*'));
}

require Exporter;

our @ISA = qw(Exporter);
use File::Spec;

our %Combin;	# $codepoint => $number    : combination class
our %Canon;	# $codepoint => \@codepoints : canonical decomp.
our %Compat;	# $codepoint => \@codepoints : compat. decomp.
our %Compos;	# $1st,$2nd  => $codepoint : composite
our %Exclus;	# $codepoint => 1          : composition exclusions
our %Single;	# $codepoint => 1          : singletons
our %NonStD;	# $codepoint => 1          : non-starter decompositions
our %Comp2nd;	# $codepoint => 1          : may be composed with a prev char.

our $Combin = do "unicore/CombiningClass.pl"
    || do "unicode/CombiningClass.pl"
    || croak "$PACKAGE: CombiningClass.pl not found";
our $Decomp = do "unicore/Decomposition.pl"
    || do "unicode/Decomposition.pl"
    || croak "$PACKAGE: Decomposition.pl not found";

our @CompEx = qw(
    0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
    0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
    0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
    FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
    FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
    FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
    1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
);

use constant SBase  => 0xAC00;
use constant SFinal => 0xD7A3; # SBase -1 + SCount
use constant SCount =>  11172; # LCount * NCount
use constant NCount =>    588; # VCount * TCount
use constant LBase  => 0x1100;
use constant LFinal => 0x1112;
use constant LCount =>     19;
use constant VBase  => 0x1161;
use constant VFinal => 0x1175;
use constant VCount =>     21;
use constant TBase  => 0x11A7;
use constant TFinal => 0x11C2;
use constant TCount =>     28;

sub decomposeHangul {
    my $sindex = $_[0] - SBase;
    my $lindex = int( $sindex / NCount);
    my $vindex = int(($sindex % NCount) / TCount);
    my $tindex =      $sindex % TCount;
    my @ret = (
       LBase + $lindex,
       VBase + $vindex,
      $tindex ? (TBase + $tindex) : (),
    );
    return wantarray ? @ret : pack_U(@ret);
}


sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }

while ($Combin =~ /(.+)/g) {
    my @tab = split /\t/, $1;
    my $ini = hex $tab[0];
    if ($tab[1] eq '') {
	$Combin{$ini} = $tab[2];
    } else {
	$Combin{$_} = $tab[2] foreach $ini .. hex($tab[1]);
    }
}

while ($Decomp =~ /(.+)/g) {
    my @tab = split /\t/, $1;
    my $compat = $tab[2] =~ s/<[^>]+>//;
    my $dec = [ _getHexArray($tab[2]) ]; # decomposition
    my $ini = hex($tab[0]); # initial decomposable character
    my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
    # ($ini .. $end) is the range of decomposable characters.

    foreach my $u ($ini .. $end) {
	$Compat{$u} = $dec;
	$Canon{$u} = $dec if ! $compat;
    }
}

for my $s (@CompEx) {
    my $u = hex $s;
    next if !$Canon{$u}; # not assigned
    next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2
    $Exclus{$u} = 1;
}

foreach my $u (keys %Canon) {
    my $dec = $Canon{$u};

    if (@$dec == 2) {
	if ($Combin{ $dec->[0] }) {
	    $NonStD{$u} = 1;
	} else {
	    $Compos{ $dec->[0] }{ $dec->[1] } = $u;
	    $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
	}
    } elsif (@$dec == 1) {
	$Single{$u} = 1;
    } else {
	my $h = sprintf '%04X', $u;
	croak("Weird Canonical Decomposition of U+$h");
    }
}

foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
    $Comp2nd{$j} = 1;
}

sub getCanonList {
    my @src = @_;
    my @dec = map {
	(SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
	    : $Canon{$_} ? @{ $Canon{$_} } : $_
		} @src;
    return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
    # condition @src == @dec is not ok.
}

sub getCompatList {
    my @src = @_;
    my @dec = map {
	(SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
	    : $Compat{$_} ? @{ $Compat{$_} } : $_
		} @src;
    return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
    # condition @src == @dec is not ok.
}

foreach my $key (keys %Canon) {
    $Canon{$key}  = [ getCanonList($key) ];
}

foreach my $key (keys %Compat) {
    $Compat{$key} = [ getCompatList($key) ];
}

sub getHangulComposite ($$) {
    if ((LBase <= $_[0] && $_[0] <= LFinal)
     && (VBase <= $_[1] && $_[1] <= VFinal)) {
	my $lindex = $_[0] - LBase;
	my $vindex = $_[1] - VBase;
	return (SBase + ($lindex * VCount + $vindex) * TCount);
    }
    if ((SBase <= $_[0] && $_[0] <= SFinal && (($_[0] - SBase ) % TCount) == 0)
     && (TBase  < $_[1] && $_[1] <= TFinal)) {
	return($_[0] + $_[1] - TBase);
    }
    return undef;
}


sub getCombinClass ($) {
    my $uv = 0 + shift;
    return $Combin{$uv} || 0;
}

sub getCanon ($) {
    my $uv = 0 + shift;
    return exists $Canon{$uv}
	? pack_U(@{ $Canon{$uv} })
	: (SBase <= $uv && $uv <= SFinal)
	    ? scalar decomposeHangul($uv)
	    : undef;
}

sub getCompat ($) {
    my $uv = 0 + shift;
    return exists $Compat{$uv}
	? pack_U(@{ $Compat{$uv} })
	: (SBase <= $uv && $uv <= SFinal)
	    ? scalar decomposeHangul($uv)
	    : undef;
}

sub getComposite ($$) {
    my $uv1 = 0 + shift;
    my $uv2 = 0 + shift;
    my $hangul = getHangulComposite($uv1, $uv2);
    return $hangul if $hangul;
    return $Compos{ $uv1 } && $Compos{ $uv1 }{ $uv2 };
}

sub isExclusion  ($) {
    my $uv = 0 + shift;
    return exists $Exclus{$uv};
}

sub isSingleton  ($) {
    my $uv = 0 + shift;
    return exists $Single{$uv};
}

sub isNonStDecomp($) {
    my $uv = 0 + shift;
    return exists $NonStD{$uv};
}

sub isComp2nd ($) {
    my $uv = 0 + shift;
    return exists $Comp2nd{$uv};
}

sub isNFC_MAYBE ($) {
    my $uv = 0 + shift;
    return exists $Comp2nd{$uv};
}

sub isNFKC_MAYBE($) {
    my $uv = 0 + shift;
    return exists $Comp2nd{$uv};
}

sub isNFD_NO ($) {
    my $uv = 0 + shift;
    return exists $Canon {$uv} || (SBase <= $uv && $uv <= SFinal);
}

sub isNFKD_NO ($) {
    my $uv = 0 + shift;
    return exists $Compat{$uv} || (SBase <= $uv && $uv <= SFinal);
}

sub isComp_Ex ($) {
    my $uv = 0 + shift;
    return exists $Exclus{$uv} || exists $Single{$uv} || exists $NonStD{$uv};
}

sub isNFC_NO ($) {
    my $uv = 0 + shift;
    return exists $Exclus{$uv} || exists $Single{$uv} || exists $NonStD{$uv};
}

sub isNFKC_NO ($) {
    my $uv = 0 + shift;
    return 1  if $Exclus{$uv} || $Single{$uv} || $NonStD{$uv};
    return '' if (SBase <= $uv && $uv <= SFinal) || !exists $Compat{$uv};
    return 1  if ! exists $Canon{$uv};
    return pack('N*', @{ $Canon{$uv} }) ne pack('N*', @{ $Compat{$uv} });
}

sub decompose ($;$)
{
    my $hash = $_[1] ? \%Compat : \%Canon;
    return pack_U map {
	$hash->{ $_ } ? @{ $hash->{ $_ } } :
	    (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_) : $_
    } unpack_U($_[0]);
}

sub reorder ($)
{
    my @src = unpack_U($_[0]);

    for (my $i=0; $i < @src;) {
	$i++, next if ! $Combin{ $src[$i] };

	my $ini = $i;
	$i++ while $i < @src && $Combin{ $src[$i] };

        my @tmp = sort {
		$Combin{ $src[$a] } <=> $Combin{ $src[$b] } || $a <=> $b
	    } $ini .. $i - 1;

	@src[ $ini .. $i - 1 ] = @src[ @tmp ];
    }
    return pack_U(@src);
}


sub compose ($)
{
    my @src = unpack_U($_[0]);

    for (my $s = 0; $s+1 < @src; $s++) {
	next unless defined $src[$s] && ! $Combin{ $src[$s] };
	 # S only; removed or combining are skipped as a starter.

	my($c, $blocked, $uncomposed_cc);
	for (my $j = $s+1; $j < @src && !$blocked; $j++) {
	    ($Combin{ $src[$j] } ? $uncomposed_cc : $blocked) = 1;

	    # S + C + S => S-S + C would be blocked.
	    next if $blocked && $uncomposed_cc;

	    # blocked by same CC (and higher CC: revised D2)
	    next if defined $src[$j-1]   && $Combin{ $src[$j-1] }
		&& $Combin{ $src[$j-1] } >= $Combin{ $src[$j] };

	    $c = getComposite($src[$s], $src[$j]);

	    # no composite or is exclusion
	    next if !$c || $Exclus{$c};

	    # replace by composite
	    $src[$s] = $c; $src[$j] = undef;
	    if ($blocked) { $blocked = 0 } else { -- $uncomposed_cc }
	}
    }
    return pack_U(grep defined, @src);
}


sub composeContiguous ($)
{
    my @src = unpack_U($_[0]);

    for (my $s = 0; $s+1 < @src; $s++) {
	next unless defined $src[$s] && ! $Combin{ $src[$s] };
	 # S only; removed or combining are skipped as a starter.

	for (my $j = $s+1; $j < @src; $j++) {
	    my $c = getComposite($src[$s], $src[$j]);

	    # no composite or is exclusion
	    last if !$c || $Exclus{$c};

	    # replace by composite
	    $src[$s] = $c; $src[$j] = undef;
	}
    }
    return pack_U(grep defined, @src);
}



use constant COMPAT => 1;

sub NFD  ($) { reorder(decompose($_[0])) }
sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
sub NFC  ($) { compose(reorder(decompose($_[0]))) }
sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
sub FCC  ($) { composeContiguous(reorder(decompose($_[0]))) }


sub checkNFD ($)
{
    my $preCC = 0;
    my $curCC;
    for my $uv (unpack_U($_[0])) {
	$curCC = $Combin{ $uv } || 0;
	return '' if $preCC > $curCC && $curCC != 0;
	return '' if exists $Canon{$uv} || (SBase <= $uv && $uv <= SFinal);
	$preCC = $curCC;
    }
    return 1;
}

sub checkNFKD ($)
{
    my $preCC = 0;
    my $curCC;
    for my $uv (unpack_U($_[0])) {
	$curCC = $Combin{ $uv } || 0;
	return '' if $preCC > $curCC && $curCC != 0;
	return '' if exists $Compat{$uv} || (SBase <= $uv && $uv <= SFinal);
	$preCC = $curCC;
    }
    return 1;
}

sub checkNFC ($)
{
    my $preCC = 0;
    my($curCC, $isMAYBE);
    for my $uv (unpack_U($_[0])) {
	$curCC = $Combin{ $uv } || 0;
	return '' if $preCC > $curCC && $curCC != 0;

	if (isNFC_MAYBE($uv)) {
	    $isMAYBE = 1;
	} elsif (isNFC_NO($uv)) {
	    return '';
	}
	$preCC = $curCC;
    }
    return $isMAYBE ? undef : 1;
}

sub checkNFKC ($)
{
    my $preCC = 0;
    my($curCC, $isMAYBE);
    for my $uv (unpack_U($_[0])) {
	$curCC = $Combin{ $uv } || 0;
	return '' if $preCC > $curCC && $curCC != 0;

	if (isNFKC_MAYBE($uv)) {
	    $isMAYBE = 1;
	} elsif (isNFKC_NO($uv)) {
	    return '';
	}
	$preCC = $curCC;
    }
    return $isMAYBE ? undef : 1;
}

sub checkFCD ($)
{
    my $preCC = 0;
    my $curCC;
    for my $uv (unpack_U($_[0])) {
	# Hangul syllable need not decomposed since cc[any Jamo] == 0;
	my @uvCan = exists $Canon{$uv} ? @{ $Canon{$uv} } : ($uv);

	$curCC = $Combin{ $uvCan[0] } || 0;
	return '' if $curCC != 0 && $curCC < $preCC;
	$preCC = $Combin{ $uvCan[-1] } || 0;
    }
    return 1;
}

sub checkFCC ($)
{
    my $preCC = 0;
    my($curCC, $isMAYBE);
    for my $uv (unpack_U($_[0])) {
	# Hangul syllable need not decomposed since cc[any Jamo] == 0;
	my @uvCan = exists $Canon{$uv} ? @{ $Canon{$uv} } : ($uv);

	$curCC = $Combin{ $uvCan[0] } || 0;
	return '' if $curCC != 0 && $curCC < $preCC;

	if (isNFC_MAYBE($uv)) {
	    $isMAYBE = 1;
	} elsif (isNFC_NO($uv)) {
	    return '';
	}

	$preCC = $Combin{ $uvCan[-1] } || 0;
    }
    return $isMAYBE ? undef : 1;
}


sub splitOnLastStarter
{
    my $str = pack_U(unpack_U(shift));
    if ($str eq '') {
	return ('', '');
    }

    my $ch;
    my $unproc = "";
    do {
	$ch = chop($str);
	$unproc = $ch.$unproc;
    } while (getCombinClass(unpack 'U', $ch) && $str ne "");
    return ($str, $unproc);
}


sub FCD ($) {
    my $str = shift;
    return checkFCD($str) ? $str : NFD($str);
}

our %formNorm = (
    NFC  => \&NFC,	C  => \&NFC,
    NFD  => \&NFD,	D  => \&NFD,
    NFKC => \&NFKC,	KC => \&NFKC,
    NFKD => \&NFKD,	KD => \&NFKD,
    FCD  => \&FCD,	FCC => \&FCC,
);

sub normalize($$)
{
    my $form = shift;
    my $str = shift;
    if (exists $formNorm{$form}) {
	return $formNorm{$form}->($str);
    }
    croak($PACKAGE."::normalize: invalid form name: $form");
}


sub normalize_partial ($$) {
    if (exists $formNorm{$_[0]}) {
	my $n = normalize($_[0], $_[1]);
	my($p, $u) = splitOnLastStarter($n);
	$_[1] = $u;
	return $p;
    }
    croak($PACKAGE."::normalize_partial: invalid form name: $_[0]");
}

sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) }
sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) }
sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) }
sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) }


our %formCheck = (
    NFC  => \&checkNFC, 	C  => \&checkNFC,
    NFD  => \&checkNFD, 	D  => \&checkNFD,
    NFKC => \&checkNFKC,	KC => \&checkNFKC,
    NFKD => \&checkNFKD,	KD => \&checkNFKD,
    FCD  => \&checkFCD, 	FCC => \&checkFCC,
);

sub check($$)
{
    my $form = shift;
    my $str = shift;
    if (exists $formCheck{$form}) {
	return $formCheck{$form}->($str);
    }
    croak($PACKAGE."::check: invalid form name: $form");
}

1;
__END__

