Files
adler32
aho_corasick
alga
approx
ascii
atty
backtrace
backtrace_sys
base64
bitflags
blas_src
block_buffer
block_padding
brotli2
brotli_sys
buf_redux
byte_tools
byteorder
cauchy
cblas_sys
cfg_if
chrono
chunked_transfer
colored
crc32fast
crossbeam
crossbeam_channel
crossbeam_deque
crossbeam_epoch
crossbeam_queue
crossbeam_utils
ctrlc
deflate
digest
dirs
error_chain
filetime
futures
generic_array
getrandom
gzip_header
hex
httparse
hyper
idna
itoa
language_tags
lapack_src
lapacke
lapacke_sys
lazy_static
libc
libm
linked_hash_map
log
matches
matrixmultiply
maybe_uninit
md5
memchr
memoffset
mime
mime_guess
multipart
nalgebra
base
geometry
linalg
ndarray
ndarray_linalg
net2
netlib_src
nix
num_complex
num_cpus
num_integer
num_rational
num_traits
opaque_debug
percent_encoding
phf
phf_shared
ppv_lite86
proc_macro2
quick_error
quote
rand
rand_chacha
rand_core
rand_distr
rawpointer
regex
regex_syntax
remove_dir_all
rosrust
rosrust_codegen
rosrust_msg
rouille
rustc_demangle
rustros_tf
ryu
safemem
scopeguard
serde
serde_bytes
serde_derive
serde_json
serde_xml_rs
sha1
siphasher
smallvec
syn
tempdir
term
thread_local
threadpool
time
tiny_http
traitobject
twoway
typeable
typenum
ucd_util
unicase
unicode_bidi
unicode_normalization
unicode_xid
url
utf8_ranges
void
xml
xml_rpc
yaml_rust
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
use crate::unicode_tables::jamo_short_name::JAMO_SHORT_NAME;

// This implementation should correspond to the algorithms described in
// Unicode 3.12.

/// A set of ranges that corresponds to the set of all Hangul syllable
/// codepoints.
///
/// These ranges are defined in Unicode 4.8 Table 4-13.
pub const RANGE_HANGUL_SYLLABLE: &'static [(u32, u32)] = &[(0xAC00, 0xD7A3)];

const S_BASE: u32 = 0xAC00;
const L_BASE: u32 = 0x1100;
const V_BASE: u32 = 0x1161;
const T_BASE: u32 = 0x11A7;
const T_COUNT: u32 = 28;
const N_COUNT: u32 = 588;

/// Return the character name of the given precomposed Hangul codepoint.
///
/// If the given codepoint does not correspond to a precomposed Hangul
/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
///
/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
pub fn hangul_name(cp: u32) -> Option<String> {
    let mut name = "HANGUL SYLLABLE ".to_string();
    let (lpart, vpart, tpart) = match hangul_full_canonical_decomposition(cp) {
        None => return None,
        Some(triple) => triple,
    };

    name.push_str(jamo_short_name(lpart));
    name.push_str(jamo_short_name(vpart));
    name.push_str(tpart.map_or("", jamo_short_name));
    Some(name)
}

/// Return the full canonical decomposition of the given precomposed Hangul
/// codepoint.
///
/// If the decomposition does not have any trailing consonant, then the third
/// part of the tuple returned is `None`.
///
/// If the given codepoint does not correspond to a precomposed Hangul
/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
///
/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
pub fn hangul_full_canonical_decomposition(
    cp: u32,
) -> Option<(u32, u32, Option<u32>)> {
    if !(0xAC00 <= cp && cp <= 0xD7A3) {
        return None;
    }

    let s_index = cp - S_BASE;
    let l_index = s_index / N_COUNT;
    let v_index = (s_index % N_COUNT) / T_COUNT;
    let t_index = s_index % T_COUNT;

    let l_part = L_BASE + l_index;
    let v_part = V_BASE + v_index;
    let t_part = if t_index == 0 { None } else { Some(T_BASE + t_index) };
    Some((l_part, v_part, t_part))
}

fn jamo_short_name(cp: u32) -> &'static str {
    let i = JAMO_SHORT_NAME.binary_search_by_key(&cp, |p| p.0).unwrap();
    JAMO_SHORT_NAME[i].1
}

#[cfg(test)]
mod tests {
    use super::{hangul_full_canonical_decomposition, hangul_name};

    #[test]
    fn canon_decomp() {
        assert_eq!(
            hangul_full_canonical_decomposition(0xD4DB),
            Some((0x1111, 0x1171, Some(0x11B6)))
        );
    }

    #[test]
    fn name() {
        assert_eq!(hangul_name(0xD4DB).unwrap(), "HANGUL SYLLABLE PWILH");
    }

    #[test]
    fn all() {
        for cp in 0xAC00..(0xD7A3 + 1) {
            hangul_name(cp).unwrap();
        }
    }

    #[test]
    fn invalid() {
        assert!(hangul_name(0).is_none());
    }
}