# NAME Lingua::JA::Dakuon - Convert between dakuon/handakuon and seion for Japanese # SYNOPSIS use utf8; use Lingua::JA::Dakuon ':all'; # Convert char to dakuon/handakuon dakuon('ã‹'); #=> 'ãŒ'(\x{304c}) dakuon('ï¾€'); #=> 'ダ'(\x{ff80}\x{ff9e}) dakuon('ã‚'); #=> 'ã‚'(\x{3042}) handakuon('ã¯'); #=> 'ã±'(\x{3071}) { local $Lingua::JA::Dakuon::EnableCombining = 1; dakuon('ã‚'); #=> "\x{3042}\x{3099}" } { local $Lingua::JA::Dakuon::PreferCombining = 1; dakuon('ã‹'); #=> "\x{304b}\x{3099}" handakuon('ã¯'); #=> "\x{306f}\x{309a}" } # Convert char to seion seion('ãŒ'); #=> 'ã‹'(\x{304b}) seion('ã‹ã‚›'); #=> 'ã‹'(\x{304b}) seion('ã‚'); #=> 'ã‚'(\x{3042}) seion("ã‚\x{3099}"); #=> 'ã‚'(\x{3042}) seion('ダ'); #=> 'ï¾€' (\x{ff80}) seion('ã±'); #=> 'ã¯'(\x{306f}) seion('ã¯ã‚œ'); #=> 'ã¯'(\x{306f}) seion('タ゚'); #=> 'ï¾€' (\x{ff80}) # Normalize dakuon/handakuon expression in string dakuon_normalize("ã‚ãŒã•\x{3099}ãŸã‚›ãªã±ã¾\x{3099}ゔハビフ\x{3099}"); #=> 'ã‚ãŒã–ã ãªã±ã¾ã‚”ハビブ' handakuon_normalize("ã‚ã±ã²\x{309a}ã²ã‚œãŒã¾\x{309a}ハピフ\x{309a}"); #=> 'ã‚ã±ã´ã´ãŒã¾ï¾Šï¾‹ï¾Ÿï¾Œï¾Ÿ' { local $Lingua::JA::Dakuon::PreferCombining = 1; dakuon_normalize("ã‚ãŒã•\x{3099}ãŸã‚›ãªã±ã¾\x{3099}ゔハビフ\x{3099}"); #=> "ã‚ã‹\x{3099}ã•\x{3099}ãŸ\x{3099}ãªã±ã¾\x{3099}ã†\x{3099}ハビブ" handakuon_normalize("ã‚ã±ã²\x{309a}ã²ã‚œãŒã¾\x{309a}ハピフ\x{309a}"); #=> "ã‚ã¯\x{309a}ã²\x{309a}ã²\x{309a}ãŒã¾\x{309a}ハピプ" } all_dakuon_normalize($string); #=> equivalent to dakuon_normalize(handakuon_normalize($string)); # DESCRIPTION This module provide routines to handle dakuon/handakuon in Japanese which is expressed by Unicode. # VARIABLES ## $Lingua::JA::Dakuon::EnableCombining (default: 0) If this variable set to true, use unicode combining character if needed. For example, there is no code corresponding to dakuon for 'ã‚'(\\x{3042}). But it can be forcely expressed with combining character "\\x{3099}" as "\\x{3042}\\x{3099}" if this flag was enabled. ## $Lingua::JA::Dakuon::PreferCombining (default: 0) If this variable set to true, use combining character instead of dakuon character code even if it is avaiable. For example, calling dakuon() with argument 'ã‹' will return "ã‹\\x{3099}" instead of returning "\\x{304c}". ## $Lingua::JA::Dakuon::AllDakuonRE Regex \*STRING\*(not compiled) that matches all dakuon character(s) can be passed to seion(). ## $Lingua::JA::Dakuon::AllHandakuonRE Regex \*STRING\*(not compiled) that matches all handakuon character(s) can be passed to seion(). # FUNCTIONS ## dakuon($char) Convert passed character to dakuon character if it is possible. Return undef if passed argument has more than 1 character. dakuon('ã‹'); #=> 'ãŒ'(\x{304c}) ## handakuon($char) Convert passed character to handakuon character if it is possible. Return undef if passed argument has more than 1 character. handakuon('ã¯'); #=> 'ã±'(\x{3071}) ## seion($char) Convert passed character to seion character if it is possible. Return undef if passed argument has more than 2 character or second character isn't a mark charactor which expresses dakuon/handakuon. seion('ãŒ'); #=> 'ã‹'(\x{304b}) seion('ã±'); #=> 'ã¯'(\x{306f}) ## dakuon\_normalize($string) Normalize string that maybe contains multiple expression of dakuon. dakuon_normalize("ã‚ãŒã•\x{3099}ãŸã‚›ãªã±ã¾\x{3099}ゔハビフ\x{3099}"); #=> 'ã‚ãŒã–ã ãªã±ã¾ã‚”ハビブ' ## handakuon\_normalize($string) Normalize string that maybe contains multiple expression of handakuon. handakuon_normalize("ã‚ã±ã²\x{309a}ã²ã‚œãŒã¾\x{309a}ハピフ\x{309a}"); #=> 'ã‚ã±ã´ã´ãŒã¾ï¾Šï¾‹ï¾Ÿï¾Œï¾Ÿ' ## all\_dakuon\_normalize($string) Equivalent to calling dakuon\_normalize(handakuon\_normalize($string)); # SEE ALSO - [æ¿ç‚¹ - Wikipedia](http://ja.wikipedia.org/wiki/%E6%BF%81%E7%82%B9) - [åŠæ¿ç‚¹ - Wikipedia](http://ja.wikipedia.org/wiki/%E5%8D%8A%E6%BF%81%E7%82%B9) - [清音 - Wikipedia](http://ja.wikipedia.org/wiki/%E6%B8%85%E9%9F%B3) # LICENSE Copyright (C) Yuto KAWAMURA(kawamuray). This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. # AUTHOR Yuto KAWAMURA(kawamuray) <kawamuray.dadada@gmail.com>