Server IP : 103.119.228.120 / Your IP : 18.117.192.64 Web Server : Apache System : Linux v8.techscape8.com 3.10.0-1160.119.1.el7.tuxcare.els2.x86_64 #1 SMP Mon Jul 15 12:09:18 UTC 2024 x86_64 User : nobody ( 99) PHP Version : 5.6.40 Disable Function : shell_exec,symlink,system,exec,proc_get_status,proc_nice,proc_terminate,define_syslog_variables,syslog,openlog,closelog,escapeshellcmd,passthru,ocinum cols,ini_alter,leak,listen,chgrp,apache_note,apache_setenv,debugger_on,debugger_off,ftp_exec,dl,dll,myshellexec,proc_open,socket_bind,proc_close,escapeshellarg,parse_ini_filepopen,fpassthru,exec,passthru,escapeshellarg,escapeshellcmd,proc_close,proc_open,ini_alter,popen,show_source,proc_nice,proc_terminate,proc_get_status,proc_close,pfsockopen,leak,apache_child_terminate,posix_kill,posix_mkfifo,posix_setpgid,posix_setsid,posix_setuid,dl,symlink,shell_exec,system,dl,passthru,escapeshellarg,escapeshellcmd,myshellexec,c99_buff_prepare,c99_sess_put,fpassthru,getdisfunc,fx29exec,fx29exec2,is_windows,disp_freespace,fx29sh_getupdate,fx29_buff_prepare,fx29_sess_put,fx29shexit,fx29fsearch,fx29ftpbrutecheck,fx29sh_tools,fx29sh_about,milw0rm,imagez,sh_name,myshellexec,checkproxyhost,dosyayicek,c99_buff_prepare,c99_sess_put,c99getsource,c99sh_getupdate,c99fsearch,c99shexit,view_perms,posix_getpwuid,posix_getgrgid,posix_kill,parse_perms,parsesort,view_perms_color,set_encoder_input,ls_setcheckboxall,ls_reverse_all,rsg_read,rsg_glob,selfURL,dispsecinfo,unix2DosTime,addFile,system,get_users,view_size,DirFiles,DirFilesWide,DirPrintHTMLHeaders,GetFilesTotal,GetTitles,GetTimeTotal,GetMatchesCount,GetFileMatchesCount,GetResultFiles,fs_copy_dir,fs_copy_obj,fs_move_dir,fs_move_obj,fs_rmdir,SearchText,getmicrotime MySQL : ON | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /usr/local/ssl/local/ssl/local/ssl/local/ssl/local/ssl/local/share/man/man3/ |
Upload File : |
.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28) .\" .\" Standard preamble: .\" ======================================================================== .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. \*(C+ will .\" give a nicer C++. Capital omega is used to do unbreakable dashes and .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, .\" nothing in troff, for use with C<>. .tr \(*W- .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` "" . ds C' "" 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' . ds C` . ds C' 'br\} .\" .\" Escape single quotes in literal strings from groff's Unicode transform. .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" .\" If the F register is turned on, we'll generate index entries on stderr for .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index .\" entries marked with X<> in POD. Of course, you'll have to process the .\" output yourself in some meaningful fashion. .\" .\" Avoid warning from groff about undefined register 'F'. .de IX .. .nr rF 0 .if \n(.g .if rF .nr rF 1 .if (\n(rF:(\n(.g==0)) \{ . if \nF \{ . de IX . tm Index:\\$1\t\\n%\t"\\$2" .. . if !\nF==2 \{ . nr % 0 . nr F 2 . \} . \} .\} .rr rF .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ======================================================================== .\" .IX Title "String::UnicodeUTF8 3" .TH String::UnicodeUTF8 3 "2016-08-24" "perl v5.16.3" "User Contributed Perl Documentation" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh .SH "NAME" String::UnicodeUTF8 \- non\-collation related unicode/utf\-8 bytes string\-type\-agnostic utils that work as far back as perl 5.6 .SH "VERSION" .IX Header "VERSION" This document describes String::UnicodeUTF8 version 0.21 .SH "SYNOPSIS" .IX Header "SYNOPSIS" .Vb 1 \& use String::UnicodeUTF8 qw(char_count bytes_size is_unicode); \& \& say \*(Aq$string type is: \*(Aq . is_unicode($string) ? \*(AqUnicode\*(Aq : \*(Aqbytes\*(Aq; \& \& say \*(Aq$string has this many characters: \*(Aq . char_count($string); \& \& say \*(Aq$string takes up this many bytes: \*(Aq . bytes_size($string); .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" Unicode is awesome. utf\-8 is also awesome. They are related but different. That difference and all the little twiggles in between make it appear to be too hard but its really not, honest! .PP The unicode problem is a solved one. The easiest way to manage day to day is have a couple of simple items in mind: .IP "XUnicodeX is a set of characters." 4 .IX Item "XUnicodeX is a set of characters." Example: X is Unicode character number 2665 (hexidecimal numbers those be) .IP "Xutf\-8X is an encoding of Unicode characters" 4 .IX Item "Xutf-8X is an encoding of Unicode characters" Example: X (i.e. Unicode character number 2665) is made of of 3 octets, or XcharactersX semantically, numbered: e2, 99, and a5 (hexidecimal numbers those be) .IP "You (almost) always want to input/output bytes in utf\-8" 4 .IX Item "You (almost) always want to input/output bytes in utf-8" By this I mean all of the files, data base connections/schema, \s-1HTTP\s0 request/response, etc etc. You may very well need to encode to/from utf\-8 when dealing with 3rdparty/external stuff you have little control over. .Sp I say almost because it is possible to use any number of encodings and I suppose you might encounter a situation when you have no other choice. But if you have choice and an \s-1IQ\s0 in the double digits just do utf\-8, its not that hard to do and youXll expontially* make your life and others' easier. .Sp If you do have a situation (and its not an ignorant boss/client forcing his moronXinducedXFUD on you) please drop me a line w/ details. Who knows I may recant! .Sp * no actual math has been harmed in this statement, patches welcome! .IP "perl basically has 2 types if strings: XUnicodeX and XbytesX" 4 .IX Item "perl basically has 2 types if strings: XUnicodeX and XbytesX" The former has the \s-1UTF\-8 SV\s0 flag set which tells perl to treat a Unicode character as one item (i.e. as apposed to 3 in our X example). .Sp The latter are just bytes that could be anything (hopefully explicitly utf\-8 in our case!). .SS "What this module is not meant for" .IX Subsection "What this module is not meant for" .IP "Collation related stuff." 4 .IX Item "Collation related stuff." Use something like Unicode::Collate for that. .IP "Unicode problem stuff." 4 .IX Item "Unicode problem stuff." See perlunicode for more info. .IP "Anything not explicitly stated in the \s-1POD.\s0" 4 .IX Item "Anything not explicitly stated in the POD." .SS "What this module is meant for" .IX Subsection "What this module is meant for" .PD 0 .IP "Consistent terminology." 4 .IX Item "Consistent terminology." .PD The term Xutf\-8X and XUnicodeX (akin to XencodingX and XcharsetX) are typically used ambiguously and perl docs are not immune. .Sp It could mean either a Unicode string or a bytes string depending on the XthingX in question. ick, just ick. That is where this module comes in. .Sp It defines those concepts strictly as XUnicode stringX and Xutf\-8 bytes stringX (the latter is shortened by removing the first or second word because they are essentially synonymous conceptually). .Sp Based on that it gives functions that operate consistently regardless of the type (or regardful if you intend one or the other, your needs; your call). .IP "Availablity" 4 .IX Item "Availablity" The functions necessary to do all of this are not available on older perls. .Sp e.g. utf8::is_utf8 is not available before 5.8.1. Encode is not avialble before 5.7.3. .IP "The steps to do the things this does are better wrapped up for sanity/reusability." 4 .IX Item "The steps to do the things this does are better wrapped up for sanity/reusability." Do I need to encode, decode, upgrade, downgrade. .Sp Do I use the return value or does it modify the \s-1SV\s0 in place? .SS "Glossary" .IX Subsection "Glossary" This glossary holds true when doing the stuff this module does only with this module. If you fiddle with the guts then its more likely you can end up in a wonky pseudo state. .PP \fI\s-1UTF\-8\s0 Bytes String\fR .IX Subsection "UTF-8 Bytes String" .PP A string of bytes whose Unicode characters are made up of utf\-8 byte sequences (e.g. \exe2\ex99\exa5 in our heart example). Each Unicode character is handled internally by perl as the bytes that make it up (and not as a single Unicode character). .PP \fIUnicode String\fR .IX Subsection "Unicode String" .PP A \*(L"\s-1UTF\-8\s0 Bytes String\*(R" that additionally has itXs \s-1UTF\-8\s0 flag set so that perl treats utf\-8 byte sequences as the individual Unicode character it makes up (e.g. \ex{2665} in our heart example). .SS "A word on unicode and utf\-8 representation in source code" .IX Subsection "A word on unicode and utf-8 representation in source code" Another point of confusion can be how unicode and utf\-8 are represented in source code and the default or pragma set treatment of utf\-8. .PP The characer itself: .PP .Vb 3 \& perl \-e \*(Aqprint utf8::is_utf8("I X perl") . "\en";\*(Aq # could be a L<UTF\-8 Bytes String> or a L<Unicode String> depending on perlXs XmodeX. \& perl \-e \*(Aquse utf8;print utf8::is_utf8("I X perl") . "\en";\*(Aq # a L<Unicode String> because of perlXs XmodeX. \& perl \-e \*(Aqno utf8;print utf8::is_utf8("I X perl") . "\en";\*(Aq # a L<UTF\-8 Bytes String>because of perlXs XmodeX. .Ve .PP \&\ex octet notation: .PP .Vb 3 \& perl \-e \*(Aqprint utf8::is_utf8("I \exe2\ex99\exa5 perl") . "\en";\*(Aq # a L<UTF\-8 Bytes String> regardless of perlXs XmodeX. \& perl \-e \*(Aquse utf8;print utf8::is_utf8("I \exe2\ex99\exa5 perl") . "\en";\*(Aq # a L<UTF\-8 Bytes String> regardless of perlXs XmodeX. \& perl \-e \*(Aqno utf8;print utf8::is_utf8("I \exe2\ex99\exa5 perl") . "\en";\*(Aq # a L<UTF\-8 Bytes String> regardless of perlXs XmodeX. .Ve .PP \&\ex unicode notation: .PP .Vb 3 \& perl \-e \*(Aqprint utf8::is_utf8("I \ex{2665} perl") . "\en";\*(Aq # a L<Unicode String> regardless of perlXs XmodeX. \& perl \-e \*(Aquse utf8;print utf8::is_utf8("I \ex{2665} perl") . "\en";\*(Aq # a L<Unicode String> regardless of perlXs XmodeX. \& perl \-e \*(Aqno utf8;print utf8::is_utf8("I \ex{2665} perl") . "\en";\*(Aq # a L<Unicode String> regardless of perlXs XmodeX. .Ve .PP bracketed \ex octet: .PP This one I donXt like. It is ambiguous (it is octets but it looks like unicode). I almost always only see it when data is in the process of being corrupted. .PP .Vb 1 \& perl \-e \*(Aqprint utf8::is_utf8("I \ex{e2}\ex{99}\ex{a5} perl") . "\en";\*(Aq .Ve .PP Good rule of thumb is to be explicit with your intent: use brackets form with 4+ digits (zero padded if necessary) and non-bracket form with 2 digits. .SS "Tips on troubleshooting Unicode/utf\-8 problems" .IX Subsection "Tips on troubleshooting Unicode/utf-8 problems" IXll maintain some more detailed Unicode resources at my Unicode page <http://drmuey.com/?do=page&id=57> but for this doc there are 3 things that will help you: .IP "1 checks the bytes" 4 .IX Item "1 checks the bytes" DonXt look so much at seemingly corrupt display, examine the bytes at the source. Once you verify they are legit you can move on to finding out what it is that is mishandling them along the route. .Sp For example, you might do a \s-1SELECT\s0 on a column and also include the column in \s-1HEX\s0 and the character and bytes lengths of the column in the query. If the bytes are correct but the character length is wrong then that is a great hint as to where to look next. .Sp For perl, make sure you do so on bytes strings: .Sp .Vb 10 \& multivac:~ dmuey$ perl \-le \*(Aqno utf8;print unpack("H*", "I X Perl");\*(Aq \& 4920e299a5205065726c \& multivac:~ dmuey$ perl \-le \*(Aquse utf8;print unpack("H*", "I X Perl");\*(Aq \& 492065205065726c \& multivac:~ dmuey$ perl \-le \*(Aqno utf8;print pack("H*", "4920e299a5205065726c");\*(Aq \& I X Perl \& multivac:~ dmuey$ perl \-le \*(Aquse utf8;print pack("H*", "4920e299a5205065726c");\*(Aq \& I X Perl \& multivac:~ dmuey$ perl \-le \*(Aquse utf8;print pack("H*", "492065205065726c");\*(Aq \& I e Perl \& multivac:~ dmuey$ perl \-le \*(Aqno utf8;print pack("H*", "492065205065726c");\*(Aq \& I e Perl \& multivac:~ dmuey$ .Ve .Sp Even better, use a tool that does what you mean regardless of the type of string: .Sp e.g. Devel::Kit does what you mean regardless of the type (via this module as it happens ;p): .Sp .Vb 10 \& [dmuey@multivac ~]$ perl \-MDevel::Kit \-e \*(Aqno utf8;xe("I X Perl",1);\*(Aq \& debug(): Hex: [ \& \*(AqI : 49\*(Aq, \& \*(Aq : 20\*(Aq, \& \*(AqX : e299a5\*(Aq, \& \*(Aq : 20\*(Aq, \& \*(AqP : 50\*(Aq, \& \*(Aqe : 65\*(Aq, \& \*(Aqr : 72\*(Aq, \& \*(Aql : 6c\*(Aq \& ] \& [dmuey@multivac ~]$ perl \-MDevel::Kit \-e \*(Aquse utf8;xe("I X Perl",1);\*(Aq \& debug(): Hex: [ \& \*(AqI : 49\*(Aq, \& \*(Aq : 20\*(Aq, \& \*(AqX : e299a5\*(Aq, \& \*(Aq : 20\*(Aq, \& \*(AqP : 50\*(Aq, \& \*(Aqe : 65\*(Aq, \& \*(Aqr : 72\*(Aq, \& \*(Aql : 6c\*(Aq \& ] \& [dmuey@multivac ~]$ .Ve .IP "2 use the simplest scenario" 4 .IX Item "2 use the simplest scenario" If you can rule out as many factors as possible (\s-1HTTP\s0 request/response, database settings, perl \-E enabling optional features that could affect Unicode/utf8\-bytes, etc) it will help you hone in on where your good bytes went bad. .IP "3 use the simplest string" 4 .IX Item "3 use the simplest string" I tend to use 'I X Unicode' so that there is one multi-byte Unicode character to examine. Also, it is a visible charcater that most fonts support, which helps. .SH "INTERFACE" .IX Header "INTERFACE" All of these functions are exportable. .SS "\fIis_unicode()\fP" .IX Subsection "is_unicode()" Like \fIutf8::is_utf8()\fR but is less ambiguously named* and works on perls before \fIutf8::is_utf8()\fR and \fIEncode::is_utf8()\fR as far back as, at least, 5.6.2. .PP There is one rare caveat: If you have an old perl, you have a string that contains no Unicode characters, you are in compiled perl w/ B optomized away, and you've upgraded a string outside of the functions in this module (or use the same text in different scalars). You *may* get erroneous results. .PP * \fIis_utf8()\fR does not mean Xare these bytes in utf\-8 encoding (as apposed to, say, utf\-16, latin1, etc etc)X, it means Xare these bytes in utf\-8 encoding and is the \s-1UTF\-8\s0 flag set on this stringX (i.e. is this a Uncode string): .PP DonXt take my word for it, try it your self: .PP .Vb 1 \& perl \-e \*(Aqprint utf8::is_utf8("I \exe2\ex99\exa5 perl") . "\en";print utf8::is_utf8("I \ex{2665} perl") . "\en";\*(Aq # this is the same on 5.6.2 as 5.16.0 .Ve .SS "\fIchar_count()\fP" .IX Subsection "char_count()" Get the number of characters, conceptually, of the given string regardless of the argumentXs type. .PP e.g. \*(L"I \ex{2665} perl\*(R" and \*(L"I \exe2\ex99\exa5 perl\*(R" both have 8 characters. The latter just happens to be encoded in utf\-8 which uses a sequence of three smaller XcharactersX to represent the one conceptual unicode character \s-1XXX.\s0 .SS "\fIbytes_size()\fP" .IX Subsection "bytes_size()" Get the number of bytes of the given string regardless of the argumentXs type. .SS "\fIget_unicode()\fP" .IX Subsection "get_unicode()" Get a \*(L"Unicode String\*(R" version of the given string regardless of the argumentXs type. .SS "\fIget_utf8()\fP" .IX Subsection "get_utf8()" Get a \*(L"\s-1UTF\-8\s0 Bytes String\*(R" version of the given string regardless of the argumentXs type. .SS "\fIescape_utf8_or_unicode()\fP" .IX Subsection "escape_utf8_or_unicode()" Serialize unicode characters as slash-x notation:: \ex{2665} style if the argument was a \*(L"Unicode String\*(R". \exe2\ex99\exa5 style if the argument was a \*(L"\s-1UTF\-8\s0 Bytes String\*(R". .PP Returns a \*(L"\s-1UTF\-8\s0 Bytes String\*(R" since it should contain no unicode characters at this point. .PP \fI\fIescape_utf8()\fI\fR .IX Subsection "escape_utf8()" .PP Like \fIescape_utf8_or_unicode()\fR but force it to be in \*(L"\s-1UTF\-8\s0 Bytes String\*(R" style \exe2\ex99\exa5 notation. .PP \fI\fIescape_unicode()\fI\fR .IX Subsection "escape_unicode()" .PP Like \fIescape_utf8_or_unicode()\fR but force it to be in \*(L"Unicode String\*(R" style \ex{2665} notation. .SS "\fIunescape_utf8_or_unicode()\fP" .IX Subsection "unescape_utf8_or_unicode()" Turn slash-x notation back into the character. .PP If there was a \*(L"Unicode String\*(R" \ex{2665} style escape it returns a \*(L"Unicode String\*(R". .PP Otherwise it returns a \*(L"\s-1UTF\-8\s0 Bytes String\*(R". .PP \fI\fIunescape_utf8()\fI\fR .IX Subsection "unescape_utf8()" .PP Like \fIunescape_utf8_or_unicode()\fR but force it to return a \*(L"\s-1UTF\-8\s0 Bytes String\*(R" regardless of slash-x type. .PP \fI\fIunescape_unicode()\fI\fR .IX Subsection "unescape_unicode()" .PP Like \fIunescape_utf8_or_unicode()\fR but force it to return a \*(L"Unicode String\*(R" regardless of slash-x type. .SS "\fIquotemeta_bytes()\fP" .IX Subsection "quotemeta_bytes()" Unicode aware version of \fIquotemeta()\fR that returns a \*(L"\s-1UTF\-8\s0 Bytes String\*(R" that has unicode characters represented as their characters. .SS "\fIquotemeta_utf8()\fP" .IX Subsection "quotemeta_utf8()" Unicode aware version of \fIquotemeta()\fR that returns a \*(L"\s-1UTF\-8\s0 Bytes String\*(R" that has unicode characters represented in \exe2\ex99\exa5 notation. .SS "\fIquotemeta_unicode()\fP" .IX Subsection "quotemeta_unicode()" Unicode aware version of \fIquotemeta()\fR that returns a \*(L"Unicode String\*(R" that has unicode characters represented in \ex{2665} notation. .SS "\fIunquotemeta_bytes()\fP" .IX Subsection "unquotemeta_bytes()" Alias of \fIunquotemeta_utf8()\fR. Exists to semantically correspond to \fIquotemeta_bytes()\fR. .SS "\fIunquotemeta_utf8()\fP" .IX Subsection "unquotemeta_utf8()" Unicode aware version of \*(L"\fIunquotemeta()\fR\*(R" in String::Unquotemeta that returns a \*(L"\s-1UTF\-8\s0 Bytes String\*(R". .SS "\fIunquotemeta_unicode()\fP" .IX Subsection "unquotemeta_unicode()" Unicode aware version of \*(L"\fIunquotemeta()\fR\*(R" in String::Unquotemeta that returns a \*(L"Unicode String\*(R". .SH "DIAGNOSTICS" .IX Header "DIAGNOSTICS" Throws no warnings or errors of its own, except: .ie n .IP """pack() did not result in unicode string and there is no way to emulate utf8::upgrade""" 4 .el .IP "\f(CWpack() did not result in unicode string and there is no way to emulate utf8::upgrade\fR" 4 .IX Item "pack() did not result in unicode string and there is no way to emulate utf8::upgrade" This essentially should never happen and mainly exists for completeness. It is only possible on pre 5.8.1 perls. If you are ever able to get \fIget_unicode()\fR to \fIcarp()\fR this please send the details! .SH "CONFIGURATION AND ENVIRONMENT" .IX Header "CONFIGURATION AND ENVIRONMENT" String::UnicodeUTF8 requires no configuration files or environment variables. .SH "DEPENDENCIES" .IX Header "DEPENDENCIES" String::Unquotemeta .PP \&\fIis_unicode()\fR, when given a string with no unicode characters, lazy loads Encode for perl versions from 5.7.3 to 5.8.1, B::Flags for < 5.7.3 .PP Module::Want is used for the lazy loading since there are advantages over straight eval. .SH "INCOMPATIBILITIES" .IX Header "INCOMPATIBILITIES" None reported. .SH "BUGS AND LIMITATIONS" .IX Header "BUGS AND LIMITATIONS" No bugs have been reported. .PP Please report any bugs or feature requests to \&\f(CW\*(C`bug\-string\-unicodeutf8@rt.cpan.org\*(C'\fR, or through the web interface at <http://rt.cpan.org>. .SH "TODO" .IX Header "TODO" \&\eN notation escaping/unescaping: Seems like \s-1YAGNI\s0 but if there is enough demand we can add it (lazy/separate since itXd be heavy). .SH "AUTHOR" .IX Header "AUTHOR" Daniel Muey \f(CW\*(C`<http://drmuey.com/cpan_contact.pl>\*(C'\fR .SH "LICENCE AND COPYRIGHT" .IX Header "LICENCE AND COPYRIGHT" Copyright (c) 2012, Daniel Muey \f(CW\*(C`<http://drmuey.com/cpan_contact.pl>\*(C'\fR. All rights reserved. .PP This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See perlartistic. .SH "DISCLAIMER OF WARRANTY" .IX Header "DISCLAIMER OF WARRANTY" \&\s-1BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE \*(L"AS IS\*(R" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.\s0 .PP \&\s-1IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE SOFTWARE \s0(\s-1INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE\s0), \s-1EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\s0