The answer provided in "Handling wide char values returned by Win32::API" can parse UTF-8 command line arguments on windows.
But with Par Packer packaging, the parsing failed.
If I save this code
use strict;use warnings;use feature qw( say state );use open ':std', ':encoding('.do { require Win32; "cp".Win32::GetConsoleOutputCP() }.')';use Config qw( %Config );use Encode qw( decode encode );use Win32::API qw( ReadMemory );use constant PTR_SIZE => $Config{ptrsize};use constant PTR_PACK_FORMAT => PTR_SIZE == 8 ? 'Q' : PTR_SIZE == 4 ? 'L' : die("Unrecognized ptrsize\n");use constant PTR_WIN32API_TYPE => PTR_SIZE == 8 ? 'Q' : PTR_SIZE == 4 ? 'N' : die("Unrecognized ptrsize\n");sub lstrlenW { my ($ptr) = @_; state $lstrlenW = Win32::API->new('kernel32', 'lstrlenW', PTR_WIN32API_TYPE, 'i') or die($^E); return $lstrlenW->Call($ptr);}sub decode_LPCWSTR { my ($ptr) = @_; return undef if !$ptr; my $num_chars = lstrlenW($ptr) or return ''; return decode('UTF-16le', ReadMemory($ptr, $num_chars * 2));}# Returns true on success. Returns false and sets $^E on error.sub LocalFree { my ($ptr) = @_; state $LocalFree = Win32::API->new('kernel32', 'LocalFree', PTR_WIN32API_TYPE, PTR_WIN32API_TYPE) or die($^E); return $LocalFree->Call($ptr) == 0;}sub GetCommandLine { state $GetCommandLine = Win32::API->new('kernel32', 'GetCommandLineW', '', PTR_WIN32API_TYPE) or die($^E); return decode_LPCWSTR($GetCommandLine->Call());}# Returns a reference to an array on success. Returns undef and sets $^E on error.sub CommandLineToArgv { my ($cmd_line) = @_; state $CommandLineToArgv = Win32::API->new('shell32', 'CommandLineToArgvW', 'PP', PTR_WIN32API_TYPE) or die($^E); my $cmd_line_encoded = encode('UTF-16le', $cmd_line."\0"); my $num_args_buf = pack('i', 0); # Allocate space for an "int". my $arg_ptrs_ptr = $CommandLineToArgv->Call($cmd_line_encoded, $num_args_buf) or return undef; my $num_args = unpack('i', $num_args_buf); my @args = map { decode_LPCWSTR($_) } unpack PTR_PACK_FORMAT.'*', ReadMemory($arg_ptrs_ptr, PTR_SIZE * $num_args); LocalFree($arg_ptrs_ptr); return \@args;}{ my $cmd_line = GetCommandLine(); say $cmd_line; my $args = CommandLineToArgv($cmd_line) or die("CommandLineToArgv: $^E\n"); for my $arg (@$args) { say "<$arg>"; }}
in CmdArgs.pl and run perl CmdArgs.pl äö.txt
, the cmd window displays
perl CmdArgs.pl äö.txt<perl><CmdArgs.pl><äö.txt>
I then packaged the CmdArgs.pl with the Par Packer command pp -o CmdArgs.exe CmdArgs.pl
,When I run CmdArgs.exe äö.txt
, the cmd window shows
C:\Users\lenovo\AppData\Local\Temp\par-5875\cache-622ad9aefc40f2ce41f40707644eb592d3c73c6a/CmdArgs.exe ??.txt<C:\Users\lenovo\AppData\Local\Temp\par-5875\cache-622ad9aefc40f2ce41f40707644eb592d3c73c6a/CmdArgs.exe><??.txt>
What I want it to show is
C:\Users\lenovo\AppData\Local\Temp\par-5875\cache-622ad9aefc40f2ce41f40707644eb592d3c73c6a/CmdArgs.exe äö.txt<C:\Users\lenovo\AppData\Local\Temp\par-5875\cache-622ad9aefc40f2ce41f40707644eb592d3c73c6a/CmdArgs.exe><äö.txt>
Info:
- Win11
- The OS language is simplified Chinese
- The values of
ACP
,OEMCP
andMACCP
in the registryHKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Nls\CodePage
are 936, 936, and 10008. - I use
chcp 65001
incmd
to change the console's CP.
Debugging:
Win32::GetConsoleOutputCP()
returns 65001 (UTF-8) as expected.$args->[ $#$args ]
is3F.3F.2E.74.78.74
(as returned bysprintf "%vX"
).$cmd_line
is….2F.43.6D.64.41.72.67.73.2E.65.78.65.20.3F.3F.2E.74.78.74
.- The memory returned by
GetCommandLineW
is….2F.0.43.0.6D.0.64.0.41.0.72.0.67.0.73.0.2E.0.65.0.78.0.65.0.20.0.3F.0.3F.0.2E.0.74.0.78.0.74.0
.