diff --git a/lib/DDG/Goodie/Bin2Unicode.pm b/lib/DDG/Goodie/Bin2Unicode.pm index f742d6b8d58..914c827c7e5 100644 --- a/lib/DDG/Goodie/Bin2Unicode.pm +++ b/lib/DDG/Goodie/Bin2Unicode.pm @@ -14,19 +14,99 @@ triggers query => qr{^([01\s]{8,})(?:\s+(?:to\s+)?(?:unicode|text|ascii))?$}; my $MAX_CODE_PT = 1114111; +my %ctrl_chars = ( + 0 => 'Null character (NUL)', + 1 => 'Start of Heading (SOH)', + 2 => 'Start of Text (STX)', + 3 => 'End-of-text character (ETX)', + 4 => 'End-of-transmission character (EOT)', + 5 => 'Enquiry character (ENQ)', + 6 => 'Acknowledge character (ACK)', + 7 => 'Bell character (BEL)', + 8 => 'Backspace (BS)', + 9 => 'Horizontal tab (HT)', + 10 => 'Line feed (LF)', + 11 => 'Vertical tab (VT)', + 12 => 'Form feed (FF)', + 13 => 'Carriage return (CR)', + 14 => 'Shift Out (SO)', + 15 => 'Shift In (SI)', + 16 => 'Data Link Escape (DLE)', + 17 => 'Device Control 1 (DC1)', + 18 => 'Device Control 2 (DC2)', + 19 => 'Device Control 3 (DC3)', + 20 => 'Device Control 4 (DC4)', + 21 => 'Negative-acknowledge character (NAK)', + 22 => 'Synchronous Idle (SYN)', + 23 => 'End of Transmission Block (ETB)', + 24 => 'Cancel character (CAN)', + 25 => 'End of Medium (EM)', + 26 => 'Substitute character (SUB)', + 27 => 'Escape character (ESC)', + 28 => 'File Separator (FS)', + 29 => 'Group Separator (GS)', + 30 => 'Record Separator (RS)', + 31 => 'Unit Separator (US)', + 32 => 'Space (SP)', + 127 => 'Delete (DEL)', + 128 => 'Padding Character (PAD)', + 129 => 'High Octet Preset (HOP)', + 130 => 'Break Permitted Here (BPH)', + 131 => 'No Break Here (NBH)', + 132 => 'Index (IND)', + 133 => 'Next Line (NEL)', + 134 => 'Start of Selected Area (SSA)', + 135 => 'End of Selected Area (ESA)', + 136 => 'Character Tabulation Set (HTS)', + 137 => 'Character Tabulation with Justification (HTJ)', + 138 => 'Line Tabulation Set (VTS)', + 139 => 'Partial Line Forward (PLD)', + 140 => 'Partial Line Backward (PLU)', + 141 => 'Reverse Line Feed (RI)', + 142 => 'Single-Shift Two (SS2)', + 143 => 'Single-Shift Three (SS3)', + 144 => 'Device Control String (DCS)', + 145 => 'Private Use 1 (PU1)', + 146 => 'Private Use 2 (PU2)', + 147 => 'Set Transmit State (STS)', + 148 => 'Cancel character (CCH)', + 149 => 'Message Waiting (MW)', + 150 => 'Start of Protected Area (SPA)', + 151 => 'End of Protected Area (EPA)', + 152 => 'Start of String (SOS)', + 153 => 'Single Graphic Character Introducer (SGCI)', + 154 => 'Single Character Intro Introducer (SCI)', + 155 => 'Control Sequence Introducer (CSI)', + 156 => 'String Terminator (ST)', + 157 => 'Operating System Command (OSC)', + 158 => 'Private Message (PM)', + 159 => 'Application Program Command (APC)' +); + +my $zaahirs_hideout = '0' x 48; + handle matches => sub { my $q = $_; # orginal query my $bin_string = shift @_; # captured binary string + my $str; + if($bin_string eq $zaahirs_hideout){ + $str = q{Congratulations, you've discovered Zaahir's hideout!}; + goto DONE; + } my $want_ascii = $q =~ /\bascii\b/; my @bins = $bin_string =~ /([01]+|\s+)/g; - my $str; for my $b (@bins){ if($b =~ /^[01]+$/){ return if length($b) % 8; # Overflow/non-portable warnings expected my $i = oct("0b$b"); + if((exists $ctrl_chars{$i}) && (@bins == 1)){ + $str = $ctrl_chars{$i}; + $str = "Control character: $str" unless ($i == 32) || ($i == 127); + last; + } # Assume ascii if out of range or explicitly requested. # This will work for characters all in the same string # but will not print the right non-ascii characters *if* @@ -40,6 +120,10 @@ handle matches => sub { } } + # return if all control/space (https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes) + return if $str =~ /^[\p{Control} ]+$/; + + DONE: return "Binary '$bin_string' converted to " . $want_ascii ? 'ascii' : 'unicode' . " is '$str'", structured_answer => { id => 'bin2unicode', diff --git a/t/Bin2Unicode.t b/t/Bin2Unicode.t index c04f9f9a8d6..76ba4815568 100644 --- a/t/Bin2Unicode.t +++ b/t/Bin2Unicode.t @@ -29,6 +29,12 @@ sub gen_struc_ans { }; } +my %ctrl_tests; +for (1..32, 127..159){ + my $b = sprintf '%08b', $_; + $ctrl_tests{join ' ', $b, $b} = undef; +} + ddg_goodie_test( [qw( DDG::Goodie::Bin2Unicode )], '0110100001100101011011000110110001101111 to text' => test_zci(gen_struc_ans( @@ -58,8 +64,19 @@ ddg_goodie_test( '0100110101110101011000110110100001100001011100110010000001000111011100100110000101100011011010010110000101110011001000000100001101101111011011010111000001100001011100110100110111110000', 'Muchas Gracias CompasMð', 1)), + '00000000' => test_zci(gen_struc_ans( + '00000000', + '00000000', + 'Control character: Null character (NUL)', + 0)), + '000000000000000000100000' => test_zci(gen_struc_ans( + '000000000000000000100000', + '000000000000000000100000', + 'Space (SP)', + 0)), '010101' => undef, - '201 to text' => undef + '201 to text' => undef, + %ctrl_tests ); done_testing;