#!/usr/bin/perl
#
# Generate encoding rules for WebAuth data.
#
# This script is used during the WebAuth build process to transform annotated
# struct definitions in headers into C data structures that specify how to
# encode those structs as WebAuth tokens.

use 5.010;
use autodie;
use strict;
use warnings;

use Carp qw(croak);
use Readonly;

##############################################################################
# Global variables
##############################################################################

# The heading on the generated source file.
Readonly my $HEADING => <<'END_HEADER';
/*
 * Automatically generated -- do not edit!
 *
 * This file was automatically generated from the encode comments on the
 * members of structs in the WebAuth source using the encoding-rules
 * script.  To make changes, modify either the encode comments or (more
 * rarely) the encoding-rules script and run it again.
 *
 * Copyright 2012, 2013
 *     The Board of Trustees of the Leland Stanford Junior University
 *
 * See LICENSE for licensing terms.
 */

#include <portable/system.h>

#include <lib/internal.h>
END_HEADER

# Mappings from C types to encoding types.
Readonly my %TYPES => (
    'char *'        => 'STRING',
    'const char *'  => 'STRING',
    'int32_t'       => 'INT32',
    'uint32_t'      => 'UINT32',
    'unsigned long' => 'ULONG',
    'time_t'        => 'TIME',
    'void *'        => 'DATA',
    'const void *'  => 'DATA',
);

# Emacs cperl-mode can't handle using character classes for braces.
## no critic (RegularExpressions::ProhibitEscapedMetacharacters)

# The start of a struct definition.
#     $1 == struct name
Readonly my $STRUCT_REGEX => qr/ \A \s* struct \s+ (\S+) \s* \{ /xms;

# A struct member.
#     $1 == C type of member
#     $2 == member name
#     $3 == encoded name and any options (comma-separated)
Readonly my $MEMBER_REGEX => qr{
    \A \s+
       (.*?)                    # C type (1)
       ([\w_]+)                 # member name (2)
       ; \s+
       /[*]                     # trailing encode comment
         [ ] encode: [ ]        #   always starts with encode:
         (.*?)                  #   encoded name and any options (3)
         [ ]
       [*]/                     # end of comment
   }xms;

# A struct member that nests another encoded struct.
#     $1 == nested struct C type (including "struct")
#     $2 == member name
Readonly my $NESTED_REGEX =>
  qr{ \A \s+ (struct \s+ [\w_]+) \s* [*]([\w_]+) ; }xms;

# The end of a struct definition.
Readonly my $END_REGEX => qr/ \A \s* \} /xms;

## use critic

##############################################################################
# Functions
##############################################################################

# print with error checking and an explicit file handle.  autodie
# unfortunately can't help us with these because they can't be prototyped and
# hence can't be overridden.
#
# $fh   - Output file handle
# @args - Remaining arguments to print
#
# Returns: undef
#  Throws: Text exception on output failure
sub print_fh {
    my ($fh, @args) = @_;
    print {$fh} @args or croak('print failed');
    return;
}

# The same for say.
sub say_fh {
    my ($fh, @args) = @_;
    say {$fh} @args or croak('say failed');
    return;
}

# Parse the C definition of a struct into encoding rules.  The open file
# handle should point to the start of the body of the struct.  This function
# is somewhat sensitive to coding style, requiring at least that each member
# be on a separate line and making some assumptions about whitespace.
#
# $source    - Input file name (for error reporting)
# $source_fh - File handle for open C header source
#
# Returns: a reference to an array of encoding rules for that struct
#  Throws: I/O exceptions on read failure and string exceptions on parsing
#          failure
sub parse_struct {
    my ($source, $source_fh) = @_;

    my @rules;
  RULE:
    while (defined(my $line = <$source_fh>)) {
        return \@rules if $line =~ $END_REGEX;

        # Only lines with an encoding specified result in any action.
        my ($c_type, $attr, $encode) = $line =~ $MEMBER_REGEX;
        next RULE if !defined $c_type;

        # Parse encoding arguments.  The first is the encoded name.
        my ($encode_name, @encode_args) = split(m{,\s*}xms, $encode);
        my %option = map { $_ => 1 } @encode_args;

        # Convert the type from C to the attribute encoder type.
        $c_type =~ s{ \s+ \z }{}xms;
        if (!$TYPES{$c_type}) {
            die "$source:$.: unknown type $c_type\n";
        }
        my $type = $option{repeat} ? 'REPEAT' : $TYPES{$c_type};

        # The creation option is only valid with TIME types.
        if ($type ne 'TIME' && $option{creation}) {
            die "$source:$.: creation flag specified on non-TIME attribute"
              . " $attr\n";
        }

        # If type REPEAT, the next line must be the nested embedded struct.
        my ($nest_type, $nest_member);
        if ($type eq 'REPEAT') {
            $line = <$source_fh>;
            if (defined($line)) {
                ($nest_type, $nest_member) = $line =~ $NESTED_REGEX;
            }
            if (!defined($line) || !defined($nest_type)) {
                die "$source:$.: missing repeated struct member for $attr\n";
            }
            $attr = $nest_member;
        }

        # Build the repesentation of this rule and add it to the rules.
        my $rule_ref = [$attr, $type, $encode_name, \%option, $nest_type];
        push(@rules, $rule_ref);
    }

    # We fell off the end of the struct.
    die "$source:$.: could not find end of struct\n";
}

# Convert a boolean to the string 'true' or 'false'.  Helper function for
# print_rule.
#
# $bool - Boolean value
#
# Returns: 'true' or 'false'
sub bool_as_string {
    my ($bool) = @_;
    return $bool ? 'true' : 'false';
}

# Print out an encoding rule for a single attribute to standard output.
#
# $fh       - File handle to which to print the rule
# $struct   - Name of the struct for which we're printing rules
# $rule_ref - Reference to array representing a rule as follows:
#   [0] Name of the attribute
#   [1] Type of the attribute
#   [2] Attribute name encoded in the wire token
#   [3] Reference to hash of options
#   [4] C data type of nested structure
#
# Returns: undef
#  Throws: I/O exceptions on print failure
sub print_rule {
    my ($fh, $struct, $rule_ref) = @_;
    my ($name, $type, $encode_name, $option_ref, $nest_type) = @{$rule_ref};

    # Do some preliminary formatting of the rule data.
    my $desc = $name;
    $desc =~ tr{_}{ };
    my $creation = bool_as_string($option_ref->{creation});
    my $optional = bool_as_string($option_ref->{optional});
    my $ascii    = bool_as_string($option_ref->{ascii});

    # Print out the basic information.
    #<<<
    say_fh($fh, q[    {]);
    say_fh($fh, qq[        "$encode_name",]);
    say_fh($fh, qq[        "$desc",]);
    say_fh($fh, qq[        WA_TYPE_${type},]);
    #>>>

    # Print out the flags derived from the options.
    #<<<
    say_fh($fh, sprintf('        %-6s /* optional */', $optional . q{,}));
    say_fh($fh, sprintf('        %-6s /* ascii    */', $ascii    . q{,}));
    say_fh($fh, sprintf('        %-6s /* creation */', $creation . q{,}));
    #>>>

    # Print out the offset.
    say_fh($fh, qq[        offsetof(struct $struct, $name),]);

    # Print out the additional metadata for this attribute.
    if ($type eq 'DATA') {
        # DATA attributes have an additional offset to the length attribute.
        #<<<
        say_fh($fh, qq[        offsetof(struct $struct, ${name}_len),]);
        say_fh($fh,  q[        0,]);
        say_fh($fh,  q[        NULL]);
        #>>>
    } elsif ($type eq 'REPEAT') {
        my $nest_name = $nest_type;
        $nest_name =~ s{ \A struct \s+ (?: (webauth|wai) _ ) }{wai_}xms;

        # Repeated attributes have a count offset and a nested size and
        # encoding.
        say_fh($fh, qq[        offsetof(struct $struct, ${name}_count),]);
        say_fh($fh, qq[        sizeof($nest_type),]);
        say_fh($fh, qq[        ${nest_name}_encoding]);
    } else {
        # Initialization placeholders for all other types.
        say_fh($fh, '        0,');
        say_fh($fh, '        0,');
        say_fh($fh, '        NULL');
    }

    # End of the encoding.
    say_fh($fh, '    },');
    return;
}

# Print the encoding rules for structs found in source header.
#
# $fh        - File handle to which to print the rules
# $source    - Path to source header
# $rules_ref - Reference to hash of struct name to rule array
#
# Returns: undef
#  Throws: I/O exceptions on print failure
sub print_rules {
    my ($fh, $source, $rules_ref) = @_;

    # Print out the initial heading.
    print_fh($fh, $HEADING);
    if ($source ne 'lib/internal.h') {
        say_fh($fh, "#include <$source>");
    }
    print_fh($fh, "\n");

    # For each struct, print out the rules for that struct.
    for my $struct (sort keys %{$rules_ref}) {
        my $name = $struct;

        # Print variable definition for the encoding rules for this struct.
        $name =~ s{ \A (webauth|wai) _ }{wai_}xms;
        say_fh($fh, "const struct wai_encoding ${name}_encoding[] = {");

        # Print the rules for this struct.
        for my $rule (@{ $rules_ref->{$struct} }) {
            print_rule($fh, $struct, $rule);
        }

        # Print the end of rules marker.
        say_fh($fh, '    WA_ENCODING_END');
        say_fh($fh, '};');
    }
    return;
}

##############################################################################
# Main routine
##############################################################################

# Parse command-line arguments.
if (@ARGV < 2) {
    die "Syntax: encoding-rules <source> <struct> [<struct> ...]\n";
}
my $source = shift;
my %encode_struct = map { $_ => 1 } @ARGV;

# Parse the source file and build our encoding rules.
my %rules;
open(my $source_fh, '<', $source);
while (defined(my $line = <$source_fh>)) {
    if ($line =~ $STRUCT_REGEX) {
        my $name = $1;
        next if !$encode_struct{$name};
        $rules{$name} = parse_struct($source, $source_fh);
    }
}
close($source_fh);

# Generate the encoding rules.
print_rules(\*STDOUT, $source, \%rules);

__END__

=for stopwords
Allbery WebAuth struct structs Kerberos keyrings encodings timestamps
sublicense MERCHANTABILITY NONINFRINGEMENT

=head1 NAME

encoding-rules - Generate encoding rules for WebAuth data

=head1 SYNOPSIS

B<encoding-rules> I<header> I<struct> [I<struct> ...]

=head1 DESCRIPTION

This script is used by WebAuth maintainers to generate encoding rules,
used by the wai_encode() and wai_decode() internal library functions, to
translate structs to and from the WebAuth data serialization format.  This
is used for token generation, Kerberos credential serialization, and other
places serialization is needed (such as keyrings and service token
caches).

B<encoding-rules> takes as arguments a source file that defines one more
more structs and then a list of structs for which to generate encodings.
It creates, from this, a C source file that defines an array of
webauth_encoding structs that will translate those structs to and from the
WebAuth attribute serialization format.  That C source is printed to
standard output.

The encoding rules for a struct are based on the data type of the struct
members and then a comment at the end of the line defining that struct
member.  The comment must be in the form:

    /* encode: <attr>[, <flag>[, <flag> ...]] */

where <attr> is the attribute used in the token encoding (the key in the
key/value pair format of WebAuth token encoding).

The following optional flags are supported:

=over 4

=item C<ascii>

Only used for numeric and data attributes.  Encode the attribute in ASCII
rather than using a binary form.  For numeric attributes, this means
encoding the value as ASCII digits.  For data attributes, this means
encoding the value in hex.  The inverse is done when decoding.

=item C<creation>

Only valid for TIME attributes (struct members of type C<time_t>).  This
attribute represents a creation time.  If it is not set when encoding, the
current time will be encoded as the value of this attribute.

=item C<optional>

Indicates that this attribute is optional in the encoding.  If it is NULL
or 0, the attribute will be omitted when encoding.  If the attribute is
missing from an encoded form, this is not a problem.  Absent this flag,
numeric attributes will be encoded as 0 if the value is 0, NULL string and
data attributes are an error, and the attribute must be present when
parsing the encoded form or an error is reported.

=item C<repeat>

This attribute represents the count of a repeated nested struct, which
will be the next member in the struct and the next line of the C header
file.  The nested struct must have its own encoding rules defined.

=back

=head1 DIAGNOSTICS

=over 4

=item %s:%d: could not find end of struct

While parsing a struct with encoding rules, the end of the input file was
seen before the end of the struct definition.

=item %s:%d: creation flag specified on non-TIME attribute %s

The given struct attribute has a C type other than C<time_t>.  The
C<creation> flag is only supported for timestamps.

=item %s:%d: missing repeated struct member for %s

This struct member was flagged as the count of a repeated nested struct,
but the nested struct member was not the next line of the C header as was
expected.

=item %s:%d: unknown type %s

The specified line of the specified source file contains a struct member
definition whose type is not one of the recognized types.
B<encoding-rules> contains a mapping of C data types to WebAuth token
attribute types, which may need to be updated for this data type.

=back

In addition, failure to open the source file or to print out the encoding
rules will result in I/O exceptions.

=head1 AUTHOR

Russ Allbery <eagle@eyrie.org>

=head1 COPYRIGHT AND LICENSE

Copyright 2012, 2013 The Board of Trustees of the Leland Stanford Junior
University

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

=cut
