#!/usr/bin/perl

# This perl script read stdin and write on stdout. It shall be an XML language file.
#
# * If the name of the language is 'HTML', then it creates the language 'PHP (HTML)'
#   which shall be used for PHP hl.
#
# * If the name of the language is something else (say '*'), it creates the language '*/PHP'.
#   This new language is the same as the old one, but is able to detect PHP everywhere.
#
# This script will correctly set extensions & mimetype, and will replace
# <IncludeRules context="##*"> by <IncludeRules context="##*/PHP">
#
# Generated languages need a language named 'PHP/PHP', which shall take care of PHP hl itself
# and which will be called every time something like <?php is encountred.
#
# This script also supports Twig and does the same as for PHP.
#
# SPDX-FileCopyrightText: Jan Villat <jan.villat@net2000.ch>
# License: LGPL

my $file = "";

open(my $input, '<:encoding(UTF-8)', $ARGV[0])
  or die "Could not open file '$ARGV[0]': $!";

open(my $output, '>:encoding(UTF-8)', $ARGV[1])
  or die "Could not open file '$ARGV[1]': $!";

my $language = $ARGV[1];
if ($language =~ /-php\.xml$/)
{
    $language = "PHP";
}
else
{
    $language = "Twig";
}

while (<$input>)
{
  $file .= $_;
}

$warning = "\n\n<!-- ***** THIS FILE WAS GENERATED BY A SCRIPT - DO NOT EDIT ***** -->\n";

$file =~ s/(?=<language)/$warning\n\n\n/;

$file =~ /<language.*?name="([^"]+)"/;
my $syntaxName = $1;

if ($syntaxName eq "HTML")
{
  $root = 1;
}

if ($language eq "Twig")
{
  $file =~ s/<language([^>]+)priority="[^"]*"/<language$1/s;
}

if ($root == 1)
{
  $file =~ s/<language([^>]+)name="[^"]*"/<language$1name="$language (HTML)"/s;
  $file =~ s/<language([^>]+)section="[^"]*"/<language$1section="Scripts"/s;
  if ($language eq "PHP")
  {
    $file =~ s/<language([^>]+)extensions="[^"]*"/<language$1extensions="*.php;*.php3;*.wml;*.phtml;*.phtm;*.inc;*.ctp"/s;
    $file =~ s/<language([^>]+)mimetype="[^"]*"/<language$1mimetype="text\/x-php4-src;text\/x-php3-src;text\/vnd.wap.wml;application\/x-php"/s;
    $file =~ s/<language([^>]+)*/<language$1 indenter="cstyle"/s;
  }
  # Twig
  else
  {
    $file =~ s/<language([^>]+)extensions="[^"]*"/<language$1extensions="*.twig;*.html.twig;*.htm.twig"/s;
    $file =~ s/<language([^>]+)mimetype="[^"]*"/<language$1mimetype="text\/x-twig"/s;
  }
}
else
{
  $file =~ s/<language([^>]+)hidden="[^"]*"/<language$1/s;
  $file =~ s/<language([^>]+)section="[^"]*"/<language$1section="Other"/s;
  my $extra = " hidden=\"true\"";
  my $mimetype = "";
  my $extensions = "";
  if ($language eq "Twig")
  {
    $mimetype = "text/x-twig";
    if ($syntaxName eq "JavaScript")
    {
      $extra = " priority=\"1\"";
      $extensions = "*.js.twig;*.mjs.twig;*.cjs.twig";
    }
    elsif ($syntaxName eq "TypeScript")
    {
      $extra = " priority=\"1\"";
      $extensions = "*.ts.twig;*.mts.twig;*.cts.twig";
    }
  }
  $file =~ s/<language([^>]+)mimetype="[^"]*"/<language$1mimetype="$mimetype"/s;
  $file =~ s/<language([^>]+)name="([^"]*)"/<language$1name="$2\/$language"$extra/s;
  $file =~ s/<language([^>]+)alternativeNames="([^"]*)"/<language$1alternativeNames="$2\/$language"/s;
  $file =~ s/<language([^>]+)extensions="[^"]*"/<language$1extensions="$extensions"/s;
}

# replace list with a include
$file =~ s/<list name="([^"]+)">.*?<\/list>/<list name="$1"><include>$1##$syntaxName<\/include><\/list>/gs;

$file =~ s/<language([^>]+)kateversion="[^"]*"/<language$1kateversion="5.79"/s;
$file =~ s/ fallthrough="(true|1)"//gs;

if ($language eq "Twig")
{
  # remove Mustache syntax
  if ($root == 1)
  {
    $file =~ s/<context name="MustacheJS.*?<\/context>//gs;
    $file =~ s/<StringDetect attribute="Value" context="#pop#pop!MustacheJS" String="[^"]+[^\/]+\/>//gs;
  }
}
elsif ($root == 1 || $ARGV[0] =~ /mustache.xml$/)
{
  $file =~ s/<(?:RegExpr (attribute="Processing Instruction" context="PI"|context="PI" attribute="Processing Instruction")|itemData name="Processing Instruction")[^\/]+\/>|<context name="PI".*?<\/context>//gs;
}

my $find_language = "##$language/$language";
my $language_suffix = "/$language";
if ($language eq "PHP")
{
  $find_language = "FindPHP";
}

# add IncludeRules only when the name of the context does not start with 'Find'
sub insert_find_language
{
    my $ctx = $_[0];

    if (not $ctx =~ /name="Find/)
    {
        if ($_[1])
        {
            return "$ctx\n<IncludeRules context=\"$find_language\" />";
        }
        return "$ctx>\n<IncludeRules context=\"$find_language\" />\n</context>";
    }

    if ($_[1])
    {
        return $ctx;
    }

    return $ctx . '/>';
}

$file =~ s/<IncludeRules\s([^>]*)context="([^"#]*)##(?!Alerts|Comments|Doxygen|Modelines)([^"]+)"/<IncludeRules $1context="$2##$3$language_suffix"/g;
$file =~ s/(<context\s[^>]*[^>\/]>)/insert_find_language($1, 1)/ge;
$file =~ s/(<context\s[^>]*[^>\/])\/>/insert_find_language($1, 0)/ge;

if ($language eq "PHP")
{
  $file =~ s/<StringDetect\b.*String="&lt;\?"[^>]*>//g;

  $findphp = "<context name=\"FindPHP\" attribute=\"Normal Text\" lineEndContext=\"#stay\">\n<Detect2Chars context=\"##PHP/PHP\" char=\"&lt;\" char1=\"?\" lookAhead=\"true\" />\n</context>\n";
  $file =~ s/(?=<\/contexts\s*>)/$findphp/;
}

print $output $file;
print $output $warning;
