#!/usr/bin/env perl

use utf8;
use 5.008;
use strict;
use warnings;
use autodie;
use List::MoreUtils qw( any all );

# DEFAULT OPTIONS
my $exponent = 6; # default
#my $exponent = 1;
#my @parsers = (qw(MATE TURBO MALT MST GOLD));
#my @parsers = (qw(MATE TURBO MALT MST PARSITO GOLD));
my @parsers = (qw(MATE TURBO MALT MST PARSITO));
my %e_table;
my @e_types;
my $gold = 0; # GOLD IN VERTICAL (for eval. purposes)

my ($tokens,$sentences) = (0,0);
my @methods = (qw(MATE TURBO MALT MST PARSITO MultiP OneBest));
my (%cTAG,%cUAS,%cLAS,%cSENT_u,%cSENT_l); # Number of correct tags, UAS, LAS for each method.
my $cycles=0;

#@e_types =('POS:LAS'); # DEFAULT
@e_types =('POS2POS:UAS'); # DEFAULT
my $DIR;
my $E_FILE = './Err_table_parsers.tsv';

my (@forms,%lems,%tags,%afuns,%heads,%weights); my $ord=0;
my (@glems,@gtags,@gafuns,@gheads);
my (@ntags,@nlems,@nheads,@nafuns, @nweights);
my @edges; $edges[0]=();
my %edges_weights;
my %best_label;
my %edges_origparser;
my $la_opt='LAS';
my %weights_penalties;

my $INPUT_FILE;

binmode(STDOUT, ":utf8");

while ((defined $ARGV[0])&&($ARGV[0] =~ /^\-[hpledxt]$/)) {
    my $format_options = shift @ARGV;
#    print "$format_options\n";
    $format_options =~ s/^\-([hpledxt])$/$1/;
    if ($format_options eq 'h') {
	print "Spouští program na vertikálu s výstupy dvou a více parserů.\n";
	print "Pro každý parser musí být předem zpracována tabulka chyb.\n";
	print "Tabulka chyb má formát název Err_table_parsers.tsv, obsahuje údaje asi takto:\n";
	print "PARSER	název_typu	las/uas	hodnota	procento_úspěchu(UAS)\n";
	print "MST	POS2POS		LAS	AN	0.8326\n";
	print "MALT	DIST		UAS	1	0.9751\n";
	print "a podobně.\n";
	print "Včetně obecného řádku, kde místo názvu typu je ALL a místo hodnoty UAS nebo LAS.\n\n";
	print "./MaxSpanTree_MP.pl -p MST,MALT,TURBO -e POS,POS2POS -d /work/data/\n";
	print "Options:\n";
	print "-p parsers, oddělené čárkou; např. -p MST,MALT,TURBO (bez mezer)\n";
	print "-l LAS/UAS pro všechny parametry, default je LAS, lze také rozlišit za dvojtečkou u parametru -e (POS:LAS)\n";
	print "-l option musí být vždy PŘED option -e\n";
	print "-e chyby,které se použijí pro výpočet vah jednotlivých hran, oddělené čárkou (bez mezer):\n";
	print "POS (POS závislého), 2POS (POS řídícího), POS2POS (obojí), DIST (vzdálenost: 1(1), 2(2-3), 3(4-6), 4(7-10), 5(11+) ), aDIST\n";
	print "popř. také ALL nebo SENT option pro vážení podle úspěšnosti celého parseru (lze kombinovat).\n";
	print "lze také uvést LAS/UAS za dvojtečkou po chybovém parametru, např. DIST:UAS\n";
	print "-d DIRECTORY: vstupní adresář\n";
	print "Ve vstupním adresáři mají být soubory .vert, obsahující vertikálu asi takovouto:\n";
	print "forma\tlemma1\ttag1\thead1\tafun1\tlemma2\ttag2\thead2\tafun2\tlemma3\ttag3\thead3\tafun3(\tlemmaX\ttagX\theadX\tafunX)*\n";
	print "-t ERROR_FILE: tabulka chyb parserů (alternativní jméno či umístění).\n";
	print "-x exponent (váha více chybových hodnot klesá při vyšším exponentu velmi rychle)\n";

	exit;
    }
    elsif ($format_options eq 'l') {
	$la_opt = shift @ARGV;
	if ($la_opt !~ /^[LU]AS$/) { die "Option -l vyžaduje hodnotu LAS nebo UAS!\n"; }
    }
    elsif ($format_options eq 'e') {
	my $ee = shift @ARGV;
	@e_types=(); # Default neplatí.
	if ((not defined $ee)||($ee !~ /^(((ALL)|(SENT)|(POS)|(POS2POS)|(P2P)|(2POS)|(POSSUBPOS)|(POSSUBPOSCASE)|(POSCASE)|(PSbPC)|(PSbP)|(PC)|(a?DIST))(\:((UAS)|(LAS)))?)(\,((ALL)|(SENT)|(POS)|(POS2POS)|(P2P)|(2POS)|(POSSUBPOS)|(POSSUBPOSCASE)|(POSCASE)|(PSbPC)|(PSbP)|(PC)|(a?DIST))(\:((UAS)|(LAS)))?)*$/)) {
	    die "Chyby pro určení vah je třeba vybírat z množiny: {ALL, SENT, POS, POS2POS/P2P, 2POS, DIST, aDIST, PSbPC, PSbP, PC}, případně některý z parametrů doplnit :UAS nebo :LAS!\n"; }
	my @e_types0 = split /\,/, $ee;
	foreach my $ex (@e_types0) {
	    if ($ex =~ /^P2P/) { $ex =~ s/P2P/POS2POS/; }
	    if ($ex =~ /^PSbP/){ $ex =~ s/PSbP/POSSUBPOS/; }
	    if ($ex =~ /^PC/) { $ex =~ s/PC/POSCASE/; }
	    if ($ex =~ /^(.+)(\:((UAS)|(LAS)))$/) { push (@e_types, $ex); }
	    else { push (@e_types, 'ex' . ':' . $la_opt); }
	}
    }
    elsif ($format_options eq 'p') {
	my $pp = shift @ARGV;
	@parsers=(); # Default neplatí.
	if ((not defined $pp)||($pp !~ /^(MST)|(MALT)|(TURBO)|(MATE)|(PARS(ITO)?)(\,(MST)|(MALT)|(TURBO)|(MATE)|(PARS(ITO)?))*(GOLD)?$/)) {
	    die "Parsery je (zatím) nutno volit z množiny: {MST, MALT, TURBO, MATE, PARS/PARSITO}, popř. GOLD (jako poslední)!\n"; }
	@parsers = split /\,/, $pp;
	if ($pp =~ /.+GOLD$/) {
	    $gold=1;
	    @methods=@parsers;
	    pop @methods;
	    push (@methods,'MultiP','OneBest');
	}
    }
    elsif ($format_options eq 'x') {
	my $xx = shift @ARGV;
	if ($xx !~ /^[123456789]$/) {
	    die "Exponent musí být celé číslo 1 až 9!\n"; }
	$exponent = $xx;
#	print "Exponent $exponent\n";
    }
    elsif ($format_options eq 'd') {
	my $dd = shift @ARGV;
	$dd =~ s|^(.+)\/|$1|;
	if (not(-d $dd)) {
	    die "Input directory does not exist!\n"; }
	$DIR = $dd;
	print "Directory $dd\n";
    }
    elsif ($format_options eq 't') {
	my $tt = shift @ARGV;
	if (not(-e $tt)) {
	    die "Error file table does not exist!\n"; }
	$E_FILE = $tt;
    }
}
if (not defined $DIR) {
    die "Input directory not specified!\n"; }

{ foreach my $mx (@methods) {
    $cLAS{$mx}=0; $cUAS{$mx}=0; $cTAG{$mx}=0; $cSENT_u{$mx}=0; $cSENT_l{$mx}=0; }}

my $IN;

open $IN, '<:utf8', $E_FILE;
while (<$IN>) {
    chomp;
    my ($parser0,$type0,$lasuas0,$value0,$perc0) = split /\t/, $_;
    if (($parser0 =~ /^((MST)|(MALT)|(TURBO)|(MATE)|(PARSITO))$/)&&
    ((($type0 eq 'POS')&&($value0 =~ /^[NAPCVDRJTIXZ]$/))||
    (($type0 eq '2POS')&&($value0 =~ /^[NAPCVDRJTIXZ]$/))||
    (($type0 eq 'DIST')&&($value0 =~ /^\-?[0-9]$/))||
    ($type0 =~ /^POS((SUBPOS)|(CASE))+$/)||
    (($type0 eq 'POS2POS')&&($value0 =~ /^[NAPCVDRJTIXZ][NAPCVDRJTIXZ]$/))||
    (($type0 eq 'ALL')||($type0 eq 'SENT')))&&
    ($perc0 =~ /^0\.[0-9][0-9]+$/)) {
	if ($lasuas0 =~ /^[LU]AS$/) {
	    $type0 .= ':' . $lasuas0; }
	$e_table{$parser0}{$type0}{$value0}=$perc0;
    }
}
close $IN;

# ------------------------------------------------ -- -- -- -- -- -- --
# --------   Vlastní zpracování textu   ---------- -- -- -- -- -- -- --
# ------------------------------------------------ -- -- -- -- -- -- --
open my $ME, '>>:utf8', './MethodsEval.tsv';

my $ODIR = $DIR . '-out';
if (-d $ODIR) {
    my $dirO;
    opendir($dirO,$ODIR);
    foreach my $Xfile (readdir ($dirO)) {
	next if ($Xfile =~ /^\.{1,2}$/);
	$Xfile = $ODIR . '/' . $Xfile;
	unlink ($Xfile) or die "Target directory must be empty, but file $Xfile cannot be deleted!\n";
    }
    closedir $dirO;
}
else { mkdir $ODIR};

my $OUTPUT_FILE;
my $dirD;
#print "Opening directory:\n$INPUT_DIR\n.";
opendir($dirD, $DIR) or die "Error opening directory $DIR !\n";

print "@e_types\n";

foreach $INPUT_FILE (sort readdir($dirD)) {
    next if ($INPUT_FILE =~ /^\.{1,2}$/);
    next if ($INPUT_FILE !~ /.+\.x?vert$/);
    $OUTPUT_FILE = $ODIR . '/' . $INPUT_FILE;
    $INPUT_FILE = $DIR . '/' . $INPUT_FILE;
    open my $INFILE, '<:utf8', $INPUT_FILE or die "Cannot open file $INPUT_FILE for reading!\n";
    open my $OUFILE, '>:utf8', $OUTPUT_FILE or die "Cannot open file $OUTPUT_FILE for writing!\n";

# ============================================================
# NAČÍTÁNÍ TEXTU (CELÝ TEXT)
# Vycházíme z formátu
# form	lem	tag	afun	head	tag	afun	head	(tag	afun	head)*
    print "Opened $INPUT_FILE\n";
    while (<$INFILE>) {
	chomp;
	my $process_line = $_;
	#HOT FIX
	$process_line =~ s/\tC\-{13}\t/\tC=-------------\t/;
	if ($process_line eq '') {
	    if ($ord > 0) {
		$sentences++;
		process_sentence();
		evaluate_sentence();
		$ord = 0;
		empty_sentence_arrays();
	    }
	    next;
	}
	elsif ($process_line =~ /^[0-9]+\t[^\t\ \n]+(\t[^\t\ \n]+\t[NAPCVDRJTIXZYE][^\t\n\ ]{3}[\-1234567X][^\t\n\ ]{10,11}\t[0-9]+\t[^\t\ \n]+){2,6}$/) {
	    my ($o,$in_f,@ats) = split /\t/, $process_line;
	    if ($ord == 0) {
		$forms[0]=''; }
	    $ord++;
	    if ($o != $ord) { die "Chyba v cislovani tokenu \$o=$o, \$ord=$ord, $. $INPUT_FILE\n"; }
	    $forms[$ord]=$in_f;
	    foreach my $px (@parsers) {
		if ($px ne 'GOLD') {
		    my $le = shift @ats; my $tg = shift @ats;
		    my $he = shift @ats; my $la = shift @ats;
		    if ((defined $he) && ($he =~ /^[0-9]+$/) && (defined $la) && (defined $tg) && (defined $le)) {
			$lems{$px}[$ord]=$le; $tags{$px}[$ord]=$tg;
			$afuns{$px}[$ord]=$la; $heads{$px}[$ord]=$he;
		    }
		    else { die "Wrong input, line $. :\n$process_line\nfile: $OUTPUT_FILE\n"; }
		}
		else {
		    my $le = shift @ats; my $tg = shift @ats;
    		    my $he = shift @ats; my $la = shift @ats;
		    $glems[$ord]=$le; $gtags[$ord]=$tg; $gafuns[$ord]=$la; $gheads[$ord]=$he;
		}
	    }
	}
	else { die "WRONG vertical input, line $. :\n$process_line\nfile: $OUTPUT_FILE\n"; }
    }
    if ($ord > 0) { # Zapomenutá větička z dokončeného souboru (chyběl prázdný řádek na konci).
	process_sentence();
	evaluate_sentence();
	$ord = 0;
	empty_sentence_arrays();
    }
    close $INFILE;
    close $OUFILE;
    print "*";
}
print "\n";

# Výpis úspěšnosti, pokud se používá GOLD
my $settings = "Combination: $e_types[0]";
if ($#e_types > 0) { foreach my $iz (1 .. $#e_types) { $settings .= '+' . $e_types[$iz]; } }
$settings .= "\tEXP: $exponent	PARSERS:";
{ foreach my $px (@parsers) { next if ($px eq 'GOLD'); $settings .= ' ' . $px; } }
print $ME "Experiment settings: $settings\n";
#print $ME "method	UAS	LAS	SENT_UAS	SENT_LAS\n";
if (($tokens > 0)&&($sentences > 0)) {
    foreach my $me (@methods) {
#    foreach my $me ('MultiP','OneBest') {
	my $cu = sprintf "%.2f", $cUAS{$me}/$tokens*100;
	my $cl = sprintf "%.2f", $cLAS{$me}/$tokens*100;
	my $csu = sprintf "%.2f", $cSENT_u{$me}/$sentences*100;
	my $csl = sprintf "%.2f", $cSENT_l{$me}/$sentences*100;
	print $ME "$me\t$cu\t$cl\t$csu\t$csl\n";
    }
}
close $ME;

print "Cycles: $cycles per sent. $sentences\n";

# ============================================================
# VLASTNÍ ZPRACOVÁNÍ VĚTY
sub process_sentence {
    test_input_parses();
#    print "processing sentence:\"@forms\"\n";
    check_dependencies();
    calculate_weights();
#    print "weights calculated\n";
    calculate_edges_weights();
#    print "edges weights calculated\n";
#    print "forms \$\# $#forms\n";
    my $yy = $#forms;
    my $cycle_in_s=0;
    foreach my $x (1..$yy) {
	$tokens++;
	my $maxw = -1;
	if (defined $edges[$x]) {
	    my @teste = @{$edges[$x]};
	    my $teste_s = scalar(@teste);
	    foreach my $e (@{$edges[$x]}) {
		if (not defined $edges_weights{$x}{$e}) {
		    die "Interní chyba: nedefinovaná váha hrany:\"$x\" \"$e\", line $., $OUTPUT_FILE\n"; }
		if ($edges_weights{$x}{$e}>$maxw) {
		    $maxw=$edges_weights{$x}{$e};
		    $nheads[$x]=$e;
		    my $oparser=$edges_origparser{$x}{$e};
		    $nlems[$x]=$lems{$oparser}[$x];
		    $nafuns[$x]=choose_best_label($x,$e);
#		    $nafuns[$x]=$afuns{$oparser}[$x];
		    $ntags[$x]=$tags{$oparser}[$x];
		    $nweights[$x]=$maxw;
		}
	    }
	}
    }
    my $max_control=200;
    my $cycl_suspected=1;
    while (($max_control>0)&&($cycl_suspected==1)) {
	$max_control--;
	my @cycl = find_cycle();
	if ($#cycl > -1) {
	    $cycle_in_s=1;
	    my $solv = fix_cycle(@cycl);
	    if ($solv == 0) {
		print "Nepodařilo se opravit cyklus ve větě, řádek $. soubor $OUTPUT_FILE\n";
		foreach my $fx (@forms) { print "$fx ";}
		print "\n";
		print "Cyklus @cycl\n";
		foreach my $fcx (@cycl) { print "$forms[$fcx] ";}
		print "\n";
		die;
	    }
	}
	else { $cycl_suspected=0; }
    }
#    print "$sentences: @forms\n";
    if ($cycle_in_s == 1) { $cycles++; }
}

sub evaluate_sentence {
    my %tot_weights; { foreach my $px (@parsers) { next if ($px eq 'GOLD'); $tot_weights{$px}=0; }}
    my (%pTAG,%pUAS,%pLAS); { foreach my $px (@parsers) { next if ($px eq 'GOLD'); $pTAG{$px}=0; $pUAS{$px}=0; $pLAS{$px}=0; }}
    my (%sok_LAS,%sok_UAS); { foreach my $px (@parsers) { next if ($px eq 'GOLD'); $sok_UAS{$px}=1; $sok_LAS{$px}=1; }}
    $sok_LAS{'MultiP'}=1; $sok_UAS{'MultiP'}=1;
    foreach my $x (1..$#forms) {
	if ((substr($ntags[$x],0,1) eq substr($gtags[$x],0,1))&&(substr($ntags[$x],4,1) eq substr($gtags[$x],4,1))) {
	    $cTAG{'MultiP'}++;
	}
	if ($nheads[$x] == $gheads[$x]) {
	    if (not defined $nafuns[$x]) { print "nafuns $x ($forms[$x]) not defined!\nALL nafuns: @nafuns\n"; }
	    if ($nafuns[$x] eq $gafuns[$x]) { $cLAS{'MultiP'}++; }
	    else {$sok_LAS{'MultiP'}=0;}
	    $cUAS{'MultiP'}++;
	}
	else { $sok_UAS{'MultiP'}=0; $sok_LAS{'MultiP'}=0; }

	foreach my $px (@parsers) {
	    next if ($px eq 'GOLD');
	    $tot_weights{$px}+=$weights{$px}[$x];
	    if (not defined $gtags[$x]) {
		die "NOT DEFINED gtags $x, form $forms[$x], line $.\n"; }
	    if ($tags{$px}[$x] eq $gtags[$x]) { $cTAG{$px}++; $pTAG{$px}++; }
	    if ($heads{$px}[$x] == $gheads[$x]) {
		if ($afuns{$px}[$x] eq $gafuns[$x]) { $cLAS{$px}++; $pLAS{$px}++; }
		else { $sok_LAS{$px}=0; }
		$cUAS{$px}++; $pUAS{$px}++;
	    }
	    else { $sok_LAS{$px}=0; $sok_UAS{$px}=0; }
	}
    }
    foreach my $px (@parsers) {
	next if ($px eq 'GOLD');
	if ($sok_LAS{$px}) { $cSENT_l{$px}++; }
	if ($sok_UAS{$px}) { $cSENT_u{$px}++; }
    }
    if ($sok_LAS{'MultiP'}) { $cSENT_l{'MultiP'}++; }
    if ($sok_UAS{'MultiP'}) { $cSENT_u{'MultiP'}++; }

    my $high_w=0; my $best_p = '';
    foreach my $px (@parsers) {
	next if ($px eq 'GOLD');
	if ($tot_weights{$px}>$high_w) {
	    $high_w=$tot_weights{$px}; $best_p=$px; }
    }
    if ($best_p ne '') {
	$cTAG{'OneBest'}+=$pTAG{$best_p};
	$cUAS{'OneBest'}+=$pUAS{$best_p};
	$cLAS{'OneBest'}+=$pLAS{$best_p};
	$cSENT_l{'OneBest'}+=$sok_LAS{$best_p};
	$cSENT_u{'OneBest'}+=$sok_UAS{$best_p};
    }
}

sub calculate_weights {
    foreach my $x (1..$#forms) {
	foreach my $px (@parsers) {
	    next if ($px eq 'GOLD');
	    $weights{$px}[$x]=0;
	    foreach my $ex (@e_types) {
		if ($ex =~ /^((ALL)|(SENT))\:((LAS)|(UAS))$/) {
		    if ((defined $e_table{$px}{$ex}{'ALL'})&&($e_table{$px}{$ex}{'ALL'}>0.0001)&&($e_table{$px}{$ex}{'ALL'}<=1)) {
		        $weights{$px}[$x]+=($e_table{$px}{$ex}{'ALL'}**$exponent)*(10**$exponent); 
		    }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POS\:((LAS)|(UAS))$/) {
		    my $pos = substr($tags{$px}[$x],0,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$pos})&&($e_table{$px}{$ex}{$pos}>0.0001)&&($e_table{$px}{$ex}{$pos}<=1)) {
		        $weights{$px}[$x]+=($e_table{$px}{$ex}{$pos}**$exponent)*(10**$exponent); 
		    }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^2POS\:((LAS)|(UAS))$/) {
		    my $hd = $heads{$px}[$x];
		    my $pos2 = '-';
		    if ($hd > 0) { $pos2 = substr($tags{$px}[$hd],0,1); }
		    if ((defined $e_table{$px}{$ex}{$pos2})&&($e_table{$px}{$ex}{$pos2}>0.0001)&&($e_table{$px}{$ex}{$pos2}<1)) {
		        $weights{$px}[$x]+=$e_table{$px}{$ex}{$pos2}**$exponent*(10**$exponent); }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POS2POS\:((LAS)|(UAS))$/) {
		    my $pos = substr($tags{$px}[$x],0,1);
		    my $hd = $heads{$px}[$x];
		    my $pos2 = '-';
		    if ($hd > 0) { $pos2 = substr($tags{$px}[$hd],0,1); }
		    my $pos2pos=$pos . $pos2;
		    if ((defined $e_table{$px}{$ex}{$pos2pos})&&($e_table{$px}{$ex}{$pos2pos}>0.0001)&&($e_table{$px}{$ex}{$pos2pos}<1)) {
		        $weights{$px}[$x]+=$e_table{$px}{$ex}{$pos2pos}**$exponent*(10**$exponent); }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POSSUBPOS\:((LAS)|(UAS))$/) {
		    my $possubpos = substr($tags{$px}[$x],0,2);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$possubpos})&&($e_table{$px}{$ex}{$possubpos}>0.0001)&&($e_table{$px}{$ex}{$possubpos}<=1)) {
		        $weights{$px}[$x]+=($e_table{$px}{$ex}{$possubpos}**$exponent)*(10**$exponent); 
		    }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^POSCASE\:((LAS)|(UAS))$/) {
		    my $poscase = substr($tags{$px}[$x],0,1) . substr($tags{$px}[$x],4,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$poscase})&&($e_table{$px}{$ex}{$poscase}>0.0001)&&($e_table{$px}{$ex}{$poscase}<=1)) {
		        $weights{$px}[$x]+=($e_table{$px}{$ex}{$poscase}**$exponent)*(10**$exponent); 
		    }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^POSSUBPOSCASE\:((LAS)|(UAS))$/) {
		    my $pscase = substr($tags{$px}[$x],0,1) . substr($tags{$px}[$x],4,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$pscase})&&($e_table{$px}{$ex}{$pscase}>0.0001)&&($e_table{$px}{$ex}{$pscase}<=1)) {
		        $weights{$px}[$x]+=($e_table{$px}{$ex}{$pscase}**$exponent)*(10**$exponent); 
		    }
		    else {
			$weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^a?DIST\:((LAS)|(UAS))$/) {
		    my $dist = 0;
		    my $hd = $heads{$px}[$x];
		    if ($hd > 0) {
			my $d = $hd - $x;
			# 1(1), 2(2-3), 3(4-6), 4(7-10), 5(11+)
			if ($hd < -10) { $dist = -5; }
			elsif ($hd > 10) { $dist = 5; }
			elsif ($hd < -6) { $dist = -4; }
			elsif ($hd > 6) { $dist = 4; }
			elsif ($hd < -3) { $dist = -3; }
			elsif ($hd > 3) { $dist = 3; }
			elsif ($hd < -1) { $dist = -2; }
			elsif ($hd > 1) { $dist = 2; }
			elsif ($hd == -1) { $dist = -1; }
			elsif ($hd == 1) { $dist = 1; }
		    }
		    if ($ex =~ /^DIST/) {
			if ((defined $e_table{$px}{$ex}{$dist})&&($e_table{$px}{$ex}{$dist}>0.0001)&&($e_table{$px}{$ex}{$dist}<1)) {
		    	    $weights{$px}[$x]+=$e_table{$px}{$ex}{$dist}**$exponent*(10**$exponent); }
			else {
			    $weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); } }
		    elsif ($ex =~ /^aDIST/) { # průměr z +dist a -dist
			my $aex = $ex; $aex =~ s/aDIST/DIST/;
			my $w = 0;
			if ((defined $e_table{$px}{$aex}{$dist})&&($e_table{$px}{$aex}{$dist}>0)&&($e_table{$px}{$aex}{$dist}<1)) {
			    $w+=$e_table{$px}{$aex}{$dist}; }
			if ((defined $e_table{$px}{$aex}{-$dist})&&($e_table{$px}{$aex}{-$dist}>0)&&($e_table{$px}{$aex}{-$dist}<1)) {
			    $w+=$e_table{$px}{$aex}{-$dist}; }
			$w = $w/2;
			if ($w > 0.0001) { $weights{$px}[$x]+=$w**$exponent*(10**$exponent); }
			else { $weights{$px}[$x]+=0.0001**$exponent*(10**$exponent); }
		    }
		}
	    }
	}
    }
}

sub calculate_edges_weights {
    foreach my $x (1 .. $#forms) {
	foreach my $px (@parsers) {
	    last if ($px eq 'GOLD');
	    my $e1 = $heads{$px}[$x];
	    my $w1 = $weights{$px}[$x];
	    if (defined $weights_penalties{$px}[$x]) {
		$w1 -= $w1 * $weights_penalties{$px}[$x]; }
	    if (defined $edges_weights{$x}{$e1}) {
		$edges_weights{$x}{$e1}+=$w1;
#		$best_label{$x}{$e1}=choose_best_label($x,$e1);
	    }
	    else {
		$edges_weights{$x}{$e1}=$w1;
		$edges_origparser{$x}{$e1}=$px;
		if (defined $edges[$x]) { push (@{$edges[$x]},$e1); }
		else { $edges[$x][0]=$e1; }
	    }
	}
    }
}
sub choose_best_label {
    my ($x, $h) = @_;
    my %lab_weights;
    foreach my $px (@parsers) {
	next if ($px eq 'GOLD');
	next if ($heads{$px}[$x] != $h);
	my $af = $afuns{$px}[$x];
	if (not defined $lab_weights{$af}) { $lab_weights{$af}=0; }
#	-------------------------------------- Zde se opakuje celá procedura výpočtu vah, ovšem jen pro LABELED!
	foreach my $exx (@e_types) {
		my $ex = $exx;
		$ex =~ s/UAS/LAS/;
		if ($ex =~ /^((ALL)|(SENT))\:LAS$/) {
		    if ((defined $e_table{$px}{$ex}{'ALL'})&&($e_table{$px}{$ex}{'ALL'}>0.0001)&&($e_table{$px}{$ex}{'ALL'}<=1)) {
		        $lab_weights{$af}+=($e_table{$px}{$ex}{'ALL'}**$exponent)*(10**$exponent); 
		    }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POS\:LAS$/) {
		    my $pos = substr($tags{$px}[$x],0,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$pos})&&($e_table{$px}{$ex}{$pos}>0.0001)&&($e_table{$px}{$ex}{$pos}<=1)) {
		        $lab_weights{$af}+=($e_table{$px}{$ex}{$pos}**$exponent)*(10**$exponent); 
		    }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^2POS\:LAS$/) {
		    my $hd = $heads{$px}[$x];
		    my $pos2 = '-';
		    if ($hd > 0) { $pos2 = substr($tags{$px}[$hd],0,1); }
		    if ((defined $e_table{$px}{$ex}{$pos2})&&($e_table{$px}{$ex}{$pos2}>0.0001)&&($e_table{$px}{$ex}{$pos2}<1)) {
		        $lab_weights{$af}+=$e_table{$px}{$ex}{$pos2}**$exponent*(10**$exponent); }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POS2POS\:LAS$/) {
		    my $pos = substr($tags{$px}[$x],0,1);
		    my $hd = $heads{$px}[$x];
		    my $pos2 = '-';
		    if ($hd > 0) { $pos2 = substr($tags{$px}[$hd],0,1); }
		    my $pos2pos=$pos . $pos2;
		    if ((defined $e_table{$px}{$ex}{$pos2pos})&&($e_table{$px}{$ex}{$pos2pos}>0.0001)&&($e_table{$px}{$ex}{$pos2pos}<1)) {
		        $lab_weights{$af}+=$e_table{$px}{$ex}{$pos2pos}**$exponent*(10**$exponent); }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }
		elsif ($ex =~ /^POSSUBPOS\:LAS$/) {
		    my $possubpos = substr($tags{$px}[$x],0,2);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$possubpos})&&($e_table{$px}{$ex}{$possubpos}>0.0001)&&($e_table{$px}{$ex}{$possubpos}<=1)) {
		        $lab_weights{$af}+=($e_table{$px}{$ex}{$possubpos}**$exponent)*(10**$exponent); 
		    }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^POSCASE\:LAS$/) {
		    my $poscase = substr($tags{$px}[$x],0,1) . substr($tags{$px}[$x],4,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$poscase})&&($e_table{$px}{$ex}{$poscase}>0.0001)&&($e_table{$px}{$ex}{$poscase}<=1)) {
		        $lab_weights{$af}+=($e_table{$px}{$ex}{$poscase}**$exponent)*(10**$exponent); 
		    }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^POSSUBPOSCASE\:LAS$/) {
		    my $pscase = substr($tags{$px}[$x],0,1) . substr($tags{$px}[$x],4,1);
		    if (not defined $tags{$px}[$x]) {
			die "Undef tags $px $x, line $., file $OUTPUT_FILE\n"; }
		    if ((defined $e_table{$px}{$ex}{$pscase})&&($e_table{$px}{$ex}{$pscase}>0.0001)&&($e_table{$px}{$ex}{$pscase}<=1)) {
		        $lab_weights{$af}+=($e_table{$px}{$ex}{$pscase}**$exponent)*(10**$exponent); 
		    }
		    else {
			$lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }

		elsif ($ex =~ /^a?DIST\:LAS$/) {
		    my $dist = 0;
		    my $hd = $heads{$px}[$x];
		    if ($hd > 0) {
			my $d = $hd - $x;
			# 1(1), 2(2-3), 3(4-6), 4(7-10), 5(11+)
			if ($hd < -10) { $dist = -5; }
			elsif ($hd > 10) { $dist = 5; }
			elsif ($hd < -6) { $dist = -4; }
			elsif ($hd > 6) { $dist = 4; }
			elsif ($hd < -3) { $dist = -3; }
			elsif ($hd > 3) { $dist = 3; }
			elsif ($hd < -1) { $dist = -2; }
			elsif ($hd > 1) { $dist = 2; }
			elsif ($hd == -1) { $dist = -1; }
			elsif ($hd == 1) { $dist = 1; }
		    }
		    if ($ex =~ /^DIST/) {
			if ((defined $e_table{$px}{$ex}{$dist})&&($e_table{$px}{$ex}{$dist}>0.0001)&&($e_table{$px}{$ex}{$dist}<1)) {
		    	    $lab_weights{$af}+=$e_table{$px}{$ex}{$dist}**$exponent*(10**$exponent); }
			else {
			    $lab_weights{$af}+=0.0001**$exponent*(10**$exponent); } }
		    elsif ($ex =~ /^aDIST/) { # průměr z +dist a -dist
			my $aex = $ex; $aex =~ s/aDIST/DIST/;
			my $w = 0;
			if ((defined $e_table{$px}{$aex}{$dist})&&($e_table{$px}{$aex}{$dist}>0)&&($e_table{$px}{$aex}{$dist}<1)) {
			    $w+=$e_table{$px}{$aex}{$dist}; }
			if ((defined $e_table{$px}{$aex}{-$dist})&&($e_table{$px}{$aex}{-$dist}>0)&&($e_table{$px}{$aex}{-$dist}<1)) {
			    $w+=$e_table{$px}{$aex}{-$dist}; }
			$w = $w/2;
			if ($w > 0.0001) { $lab_weights{$af}+=$w**$exponent*(10**$exponent); }
			else { $lab_weights{$af}+=0.0001**$exponent*(10**$exponent); }
		    }
		}
	}
    }
    my ($best_label, @rubbish) = (sort {$lab_weights{$b} <=> $lab_weights{$a}} keys %lab_weights);
    return $best_label;
}

sub find_cycle {
    my @cycle;
    my %c_parents; # x & all parents & grparents of token x. If head[y] is already in, a cycle has been detected.
    # Cycle again to register all cycle members...
    foreach my $x (1 .. $#forms) {
#	print "Foreach $x find cycle\n";
	if (not defined $nheads[$x]) {
	    die "Interní chyba (2): nový řídící člen item $x ($forms[$x]) není definován! $. $OUTPUT_FILE\n"; }
	next if ($nheads[$x] == 0);
	$c_parents{$x}=1;
	my $y = $x;
	my $cycle_detected=-1;
	while (($y != 0) && (not defined $c_parents{$nheads[$y]})) {
#	    print "TESTING cycle x=$x - y=$y - nheads\[y\]=$nheads[$y]\n";
	    if (not defined $nheads[$y]) {
		die "Interní chyba (3): nový řídící člen item $y ($forms[$y]) není definován! $. $OUTPUT_FILE\n"; }
	    $c_parents{$y}=1;
	    $y=$nheads[$y];
	    if (defined $c_parents{$y}) {
		$cycle_detected = $y;
#		print "Cycle_detected: $y\n";
		last;
	    }
	}
	if ($cycle_detected>-1) {
	    $y = $cycle_detected; my $z = $nheads[$y];
	    push (@cycle, $y);
	    while (($y != $z)&&(defined $nheads[$z])&&($nheads[$z]>0)) { # Druhá a třetí podmínka je teoreticky zbytečná
		push (@cycle, $z);
		$z = $nheads[$z];
	    }
	}
    }
    return (@cycle);
}

sub fix_cycle {
    my @cycle = @_;
    my %ci_members;
    my $ci_weight=0;
    foreach my $c (@cycle) {
	$ci_weight+=$nweights[$c];
	$ci_members{$c}=1;
    }
    my $new_weight;
    my $new_edge;
    my $best_head;
    my $best_node;
    my $best_weight = -9.9;
    foreach my $c (@cycle) {
	my $bestW = -10; my $bestE;
	foreach my $e (@{$edges[$c]}) {
	    next if (($ci_members{$e}||0)==1);
	    if ($edges_weights{$c}{$e}>$bestW) {
		$bestW=$edges_weights{$c}{$e};
		$bestE=$e;
	    }
	}
	$new_weight = $ci_weight-$nweights[$c]+$bestW;
	$new_edge = $bestE;
	if ($new_weight>$best_weight) {
	    $best_weight=$new_weight;
	    $best_head=$new_edge;
	    $best_node=$c;
	}
    }
    if ((defined $best_head)&&($best_weight>-9.9)) {
	$nheads[$best_node]=$best_head;
	$nweights[$best_node]=$best_weight;
	my $px = $edges_origparser{$best_node}{$best_head};
	$nafuns[$best_node]=$afuns{$px}[$best_node];
	return 1;
    }
    else { return 0; }
}

sub test_input_parses {
    foreach my $p0 (@parsers) {
	next if ($p0 eq 'GOLD');
	foreach my $z (1 .. $#forms) {
	    my ($c,$zh)=(0,$z); my @cyc = ($z);
	    # DETECT CYCLE
	    while ((defined $heads{$p0}[$zh])&&($heads{$p0}[$zh]>0)&&($heads{$p0}[$zh]!=$z)) {
		$zh = $heads{$p0}[$zh];
		if (not defined $heads{$p0}[$zh]) {
		    die "HEAD of item $zh, parser $p0, in the sentence @forms, line $., file $INPUT_FILE is not defined!\n"; }
		if ($zh == $z) { $c = 1; }
		elsif ($zh != 0) { push (@cyc, $zh); }
		else { @cyc = (); }
	    }
	    if ($c == 1) {
		print "Cycle found in sentence $sentences, file $INPUT_FILE, line $. (approx.), parser $p0\n";
		# CYCLE FOUND, BREAK CYCLE
		my $c0 = $z;
		foreach my $cx (@cyc) {
		    $weights_penalties{$p0}[$cx]=0.5;
		    if ($cx < $c0) { $c0 = $cx; }
		}
		# Oprava cyklu: rodičem členu cyklu nejvíce vlevo se stává token o 1 doleva, případně root (0).
		$heads{$p0}[$c0]=$c0-1;
		$weights_penalties{$p0}[$c0]=0.9;
	    }
	}
    }
}

sub check_dependencies {
    foreach my $p0 (@parsers) {
	next if ($p0 eq 'GOLD');
	foreach my $x (1 .. $#forms) {
	    my $y = $heads{$p0}[$x];
	    my $tx = $tags{$p0}[$x]; my $lx = $lems{$p0}[$x];
	    my $ty = '-'; my $ly = '-';
	    if ($y > 0) { $ty = $tags{$p0}[$y]; $ly = $lems{$p0}[$y]; }
	    if (($tx =~ /^V[Bpic]/)&&($ty =~ /^R[RV]/)&&
	    (($ty !~ /^(od)|(do)$/)||($forms[$x] ne 'nevidím'))) {
		# Sloveso v určitém tvaru závislé na předložce (primární), vyloučeno "od nevidím do nevidím"
#		if ($heads{$p0}[$x] == $gheads[$x]) {
#		    print "RULE 1 CLASHES WITH GOLD!!!: line $. word $x\n";
#		    foreach my $j (1 .. $#forms) { print " $j $forms[$j]"; }
#		    print "\n";
#		}
		$weights_penalties{$p0}[$x] = 0.9; }
	    if (($tx =~ /^P7/)&&($ty =~ /^NN/)&&($ly !~ /[nt]í$/)) {
		# Reflexivum závislé na substantivu, které není deverbativní
#		if ($heads{$p0}[$x] == $gheads[$x]) {
#		    print "RULE 2 CLASHES WITH GOLD!!!: line $. word $x\n";
#		    foreach my $j (1 .. $#forms) { print " $j $forms[$j]"; }
#		    print "\n";
#		}
		$weights_penalties{$p0}[$x] = 0.9; }
	    if (($tx =~ /^[NPC][NPH67n]/)&&($ty =~ /^R[RV]/)&&($ly !~ /^(naproti)|(vstříc)$/)&&($x < $y)&&
		($lx !~ /^(souvislost)|(rozdíl)|(ohled)|(srovnání)|(soulad)|(spolupráce)|(směr)|(vztah)|(porovnání)|(rozpor)|(závislost)|(čelo)|(spojení)|(shoda)|(přihlédnutí)|(poměr)|(zřetel)|(souhlas)|(spojitost)|(protiklad)|(konfrontace)$/)) {
		# Substantivum nebo osobní/refl. zájmeno závislé doprava na předložce, která není vstříc / naproti apod.
#		if ($heads{$p0}[$x] == $gheads[$x]) {
#		    print "RULE 3 CLASHES WITH GOLD!!!: line $. word $x\n";
#		    foreach my $j (1 .. $#forms) { print " $j $forms[$j]"; }
#		    print "\n";
#		}
		$weights_penalties{$p0}[$x] = 0.7; }
	    if (($ly eq 'být')&&
		(((($ty =~ /^VB.....[12]/)||($ty =~ /^Vc/))&&($heads{$p0}[$y]>0)&&($tags{$p0}[$heads{$p0}[$y]] =~ /^Vp/))||
#		(($ty =~ /^VB/)&&($heads{$p0}[$y]>0)&&($tags{$p0}[$heads{$p0}[$y]] =~ /^Vs/))||
		(($ty =~ /^VB......F/)&&($heads{$p0}[$y]>0)&&($tags{$p0}[$heads{$p0}[$y]] =~ /^Vf/)))) {
		# Jakékoli slovo závislé na pomocném slovese
#		if ($heads{$p0}[$x] == $gheads[$x]) {
#		    print "RULE 4 CLASHES WITH GOLD!!!: line $. word $x\n";
#		    foreach my $j (1 .. $#forms) { print " $j $forms[$j]"; }
#		    print "\n";
#		}
		$weights_penalties{$p0}[$x] = 0.9; }
	}
    }
}

# ==============================================================================

sub empty_sentence_arrays {
    @forms=(); %lems=(); %tags=(); %afuns=(); %heads=(); $ord=0;
    %weights=();
    @glems=(); @gtags=(); @gafuns=(); @gheads=();
    @edges=(); $edges[0]=(); %edges_weights=(); %edges_origparser=();
    @nlems=(); @ntags=(); @nheads=(); @nafuns=(); @nweights=();
    %weights_penalties=();
}


