/* Viterbi
 * 
 * This program implements the Viterbi algorithm for finding the most likely
 * sequence of hidden states in the Hidden Markov Model (HMM). Its inputs are
 * the HMM (vitulaz.txt) and an amino acid sequence (niz.txt).
 *
 * Copyright (c) 2006 Filip Niksic
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 ***/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define maxn 1000

#define M 0
#define D 1
#define I 2

// Inicijalizacija matrice tipa mxn
double **dmatrica(int m,int n) {
    int i;
    double **A;

    A=(double **)malloc(m*sizeof(double *));
    A[0]=(double *)calloc(m*n,sizeof(double));
    for (i=1;i<m;i++) A[i]=A[i-1]+n;

    return A;
}

int **imatrica(int m,int n) {
    int i,**A;

    A=(int **)malloc(m*sizeof(int *));
    A[0]=(int *)calloc(m*n,sizeof(int));
    for (i=1;i<m;i++) A[i]=A[i-1]+n;

    return A;
}

// Oslobadjanje alocirane memorije
void brisi_matricu(void **A) {
    free(A[0]);
    free(A);
}

// Pridruzivanje aminokiselina |-> broj
int broj(char c){
    switch (c) {
	case 'A': return 0; break;
	case 'R': return 1; break;
	case 'N': return 2; break;
	case 'D': return 3; break;
	case 'B': return 3; break;
	case 'C': return 4; break;
	case 'Q': return 5; break;
	case 'E': return 6; break;
	case 'Z': return 6; break;
	case 'G': return 7; break;
	case 'H': return 8; break;
	case 'I': return 9; break;
	case 'L': return 10; break;
	case 'K': return 11; break;
	case 'M': return 12; break;
	case 'F': return 13; break;
	case 'P': return 14; break;
	case 'S': return 15; break;
	case 'T': return 16; break;
	case 'W': return 17; break;
	case 'Y': return 18; break;
	case 'V': return 19; break;
	default: return -1;
    }
}

int main(void) {
    FILE *fp;
    char *x,buffer[maxn]; // x je niz
    int n; // Duljina niza

    int d; // Duljina HMM modela
    double **T; // vjerojatnost tranzicije u modelu
    double **E; // vjerojatnost emisije

    double **V; // Tablica za Viterbija
    int **P; // Pointeri

    char *izlaz;

    // Ostale varijable
    int i,j,k,jj,kk;
    double f;

    /* Ucitavanje niza iz datoteke "niz.txt" */

    fp=fopen("niz.txt","r");

    fgets(buffer,maxn,fp);
    // Prvi znak u nizu koji ne predstavlja aminokiselinu
    // zamijenimo s \0.
    for (i=0;broj(buffer[i])!=-1;i++);
    buffer[i]='\0';

    n=strlen(buffer);
    x=(char *)malloc((n+2)*sizeof(char));
    strcpy(x+1,buffer);
    x[0]='-';

    fclose(fp);

    //printf("%s\n",x);

    /* Ucitavanje HMM modela iz datoteke "vitulaz.txt" */
    
    fp=fopen("vitulaz.txt","r");

    fscanf(fp,"%d",&d);

    /* T je ovakva matrica:
     *   T[3*korak+stanje1][stanje2]
     * E je ovakva matrica:
     *   E[2*korak+stanje][aminokiselina]
     **/
    T=dmatrica(3*(d+1),3);
    E=dmatrica(2*(d+1),20);

    for (i=0;i<=d;i++) {
	//procitamo tranzicije
	for (j=0;j<3;j++)
	    for (k=0;k<3;k++) {
		fscanf(fp,"%lf",&f);
		// Tu treba izvrsiti pretvorbu (bez privatizacije :)) jer su
		// tranzicije zadane u redosljedu M I D, a meni kasnije vise
		// odgovara M D I.
		jj=j;kk=k;
		if (j==1) jj=I;
		else if (j==2) jj=D;
		if (k==1) kk=I;
		else if (k==2) kk=D;
		T[3*i+jj][kk]=f;
	    }
	//procitamo emisije
	for (j=0;j<2;j++)
	    for (k=0;k<20;k++)
		fscanf(fp,"%lf",&E[2*i+j][k]);
    }

    fclose(fp);

    /* Ispis matrica */

    /*for (i=0;i<=d;i++) {
	for (j=0;j<3;j++)
	    for (k=0;k<3;k++)
		printf("%.2lf%c",T[3*i+j][k],(k==2?'\n':' '));
	for (j=0;j<2;j++)
	    for (k=0;k<20;k++)
		printf("%.2lf%c",E[2*i+j][k],(k==19?'\n':' '));
    }*/
	
    /* Implementacija Viterbijevog algoritma */

    V=dmatrica(3*(d+1)+1,n+1);
    P=imatrica(3*(d+1)+1,n+1);

    // Inicijalizacija
    V[0][0]=1;

    /* U sljedecim petljama je:
     *   i je korak
     *   j je stanje [M,D,I]
     **/
    for (i=0;i<=d;i++)
	for (j=0;j<3;j++)
	    for (k=0;k<=n;k++)
		if (V[3*i+j][k]>0) {
		    if (i==d) {
			/* Nalazimo se u zadnjem koraku. Ovo iziskuje
			* specijalni tretman.
			* Ne mozemo iz M u E ako su ostala slova koja
			* nisu emitirana. Moramo ih emitirati u I.
			* Ne mozemo iz D u E ako su ostala slova koja
			* nisu emitirana. Moramo ih emitirati u I.
			**/
			if (k<n) {
			    f=V[3*i+j][k]*T[3*i+j][I]*E[2*i+I/2][broj(x[k+1])];
			    if (f>V[3*i+I][k+1]) {
				V[3*i+I][k+1]=f;
				P[3*i+I][k+1]=j;
			    }
			}
			else {
			    f=V[3*i+j][k]*T[3*i+j][M];
			    if (f>V[3*(i+1)][k]) {
				V[3*(i+1)][k]=f;
				P[3*(i+1)][k]=j;
			    }
			}
		    }
		    else {
			if (k==n) {
			    // Tranzicija moguca jedino u sljedece
			    // D stanje.
			    f=V[3*i+j][k]*T[3*i+j][D];
			    if (f>V[3*(i+1)+D][k]) {
				V[3*(i+1)+D][k]=f;
				P[3*(i+1)+D][k]=j;
			    }
			}
			else {
			    // Tranzicija u I
			    f=V[3*i+j][k]*T[3*i+j][I]*E[2*i+I/2][broj(x[k+1])];
			    if (f>V[3*i+I][k+1]) {
				V[3*i+I][k+1]=f;
				P[3*i+I][k+1]=j;
			    }
			    // Tranzicija u M
			    f=V[3*i+j][k]*T[3*i+j][M]*E[2*(i+1)+M][broj(x[k+1])];
			    if (f>V[3*(i+1)+M][k+1]) {
				V[3*(i+1)+M][k+1]=f;
				P[3*(i+1)+M][k+1]=j;
			    }
			    // Tranzicija u D
			    f=V[3*i+j][k]*T[3*i+j][D];
			    if (f>V[3*(i+1)+D][k]) {
				V[3*(i+1)+D][k]=f;
				P[3*(i+1)+D][k]=j;
			    }
			}
		    }
		}

    /*for (i=0;i<=3*(d+1);i++)
	for (j=0;j<=n;j++)
	    printf("%.9lf%c",V[i][j],(j==n?'\n':' '));
    for (i=0;i<=3*(d+1);i++)
	for (j=0;j<=n;j++)
	    printf("%d%c",P[i][j],(j==n?'\n':' '));*/

    /* Valja napraviti traceback */
    izlaz=(char *)malloc((n+d+10)*sizeof(char));
    i=3*(d+1);j=n;k=0;
    i-=3-P[i][j];
    while (i>0 || j>0) {
	if (i%3==M) {
	    izlaz[k]='M';
	    i-=3-P[i][j];
	    j--;
	}
	else if (i%3==I) {
	    izlaz[k]='I';
	    i-=2-P[i][j];
	    j--;
	}
	else {
	    izlaz[k]='D';
	    i-=4-P[i][j];
	}
	k++;
    }
    
    printf("Prolaz: B->");
    for (i=k-1,j=0;i>=0;i--)
	printf("%c%d->",izlaz[i],(izlaz[i]=='I'?j:++j));
    printf("E\n");
	

    /* Oslobadjanje memorije, kraj */

    brisi_matricu((void *)T);
    brisi_matricu((void *)E);
    brisi_matricu((void *)V);
    brisi_matricu((void *)P);
    free(x);
    free(izlaz);

    return 0;
}
