/***************************************************************************
 *   Copyright (C) 2007 by Faubet Pierre   *
 *   pierre.faubet@e.ujf-grenoble.fr   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/

#include <iostream>
#include <fstream>
#include <sstream>

#include "multilocus_genotypes.h"
#include "BIMrErrors.h"

multilocus_genotypes::multilocus_genotypes()
{
  EXCEPTION_INFOS();
  throw BIMrError();
}

multilocus_genotypes::multilocus_genotypes(const string& infilename)
{
  int info=0;
  bool readgeno=false;

  inputfilename = infilename.substr(0,infilename.length()-4);
  string filename = inputfilename + ".gen";

  clog << "Read multilocus genotype data from file " << filename << endl;

// Open input file
  ifstream infile;
  infile.open(filename.data());
  if (infile.fail())
    {
      EXCEPTION_INFOS();
      throw OpenFileError();
    }

  string line;
  int allele;

  while (!infile.eof())
// 		    while (getline(infile,line,'='))
    {
      getline(infile,line,'=');

      /* Read number of individuals */
      if ((line.length() >= 13) && (line.substr(line.length()-13,line.length())=="[individuals]"))
        {
          getline(infile,line);
          istringstream size(line);
          if (size >> nbindiv)
            info++;
        }

      /* Read number of populations */
      else if ((line.length() >= 13) && (line.substr(line.length()-13,line.length())=="[populations]"))
        {

          getline(infile,line);
          istringstream size(line);
          if (size >> nbpop)
            info++;
        }

      /* Read number of loci */
      else if ((line.length() >= 6) && (line.substr(line.length()-6,line.length())=="[loci]"))
        {
          getline(infile,line);
          istringstream size(line);
          if (size >> nbloci)
            info++;
        }

      /* Read number of alleles */
      else if ((line.length() >= 9) && (line.substr(line.length()-9,line.length())=="[alleles]"))
        {
          getline(infile,line);
          istringstream size(line);
          if (size >> nballel)
            info++;

          /* Add null allele */
          nballel++;
        }

      /* Read multilocus genotypes */
      else if ((line.length() >= 11) && (line.substr(line.length()-11,line.length())=="[genotypes]"))
        {
          if (info!= 4)
            {
              cerr << "Sample sizes" << endl;
              EXCEPTION_INFOS();
              throw FileFormatError();
            }

          createarrays();
          readgeno=true;

          getline(infile,line);

          int h=0;
          while (!infile.eof() && (h < nbindiv))
            {
              getline(infile,line);
              if (line.length())
                {
                  /* Read individual's id */
                  string::size_type pos = line.find(',');
                  if (pos != string::npos)
                    {
                      id[h] = line.substr(0,pos);
                      line.erase(0,pos+1);
                    }
                  else
                    {
                      stringstream strstr;
                      strstr << h+1;
                      string num;
                      strstr >> num;
                      id[h] = "Ind#" + num;
                    }

                  if (line.length()==0)
                    {
                      EXCEPTION_INFOS();
                      throw FileFormatError();
                    }

                  /* Read source population */
                  istringstream geno(line);
                  string popidf;
                  geno >> popidf;
                  if (!popnames.count(popidf))
                    {
                      int currentsize = (int) popnames.size();
                      popnames[popidf] = currentsize;
                    }
                  S[h] = popnames[popidf];

                  /* Increment individual count in sampled population */
                  N[S[h]]++;

                  /* Multilocus genotype */
                  int j=0;
                  while ((geno >> allele) && (j < nbloci))
                    /*                      for (int j=0;j<nbloci;j++)*/
                    {
                      X[h][j][0] = allele / 100;
                      X[h][j][1] = allele % 100;

                      if (!((X[h][j][0] < nballel)&&(X[h][j][1] < nballel)))
                        {
                          cerr << "Allele coding" << endl;
                          cerr << "Individual " << h+1 << endl;
                          cerr << "Locus " << j+1 << endl;
                          EXCEPTION_INFOS();
                          throw FileFormatError();
                        }

                      /* Allele count and presence */
                      if ((X[h][j][0] == X[h][j][1]) && (allelecount[S[h]][j][X[h][j][0]] == 0))
                        {

                          allelecount[S[h]][j][X[h][j][0]] += 2;
                          is_at_locus[j][X[h][j][0]] = true;
                        }

                      else if ((X[h][j][0] == X[h][j][1]) && (allelecount[S[h]][j][X[h][j][0]] != 0))
                        allelecount[S[h]][j][X[h][j][0]] += 2;

                      else if (X[h][j][0] != X[h][j][1])
                        {
                          if (allelecount[S[h]][j][X[h][j][0]] == 0)
                            is_at_locus[j][X[h][j][0]] = true;

                          if (allelecount[S[h]][j][X[h][j][1]] == 0)
                            is_at_locus[j][X[h][j][1]] = true;

                          allelecount[S[h]][j][X[h][j][0]] += 1;
                          allelecount[S[h]][j][X[h][j][1]] += 1;
                        }
                      j++;
                    }
                  if (j!=nbloci)
                    {
                      cerr << "Individual " << h+1 << endl;
                      cerr << "Only " << j+1 << "loci scored" << endl;
                      EXCEPTION_INFOS();
                      throw FileFormatError();
                    }
                  h++;
                }
            }
          if (h!=nbindiv)
            {
              cerr << "Only " << h << " individuals observed, " << nbindiv << " expected" << endl;
              EXCEPTION_INFOS();
              throw FileFormatError();
            }
        }
    }
  if (!readgeno)
    {
      cerr << "Unable to find multilocus genotypes section." << endl;
      EXCEPTION_INFOS();
      throw FileFormatError();
    }


  infile.close();
  checkdata();

  string popfilename = inputfilename + ".names";
  ofstream popfile;
  popfile.open(popfilename.data());
  if (popfile.fail())
    {
      EXCEPTION_INFOS();
      throw OpenFileError();
    }

  map<string,int>::iterator popid;
  for (popid=popnames.begin();popid!=popnames.end();popid++)
    popfile << popid->second+1 << " " << popid->first << endl;

  clog << "Read multilocus genotype data ... (done)" << endl;
}

multilocus_genotypes::~multilocus_genotypes()
{
  clog << "Drop genetic data" << endl;

  delete [] S;
  delete [] id;

  for (int q=0;q<nbpop;q++)
    {
      for (int j=0;j<nbloci;j++)
        delete [] allelecount[q][j];
      delete [] allelecount[q];
    }
  delete [] N;
  delete [] allelecount;

  for (int j=0;j<nbloci;j++)
    delete [] is_at_locus[j];
  delete [] is_at_locus;

  for (int h=0;h<nbindiv;h++)
    {
      for (int j=0;j<nbloci;j++)
        delete [] X[h][j];

      delete [] X[h];
    }
  delete [] X;

  clog << "Drop genetic data ... (done)" << endl;
}

void multilocus_genotypes::createarrays()
{
  clog << "Allocate memory for multilocus genotype data" << endl;
//   clog << "Number of individuals: " << nbindiv << endl;
//   clog << "Number of populations: " << nbpop << endl;
//   clog << "Number of loci: " << nbloci << endl;
//   clog << "Number of alleles: " << nballel-1 << endl;

  if (nbindiv != 0)
    {
      S = new int [nbindiv];
      if (!S)
        {
          cerr << "Source population" << endl;
          EXCEPTION_INFOS();
          throw MemError();
        }

      X = new int **[nbindiv];
      if (X)
        for (int h=0;h<nbindiv;h++)
          {
            X[h] = new int *[nbloci];
            if (X[h])
              {
                for (int j=0;j<nbloci;j++)
                  {
                    X[h][j] = new int [2];
                    if (!X[h][j])
                      {
                        cerr << "Genotype of individual " << h+1 << " at locus " << j+1 << endl;
                        EXCEPTION_INFOS();
                        throw MemError();
                      }
                  }
              }
            else
              {
                cerr << "Multilocus genotype of individual " << h+1 << endl;
                EXCEPTION_INFOS();
                throw MemError();
              }
          }
      else
        {
          cerr << "Multilocus genotypes data" << endl;
          EXCEPTION_INFOS();
          throw MemError();
        }
    }

  allelecount = new int **[nbpop];
  if (allelecount)
    {
      for (int q=0;q<nbpop;q++)
        {
          allelecount[q] = new int *[nbloci];
          if (allelecount[q])
            {
              for (int j=0;j<nbloci;j++)
                {
                  allelecount[q][j] = new int [nballel];
                  if (allelecount[q][j])
                    for (int k=0;k<nballel;k++)
                      allelecount[q][j][k] = 0;
                  else
                    {
                      cerr << "Allelele count in population " << q+1 << " at locus " << j+1 << endl;
                      EXCEPTION_INFOS();
                      throw MemError();
                    }
                }
            }
          else
            {
              cerr << "Allelele count in population " << q+1 << endl;
              EXCEPTION_INFOS();
              throw MemError();
            }
        }
    }
  else
    {
      cerr << "Allele count" << endl;
      EXCEPTION_INFOS();
      throw MemError();
    }

  is_at_locus = new bool *[nbloci];
  if (is_at_locus)
    {
      for (int j=0;j<nbloci;j++)
        {
          is_at_locus[j] = new bool [nballel];
          if (is_at_locus[j])
            {
              for (int k=0;k<nballel;k++)
                is_at_locus[j][k] = false;
            }
          else
            {
              EXCEPTION_INFOS();
              throw MemError();
            }
        }
    }
  else
    exit(0);

  N = new int [nbpop];
  if (N)
    for (int q=0;q<nbpop;q++)
      N[q] = 0;
  else
    {
      EXCEPTION_INFOS();
      throw MemError();
    }

  id = new string [nbindiv];
  if (!id)
    {
      EXCEPTION_INFOS();
      throw MemError();
    }
}


void multilocus_genotypes::checkdata()
{
  nodata = (nbindiv == 0);

  missingdata = true;
  if (!nodata)
    for (int q=0;q<nbpop;q++)
      for (int j=0;j<nbloci;j++)
        missingdata = (allelecount[q][j][0] == 2*N[q]) && missingdata;

  if (nodata || missingdata)
    for (int j=0;j<nbloci;j++)
      for (int k=0;k<nballel;k++)
        is_at_locus[j][k] = (k != 0);
}


void multilocus_genotypes::write(const string& outfil)
{
  /* Output file */
  ofstream out;
  out.open(outfil.data());
  if (out.fail())
    {
      EXCEPTION_INFOS();
      throw OpenFileError();
    }

  out << "Genetic data" << endl;
  out << "Number of populations: " << nbpop << endl;
  out << "Number of loci: " << nbloci << endl;
  out << "Number of alleles: " << nballel-1 << endl;
  out << "Number of individuals: " << nbindiv << endl;

  out << endl;
  out << "All/Loc";
  for (int j=0;j<nbloci;j++)
    out << '\t' << "loc-" << j+1;
  out << endl;
  for (int k=0;k<nballel;k++)
    {
      out << k;
      for (int j=0;j<nbloci;j++)
        out << '\t' << is_at_locus[j][k];
      out << endl;
    }

  out << endl;
  for (int l=0;l<nbpop;l++)
    out << "# of individuals in population " << l+1 << " is " << N[l] << endl;

  out << endl;
  out << "Data: " << (missingdata ? "no" : "yes" ) << endl;

  out << endl;
  out << "Ind" << '\t' << "S";
  for (int j=0;j<nbloci;j++)
    out << '\t' << "loc" << j+1;
  out << endl;
  for (int h=0;h<nbindiv;h++)
    {
      out << id[h] << '\t' << S[h]+1;
      for (int j=0;j<nbloci;j++)
        out << '\t' << X[h][j][0] << ',' << X[h][j][1];
      out << endl;
    }
  out.close();
}
