=begin
------------------------------------------------------------------
  ref.rb
------------------------------------------------------------------
  started on 07MAR2020
  modified on 09MAR2020 for qscores
  last modified on 22MAR2020 for fastq header for mira
------------------------------------------------------------------
  all rights reserved to TOQUE
------------------------------------------------------------------
  Ref class handles a character array of a reference genome for
  simulating Illumina reads.
------------------------------------------------------------------
  This Ruby script depends on "bio" gem packages.
  gem install bio
------------------------------------------------------------------
=end

require 'bio'

class Array
  def mean
    self.inject(:+) /self.length
  end
end

def intVectorSum v1, v2
  v2.each_with_object(v1).with_index(0){|(elm, rslt),idx|
    rslt[idx] = rslt[idx].to_i + elm
  }
end

class Ref
  def initialize refname, rs, rlen, ilen, cov, imr=0, dmr=0
    @rs = rs
    srand(@rs)
    @rlen = rlen
    @ilen = ilen
    @cov = cov
    @imr = imr
    @dmr = dmr
    @ref = Array.new
    File.open(refname){|fh|
      fh.each{|line|
        line.chomp.split("").each{|bs| @ref << bs} if !line.include?('>')
      }
    }
    @covers = Array.new(@ref.size,0)
    @reads1 = Array.new
    @reads2 = Array.new    
    @head = 0
  end

  def checkCoverage cov
    if @covers.min >= cov then
      true
    else
      false
    end
  end

  def makeReads alpha=0.5
    continue = true
    while(continue) do
      @head += (@rlen*(1 + alpha - 2*alpha*rand)).round
      @head = @head - @ref.size if @head > (@ref.size - 1)
      tail = @head + @rlen - 1
      if tail > (@ref.size - 1) then
        @reads1 << (@ref[@head..(@ref.size - 1)] + @ref[0..(tail-@ref.size)]).join
        @covers[@head..(@ref.size - 1)] = intVectorSum(\
          @covers[@head..(@ref.size - 1)], Array.new(@ref.size - @head,1))
        @covers[0..(tail-@ref.size)] = intVectorSum(\
          @covers[0..(tail-@ref.size)], Array.new(tail - @ref.size + 1,1))
      else
        @reads1 << @ref[@head..tail].join
        @covers[@head..tail] = intVectorSum(\
          @covers[@head..tail],Array.new(tail - @head + 1, 1))
      end
      head = @head+@rlen+@ilen-1
      head = head - @ref.size if head > (@ref.size - 1)
      tail = head + @rlen - 1
      if tail > (@ref.size - 1) then
        dna = (@ref[head..(@ref.size - 1)] + @ref[0..(tail-@ref.size)]).join
        dna = Bio::Sequence.auto(dna).reverse_complement
        @reads2 << dna.seq.upcase
        @covers[head..(@ref.size - 1)] = intVectorSum(
          @covers[head..(@ref.size - 1)], Array.new(@ref.size - head,1))
        @covers[0..(tail-@ref.size)] = intVectorSum(
          @covers[0..(tail-@ref.size)], Array.new(tail - @ref.size + 1,1))
      else
        dna = @ref[head..tail].join
        dna = Bio::Sequence.auto(dna).reverse_complement
        @reads2 << dna.seq.upcase
        @covers[head..tail] = intVectorSum(
          @covers[head..tail],Array.new(tail - head + 1,1))
      end
      continue = false if self.checkCoverage @cov
    end
  end

  def writeReads
    qscores = ["i"]
    dirName = "r"+@rs.to_s
    Dir.mkdir(dirName) if !Dir.exists?(dirName)
    reads = [@reads1, @reads2]
    2.times{|i|
      fname = "sim"+@rlen.to_s+"i"+@ilen.to_s+"c"+@cov.to_s+"_"+(i+1).to_s+".fq"
      of = File.open(dirName + "/" + fname, "w")
      count = 1
      reads[i].each{|r|
##        of << "@r#{count}_NC_001422.1 phi-X174, complete genome_ln380_#0/#{i+1}\n" \
##        num1 = 2200 - 2*rand(200)
##        num2 = @ilen + 5 - 2*rand(5)
        of << "@read_#{@ilen}_#{count}/#{i+1} gi|96263272|ref|NC_001422.1| 1 2000 90 #{@ilen} \n" \
           << r << "\n+\n"
        @rlen.times{
          of << qscores[rand(qscores.size)]
        }
        of << "\n"
        count += 1
      }
    }
  end
end
