
=head1 LICENSE

  Copyright (c) 1999-2011 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    http://www.ensembl.org/info/about/code_licence.html

=head1 CONTACT

  Please email comments or questions to the public Ensembl
  developers list at <ensembl-dev@ebi.ac.uk>.

  Questions may also be sent to the Ensembl help desk at
  <helpdesk@ensembl.org>.

=head1 NAME

Bio::EnsEMBL::Funcgen::RegulatoryFeature - A module to represent a regulatory feature 
mapping as generated by the eFG regulatory build pipeline.

=head1 SYNOPSIS

 use Bio::EnsEMBL::Registry;
 use Bio::EnsEMBL::Funcgen::RegulatoryFeature;

 my $reg = Bio::EnsEMBL::Registry->load_adaptors_from_db(-host    => 'ensembldb.ensembl.org',
                                                         -user    => 'anonymous');

 my $pset_adaptor = $reg->get_adaptor($species, 'funcgen', 'RegualtoryFeature');


 ### Creating/storing a RegulatoryFeature Set ###

 my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new(
    -SLICE         => $chr_1_slice,
    -START         => 1_000_000,
	-END           => 1_000_024,
    -STRAND        => 0,
    -DISPLAY_LABEL => $text,
    -FEATURE_SET   => $fset,
    -FEATURE_TYPE  => $reg_ftype,
    -_ATTRIBUTE_CACHE => \%attr_cache,
 ); 


=head1 DESCRIPTION

A RegulatoryFeature object represents the genomic placement of a combined regulatory
feature generated by the eFG analysis pipeline, which may have originated from one or 
many separate annotated or supporting features.

=cut


package Bio::EnsEMBL::Funcgen::RegulatoryFeature;

use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( throw );
use strict;
use warnings;

use base qw(Bio::EnsEMBL::Funcgen::SetFeature); #@ISA


=head2 new

  Arg [-SLICE]             : Bio::EnsEMBL::Slice - The slice on which this feature is.
  Arg [-START]             : int - The start coordinate of this feature relative to the start of the slice
                             it is sitting on. Coordinates start at 1 and are inclusive.
  Arg [-END]               : int -The end coordinate of this feature relative to the start of the slice
                    	     it is sitting on. Coordinates start at 1 and are inclusive.
  Arg [-DISPLAY_LABEL]     : string - Display label for this feature
  Arg [-BINARY_STRING]     : string - Regulatory Build binary string
  Arg [-PROJECTED]         : boolean - Flag to specify whether this feature has been projected or not
  Arg [-FEATURE_SET]       : Bio::EnsEMBL::Funcgen::FeatureSet - Regulatory Feature set
  Arg [-FEATURE_TYPE]      : Bio::EnsEMBL::Funcgen::FeatureType - Regulatory Feature sub type
  Arg [-ATTRIBUTE_CACHE]   : HASHREF of feature class dbID|Object lists
  Arg [-dbID]              : (optional) int - Internal database ID.
  Arg [-ADAPTOR]           : (optional) Bio::EnsEMBL::DBSQL::BaseAdaptor - Database adaptor.

  Example    : my $feature = Bio::EnsEMBL::Funcgen::RegulatoryFeature->new(
										                                  -SLICE         => $chr_1_slice,
									                                      -START         => 1_000_000,
									                                      -END           => 1_000_024,
									                                      -DISPLAY_LABEL => $text,
									                                      -FEATURE_SET   => $fset,
                                                                          -FEATURE_TYPE  => $reg_ftype,
                                                                          -ATTRIBUTE_CACHE => \%attr_cache,
                                                                         );


  Description: Constructor for RegulatoryFeature objects.
  Returntype : Bio::EnsEMBL::Funcgen::RegulatoryFeature
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;
  #hardcode strand as always 0
  my $self = $class->SUPER::new(@_, -strand => 0);

  my ($stable_id, $attr_cache, $bin_string, $projected)
    = rearrange(['STABLE_ID', 'ATTRIBUTE_CACHE', 'BINARY_STRING', 'PROJECTED'], @_);

  $self->is_projected($projected)      if defined $projected;
  $self->binary_string($bin_string)    if defined $bin_string;
  $self->stable_id($stable_id)         if $stable_id;
  $self->attribute_cache($attr_cache) if $attr_cache;

  return $self;
}


=head2 display_label

  Arg [1]    : string - display label
  Example    : my $label = $feature->display_label();
  Description: Getter and setter for the display label of this feature.
  Returntype : String
  Exceptions : None
  Caller     : General
  Status     : Medium Risk

=cut

sub display_label {
  my $self = shift;
  $self->{'display_label'} = shift if @_;


  if(! defined $self->{'display_label'}){
	$self->{'display_label'}  = $self->feature_type->name.' Regulatory Feature';

	if( defined $self->cell_type ){
	  $self->{'display_label'} .= ' - '.$self->cell_type->name;
	}
  }

  return  $self->{'display_label'};
}


=head2 binary_string

  Arg [1]    : optional string - binary string from regualtory build
  Example    : my $bin_string = $feature->binary_string();
  Description: Getter and setter for the binary_string for this feature.
  Returntype : String
  Exceptions : None
  Caller     : General
  Status     : At Risk - May change to BLOB

=cut

sub binary_string{
  my ($self, $bin_string)  = @_;
  $self->{'binary_string'} = $bin_string if defined $bin_string;

  return $self->{'binary_string'};
}


=head2 stable_id

  Arg [1]    : (optional) string - stable_id e.g ENSR00000000001
  Example    : my $stable_id = $feature->stable_id();
  Description: Getter and setter for the stable_id attribute for this feature. 
  Returntype : string
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub stable_id {
  my $self             = shift;
  $self->{'stable_id'} = shift if @_;

  return $self->{'stable_id'};
}


=head2 regulatory_attributes

  Arg [1]    : (optional) list of constituent features
  Example    : print "Regulatory Attributes:\n\t".join("\n\t", (map $_->feature_type->name, @{$feature->regulatory_attributes()}))."\n";
  Description: Getter and setter for the regulatory_attributes for this feature. 
  Returntype : ARRAYREF
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut


sub regulatory_attributes{
  my ($self, $feature_class) = @_;

  #Incorporating the MFs like this does cause some redundancy in the DB
  #But will speed up display of the RegFeat image including the MFs
  #Redefine the cache to have class keys e.g. TFBS, OpenChromatin, Histone Mods
  #Can't do this as we need the table key to be able to fetch the features
  #Really need something to be able to draw the image first, then create the zmenu details later.

  my %adaptors = (
				  'annotated' => $self->adaptor->db->get_AnnotatedFeatureAdaptor,
				  'motif'     => $self->adaptor->db->get_MotifFeatureAdaptor,
				  #external
				 );

  my @fclasses;

  if(defined $feature_class){

	if(exists $adaptors{lc($feature_class)}){
	  @fclasses = (lc($feature_class));
	}
	else{
	  throw("The feature class you specified is not valid:\t$feature_class\n".
			"Please use one of:\t".join(', ', keys %adaptors));
	}
  }
  else{
	@fclasses = keys %adaptors;
  }

  foreach my $fclass(@fclasses){
	#Now structured as hash to facilitate faster has_attribute method
	#Very little difference to array based cache

	my @attr_dbIDs = keys %{$self->{'attribute_cache'}{$fclass}};

	
	if(scalar(@attr_dbIDs) > 0){
	  
	  if( ! ( ref($self->{'regulatory_attributes'}{$fclass}->[0])  &&
			  ref($self->{'regulatory_attributes'}{$fclass}->[0])->isa('Bio::EnsEMBL::Feature') )){

		$adaptors{$fclass}->force_reslice(1);	#So we don't lose attrs which aren't on the slice
		$self->{'regulatory_attributes'}{$fclass} = $adaptors{$fclass}->fetch_all_by_dbID_list(\@attr_dbIDs, $self->slice);
		$adaptors{$fclass}->force_reslice(0);

		#Problems here with attrs not being returning when they do not lie on dest slice
		#i.e. core projected to cell line, but dest slice only over laps a region of the core which
		#actually has no attrs.
		#either use the feature_Slice and reslice everthing to the dest slice
		#or skip test in attr obj_frm_sth?
		#

		#This method transfers to the query slice, do not use fetch_by_dbID
		#It also should use _final_clause
		#This is currently only specified in the MotifFeatureAdaptor
		#as these are required to be sorted to relate to the structure string

		#but we are stll storing in has where order is not preserved!!
		#so this will not match order of underlying strcture!

		#separate so we can have ordered array returned
		#do we need redundant caches?
		#defo need db id cache for 'has' methods
		
		#foreach my $attr(@{$fclass_attrs}){
		#  $self->{'regulatory_attributes'}{$fclass}{$attr->dbID} = $attr;
		#}
	  }
	}
	else{
	  $self->{'regulatory_attributes'}{$fclass} = [];
	}
  }

  return [ map { @{$self->{'regulatory_attributes'}{$_}} } @fclasses ];
}

=head2 has_attribute

  Arg [1]    : Attribute Feature dbID
  Arg [2]    : Attribute Feature class e.g. motif or annotated
  Example    : if($regf->has_attribute($af->dbID, 'annotated'){ #do something here }
  Description: Identifies whether this RegualtoryFeature has a given attribute
  Returntype : Boolean
  Exceptions : Throws if args are not defined
  Caller     : General
  Status     : At Risk

=cut


sub has_attribute{
  my ($self, $dbID, $fclass) = @_;

  throw('Must provide a dbID and a Feature class argument') if ! $dbID && $fclass;

  return exists ${$self->attribute_cache}{$fclass}{$dbID};

}

=head2 get_focus_attributes

  Arg [1]    : None
  Example    : my @focus_attrs = @{$regf->get_focus_attributes};
  Description: Getter for the focus features of this RegualtoryFeature, used to defined the core region
  Returntype : ARRAYREF
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub get_focus_attributes{
  my $self = shift;

  if(! exists $self->{'focus_attributes'} ||
	 ! @{$self->{'focus_attributes'}}){
	$self->_sort_attributes;
  }


  return $self->{'focus_attributes'};
}


=head2 get_nonfocus_attributes

  Arg [1]    : None
  Example    : my @non_focus_attrs = @{$regf->get_nonfocus_attributes};
  Description: Getter for the non-focus features of this RegulatoryFeature, used to defined 
               the non core region i.e. the whiskers.
  Returntype : ARRAYREF
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub get_nonfocus_attributes{
  my $self = shift;

  #Test focus here as we may not have any nonfocus
  #But focus will show that we have sorted already
  if(! exists $self->{'focus_attributes'} ||
	 ! @{$self->{'focus_attributes'}}){
	$self->_sort_attributes;
  }

  return $self->{'nonfocus_attributes'};
}


sub _sort_attributes{
  my $self = shift;

  $self->{'focus_attributes'} = [];
  $self->{'nonfocus_attributes'} = [];

  foreach my $attrf(@{$self->regulatory_attributes}){

	if($attrf->isa('Bio::EnsEMBL::Funcgen::MotifFeature') ||
	   $attrf->feature_set->is_focus_set){
	  push @{$self->{'focus_attributes'}}, $attrf;
	}
	else{
	  push @{$self->{'nonfocus_attributes'}}, $attrf;
	}
  }

  return;
}


=head2 attribute_cache

  Arg [1]    : optional - HASHREF of attribute table keys with values as either a list of attribute 
               feature dbIDs or objects. If passing object, any MotifFeature objects should be in position
               order with respect to the slice.
  Example    : $feature->attribute_cache(\%attribute_feature_info);
  Description: Setter for the regulatory_attribute cache for this feature. This is a short cut method used by the 
               regulatory build and the webcode to avoid unnecessary fetching and enable enable lazy loading 
  Returntype : HASHREF
  Exceptions : Throws if trying to overwrite existing cache
  Caller     : RegulatoryFeatureAdaptor.pm and build_regulatory_features.pl
  Status     : At Risk

=cut


sub attribute_cache{
  my ($self, $attr_hash) = @_;

#  if(! defined $attr_hash){
#	$self->regulatory_attributes; #Fetch the attrs?
#
#
#	#Do we need to do this now we have separated the caches?
#
#  }
  if(defined $attr_hash){

	foreach my $fclass(keys %{$attr_hash}){

	  if(exists $self->{'attribute_cache'}{$fclass}){
		throw("You are trying to overwrite a pre-existing regulatory attribute cache entry for feature class:\t$fclass");
	  }
	  else{
		$self->{'attribute_cache'}{$fclass} = $attr_hash->{$fclass};
	  }
	}
  }

  return $self->{'attribute_cache'} || {};
}


=head2 bound_start

  Example    : my $bound_start = $feature->bound_start();
  Description: Getter for the bound_start attribute for this feature.
               Gives the 5' most start value of the underlying attribute
               features.
  Returntype : string
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub bound_start {
  my $self = shift;
  $self->get_underlying_structure if ! defined $self->{'bound_start'};

  return $self->{'bound_start'};
}


=head2 bound_end

  Example    : my $bound_end = $feature->bound_start();
  Description: Getter for the bound_end attribute for this feature.
               Gives the 3' most end value of the underlying attribute
               features.
  Returntype : string
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub bound_end {
  my $self = shift;
  $self->get_underlying_structure if ! defined $self->{'bound_end'};

  return $self->{'bound_end'};
}


=head2 is_projected

  Arg [1]    : optional - boolean
  Example    : if($regf->is_projected){ #do something different here }
  Description: Getter/Setter for the projected attribute.
  Returntype : boolean
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

sub is_projected {
  my $self           = shift;
  $self->{projected} = shift if @_;

  return $self->{'projected'};
}


=head2 get_underlying_structure

  Example    :  $self->get_underlying_structure() if(! exists $self->{'bound_end'});
  Description: Getter for the bound_end attribute for this feature.
               Gives the 3' most end value of the underlying attribute
               features.
  Returntype : string
  Exceptions : None
  Caller     : General
  Status     : At Risk

=cut

#This should really be precomputed and stored in the DB to avoid the MF attr fetch
#Need to be aware of projecting here, as these will expire if we project after this method is called

sub get_underlying_structure{
  my $self = shift;

  if(! defined $self->{underlying_structure}){

	my @attrs = @{$self->regulatory_attributes()};

	if(! @attrs){
	  throw('No underlying regulatory_attribute features to get_underlying_structure for dbID '.$self->dbID);
	  #This should never happen even with a projection build
	}
	else{


	  #We only need to set the bounds when storing on full slice/seq_region values
	  #else they should be fetched from the DB

	  if(! defined $self->{'bound_start'}){

		my (@start_ends);

		foreach my $attr(@attrs){
		  push @start_ends, ($attr->start, $attr->end);
		}

		#Accounts for core region, where data may be absent on this cell type
		push @start_ends, ($self->start, $self->end);

		@start_ends = sort { $a <=> $b } @start_ends;

		$self->{'bound_end'} = pop @start_ends;
		$self->{'bound_start'} = shift @start_ends;

		#Need to account for projection build here
		#i.e. attr extremeties may not extend past core start/end

		if($self->is_projected){
		  $self->{'bound_end'}   = $self->end   if $self->end   > $self->{'bound_end'};
		  $self->{'bound_start'} = $self->start if $self->start < $self->{'bound_start'};
		}
	  }

	  #Now deal with MotifFeature loci
	  my @mf_loci;

	  foreach my $mf(@{$self->regulatory_attributes('motif')}){
		push @mf_loci, ($mf->start, $mf->end);
	  }

	  $self->{underlying_structure} = [$self->{'bound_start'}, $self->start, @mf_loci, $self->end, $self->{'bound_end'}];
	}
  }

  return $self->{underlying_structure};
}




1;

__END__
