# Das adaptor for GUS =head1 NAME DAS::GUS - DAS-style accession to a gus database =head1 SYNOPSIS # Open up a feature database $db = DAS::GUS->new( -dsn => 'dbi:Oracle:sid=GUSDEV;host=localhost;port=1521', -user => 'user', -pass => 'pass', ); @segments = $db->segment ( -name => 'AAEE01000001', -start => 1, -end => 1000000 ); # segments are Bio::Das::SegmentI - compliant objects # fetch a list of features @features = $db->features( -type=>['type1', 'type2', 'type3'] ); # invoke a callback over features $db->features( -type=>['type1', 'type2', 'type3'], -callback => sub { ... } ); # get all featur types @types = $db->types # count types %types = $db->types( -enumerate=>1 ); @feature = $db->get_feature_by_name( $class=>$name ); @feature = $db->get_feature_by_target( $target_name ); @feature = $db->get_feature_by_attribute( $att1=>$value1, $att2=>$value2 ); $feature = $db->get_feature_by_id( $id ); $error = $db->error; =cut =head1 AUTHOR Name: Haiming Wang Email: hwang@uga.edu =cut #' package DAS::GUS; use strict; use DAS::GUS::Segment; use DAS::Util::SqlParser; use Bio::Root::Root; use Bio::DasI; use Bio::PrimarySeq; use DBI; use Carp qw(longmess); use vars qw($VERSION @ISA); use constant DEBUG => 0; $VERSION = 0.11; @ISA = qw(Bio::Root::Root Bio::DasI); =head2 new Title : new Usage : $db = DAS::GUS->new ( -dsn => 'dbi:Oracle:sid=GUSDEV;host=localhost;port=1521', -user => 'user', -pass => 'pass', ); Function : Open up a Bio::DB::DasI interface to a GUS database Returns : a new DAS::GUS object =cut sub new { my $proto = shift; my $self = bless {}, ref($proto) || $proto; my %arg = @_; my $dsn = $arg{-dsn}; my $username = $arg{-user}; my $password = $arg{-pass}; my $sqlfile = $arg{-sqlfile}; my $dbh = DBI->connect( $dsn, $username, $password ) or $self->throw("unable to open db handle"); # solve oracle clob problem $dbh->{LongTruncOk} = 0; $dbh->{LongReadLen} = 10000000; $self->dbh($dbh); $self->parser(DAS::Util::SqlParser->new($sqlfile)); return $self; } =head2 dbh Title : dbh Usage : $obj->dbh($newval) Function : get a database handle Returns : value of dbh (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub dbh { my $self = shift; return $self->{'dbh'} = shift if @_; return $self->{'dbh'}; } =head2 parser Title : parser Usage : $obj->parser($parserObj) Function : get a sql parser object Returns : a sql parser object Args : =cut sub parser { my $self = shift; return $self->{'parser'} = shift if @_; return $self->{'parser'}; } =head2 segment Title : segment Usage : $db->segment(@args) Function : create a segment object Returns : segment object(s) Args : see below This method generates a Bio::Das::SegmentI object (see L). The segment can be used to find overlapping features and the raw sequence. When making the segment() call, you specify the ID of a sequence landmark (e.g. an accession number, a clone or contig), and a positional range relative to the landmark. If no range is specified, then the entire region spanned by the landmark is used to generate the segment. Arguments are -option=Evalue pairs as follows: -name ID of the landmark sequence. -class A namespace qualifier. It is not necessary for the database to honor namespace qualifiers, but if it does, this is where the qualifier is indicated. -version Version number of the landmark. It is not necessary for the database to honor version, but if it does, this is where the version is indicated. -start Start of the segment relative to landmark. Positions follow standard 1-based sequence rules. If not specified, defaults to the beginning of the landmark. -end End of the segment relative to the landmark. If not specified, defaults to the end of the landmark. -atts Attribute of reference sequence The return value is a list of Bio::Das::SegmentI objects. If the method is called in a scalar context and there are no more than one segments that satisfy the request, then it is allowed to return the segment. Otherwise, the method must throw a "multiple segment exception". =cut sub segment { my $self = shift; my ( $name,$base_start,$stop,$end,$class,$version,$atts ) = $self->_rearrange([qw(NAME START STOP END CLASS VERSION ATTS )], @_); $end ||= $stop; return DAS::GUS::Segment->new($name, $self, $base_start, $end, $atts); } =head2 features Title : fetures Usage : $db->features(@args) Function : get all features, possibly filtered by type Returns : a list of DAS::GUS::Segment::Feature objects Args : see below Status : public This routine will retrieve features in the database regardless of position. It can be used to return all features, or a subset based on their method and source. Arguments are -option=Evalue pairs as follows: -type List of feature types to return. Argument is an array of i reference containing strings of the format "method:source" -callback A callback to invoke on each feature. The subroutine will be passed each Bio::SeqFeatureI object in turn. -attributes A has reference containing attributes to match. -iterator Whether to return an iterator across the features Types are indicated using the nomenclature "method:source". Either of these fields can be omitted, in which case a wildcard is used for the missing field. Type names without the colon (e.g. "exon") are interpreted as the method name and a source wild card. Regular expression are allowed in either field, as in: "similarity:BLAST.*". The -attributes argument is a hashref containing one or more attributes to match against: -attributes => { Gene => 'abc-1', Note => 'confirmed' } Attribute matching is simple exact string match, and multiple attributes are ANDed together. If one provides a callback, it will be invoked on each feature in turn. If the callback returns a false value, iteration will be interrupted. When a callback is provided, the method returns undef. =cut sub features { my $self = shift; my ( $type, $types, $callback, $attributes, $iterator ) = $self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR)], @_ ); $type ||= $types; my @features = DAS::GUS::Segment->features( -type => $type, -attributes => $attributes, -callback => $callback, -iterator => $iterator, -factory => $self, ); return @features; } =head2 get_feature_by_name Title : get_feature_by_name Usage : $db->get_feature_by_name($class => $name) Function: fetch features by their name Returns : a list of DAS::GUS::Segment::Feature objects Args : the class and the name of the desired feature Status : public Note : You need to modify _feature_get() in Browser.pm currently. Find "return unless @segments;" and change it to "return @segments;" Debug it later. Also see multiple_choice() in gbrowse cgi script for getting features\'s attributes =cut sub get_feature_by_name { my $self = shift; my ( $name, $class, $ref, $base_start, $stop ) = $self->_rearrange([qw(NAME CLASS REF START END)], @_); my ( @segs, @features, $sth, $segment, $seg_name ); if( $name ) { $name =~ s/[?*]\s*$/%/; # replace * with % in the sql # get features by locus name, genbank accession number or # protein function keywords my $query = $self->parser->getSQL("GUS.pm", "get_feature_by_name"); $query =~ s/(\$\w+)/eval $1/eg; $query =~ s/\*/\%/g; my $un = uc($name); $query =~ s/\?/\'\%$un\%\'/g; $sth = $self->dbh->prepare($query); $sth->execute(); while(my $hashref = $sth->fetchrow_hashref) { $seg_name = $$hashref{'CTG_NAME'}; # if this is a segment, return a segment object return $self->segment($seg_name) if($seg_name =~ /$name/i); $segment = $self->segment($seg_name); my $feat = DAS::GUS::Segment::Feature->new( $self, $segment, # parent $seg_name, # the source sequence $$hashref{'STARTM'}, # start $$hashref{'END'}, # end $$hashref{'TYPE'}.':'.$$hashref{'SOURCE'}, $$hashref{'SCORE'}, # score $$hashref{'STRAND'}, # strand $$hashref{'PHASE'}, # phase $$hashref{'NAME'}, # group $$hashref{'ATTS'}, # attributes $$hashref{'NAME'}, # unique_name $$hashref{'FEATURE_ID'} # feature_id ); push @features, $feat; } } if (@features) { @features; } else { (); } } sub default_class { return 'Sequence' } # Compatible with other gbrowse related scripts - e.g. das sub aggregators { return } sub absolute { return } package DAS::GUSIterator; sub new { my $package = shift; my $features = shift; return bless $features, $package; } sub next_seq { my $self = shift; return unless @$self; my $next_feature = shift @$self; return $next_feature; } 1;