#!/tools/perl/current/bin/perl ##################################################################### # Program : dumpAnnotation.pl # # Author : Shuai Weng # # Date : Sept. 2000 # # Description : This script is used for sgd curators to dump # # the annotations out of Oracle into the # # sgd_associations flat file. # # # ##################################################################### use strict; use lib "/share/daisy/www-data/lib/common"; use Login qw (ConnectToDatabase); my $dbh; my $database = "sgd"; my $datadir = "/share/sgd/data"; my $cvsdir = "/share/go/saved/today/go/SGD_GO_files"; my %sgdid; my %locus; my %feature; my %goevCode; my %goAspect; my %alias; ##################################################################### $dbh = &ConnectToDatabase($database); ##################################################################### ##################################################################### my $datafile = "$datadir/gene_association.sgd"; open(OUT, ">$datafile") || die "Can't open '$datafile' for writing:$!\n"; # # split RCS header lines or else RCS will add a date and revision number # print OUT "\!version: \$"; print OUT "Revision: \$\n"; print OUT "\!date: \$"; print OUT "Date: \$\n"; ##################################################################### &setSgdidFeatureLocusAlias(); &setGoevCode(); &setGoAspect(); ##################################################################### my %annot4locus; my %annot4feat; my @locus; my @feature; my $sth = $dbh->prepare(" SELECT ace_object, primary_key, tab_name FROM prod.acelink WHERE tab_name = 'GO_LOCUS_GOEV' OR tab_name = 'GO_FEAT_GOEV' "); $sth->execute; my $annotArrayRef = $sth->fetchall_arrayref(); foreach my $annotRef (@$annotArrayRef) { my ($ref, $priKey, $tabNm) = @$annotRef; my ($goid, $id, $goevNo) = split(/::/, $priKey); if ($tabNm =~ /locus/i) { my $locusNm = $locus{$id}; push(@locus, $locusNm); $annot4locus{$locusNm} .= "\t${goid}::${ref}::$goevCode{$goevNo}"; } elsif ($tabNm =~ /feat/i) { my $featNm = $feature{$id}; push(@feature, $featNm); $annot4feat{$featNm} .= "\t${goid}::${ref}::$goevCode{$goevNo}"; } } my @sortedFeature = sort @feature; my @sortedLocus = sort @locus; my %count; foreach my $locusNm (@sortedLocus) { my $annots = $annot4locus{$locusNm}; $annots =~ s/^\t//; my @annot = split(/\t/, $annots); my $sgdid = $sgdid{$locusNm}; foreach my $annot (@annot) { if ($count{"$annot:$locusNm"}) { next; } $count{"$annot:$locusNm"}++; my ($goid, $ref, $goevCode) = split(/::/, $annot); my $goAspect = $goAspect{$goid}; my $goidLen = length($goid); my $n = 7 - $goidLen; for (my $i = 1; $i <= $n; $i++) { $goid = "0".$goid; } my $aliasNm = $alias{$locusNm}; $aliasNm =~ s/^\|//; print OUT "SGD\t$sgdid\t$locusNm\t\tGO:$goid\tSGD:$ref\t$goevCode\t\t$goAspect\t\t$aliasNm\n"; } } foreach my $featNm (@sortedFeature) { my $annots = $annot4feat{$featNm}; $annots =~ s/^\t//; my @annot = split(/\t/, $annots); my $sgdid = $sgdid{"\U$featNm"}; foreach my $annot (@annot) { if ($count{"$annot:$featNm"}) { next; } $count{"$annot:$featNm"}++; my ($goid, $ref, $goevCode) = split(/::/, $annot); my $goAspect = $goAspect{$goid}; my $goidLen = length($goid); my $n = 7 - $goidLen; for (my $i = 1; $i <= $n; $i++) { $goid = "0".$goid; } my $aliasNm = $alias{"\U$featNm"}; $aliasNm =~ s/^\|//; print OUT "SGD\t$sgdid\t$featNm\t\tGO:$goid\tSGD:$ref\t$goevCode\t\t$goAspect\t\t$aliasNm\n"; } } close (OUT); $dbh->disconnect; ################# CVS ############################################### my $dstr = `date +%Y%m%d`; chomp $dstr; print STDERR "\n\n################################################################\n"; print STDERR "# $dstr\n\n"; system("/sbin/sh -c '/bin/diff $datadir/gene_association.sgd $datadir/gene_association.sgd.old > $datadir/gene_association.sgd.diff 2> /dev/null'"); if (-z "$datadir/gene_association.sgd.diff") { print STDERR "$datadir/gene_association.sgd.diff zero size, update not necessary\n"; system("/bin/rm $datadir/gene_association.sgd.diff $datadir/gene_association.sgd"); exit; } my $err = system("/bin/cp $datadir/gene_association.sgd $cvsdir/"); if ($err) { die "dumpAnnotation.pl: cp failed: $!\n"; } $ENV{'PATH'} .= ":/tools/gnu/bin"; $ENV{'USER'} = "shuai"; $ENV{'LOGNAME'} = "shuai"; $err = system("/tools/gnu/bin/cvs -d /share/go/cvs commit -m '$dstr' $cvsdir/gene_association.sgd"); if ($err) { system("/bin/rm $datadir/gene_association.sgd.diff"); die "dumpAnnotation.pl: cvs update failed: $!\n"; } system("/bin/rm $datadir/gene_association.sgd.diff"); system("/bin/mv $datadir/gene_association.sgd $datadir/gene_association.sgd.old"); exit; ################################################################### ###################################################################### sub setSgdidFeatureLocusAlias { ###################################################################### my $sth = $dbh->prepare(" SELECT F.sgdid, F.feature_no, F.feature_name, F.locus_no, L.locus_name FROM prod.feature F, prod.locus L WHERE F.locus_no = L.locus_no(+) "); $sth->execute; while(my($sgdid, $featNo, $featNm, $locusNo, $locusNm) = $sth->fetchrow()) { $sgdid{"\U$featNm"} = $sgdid; $feature{$featNo} = $featNm; if ($locusNm) { $sgdid{$locusNm} = $sgdid; $locus{$locusNo} = $locusNm; $alias{$locusNm} = $featNm; } } $sth->finish; $sth = $dbh->prepare(" SELECT locus_no, locus_name, sgdid FROM prod.locus WHERE sgdid is not null "); $sth->execute; while(my ($locusNo, $locusNm, $sgdid) = $sth->fetchrow()) { $sgdid{$locusNm} = $sgdid; $locus{$locusNo} = $locusNm; } $sth->finish; $sth = $dbh->prepare(" SELECT A.alias_name, L.locus_name FROM prod.alias A, prod.locus L, prod.locus_alias LA WHERE L.locus_no = LA.locus_no AND LA.alias_no = A.alias_no "); $sth->execute; while(my($aliasNm, $locusNm) = $sth->fetchrow()) { $alias{$locusNm} .= "|$aliasNm"; } $sth->finish; $sth = $dbh->prepare(" SELECT A.alias_name, F.feature_name FROM prod.alias A, prod.feature F WHERE A.feature_no = F.feature_no AND A.feature_no is not null "); $sth->execute; while(my($aliasNm, $featNm) = $sth->fetchrow()) { $alias{"\U$featNm"} .= "|$aliasNm"; } $sth->finish; } ###################################################################### sub setGoevCode { ###################################################################### my $sth = $dbh->prepare(" SELECT go_evidence_no, evidence_code FROM prod.go_evidence "); $sth->execute; while(my ($No, $code) = $sth->fetchrow()) { $goevCode{$No} = $code; } $sth->finish; } ###################################################################### sub setGoAspect { ###################################################################### my $sth = $dbh->prepare(" SELECT goid, go_aspect FROM prod.go "); $sth->execute; while(my($goid, $goAspect) = $sth->fetchrow()) { $goAspect{$goid} = $goAspect; } } ###################################################################### sub END { ###################################################################### if (defined $dbh) { $dbh->disconnect;} } ######################################################################