#!/usr/bin/perl ###################################################################### # Script Name: loadGoPath.pl # Author: Shuai Weng # Date created: March 2002 # Description: This script is used to parse the ontology files # and load or update go child/parent relationship info # in database. # # ###################################################################### use strict; use DBI; my $database = shift; my $ontologyDir = shift; my $logDir = shift; if (!$database || !$ontologyDir || !$logDir) { print "Usage: loadGoPath.pl databaseName ontologyDir logDir\n"; print "Example: loadGoPath.pl sdev /share/ftp/pub/go/ontology/ /share/sdev/logs/\n\n"; exit; } ### To set your database user name my $dbuser = "ABC"; ### To set the password for $dbuser my $passwd = "passwd4ABC"; ### To set your database schema name my $schema = "prod"; ### You may want to change the following DB name to ### whatever database you are using. my $dbh = DBI->connect("dbi:Oracle:$database", $dbuser, $passwd, { RaiseError=>1, AutoCommit=>0 }); ### To set the log file name. Of course you can change the name to ### whatever you like. my $logFile = $logDir."goPath.log"; ### To set the ontology files under $ontologyDir. my @ontologyFile = ('function.ontology', 'process.ontology', 'component.ontology'); ### hashes for storing the info my %parent; my %relationship; my %recordInDB; my %recordInFile; my %goidInDB; ### a few variables for the summary section my ($updateEntryNum, $insertEntryNum, $deleteEntryNum); open(LOG, ">$logFile") || die "Can't open '$logFile' for writing:$!\n"; print LOG `date`, "\n"; print LOG "Retrieving go_path info from database ...\n"; &retrieveGoPathInfoFromDB; print LOG "Parsing ontology files ...\n"; &parseOntology; print LOG "Updating go_path table ...\n"; &updateGoPath; &printSummaryInfo; print LOG `date`; close(LOG); $dbh->disconnect; exit; ################################################################ sub retrieveGoPathInfoFromDB { ################################################################ # This method simply retrieves the go_path info from database # and stores them in a hash for late use. ### change the table name if it is necessary my $sth = $dbh->prepare_cached(" SELECT go_path_no, child_goid, ancestor_goid, generation, ancestor_path, relationship_type FROM $schema.go_path "); $sth->execute; while(my($goPathNo, $childGoid, $ancestorGoid, $generation, $ancestorPath, $relationship) = $sth->fetchrow()) { my $key = $childGoid."\t".$ancestorGoid."\t".$generation."\t". $ancestorPath; $recordInDB{$key} = $relationship || 1; } $sth->finish; ############################################################ $sth = $dbh->prepare_cached(" SELECT goid FROM $schema.go "); $sth->execute; while(my($goid) = $sth->fetchrow()) { $goidInDB{$goid}++; } $sth->finish; } #################################################################### sub parseOntology { #################################################################### # This method is used to parse the ontology files. foreach my $file (@ontologyFile) { my $ontologyFile = $ontologyDir.$file; &readOntologyFile($ontologyFile); } } #################################################################### sub readOntologyFile { #################################################################### # This method simply splits the ontology file into an array of super # groups and calls 'processGroup' method to retrieve info from each # group. my ($ontologyFile) = @_; $ontologyFile =~ /([^\/]+)$/; my $ontologyFileTmp = $logDir.$1; # This is something you may want to change if you are not using # UNIX. system("/usr/ucb/sed 's/\ $ontologyFileTmp"); open(IN, "$ontologyFileTmp") || die "Can't open '$ontologyFileTmp' for reading:$!\n"; my $spaceNum = 1; while() { my @group = split(/\15 {$spaceNum}:/, $_); my $topGoid = shift (@group); $topGoid =~ s/^.*GO:0*([0-9]+).*$/$1/; foreach my $group (@group) { &processGroup($topGoid, $group, $spaceNum); } } close(IN); unlink($ontologyFileTmp); } #################################################################### sub processGroup { #################################################################### # This method will recursively retrieve information for the top node # and all its descendants in the given group. my ($parentGoid, $group, $space) = @_; $space++; my @group = split(/\15 {$space}:/, $group); my $currEntry = shift(@group); my ($goid, $relationship, $isObsolete) = &getRelationshipAndGoid($currEntry); if ($isObsolete || !$goid || !$goidInDB{$goid}) { return; } if (!$relationship{$goid."::".$parentGoid} && $space != 2) { if ($parent{$goid}) { $parent{$goid} .= "\t"; } $parent{$goid} .= $parentGoid; $relationship{$goid."::".$parentGoid} = $relationship; } foreach my $group (@group) { &processGroup($goid, $group, $space); } } #################################################################### sub getRelationshipAndGoid { #################################################################### # This method simply retrieves the goid for the given node. my ($line) = @_; my ($nodeName, $goid, $others) = split(/\ \;\ /, $line); my $relationship; if ($nodeName =~ /^\prepare_cached(" INSERT INTO $schema.go_path(go_path_no, child_goid, ancestor_goid, generation, ancestor_path, relationship_type) VALUES($schema.gopathno_seq.nextval, ?, ?, ?, ?, ?) "); eval { $sth->execute($childGoid, $parentGoid, $generation, $ancestorPath, $relationship); }; if ($@) { print LOG "The new entry for child_goid = $childGoid, ancestor_goid = $parentGoid, generation = $generation, ancestor_path = '$ancestorPath', and relationship_type = '$relationship' can not be inserted into database. See error message below:\n$@\n"; } else { print LOG "The new entry for child_goid = $childGoid, ancestor_goid = $parentGoid, generation = $generation, ancestor_path = '$ancestorPath', and relationship_type = '$relationship' has been inserted into database.\n"; $insertEntryNum++; $dbh->commit; } $sth->finish; } ################################################################ sub updateEntry { ################################################################ # update available entry in database... my ($childGoid, $parentGoid, $generation, $ancestorPath, $relationship) = @_; ### change table name if it is necessary. my $sth = $dbh->prepare_cached(" UPDATE $schema.go_path SET relationship_type = ? WHERE child_goid = ? AND ancestor_goid = ? AND ancestor_path = ? AND generation = ? "); eval { $sth->execute($relationship, $childGoid, $parentGoid, $ancestorPath, $generation); }; if ($@) { print LOG "The entry for child_goid = $childGoid, ancestor_goid = $parentGoid, ancestor_path = '$ancestorPath', and generation = $generation can not be updated. See error message below:\n$@\n"; } else { print LOG "The entry for child_goid = $childGoid, ancestor_goid = $parentGoid, ancestor_path = '$ancestorPath', and generation = $generation has been updated.\n"; $updateEntryNum++; $dbh->commit; } $sth->finish; } ################################################################ sub deleteEntry { ################################################################ # delete old entry from database ... my ($key) = @_; my ($childGoid, $ancestorGoid, $generation, $ancestorPath, $relationship) = split(/\t/, $key); ### change table name if it is necessary. my $sth = $dbh->prepare_cached(" DELETE from $schema.go_path WHERE child_goid = ? AND ancestor_goid = ? AND generation = ? AND ancestor_path = ? "); eval { $sth->execute($childGoid, $ancestorGoid, $generation, $ancestorPath); }; if ($@) { print LOG "The entry for child_goid = $childGoid, ancestor_goid = $ancestorGoid, generation = $generation, and ancestor_path = '$ancestorPath' can not be deleted from go_path table. See error message below:\n$@\n"; } else { print LOG "The entry for child_goid = $childGoid, ancestor_goid = $ancestorGoid, generation = $generation, and ancestor_path = '$ancestorPath' has been deleted from go_path table.\n"; $deleteEntryNum++; $dbh->commit; } $sth->finish; } ################################################################ sub printSummaryInfo { ################################################################ if ($insertEntryNum) { print LOG "\nTotal $insertEntryNum row(s) have been inserted into go_path table.\n"; } if ($updateEntryNum) { print LOG "\nTotal $updateEntryNum row(s) have been updated in go_path table.\n"; } if ($deleteEntryNum) { print LOG "\nTotal $deleteEntryNum row(s) have been deleted from go_path table.\n"; } print LOG "\n"; } ################################################################