#!/usr/bin/perl ###################################################################### # Script Name: parseOntology.pl # Author: Shuai Weng # Date created: Sept. 2003 # # Description: This script is used to parse a given ontology file # and write child/parent relationship info into # a flat file. # # ###################################################################### use strict; my $ontologyFile = shift; my $outFile = shift; if (!$ontologyFile || !$outFile) { print "\nUsage: parseOntology.pl ontologyFile outFile\n"; print "Example: parseOntology.pl /share/ftp/pub/go/ontology/function.ontology /share/sgd/data/function.ontology.path\n\n"; exit; } my %parent; my %relationship; my %nodeName; my $goAspect; #### read info from ontology file &readOntologyFile; #### write path info into a flat file &writeGoPath; exit; #################################################################### sub readOntologyFile { #################################################################### # This method simply splits the ontology file into an array of super # groups and calls 'processGroup' method to retrieve info from each # group. $outFile =~ /^(.+)\/[^\/]+$/; my $tmpDir = $1; $ontologyFile =~ /\/([^\/]+)$/; my $ontologyFileTmp = $tmpDir."/".$1.".tmp"; ##### You may want to change this system command if you are not ##### using UNIX. system("/usr/bin/sed 's/\ $ontologyFileTmp"); open(IN, "$ontologyFileTmp") || die "Can't open '$ontologyFileTmp' for reading:$!\n"; my $spaceNum = 1; while() { my @group = split(/\15 {$spaceNum}:/, $_); my $topGoid = shift (@group); $topGoid =~ s/^.*GO:0*([0-9]+).*$/$1/; foreach my $group (@group) { &processGroup($topGoid, $group, $spaceNum); } } close(IN); unlink($ontologyFileTmp); } #################################################################### sub processGroup { #################################################################### # This method will recursively retrieve information for the top node # and all its descendants in the given group. my ($parentGoid, $group, $space) = @_; $space++; my @group = split(/\15 {$space}:/, $group); my $currEntry = shift(@group); my ($goid, $relationship, $isObsolete) = &getRelationshipAndGoid($currEntry); if ($isObsolete || !$goid) { return; } if (!$relationship{$goid."::".$parentGoid} && $space != 2) { if ($parent{$goid}) { $parent{$goid} .= "\t"; } $parent{$goid} .= $parentGoid; $relationship{$goid."::".$parentGoid} = $relationship; } foreach my $group (@group) { &processGroup($goid, $group, $space); } } #################################################################### sub getRelationshipAndGoid { #################################################################### # This method simply retrieves the goid and go_term (node name) for # the given node. my ($line) = @_; my ($nodeName, $goid, $others) = split(/\ \;\ /, $line); my $relationship; if ($nodeName =~ /^\$outFile") || die "Can't open '$outFile' for writing:$!\n"; print OUT "child_goid\tancestor_goid\tgeneration\tancestor_path\trelationship\tchild_go_term\tchild_go_aspect\n"; foreach my $childGoid (sort (keys %parent)) { if ($childGoid !~ /^[0-9]+$/) { next; } my @parentGoid = split(/\t/, $parent{$childGoid}); foreach my $parentGoid (@parentGoid) { my $generation = 1; my $relationship = $relationship{$childGoid."::".$parentGoid}; my $ancestorPath = $parentGoid; print OUT "$childGoid\t$parentGoid\t$generation\t$ancestorPath\t$relationship\t$nodeName{$childGoid}\t$goAspect\n"; if($parent{$parentGoid}) { &findAncestor($childGoid, $parentGoid, ++$generation, $ancestorPath); } } } close(OUT); } ################################################################ sub findAncestor { ################################################################ # This method is used to retrieve the ancestor goids... my ($childGoid, $parentGoid, $i, $ancestorList) = @_; my @ancestorGoid = split(/\t/, $parent{$parentGoid}); foreach my $ancestorGoid (@ancestorGoid) { my $generation = $i; my $ancestorPath = $ancestorList; if ($ancestorPath) { $ancestorPath .= "::"; } $ancestorPath .= $ancestorGoid; print OUT "$childGoid\t$ancestorGoid\t$generation\t$ancestorPath\t\t$nodeName{$childGoid}\t$goAspect\n"; if ($parent{$ancestorGoid}) { &findAncestor($childGoid, $ancestorGoid, ++$generation, $ancestorPath); } } } ################################################################