====== Processing JSON with Perl ====== ===== The task ===== If we have an array of JSON morphological analyses, like this: [{"RDF":{"Annotation":{"about":"urn:TuftsMorphologyService:usu:morpheus","hasTarget":{"Description":{"about":"urn:word:usu"}},"hasBody":[{"resource":"urn:uuid:e8e23662-6141-4b39-8ae2-ff25322dcdba"},{"resource":"urn:uuid:4ff79224-742c-46ff-8871-9f48c16d7d54"}],"title":null,"creator":{"Agent":{"about":"org.perseus:tools:morpheus.v1"}},"created":"29\nDec\n2012\n20:40:04\nGMT","Body":[{"about":"urn:uuid:e8e23662-6141-4b39-8ae2-ff25322dcdba","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"usus#2"},"pofs":{"order":3,"$":"noun"},"decl":"4th","gend":"masculine"},"infl":{"term":{"lang":"lat","stem":"u_s","suff":"u_"},"pofs":{"order":3,"$":"noun"},"decl":"4th","case":{"order":3,"$":"ablative"},"gend":"masculine","num":"singular","stemtype":"us_us"}}}},{"about":"urn:uuid:4ff79224-742c-46ff-8871-9f48c16d7d54","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"utor"},"pofs":{"order":3,"$":"noun"},"gend":"neuter"},"infl":{"term":{"lang":"lat","stem":"us","suff":"u_"},"pofs":{"order":3,"$":"noun"},"case":{"order":5,"$":"dative"},"gend":"neuter","mood":"supine","num":"singular","stemtype":"pp4"}}}}]}}}, {"RDF":{"Annotation":{"about":"urn:TuftsMorphologyService:ut:morpheus","hasTarget":{"Description":{"about":"urn:word:ut"}},"hasBody":{"resource":"urn:uuid:72303a08-7a9e-434d-bc75-e170070cdcae"},"title":null,"creator":{"Agent":{"about":"org.perseus:tools:morpheus.v1"}},"created":"29\nDec\n2012\n20:40:04\nGMT","Body":{"about":"urn:uuid:72303a08-7a9e-434d-bc75-e170070cdcae","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"ut"},"pofs":{"order":7,"$":"adverb"}},"infl":{"term":{"lang":"lat","stem":"ut"},"pofs":{"order":7,"$":"adverb"},"stemtype":"adverb","morph":"indeclform"}}}}}}}] ... and we want to get at specific contents of JSON objects, e. g. to traverse the route to the word which was sent to the service: 'RDF'/'Annotation'/'hasTarget'/'Description'/'about' or to the lemma of that word: 'RDF'/'Annotation'/'Body'/'rest'/'entry'/'dict'/'hdwd'/'$' or to the part-of-speech identification: 'RDF'/'Annotation'/'Body'/'rest'/'entry'/'dict'/'pofs'/'$' ... how to do this? We have also to account for the possibilities that (1) a word won't be recognized, (2) that there'll be several probable lemmata. ===== The script ===== Yesterday and today, internet, the Llama book, and much experimenting taught us how to do it in Perl. The following script, for whatever it's worth, actually //works// on three possible responses to a Latin word query (no identification, an unambiguous lemma, an ambiguous one), and on any amount of Morphology Service JSON. #!/usr/bin/perl -w # persjson.pl - access different field values of Perl JSON parsing # usage: perl persjson.pl use JSON qw( decode_json ); # From CPAN use Data::Dumper; # Perl core module use strict; # Good practice use warnings; # Good practice use File::Slurp 'read_file'; # read in the file: my $jsonfile = $ARGV[0]; my $json = read_file( $jsonfile ) ; # a list variable to hold the result: my @result; # Decode the entire JSON: my $decoded_json = decode_json $json; # for testing purposes; comment this when done. # print Dumper $decoded_json; # JSON is not a hash, but a list: for my $item (@$decoded_json) { # get the query: my $val2 = $item->{'RDF'}{'Annotation'}{'hasTarget'}{'Description'}{'about'}; # is there a Body element? my $val = $item->{'RDF'}{'Annotation'}{'Body'}; # is Body an array (of hashes)? if (ref($val) eq "ARRAY") { # iterate over array: my $numberof = scalar(@{$val}); # make counter: for(my $index=0; $index < $numberof; $index++) { my $row = @$val[$index]; # result as CSV, fields: query, lemma, qualification: push (@result, '"' . $val2 . '","' . $row->{'rest'}{'entry'}{'dict'}{'hdwd'}{'$'} . '","VERBUM AMBIGUUM"', "\n"); } } # is Body hash? elsif (ref($val) eq "HASH") { my @values = keys %$val; # result as CSV, fields: query, lemma, part of speech: push (@result, '"' . $val2 . '","' . $val->{'rest'}{'entry'}{'dict'}{'hdwd'}{'$'} . '","' . $val->{'rest'}{'entry'}{'dict'}{'pofs'}{'$'}, '"', "\n"); } # if there's no Body (unidentified): else { push(@result, '"' . $val2 . '","FORMA NON RECOGNITA",""', "\n"); } } # put it all together: print @result;