====== Processing JSON with Perl ======
===== The task =====
If we have an array of JSON morphological analyses, like this:
[{"RDF":{"Annotation":{"about":"urn:TuftsMorphologyService:usu:morpheus","hasTarget":{"Description":{"about":"urn:word:usu"}},"hasBody":[{"resource":"urn:uuid:e8e23662-6141-4b39-8ae2-ff25322dcdba"},{"resource":"urn:uuid:4ff79224-742c-46ff-8871-9f48c16d7d54"}],"title":null,"creator":{"Agent":{"about":"org.perseus:tools:morpheus.v1"}},"created":"29\nDec\n2012\n20:40:04\nGMT","Body":[{"about":"urn:uuid:e8e23662-6141-4b39-8ae2-ff25322dcdba","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"usus#2"},"pofs":{"order":3,"$":"noun"},"decl":"4th","gend":"masculine"},"infl":{"term":{"lang":"lat","stem":"u_s","suff":"u_"},"pofs":{"order":3,"$":"noun"},"decl":"4th","case":{"order":3,"$":"ablative"},"gend":"masculine","num":"singular","stemtype":"us_us"}}}},{"about":"urn:uuid:4ff79224-742c-46ff-8871-9f48c16d7d54","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"utor"},"pofs":{"order":3,"$":"noun"},"gend":"neuter"},"infl":{"term":{"lang":"lat","stem":"us","suff":"u_"},"pofs":{"order":3,"$":"noun"},"case":{"order":5,"$":"dative"},"gend":"neuter","mood":"supine","num":"singular","stemtype":"pp4"}}}}]}}},
{"RDF":{"Annotation":{"about":"urn:TuftsMorphologyService:ut:morpheus","hasTarget":{"Description":{"about":"urn:word:ut"}},"hasBody":{"resource":"urn:uuid:72303a08-7a9e-434d-bc75-e170070cdcae"},"title":null,"creator":{"Agent":{"about":"org.perseus:tools:morpheus.v1"}},"created":"29\nDec\n2012\n20:40:04\nGMT","Body":{"about":"urn:uuid:72303a08-7a9e-434d-bc75-e170070cdcae","type":{"resource":"cnt:ContentAsXML"},"rest":{"entry":{"uri":null,"dict":{"hdwd":{"lang":"lat","$":"ut"},"pofs":{"order":7,"$":"adverb"}},"infl":{"term":{"lang":"lat","stem":"ut"},"pofs":{"order":7,"$":"adverb"},"stemtype":"adverb","morph":"indeclform"}}}}}}}]
... and we want to get at specific contents of JSON objects, e. g. to traverse the route to the word which was sent to the service:
'RDF'/'Annotation'/'hasTarget'/'Description'/'about'
or to the lemma of that word:
'RDF'/'Annotation'/'Body'/'rest'/'entry'/'dict'/'hdwd'/'$'
or to the part-of-speech identification:
'RDF'/'Annotation'/'Body'/'rest'/'entry'/'dict'/'pofs'/'$'
... how to do this? We have also to account for the possibilities that (1) a word won't be recognized, (2) that there'll be several probable lemmata.
===== The script =====
Yesterday and today, internet, the Llama book, and much experimenting taught us how to do it in Perl. The following script, for whatever it's worth, actually //works// on three possible responses to a Latin word query (no identification, an unambiguous lemma, an ambiguous one), and on any amount of Morphology Service JSON.
#!/usr/bin/perl -w
# persjson.pl - access different field values of Perl JSON parsing
# usage: perl persjson.pl
use JSON qw( decode_json ); # From CPAN
use Data::Dumper; # Perl core module
use strict; # Good practice
use warnings; # Good practice
use File::Slurp 'read_file';
# read in the file:
my $jsonfile = $ARGV[0];
my $json = read_file( $jsonfile ) ;
# a list variable to hold the result:
my @result;
# Decode the entire JSON:
my $decoded_json = decode_json $json;
# for testing purposes; comment this when done.
# print Dumper $decoded_json;
# JSON is not a hash, but a list:
for my $item (@$decoded_json) {
# get the query:
my $val2 = $item->{'RDF'}{'Annotation'}{'hasTarget'}{'Description'}{'about'};
# is there a Body element?
my $val = $item->{'RDF'}{'Annotation'}{'Body'};
# is Body an array (of hashes)?
if (ref($val) eq "ARRAY") {
# iterate over array:
my $numberof = scalar(@{$val});
# make counter:
for(my $index=0; $index < $numberof; $index++) {
my $row = @$val[$index];
# result as CSV, fields: query, lemma, qualification:
push (@result, '"' . $val2 . '","' . $row->{'rest'}{'entry'}{'dict'}{'hdwd'}{'$'} . '","VERBUM AMBIGUUM"', "\n");
}
}
# is Body hash?
elsif (ref($val) eq "HASH") {
my @values = keys %$val;
# result as CSV, fields: query, lemma, part of speech:
push (@result, '"' . $val2 . '","' . $val->{'rest'}{'entry'}{'dict'}{'hdwd'}{'$'} . '","' . $val->{'rest'}{'entry'}{'dict'}{'pofs'}{'$'}, '"', "\n");
}
# if there's no Body (unidentified):
else {
push(@result, '"' . $val2 . '","FORMA NON RECOGNITA",""', "\n");
}
}
# put it all together:
print @result;