Homepage Forums RetroPie Project Peoples Projects Improved(?) Scraper Script Reply To: Improved(?) Scraper Script

#83283
rev138
Participant

Looks like that failed to attach, here it is:

#!/usr/bin/perl

###
### Point this script at a ROM/directory full of ROMs and it will generate a 
### gamelist.xml file for EmulationStation and download the box cover art.
###
### 20141202 
###

use strict;
use warnings;
use LWP::Simple 'getstore';
use LWP::UserAgent;
use XML::Simple;
use JSON;
use Digest::MD5;
use Cwd qw( getcwd abs_path );
use Image::Magick;
use File::Path 'make_path';
use Getopt::Long;

my $opts = {
        'api-key'       => '7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5',  # RetroPie's API key
        'api-url'       => 'http://api.archive.vg/2.0',         # VG Archive API
        'downloads'     => getcwd . '/downloaded_images',       # Folder for downloaded box cover art
        'in-file'       => getcwd . '/gamelist.xml',
        'out-file'      => getcwd . '/gamelist.xml',
};

GetOptions(
        $opts,
        'api-key|k=s',
        'api-url|u=s',
        'downloads|d=s',
        'in-file|i=s',
        'out-file|o=s',
        'help|h'        => sub{ &help },
        'no-images|n',
        'stdout|s',
);

my @files = glob( $ARGV[0] );
my $game_list = {};
my $xs = XML::Simple->new;

# read in the existing gamelist if there is one
if( -r $opts->{'in-file'} ){
        my $in_file = IO::File->new( $opts->{'in-file'} ) or die $!;
        foreach my $game ( @{$xs->XMLin( $in_file, SuppressEmpty => 1, KeyAttr => { 'name' => "+name" } )->{'game'}} ){
                $game_list->{$game->{'path'}} = $game if -e $game->{'path'};
        }
}

my $ua = LWP::UserAgent->new;

# ;)
$ua->agent('RetroPie Scraper Browser');

# ensure file paths are absolute
@files = map { abs_path( $_ ) } @files;

foreach my $filename ( @files ){
        # get the MD5 digest for the ROM
        my $md5 = get_md5( $filename );
        # look up the ROM by its digest
        my $response = $ua->get( $opts->{'api-url'} . '/Game.getInfoByMD5/xml/' . $opts->{'api-key'} . "/$md5" );

        if( $response->is_success ){
                my $data = XMLin( $response->decoded_content );

                # make sure the API returned data in the format we expect
                if( defined $data->{'games'} and defined $data->{'games'}->{'game'} and ref $data->{'games'}->{'game'} eq 'HASH' ){
                        my $game_data = $data->{'games'}->{'game'};
                        my $rating = 0;
                        my $image_file;

                        print "Found $game_data->{'title'}\n" unless $opts->{'stdout'};

                        $rating = $game_data->{'rating'} if defined $game_data->{'rating'} and $game_data->{'rating'} =~ /^[0-9.]$/;

                        # get the box cover if any
                        if( not $opts->{'no-images'} and ( defined $game_data->{'box_front'} and ref $game_data->{'box_front'} ne 'HASH' ) or ( defined $game_data->{'box_front_small'} and ref $game_data->{'box_front_small'} ne 'HASH' ) ){
                                # parse out the filename
                                $game_data->{'box_front'} =~ /\/([^\/]+)$/;
                                $game_data->{'box_front_small'} =~ /\/([^\/]+)$/ unless defined $1;

                                # set a temporary download location
                                my $temp_file = "/tmp/$1" if defined $1;

                                # download the box cover
                                my $response_code = '';
                                $response_code =  getstore( $game_data->{'box_front'}, $temp_file ) if defined $game_data->{'box_front'} and ref $game_data->{'box_front'} ne 'HASH';

                                # if that didn't work, try to get the small version
                                if( $response_code !~ /^(2|3)[0-9]{2}$/ ){
                                        $response_code = getstore( $game_data->{'box_front_small'}, $temp_file ) if defined $game_data->{'box_front_small'} and ref $game_data->{'box_front_small'} ne 'HASH';
                                }

                                # if that didn't work, try google
                                if( $response_code !~ /^(2|3)[0-9]{2}$/ ){
                                        my $google_result = google_image_search( $ua, $game_data->{'title'} . ' ' . $game_data->{'system_title'} . ' box art' );
                                        $response_code = getstore( $google_result, $temp_file ) if defined $google_result;
                                }

                                # how about now?
                                if( $response_code =~ /^(2|3)[0-9]{2}$/ ){
                                        # set the post-processed file location
                                        $image_file = $opts->{'downloads'} . "/$md5.jpg";

                                        my $im = Image::Magick->new;
                                        my $image = $im->Read( $temp_file );

                                        # scale to 350px width
                                        $im->AdaptiveResize( geometry => '350x' );
                                        # write out the scaled image in JPEG format at 50% quality
                                        make_path( $opts->{'downloads'} );
                                        $im->Write( filename => $image_file, compression => 'JPEG', quality => 50 ) ;
                                        # remove the temp file
                                        unlink $temp_file;
                                }
                        }

                        # set/overwrite the attributes of the current rom
                        $game_list->{$filename}->{'name'} = $game_data->{'title'};
                        $game_list->{$filename}->{'path'} = $filename;
                        $game_list->{$filename}->{'image'} = $image_file if defined $image_file;
                        $game_list->{$filename}->{'description'} = $game_data->{'description'};
                        $game_list->{$filename}->{'developer'} = $game_data->{'developer'};
                        $game_list->{$filename}->{'publisher'} = $game_data->{'developer'};
                        $game_list->{$filename}->{'genre'} = $game_data->{'genre'};
                        $game_list->{$filename}->{'rating'} = $rating;
                }
        }
        else {
                die $response->code . ' ' . $response->message . "\n";
        }
}

# manually printing this because getting XML::Simple to reproduce the same formatting is baffling
open STDOUT, ">$opts->{'out-file'}" or die "Can't write to $opts->{'out-file'}: $!" unless $opts->{'stdout'};
print "<gameList>\n";

foreach my $file ( sort { $game_list->{$a}->{'name'} cmp $game_list->{$b}->{'name'} } keys %$game_list ){
        print "\t<game>\n";

        foreach my $key ( sort keys %{$game_list->{$file}} ){
                print "\t\t" . $xs->XMLout( { $key => $game_list->{$file}->{$key} }, NoAttr => 1, KeepRoot => 1 );
        }

        print "\t</game>\n";
}

print "</gameList>\n";

###

sub get_md5 {
        my ( $filename ) = @_;
        my $ctx = Digest::MD5->new;

        open( FILE, '<', $filename );
        $ctx->addfile( *FILE );
        close( FILE );

        my $md5 = $ctx->hexdigest;

        return $md5 if defined $md5;
}

sub google_image_search {
        my ( $ua, $search_string ) = @_;
        my $response = $ua->get( 'https://ajax.googleapis.com/ajax/services/search/images?v=1.0&rsz=1&q=' . $search_string );

        if( $response->is_success ){
                my $data = from_json( $response->decoded_content );
                if( defined $data->{'responseData'} and @{$data->{'responseData'}->{'results'}} ){
                        return $data->{'responseData'}->{'results'}->[0]->{'url'};
                }
        }
}

sub help {
        print "usage: es_scraper.pl [OPTIONS] /path/to/roms\n";
        print "options:\n";
        print "\t--api-key\tVG Archive API key\n";
        print "\t--api-url\tVG Archive API URL\n";
        print "\t--downloads\tBox cover art download folder\n";
        print "\t--in-file\tgamelist XML file to read in\n";
        print "\t--no-images\tSkip downloading box covers\n";
        print "\t--out-file\tgamelist XML file to write out\n";
        print "\t--stdout\tWrite to stdout instead of --out-file\n";
        print "\n";
        print "All options have sane defaults\n";
        exit;
}