Homepage › Forums › RetroPie Project › Peoples Projects › Improved(?) Scraper Script › Reply To: Improved(?) Scraper Script
12/02/2014 at 21:19
#83283
rev138
Participant
Looks like that failed to attach, here it is:
#!/usr/bin/perl
###
### Point this script at a ROM/directory full of ROMs and it will generate a
### gamelist.xml file for EmulationStation and download the box cover art.
###
### 20141202
###
use strict;
use warnings;
use LWP::Simple 'getstore';
use LWP::UserAgent;
use XML::Simple;
use JSON;
use Digest::MD5;
use Cwd qw( getcwd abs_path );
use Image::Magick;
use File::Path 'make_path';
use Getopt::Long;
my $opts = {
'api-key' => '7TTRM4MNTIKR2NNAGASURHJOZJ3QXQC5', # RetroPie's API key
'api-url' => 'http://api.archive.vg/2.0', # VG Archive API
'downloads' => getcwd . '/downloaded_images', # Folder for downloaded box cover art
'in-file' => getcwd . '/gamelist.xml',
'out-file' => getcwd . '/gamelist.xml',
};
GetOptions(
$opts,
'api-key|k=s',
'api-url|u=s',
'downloads|d=s',
'in-file|i=s',
'out-file|o=s',
'help|h' => sub{ &help },
'no-images|n',
'stdout|s',
);
my @files = glob( $ARGV[0] );
my $game_list = {};
my $xs = XML::Simple->new;
# read in the existing gamelist if there is one
if( -r $opts->{'in-file'} ){
my $in_file = IO::File->new( $opts->{'in-file'} ) or die $!;
foreach my $game ( @{$xs->XMLin( $in_file, SuppressEmpty => 1, KeyAttr => { 'name' => "+name" } )->{'game'}} ){
$game_list->{$game->{'path'}} = $game if -e $game->{'path'};
}
}
my $ua = LWP::UserAgent->new;
# ;)
$ua->agent('RetroPie Scraper Browser');
# ensure file paths are absolute
@files = map { abs_path( $_ ) } @files;
foreach my $filename ( @files ){
# get the MD5 digest for the ROM
my $md5 = get_md5( $filename );
# look up the ROM by its digest
my $response = $ua->get( $opts->{'api-url'} . '/Game.getInfoByMD5/xml/' . $opts->{'api-key'} . "/$md5" );
if( $response->is_success ){
my $data = XMLin( $response->decoded_content );
# make sure the API returned data in the format we expect
if( defined $data->{'games'} and defined $data->{'games'}->{'game'} and ref $data->{'games'}->{'game'} eq 'HASH' ){
my $game_data = $data->{'games'}->{'game'};
my $rating = 0;
my $image_file;
print "Found $game_data->{'title'}\n" unless $opts->{'stdout'};
$rating = $game_data->{'rating'} if defined $game_data->{'rating'} and $game_data->{'rating'} =~ /^[0-9.]$/;
# get the box cover if any
if( not $opts->{'no-images'} and ( defined $game_data->{'box_front'} and ref $game_data->{'box_front'} ne 'HASH' ) or ( defined $game_data->{'box_front_small'} and ref $game_data->{'box_front_small'} ne 'HASH' ) ){
# parse out the filename
$game_data->{'box_front'} =~ /\/([^\/]+)$/;
$game_data->{'box_front_small'} =~ /\/([^\/]+)$/ unless defined $1;
# set a temporary download location
my $temp_file = "/tmp/$1" if defined $1;
# download the box cover
my $response_code = '';
$response_code = getstore( $game_data->{'box_front'}, $temp_file ) if defined $game_data->{'box_front'} and ref $game_data->{'box_front'} ne 'HASH';
# if that didn't work, try to get the small version
if( $response_code !~ /^(2|3)[0-9]{2}$/ ){
$response_code = getstore( $game_data->{'box_front_small'}, $temp_file ) if defined $game_data->{'box_front_small'} and ref $game_data->{'box_front_small'} ne 'HASH';
}
# if that didn't work, try google
if( $response_code !~ /^(2|3)[0-9]{2}$/ ){
my $google_result = google_image_search( $ua, $game_data->{'title'} . ' ' . $game_data->{'system_title'} . ' box art' );
$response_code = getstore( $google_result, $temp_file ) if defined $google_result;
}
# how about now?
if( $response_code =~ /^(2|3)[0-9]{2}$/ ){
# set the post-processed file location
$image_file = $opts->{'downloads'} . "/$md5.jpg";
my $im = Image::Magick->new;
my $image = $im->Read( $temp_file );
# scale to 350px width
$im->AdaptiveResize( geometry => '350x' );
# write out the scaled image in JPEG format at 50% quality
make_path( $opts->{'downloads'} );
$im->Write( filename => $image_file, compression => 'JPEG', quality => 50 ) ;
# remove the temp file
unlink $temp_file;
}
}
# set/overwrite the attributes of the current rom
$game_list->{$filename}->{'name'} = $game_data->{'title'};
$game_list->{$filename}->{'path'} = $filename;
$game_list->{$filename}->{'image'} = $image_file if defined $image_file;
$game_list->{$filename}->{'description'} = $game_data->{'description'};
$game_list->{$filename}->{'developer'} = $game_data->{'developer'};
$game_list->{$filename}->{'publisher'} = $game_data->{'developer'};
$game_list->{$filename}->{'genre'} = $game_data->{'genre'};
$game_list->{$filename}->{'rating'} = $rating;
}
}
else {
die $response->code . ' ' . $response->message . "\n";
}
}
# manually printing this because getting XML::Simple to reproduce the same formatting is baffling
open STDOUT, ">$opts->{'out-file'}" or die "Can't write to $opts->{'out-file'}: $!" unless $opts->{'stdout'};
print "<gameList>\n";
foreach my $file ( sort { $game_list->{$a}->{'name'} cmp $game_list->{$b}->{'name'} } keys %$game_list ){
print "\t<game>\n";
foreach my $key ( sort keys %{$game_list->{$file}} ){
print "\t\t" . $xs->XMLout( { $key => $game_list->{$file}->{$key} }, NoAttr => 1, KeepRoot => 1 );
}
print "\t</game>\n";
}
print "</gameList>\n";
###
sub get_md5 {
my ( $filename ) = @_;
my $ctx = Digest::MD5->new;
open( FILE, '<', $filename );
$ctx->addfile( *FILE );
close( FILE );
my $md5 = $ctx->hexdigest;
return $md5 if defined $md5;
}
sub google_image_search {
my ( $ua, $search_string ) = @_;
my $response = $ua->get( 'https://ajax.googleapis.com/ajax/services/search/images?v=1.0&rsz=1&q=' . $search_string );
if( $response->is_success ){
my $data = from_json( $response->decoded_content );
if( defined $data->{'responseData'} and @{$data->{'responseData'}->{'results'}} ){
return $data->{'responseData'}->{'results'}->[0]->{'url'};
}
}
}
sub help {
print "usage: es_scraper.pl [OPTIONS] /path/to/roms\n";
print "options:\n";
print "\t--api-key\tVG Archive API key\n";
print "\t--api-url\tVG Archive API URL\n";
print "\t--downloads\tBox cover art download folder\n";
print "\t--in-file\tgamelist XML file to read in\n";
print "\t--no-images\tSkip downloading box covers\n";
print "\t--out-file\tgamelist XML file to write out\n";
print "\t--stdout\tWrite to stdout instead of --out-file\n";
print "\n";
print "All options have sane defaults\n";
exit;
}