AppStore のレビューとか順位とか #
仕事でちょっと作ったスクリプトが、一年を経過しエライことになってきて、メンテ不能になってきた。XML からデタラメに引っこ抜いてるので頻繁に色々修正せなあかんしね
で、せめてこんな感じで使えるようにしたいなぁ、と
use AppStore::Scraper;
use Data::Dumper;
my $obj = AppStore::Scraper->new(wait => 5);
my $info = $obj->app_info(
app => ['322894440','331069023','331225704'],
store => ['jp','us'],
lang => 9,
ident => 'ipad',
review_number => 100,
review_order => 4
);
warn Dumper $info;
仕事中にこんなことやってるのも時間もったいないので、ひとまず家でガワだけ作った。中身は整理しきれてないけど、動いているので、ベタッと貼っとく。長いけど
動いているだけなので、誰かキレイに修正して ><
2010/06/18: bug fix しましたよ
2010/06/23: wait 秒数指定できるようにしたよ
2011/01/04: iPhone / iPad を指定できるようにしたよ
2011/02/03: レビューの取得件数とオーダーを指定できるようにしたよ
2011/02/17: その国のストアで公開されてない App の場合はスルーするようにしたよ
package AppStore::Scraper;
use strict;
use utf8;
use warnings;
use Data::Dumper;
use LWP::UserAgent;
use XML::Simple;
sub new {
my $class = shift;
my @args = @_;
my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
my $self = bless{}, ref $class || $class;
$self->{__STORE_CODES} = _init_countries();
$self->{__URL_PREF} = 'http://ax.itunes.apple.com/WebObjects/MZStore.woa/wa/';
$self->{__UA} = 'iTunes/9.1.1 (Macintosh; Intel Mac OS X 10.6.3',
$self->{ua} = LWP::UserAgent->new();
$self->{ua}->timeout(30);
$self->{ua}->env_proxy;
$self->{ua}->agent( $self->{__UA} );
$self->{__WAIT} = $args_ref->{wait} || '1';
# $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::PurePerl';
$self->{__XML_PREFERRED_PARSER} = 'XML::Parser';
# $self->{__XML_PREFERRED_PARSER} = 'XML::SAX::Expat';
# $self->{__XML_PREFERRED_PARSER} = 'XML::LibXML::SAX';
$self;
}
sub app_info {
my $self = shift;
my @args = @_;
# get info from app page
my $base = $self->app_base_info( @args );
my $ret = {};
for my $app ( keys %$base ) {
for my $store ( keys %{$base->{$app}} ) {
my $info = $base->{$app}->{$store};
my $genre_rank = $self->genre_rank(
app => $app,
info => $info
);
my $total_rank = $self->total_rank(
app => $app,
info => $info
);
my $reviews = $self->app_reviews(
app => $app,
info => $info
);
$ret->{$app}->{$store} = {
%$info,
genre_rank => $genre_rank,
total_rank => $total_rank,
reviews => $reviews,
store_name => $self->{__STORE_CODES}->{$store}->{name},
};
sleep $self->{__WAIT};
}
}
$ret;
}
sub app_base_info {
my $self = shift;
my @args = @_;
my $args = $self->_validate_args(@args);
my $ret = {};
for my $app ( @{$args->{apps}} ) {
for my $store ( keys %{$args->{stores}} ) {
my $tmp;
$tmp->{store_code} = $args->{stores}->{$store}->{code};
$tmp->{lang} = $args->{lang};
$tmp->{ident} = $args->{ident};
$tmp->{review_number} = $args->{review_number};
$tmp->{review_order} = $args->{review_order};
my $uri = $self->{__URL_PREF} . 'viewSoftware?id=' . $app . '&mt=8';
my $xmlobj = $self->_get_xml($uri, $tmp->{store_code}, $tmp->{lang});
for ( split /\n/, Dumper( $xmlobj ) ) {
if ( /'(http:\/\/[^']+)'/ ) {
$uri = $1;
last;
}
}
next unless $uri =~ m|^http://(?:ax\.)?itunes\.apple\.com|;
$xmlobj = $self->_get_xml($uri, $tmp->{store_code}, $tmp->{lang});
#
# genre_id, artist_id, app_name, genre_name
#
next unless exists $xmlobj->{iTunes};
$tmp->{genre_id} = $xmlobj->{genreId};
$tmp->{artist_id} = $xmlobj->{artistId};
($tmp->{app_name} = $xmlobj->{iTunes}) =~ s/^\s+(.*)\s+$/$1/;
for my $c ( @{ $xmlobj->{Path}->{PathElement} } ) {
if ( $c->{content} =~ m|/genre/(?:.*/)?id$tmp->{genre_id}| ) {
$tmp->{genre_name} = $c->{displayName};
last;
}
# elsif ( $c->{content} =~ m|/app/(?:.*/)?id$app| ) {
# $tmp->{app_name} = $c->{displayName};
# }
}
#
# price
#
for ( split /\n/, Dumper( $xmlobj->{View} ) ) {
if ( /buyParams.*price=(\d+)/ ) {
$tmp->{price} = $1;
last;
}
}
my $treetmp = $xmlobj->{View}->{ScrollView}->{VBoxView}->{View}->{MatrixView}->{VBoxView}->[0]->{View}->{MatrixView}->{VBoxView}->[0]->{VBoxView}->[1];
#
# review
#
if ( $treetmp->{VBoxView}->{HBoxView} ) {
$tmp->{review_url} = $treetmp->{VBoxView}->{HBoxView}->[0]->{VBoxView}->{HBoxView}->[0]->{VBoxView}->[0]->{GotoURL}->{url};
}
#
# star
#
$tmp->{stars} = [];
if ( ref $treetmp->{View} eq 'HASH' ) {
for my $i ( 0 .. 4 ) {
my $s = $treetmp->{View}->{View}->{View}->{VBoxView}->{Test};
if ( ref $s eq 'ARRAY' ) {
eval{$tmp->{stars}->[4-$i] = $s->[1]->{VBoxView}->[0]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; # p1 obsolete ?
}
elsif ( ref $s eq 'HASH' ) {
eval{$tmp->{stars}->[4-$i] = $s->{VBoxView}->[1]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; # p2
unless ( $tmp->{stars}->[4-$i] ) {
for ( keys %$s ) {
next unless ( ref $s->{$_} eq 'HASH' );
eval{$tmp->{stars}->[4-$i] = $s->{$_}->{VBoxView}->[0]->{MatrixView}->{VBoxView}->[1]->{TextView}->[$i]->{SetFontStyle}->{content};}; #p3
last if $tmp->{stars}->[4-$i];
}
}
}
}
}
delete $tmp->{stars} unless ( scalar @{$tmp->{stars}} );
delete $tmp->{stars} if ( ! $tmp->{stars}->[0]
and ! $tmp->{stars}->[1]
and ! $tmp->{stars}->[2]
and ! $tmp->{stars}->[3]
and ! $tmp->{stars}->[4]
);
$ret->{$app}->{$store} = $tmp;
}
}
$ret;
}
#
# for rank
#
sub genre_rank {
my $self = shift;
my @args = @_;
$self->_get_rank(@args);
}
sub total_rank {
my $self = shift;
my @args = @_;
$self->_get_rank(@args);
}
sub _rank_uri {
my $self = shift;
my $price = shift;
my $ident = shift;
# iphone 30:27, ipad 47:44
my $popId = $price ? 30: 27;
$popId += 17 if $ident eq 'ipad';
my $uri = $self->{__URL_PREF} . 'viewTop?id=25209&popId='. $popId;
$uri;
}
sub _get_rank {
my $self = shift;
my @args = @_;
my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
my $caller = (caller(1))[3];
my $info;
if ( $args_ref->{info} ) {
$info = $args_ref->{info};
}
else {
my $base_info = $self->app_base_info($args_ref);
$info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
}
my $uri = $self->_rank_uri( $info->{price}, $info->{ident} );
$uri .= '&genreId=' . $info->{genre_id} if $caller =~ /genre_rank$/;
my $ret;
my $xmlobj = $self->_get_xml($uri, $info->{store_code}, $info->{lang});
my @arrays = split /\n+/, Dumper($xmlobj->{View}->{ScrollView}->{VBoxView}->{View});
my $i;
for ( @arrays ) {
next unless /salableAdamId=(\d+)/;
$i++;
next unless $1 == $args_ref->{app};
$ret = $i;
last;
}
$ret;
}
#
# for reviews
#
sub app_reviews {
my $self = shift;
my @args = @_;
my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
my $ret = [];
my $info;
if ( $args_ref->{info} ) {
$info = $args_ref->{info};
}
else {
my $base_info = $self->app_base_info($args_ref);
$info = $base_info->{ $args_ref->{app} }->{ $args_ref->{store} };
}
my $order = $info->{review_order};
my $uri = $info->{review_url} || $self->{__URL_PREF} . 'viewContentsUserReviews?pageNumber=0&type=Purple+Software&id='.$args_ref->{app}.'&sortOrdering='.$order;
$uri =~ s|sortOrdering=\d+|sortOrdering=$order|x;
# pagenation
if ( $uri =~ /(?:\?|&)pageNumber=\d+/ ) {
my $i = 0;
while ( scalar(@$ret) <= $info->{review_number} ) {
$uri =~ s|pageNumber=\d+|pageNumber=$i|;
my $tmp = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
last unless scalar(@$tmp);
$ret = [@$ret, @$tmp];
$i++;
}
}
else {
$ret = $self->_app_reviews($uri, $info->{store_code}, $info->{lang});
}
@$ret = splice @$ret, 0, $info->{review_number};
$ret;
}
sub _app_reviews {
my $self = shift;
my $uri = shift;
my $store_code = shift;
my $lang = shift;
my $ret = [];
my $xmlobj = $self->_get_xml($uri, $store_code, $lang);
my $treetmp = $xmlobj->{View}->{ScrollView}->{VBoxView}->{View}->{MatrixView}->{VBoxView}->[0]->{VBoxView}->{VBoxView};
if ( ref $treetmp eq 'HASH' ) {
my($date, $mes) = $self->_get_review_message( $treetmp );
push @$ret, {
message => $mes,
date => $date,
};
}
elsif ( ref $treetmp eq 'ARRAY' ) {
for ( @$treetmp ) {
my($date, $mes) = $self->_get_review_message( $_ );
push @$ret, {
message => $mes,
date => $date,
};
}
}
$ret;
}
sub _get_review_message {
my $self = shift;
my $args = shift;
my $mes = $args->{TextView}->{SetFontStyle}->{content};
my $tmp = $args->{HBoxView}->[1]->{TextView}->{SetFontStyle}->{content} || '';
my $datetmp = ref $tmp eq 'ARRAY' ? $tmp->[scalar(@$tmp) -1] : $tmp;
my $date;
if ( $datetmp ) {
chomp $datetmp;
my @tmps = split /\n\s+/, $datetmp;
$date = pop @tmps;
}
if ( ref $mes eq 'ARRAY' ) {
$mes = join "\n", @{$mes};
}
return ($date, $mes);
}
#
# common
#
sub _validate_args {
my $self = shift;
my @args = @_;
my $args_ref = ref $args[0] eq 'HASH' ? $args[0] : {@args};
#
# prepare array by target apps
#
die 'app code MUST be needed' unless $args_ref->{app};
my @appcode = ref $args_ref->{app} eq 'ARRAY' ? @{$args_ref->{app}}
: ($args_ref->{app});
for (@appcode) {
die 'app code MUST be numerical: ',$_ unless m|^\d+$|;
}
my $apps_array = [@appcode];
#
# prepare array by target countries
#
my $stores_hash;
if ( $args_ref->{store} ) {
my @storename = ref $args_ref->{store} eq 'ARRAY' ? @{$args_ref->{store}}
: ($args_ref->{store});
for ( @storename ) {
my $s = lc $_;
if ( exists $self->{__STORE_CODES}->{ $s } ) {
$stores_hash->{ $s } = $self->{__STORE_CODES}->{ $s };
}
else {
die 'cannot found appstore on "', $s, '"';
}
}
}
else {
$stores_hash = $self->{__STORE_CODES};
}
#
# prepare digit by target lang
#
my $lang = ( exists $args_ref->{lang} and $args_ref->{lang} =~ /^\d+$/ ) ? $args_ref->{lang} : 1;
#
# prepare identifier
#
my $ident = ( exists $args_ref->{ident} and $args_ref->{ident} eq 'ipad' ) ? 'ipad' : 'iphone';
#
# prepare reviews max number
#
my $review_number = ( exists $args_ref->{review_number} and $args_ref->{review_number} =~ /^\d+$/ ) ? $args_ref->{review_number} :25;
#
# prepare reviews order
# 1..Most Helpful
# 2..Most Favourable
# 3..Most Critical
# 4..Most Recent
#
my $review_order = ( exists $args_ref->{review_order} and $args_ref->{review_order} =~ /^\d+$/ ) ? $args_ref->{review_order} :1;
return {
apps => $apps_array,
stores => $stores_hash,
lang => $lang,
ident => $ident,
review_number => $review_number,
review_order => $review_order,
};
}
sub _get_xml {
my $self = shift;
my ($uri,$store,$lang) = @_;
$self->{ua}->default_header('X-Apple-Store-Front' => $store . '-' . $lang);
my $res = $self->{ua}->get( $uri );
# Error Check
unless ( $res->is_success ) {
warn 'request failed: ', $uri, ': ', $res->status_line, ': ', $store, '-', $lang;
next;
}
unless ( $res->headers->header('Content-Type') =~ m|/xml| ) {
warn 'content is not xml: ', $uri, ': ', $res->headers->header('Content-Type'), ': ', $store, '-', $lang;
next;
}
local $XML::Simple::PREFERRED_PARSER = $self->{__XML_PREFERRED_PARSER};
my $xmlobj = XMLin( $res->content );
$xmlobj;
}
sub _init_countries {
my $c = {
jp => {
name => 'Japan',
code => 143462,
},
us => {
name => 'United States',
code => 143441,
},
ar => {
name => 'Argentine',
code => 143505,
},
au => {
name => 'Autstralia',
code => 143460,
},
be => {
name => 'Belgium',
code => 143446,
},
br => {
name => 'Brazil',
code => 143503,
},
ca => {
name => 'Canada',
code => 143455,
},
cl => {
name => 'Chile',
code => 143483,
},
cn => {
name => 'China',
code => 143465,
},
co => {
name => 'Colombia',
code => 143501,
},
cr => {
name => 'Costa Rica',
code => 143495,
},
hr => {
name => 'Croatia',
code => 143494,
},
cz => {
name => 'Czech Republic',
code => 143489,
},
dk => {
name => 'Denmark',
code => 143458,
},
de => {
name => 'Germany',
code => 143443,
},
sv => {
name => 'El Salvador',
code => 143506,
},
es => {
name => 'Spain',
code => 143454,
},
fi => {
name => 'Finland',
code => 143447,
},
fr => {
name => 'France',
code => 143442,
},
gr => {
name => 'Greece',
code => 143448,
},
gt => {
name => 'Guatemala',
code => 143504,
},
hk => {
name => 'Hong Kong',
code => 143463,
},
hu => {
name => 'Hungary',
code => 143482,
},
in => {
name => 'India',
code => 143467,
},
id => {
name => 'Indonesia',
code => 143476,
},
ie => {
name => 'Ireland',
code => 143449,
},
il => {
name => 'Israel',
code => 143491,
},
it => {
name => 'Italia',
code => 143450,
},
kr => {
name => 'Korea',
code => 143466,
},
kw => {
name => 'Kuwait',
code => 143493,
},
lb => {
name => 'Lebanon',
code => 143497,
},
lu => {
name => 'Luxembourg',
code => 143451,
},
my => {
name => 'Malaysia',
code => 143473,
},
mx => {
name => 'Mexico',
code => 143468,
},
nl => {
name => 'Nederland',
code => 143452,
},
nu => {
name => 'New Zealand',
code => 143461,
},
no => {
name => 'Norway',
code => 143457,
},
at => {
name => 'Osterreich',
code => 143445,
},
pk => {
name => 'Pakistan',
code => 143477,
},
pa => {
name => 'Panama',
code => 143485,
},
pe => {
name => 'Peru',
code => 143507,
},
ph => {
name => 'Phillipines',
code => 143474,
},
pl => {
name => 'Poland',
code => 143478,
},
pt => {
name => 'Portugal',
code => 143453,
},
qa => {
name => 'Qatar',
code => 143498,
},
ro => {
name => 'Romania',
code => 143487,
},
ru => {
name => 'Russia',
code => 143469,
},
sa => {
name => 'Saudi Arabia',
code => 143479,
},
ch => {
name => 'Switzerland',
code => 143459,
},
sg => {
name => 'Singapore',
code => 143464,
},
sk => {
name => 'Slovakia',
code => 143496,
},
si => {
name => 'Slovenia',
code => 143499,
},
za => {
name => 'South Africa',
code => 143472,
},
lk => {
name => 'Sri Lanka',
code => 143486,
},
se => {
name => 'Sweden',
code => 143456,
},
tw => {
name => 'Taiwan',
code => 143470,
},
th => {
name => 'Thailand',
code => 143475,
},
tr => {
name => 'Turkey',
code => 143480,
},
ae => {
name => 'United Arab Emirates',
code => 143481,
},
uk => {
name => 'United Kingdom',
code => 143444,
},
ve => {
name => 'Venezuela',
code => 143502,
},
vn => {
name => 'Vietnam',
code => 143471,
},
};
}
1;
[うめゆき] (2010-06-19 23:37:06)
本題とそれますが、縦長のキャプチャってどうやってとってるんですの?[ひげまる] (2010-06-20 01:48:11)
ん? 縦長キャプチャって、どれのことですの?
[うめゆき] (2010-06-20 10:00:08)
609行ものプログラムの画面をどうやってとっているのかしらと思いまして。
コマンド+シフト+4 space なんですねー8-D[ひげまる] (2010-06-20 11:14:04)
あー、そういうことか
画像じゃないですよ? つ http://code.google.com/p/syntaxhighlighter/
[うめゆき] (2010-06-20 16:13:27)
お!参考になります。