Browse Source

Initial checkin of orangelogin.pl, a script to scrape the Orange

website and download PDF bills.
master
Gavan Fantom 14 years ago
commit
e116955e5c
  1. 145
      orangelogin.pl

145
orangelogin.pl

@ -0,0 +1,145 @@
#!/usr/pkg/bin/perl
use WWW::Mechanize;
local $mech = WWW::Mechanize->new();
my $configfile = $ARGV[0];
error("Usage: $0 configfile") unless defined($configfile);
open CONFIG, "<$configfile" or die $!;
while (<CONFIG>)
{
chomp;
# no newline
s/#.*//;
# no comments
s/^\s+//;
# no leading white
s/\s+$//;
# no trailing white
next unless length;
# anything left?
my ($var, $value) = split(/\s*=\s*/, $_, 2);
$options{$var} = $value;
}
close CONFIG;
$username = $options{'username'};
die "username not set" unless defined($username);
$password = $options{'password'};
die "password not set" unless defined($password);
$downloaddir = $options{'downloaddir'};
die "downloaddir not set" unless defined($downloaddir);
print "Fetching homepage\n";
$mech->get("https://www.orange.co.uk/");
$mech->follow_link( text_regex => qr/mobile account/ );
print "Logging in\n";
$mech->submit_form(
form_number => 1,
fields => {
txtMSISDN => $username,
txtPassword => $password,
}
);
# Stupid JavaScript here to submit the form on loading of the page.
# Just log in.
print "Following intermediate login form\n";
$mech->submit_form(
form_number => 1,
fields => {
}
);
print "Following view your bills link\n";
$mech->follow_link( text_regex => qr/view your bills/ );
follow_processing();
my @links = $mech->find_all_links( text_regex => qr/download PDF/ );
for $link (@links) {
my $url = $link->url();
my ($invoice) = $url =~ /leg_invoice=(\d+)/;
my $filename = $downloaddir . '/' . $invoice . ".pdf";
unless (-e $filename) {
# print "Downloading invoice $invoice to $filename from $url\n";
print "Downloading invoice $invoice to $filename\n";
downloadbill($url, $filename);
#$mech->mirror($url, $filename);
} else {
print "Skipping download of invoice $invoice\n";
}
}
#$mech->follow_link( text_regex => qr/log out/ );
#print $mech->content();
#$mech->dump_forms( undef, $absolute );
#$mech->dump_links( undef, $absolute );
sub follow_processing {
my $url, $attempts;
$attempts = 0;
while ($attempts < 10) {
$attempts++;
($url) = $mech->content() =~ /var sURL = "(.*)";/;
return unless defined($url);
sleep(1);
$url =~ s/&amp;/&/g;
# print "Attempt $attempts. Found URL: $url\n";
print "Waiting for processing. Attempt $attempts.\n";
my $newurl = $mech->uri();
$newurl =~ s/\/[^\/]*$//;
$newurl = $newurl . "/" . $url;
# print "I think I'm going to $newurl\n";
$mech->get($newurl);
print "Processing finished.\n";
}
}
sub downloadbill {
my ($url, $filename) = @_;
print "Following download link\n";
$mech->get($url);
follow_processing();
print "Following View your bill link\n";
my $billlink = $mech->find_link( text_regex => qr/View your bill/ );
if (!defined($billlink)) {
print "Unable to download bill\n";
return;
}
my $billurl = $billlink->url_abs();
print "Downloading bill\n";
$mech->mirror($billurl, $filename);
}
sub error {
my ($error) = @_;
print STDERR $error;
print STDERR "\n";
exit 1;
}
Loading…
Cancel
Save