Screen scraping to retrieve bills and keep account active on the Orange website.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
2.8 KiB

#!/usr/pkg/bin/perl
use WWW::Mechanize;
local $mech = WWW::Mechanize->new();
my $configfile = $ARGV[0];
error("Usage: $0 configfile") unless defined($configfile);
open CONFIG, "<$configfile" or die $!;
while (<CONFIG>)
{
chomp;
# no newline
s/#.*//;
# no comments
s/^\s+//;
# no leading white
s/\s+$//;
# no trailing white
next unless length;
# anything left?
my ($var, $value) = split(/\s*=\s*/, $_, 2);
$options{$var} = $value;
}
close CONFIG;
$username = $options{'username'};
error("username not set") unless defined($username);
$password = $options{'password'};
error("password not set") unless defined($password);
$downloaddir = $options{'downloaddir'};
error("downloaddir not set") unless defined($downloaddir);
print "Fetching homepage\n";
$mech->get("https://www.orange.co.uk/");
$mech->follow_link( text_regex => qr/mobile account/ );
print "Logging in\n";
$mech->submit_form(
form_number => 1,
fields => {
LOGIN => $username,
PASSWORD => $password,
}
);
# Stupid JavaScript here to submit the form on loading of the page.
# Just log in.
print "Following intermediate login form\n";
$mech->submit_form(
form_number => 1,
fields => {
}
);
follow_processing();
print "Following see your recent bills link\n";
$mech->follow_link( text_regex => qr/see your recent bills/ );
follow_processing();
my @links = $mech->find_all_links( text_regex => qr/download PDF/ );
for $link (@links) {
my $url = $link->url();
my ($invoice) = $url =~ /leg_invoice=(\d+)/;
my $filename = $downloaddir . '/' . $invoice . ".pdf";
unless (-e $filename) {
print "Downloading invoice $invoice to $filename\n";
downloadbill($url, $filename);
#$mech->mirror($url, $filename);
} else {
print "Skipping download of invoice $invoice\n";
}
}
sub follow_processing {
my $url, $attempts;
$attempts = 0;
while ($attempts < 10) {
$attempts++;
($url) = $mech->content() =~ /var sURL = "(.*)";/;
return unless defined($url);
sleep(1);
$url =~ s/&amp;/&/g;
print "Waiting for processing. Attempt $attempts.\n";
my $newurl = $mech->uri();
$newurl =~ s/\/[^\/]*$//;
$newurl = $newurl . "/" . $url;
$mech->get($newurl);
print "Processing finished.\n";
}
}
sub downloadbill {
my ($url, $filename) = @_;
print "Following download link\n";
$mech->get($url);
follow_processing();
print "Following View your bill link\n";
my $billlink = $mech->find_link( text_regex => qr/View your bill/ );
if (!defined($billlink)) {
print "Unable to download bill\n";
return;
}
my $billurl = $billlink->url_abs();
print "Downloading bill\n";
$mech->mirror($billurl, $filename);
}
sub error {
my ($error) = @_;
print STDERR $error;
print STDERR "\n";
exit 1;
}