use strict;
#use locale;
use DateTime;
#use DateTime::Duration;
use DateTime::Format::DateParse;
use LWP::Simple;
use Encode;
use HTML::Entities;


my %users = ();

open out, ">users_with_skills.csv" or die "can not open file users_with_skills.txt\n";
open out2, ">users.csv" or die "can not open file users.txt\n";

print "open page for page number\n";
my $pageraw = encode("iso-8859-1", decode("utf8", get("http://www.bigbluecup.com/yabb/index.php?topic=30351.0")));
$pageraw =~ s/\r//g;
my $pagecount = 1;
my @page = split(/\n/, $pageraw);
for(@page){
	if(/<a name="bot">.*>(\d+)<\/a> *$/){
		$pagecount = $1;
	}
}

my $count = 0;
my $lastuser = 0;
my $lastdate = '';
my $dt_now = DateTime->now;

for(0..$pagecount-1){
	print "opening page ".($_+1)."/$pagecount\n";
	sleep(1);
	my $pageraw = encode("iso-8859-1", decode("utf8", get("http://www.bigbluecup.com/yabb/index.php?topic=30351.".($_*20))) );
	$pageraw =~ s/\r//g;
	my @page = split(/\n/, $pageraw);
	while(defined($_ = shift(@page))){
		#print "$_";
		if(/<div class="post">/){
			if($users{$lastuser} == 2){
				open outpost, ">>posts/$lastuser.html" or die "can not open file posts/$lastuser.html\n";
				s/^\s+//;
				print outpost "<div class=\"date\">$lastdate</div>$_\n";
				close outpost;
			}
		}
		if(/<span class="smalltext">&#171; <b>[^<]* on:<\/b> ([^<]+) &#187;<\/span><\/td>/){
			my $dt_then = DateTime::Format::DateParse->parse_datetime($1." UTC");
			$lastdate = $dt_then->iso8601();
			$lastdate =~ s/T/ /;
			$lastdate =~ s/:\d\d$/ /;
		}
		if(/<b>Guest<\/b><br \/>/){
			$lastuser = 0;
		}
		if(/<b><a href="http:\/\/www\.bigbluecup\.com\/yabb\/index\.php\?action=profile;u=(\d+)" title="View the profile of [^"]+">([^<]+)<\/a><\/b><br \/>/){
			my ($id, $name) = ($1, $2);
			$lastuser = $id;
			if(!$users{$id}){
				
				my @profiledata = ();
				my $data;
				while($data ne '</span>'){
					$data = shift(@page);
					chomp($data);
					$data =~ s/\t//g;
					push(@profiledata, $data) if($data);
				}
				my @skills = ();
				my $personaltext = '';
				for(@profiledata){
					if(/^<b>([^<]+)<\/b><br \/>/){ # title
						#print "title: $1\n";
					}
					if(/^<b>([^<]+)<\/b><br \/><br \/>/){ # position
						#print "position: $1\n";
					}
					if(/^<img src="([^"]+)"/){ # avatar
						#print "avatar: $1\n";
					}
					if(/^([^<]+)<br \/>/){ # personal text
						$personaltext = $1;
					}
					if(/^<br \/><img src="http:\/\/www\.bigbluecup\.com\/images\//){ # skills
						for(split /</){
							if(/^img src="[^"]+" alt="([^"]+)"/){
								push(@skills, encode_entities(decode_entities($1)));
							}
						}
					}
					if(/^<a href=/){ # contacts
						
					}
				}
				if(scalar(@skills)){
					my %read = ();
					my %data = ();
					my $games = "";

					print "getting profile for $name...\n";
					sleep(1);
					my $profileraw = encode("iso-8859-1", decode("utf8", get("http://www.bigbluecup.com/yabb/index.php?action=profile;u=".$id)));
					$profileraw =~ s/\r//g;
					my @profile = split(/\n/, $profileraw);
					my $tabledepth = 0;
					my $profileread = 0;
					my $profilehtml = '';
					while(defined($_ = shift(@profile))){
						if(/<table border="0" cellpadding="4" cellspacing="1" align="center" class="bordercolor">/){
							$profileread = 1;
							#print "read profile start\n";
						}
						elsif(/<table/i and $profileread){
							$tabledepth++;
							#print "depth is now $tabledepth\n";
						}
						$profilehtml .= "$_\n" if($profileread);
						if(/<\/table>/i and $profileread){
							if($tabledepth == 0){
								#print "read profile end\n";
								$profileread = 0;
								unlink("posts/$id.html");
								open outpost, ">posts/$id.html" or die "can not open file posts/$id.html\n";
								$profilehtml =~ s/(<\/table>)$/%skills%$1/;
								print outpost "<div id=\"profile\">$profilehtml</div>\n\n";
								close outpost;
							}
							else {
								$tabledepth--;
								#print "depth is now $tabledepth\n";
							}
						}
						if(/<td><b>Posts: <\/b><\/td>/){
							$read{posts} = 1;
						}
						elsif($read{posts} and /^\s*<td>([^<]+)<\/td>\s*$/){
							$data{posts} = $1;
							$read{posts} = 0;
						}
						elsif(/<td><b>Date Registered: <\/b><\/td>/){
							$read{registered} = 1;
						}
						elsif($read{registered} and /^\s*<td>(.+)<\/td>\s*$/){
							$data{registered} = $1;
							my $dt_then = 0;
							if($data{registered} =~ /<b>Today<\/b> at (\d\d):(\d\d)/){
								$dt_then = DateTime->new(
									year => $dt_now->year,
									month => $dt_now->month,
									day => $dt_now->day,
									hour => $1,
									minute => $2,
									time_zone => 'UTC'
								);
							}
							elsif($data{registered} =~ /<b>Yesterday<\/b> at (\d\d):(\d\d)/){
								$dt_then = DateTime->new(
									year => $dt_now->year,
									month => $dt_now->month,
									day => $dt_now->day,
									hour => $1,
									minute => $2,
									time_zone => 'UTC'
								);
								my $dur = DateTime::Duration->new( days => 1 );
								$dt_then = $dt_then - $dur;
							}
							elsif($data{registered} !~ /transferred/i) {
								$dt_then = DateTime::Format::DateParse->parse_datetime($data{registered}." UTC");
							}
							$data{registered} = $dt_then->iso8601() if($dt_then);
							$read{registered} = 0;
						}
						elsif(/<td><b>Last Active: <\/b><\/td>/){
							$read{active} = 1;
						}
						elsif($read{active} and /^\s*<td>(.+)<\/td>\s*$/){
							$data{active} = $1;
							my $dt_duration;
							my $dt_then;
							if($data{active} =~ /<b>Today<\/b> at (\d\d):(\d\d)/){
								$dt_then = DateTime->new(
									year => $dt_now->year,
									month => $dt_now->month,
									day => $dt_now->day,
									hour => $1,
									minute => $2,
									time_zone => 'UTC'
								);
								#print "subtract active today $data{active}\n";
								$dt_duration = $dt_now->delta_days($dt_then);
							}
							elsif($data{active} =~ /<b>Yesterday<\/b> at (\d\d):(\d\d)/){
								$dt_then = DateTime->new(
									year => $dt_now->year,
									month => $dt_now->month,
									day => $dt_now->day,
									hour => $1,
									minute => $2,
									time_zone => 'UTC'
								);
								my $dur = DateTime::Duration->new( days => 1 );
								#print "subtract active yesterday $data{active}\n";
								$dt_then = $dt_then - $dur;
								$dt_duration = $dt_now->delta_days($dt_then);
							}
							else {
								$dt_then = DateTime::Format::DateParse->parse_datetime($data{active}." UTC");
								#print "subtract active $data{active}\n";
								$dt_duration = $dt_now->delta_days($dt_then);
							}
							#print $dt_now."\n";
							#print $dt_then."\n";
							#print $dt_duration."\n";
							#print $dt_duration->delta_days."\n";
							$data{active} = $dt_duration->delta_days;
							$read{active} = 0;
						}
						elsif(/<td><b>Gender: <\/b><\/td>/){
							$read{gender} = 1;
						}
						elsif($read{gender} and /^\s*<td>([^<]*)<\/td>\s*$/){
							$data{gender} = $1;
							$read{gender} = 0;
						}
						elsif(/<td><b>Age:<\/b><\/td>/){
							$read{age} = 1;
						}
						elsif($read{age} and /^\s*<td>([^<]*)<\/td>\s*$/){
							$data{age} = $1;
							$read{age} = 0;
						}
						elsif(/<td><b>Location:<\/b><\/td>/){
							$read{location} = 1;
						}
						elsif($read{location} and /^\s*<td>([^<]*)<\/td>\s*$/){
							$data{location} = $1;
							$read{location} = 0;
						}
						elsif(/<td><b>Local Time:<\/b><\/td>/){
							$read{locatime} = 1;
						}
						elsif($read{locatime} and /^\s*<td>([^<]*)<\/td>\s*$/){
							$data{locatime} = $1;
							my $dt_then = DateTime::Format::DateParse->parse_datetime($data{locatime}." UTC");
							#print "subtract localtime $data{locatime}\n";
							my $dt_duration = $dt_then - $dt_now;
							if($dt_duration->is_positive){
								$data{locatime} = $dt_duration->hours();
							}
							else {
								$data{locatime} = -$dt_duration->hours();
							}
							$read{locatime} = 0;
						}
						#elsif(/<td><b>Language:<\/b><\/td>/){
						#	$read{language} = 1;
						#}
						#elsif($read{language} and /^\s*<td>([^<]+)<\/td>\s*$/){
						#	$data{language} = $1;
						#	$read{language} = 0;
						#}
						elsif(/My games:/ and /I've worked on the following:/){
							for(split(/<br \/><a target="_blank" /)){
								if(/href="([^"]+)"><b>([^<]+)<\/b><\/a><br \/><i>([^<]+)<\/i>/){
									$games .= encode_entities(decode_entities($1))."¤".encode_entities(decode_entities($2))."¤".encode_entities(decode_entities($3)).";;";
								}
							}
						}
					}
					$name = encode_entities(decode_entities($name));
					$personaltext = encode_entities(decode_entities($personaltext));
					$data{location} = encode_entities(decode_entities($data{location}));


					print out "$id\t$name\t$personaltext\t";
					print out "$data{posts}\t";
					print out "$data{registered}\t";
					print out "$data{active}\t";
					print out "$data{gender}\t";
					print out "$data{age}\t";
					print out "$data{location}\t";
					print out "$data{locatime}\t";
					#print out "$data{language}\t";
					print out "$games\t";
					for(@skills){
						print out "$_\t";
					}
					print out "\n";
					#my ($tutoring, $testing, $voice, $music, $background, $character, $animation, $proof, $scripting, $story, $web, $translate);
					#for(@skills){
					#	$tutoring = $1 if(/^AGS tutoring:* (.+)$/i);
					#	$testing = $1 if(/^play-testing:* (.+)$/i);
					#	$voice = $1 if(/^voice acting:* (.+)$/i);
					#	$music = $1 if(/^music:* (.+)$/i);
					#	$background = $1 if(/^backgrounds:* (.+)$/i);
					#	$character = $1 if(/^character art:* (.+)$/i);
					#	$animation = $1 if(/^animation:* (.+)$/i);
					#	$proof = $1 if(/^proof reading:* (.+)$/i);
					#	$scripting = $1 if(/^scripting:* (.+)$/i);
					#	$story = $1 if(/^story design:* (.+)$/i);
					#	$web = $1 if(/^web design:* (.+)$/i);
					#	$translate = $1 if(/^translating:* (.+)$/i);
					#}
					#$users{$id} = {
					#	'name' => $name,
					#	'AGS tutoring' => $tutoring,
					#	'play-testing' => $testing,
					#	'voice acting' => $voice,
					#	'music' => $music,
					#	'backgrounds' => $background,
					#	'character art' => $character,
					#	'animation' => $animation,
					#	'proof reading' => $proof,
					#	'scripting' => $scripting,
					#	'story design' => $story,
					#	'web design' => $web,
					#	'translating' => $translate
					#};
					$users{$id} = 2;
					$count++;
				}
				else {
					$users{$id} = 1;
					print out2 "$id\n";
				}
			}
		}
	}
}
print "finished with $count members data read\n";
close out;
close out2;

#system("perl htx.pl -mu posts");