<?php
/* 	OpenDb - Open Media Lending Database
	Copyright (C) 2001,2002 by Jason Pell

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
	* 
	* Search for 'Rambo' to return a list of various titles.
		Search for '12 Angry Men' to get an exact title match.
		Search for 'faddsda' to trigger a search error.
*/
include_once("./functions/SitePlugin.class.inc");

//
// Search for 'Shitters' to return no titles.
// Search for 'Cunt' to get a single title.
// Search for 'Filthy Fuckers' to get an exact title match.
// Search for 'Chocolat' will return a single 'Most Popular' and multiple actual entries.
//

class imdb extends SitePlugin
{
	var $_pageBuffer;
	
	function imdb($site_type)
	{
		parent::SitePlugin($site_type);
	}
	
	function queryListing($page_no, $items_per_page, $offset, $s_item_type, $search_vars_r)
	{
		if(strlen($search_vars_r['imdb_id'])>0)
		{
			$this->addListingRow(NULL, NULL, NULL, array('imdb_id'=>$search_vars_r['imdb_id']));
			return TRUE;
		}
		else
		{
			$this->_pageBuffer = $this->fetchURI("http://www.imdb.com/find?q=".rawurlencode(strtolower($search_vars_r['title'])).";more=tt");
		}
		
		if(strlen($this->_pageBuffer)>0)
		{
			if(preg_match("!http://us.imdb.com/title/tt([^/]+)/!", $this->getFetchedURILocation(), $regs))
			{
				$this->addListingRow(NULL, NULL, NULL, array('imdb_id'=>$regs[1]));
				return TRUE;
			}
			else
			{
				$pageBlock = substr($this->_pageBuffer,$start_of_block);
				
				//<p><b>Titles (Exact Matches)</b> (Displaying 4 Results)
				if( preg_match_all("/<b>([a-zA-Z]+) \(([a-zA-Z]+) Matches\)<\/b> \(Displaying ([0-9]+) Result[s]*\)<ol>/m", $pageBlock, $gmatches) )
				{
				    // we need to know what match types to support (exact, partial, approx).
					$match_type_r = $this->getConfigValue('title_search_match_types');
					
				    for ($i = 0; $i < count($gmatches[0]); $i++)
					{
					    if(!is_array($match_type_r) || in_array(strtolower($gmatches[2][$i]), $match_type_r))
						{
							$start = strpos($pageBlock, $gmatches[0][$i]);
							if($start!==FALSE)
							{
							    $start += strlen($gmatches[0][$i]);
								$end = strpos($pageBlock, "</ol>", $start);

								$search_block = substr($pageBlock, $start, $end-$start);
							
								if(preg_match_all("/<li>[\s]*<a href=\"([^\"]+)\"[^\>]*>(.*)<\/li>/Um", $search_block, $matches))
								{
									for ($j = 0; $j < count($matches[1]); $j++)
									{
										if( preg_match("!/title/tt([^/]+)/!", $matches[1][$j], $imdb_id_match))
										{
										    $comments = NULL;

											// check for AKA entries and display them.
											$index = strpos($matches[2][$j], "<br>");
											if($index !== FALSE)
											{
												$title = trim(convert_html_numeric_codes(strip_tags(substr($matches[2][$j], 0, $index))));

												$aka_block = substr($matches[2][$j], $index);//4=<br>
												while(true)
												{
													$index = strpos($aka_block, "<br>&#160;aka ");
													if($index !== FALSE)
													{
														$aka = substr($aka_block, 0, $index);
														if(strlen($aka)>0)
														{
															$comments = "...aka ".trim(convert_html_numeric_codes(strip_tags($aka)));
														}

														$aka_block = substr($aka_block, $index+strlen("<br>&#160;aka "));
													}
													else // end of string
													{
														$comments = "...aka ".trim(convert_html_numeric_codes(strip_tags($aka_block)));
														break;
													}
												}
											}
											else
											{
												$title = trim(convert_html_numeric_codes(strip_tags($matches[2][$j])));
											}

											$this->addListingRow($title, NULL, $comments, array('imdb_id'=>$imdb_id_match[1]));
										}
									}
								}
							}//if($start!==FALSE)
						}//if(!is_array($match_type_r) || in_array(strtolower($matches[2][$i]), $match_type_r))
					}//for ($i = 0; $i < count($matches[0]); $i++)
					
					$this->_pageBuffer = NULL;
					return TRUE;
				}//if(is_not_empty_array($search_blocks))
			}
			
			//else no results found
			return TRUE;
		}
		else
		{
			return FALSE;
		}
	}
	
	function queryItem($search_attributes_r, $s_item_type)
	{
		// if no page supplied, then get it.
		if(strlen($this->_pageBuffer)==0)
		{
			$this->_pageBuffer = $this->fetchURI("http://us.imdb.com/Title?".$search_attributes_r['imdb_id']);
		}
		
		// no sense going any further here.
		if(strlen($this->_pageBuffer)==0)
			return FALSE;
			
		// Set to default, as we may not enter some of the if tests.
		$end = 0;
	
		//title extraction block
		if(preg_match("/<title>(.*)<\/title>/", $this->_pageBuffer, $regs))
		{
			//get rid of double quotes - some movies such as "Rambo" include quotes in title.
			$title = str_replace("\"", "", convert_html_numeric_codes(strip_tags($regs[1])));
			
			// remove year first.
			if(preg_match("/(.*)\(([0-9]+)\)/", $title, $matches))
			{
				$this->addItemAttribute('title', $matches[1]);
				$this->addItemAttribute('year', $matches[2]);
			}
		}
	
		//year extraction block
		if(!is_numeric($this->getItemAttribute('year')))
		{
			$start = strpos($this->_pageBuffer,"/Sections/Years/", $end);
			if($start !== FALSE)
			{
				$year = substr($this->_pageBuffer,$start+16,4);// 16="/Sections/Years/"
				  // Move past.
				$end = $start + 20; //20="/Sections/Years/????"
		    
				$this->addItemAttribute('year', $year);
			}
		}
	
		
		
		//image src extraction block
		$start = strpos($this->_pageBuffer,"alt=\"No poster or movie still available\"", $end);
		if($start === FALSE)
		{
			//<a name="poster" href="photogallery" title="&#34;Band of Brothers&#34;"><img border="0" alt="&#34;Band of Brothers&#34;" title="&#34;Band of Brothers&#34;" src="http://ia.imdb.com/media/imdb/01/I/56/50/31m.jpg" height="122" width="100"></a>
			if(preg_match("/<a name=\"poster\" href=\"[^\"]+\" .*><img .* src=\"([^\"]+)\"/", $this->_pageBuffer, $matches))
			{
				if(starts_with($matches[1], 'http://'))
					$this->addItemAttribute('imageurl', $matches[1]);
				else
					$this->addItemAttribute('imageurl', 'http://'.$matches[1]);
			}
		}
	
		// director extraction block
		$start = strpos($this->_pageBuffer,"Directed by", $end);
		if($start !== FALSE)
		{
			$start = strpos($this->_pageBuffer,"\">", $start)+2;
			$end = strpos($this->_pageBuffer,"</a>", $start);
			$director = substr($this->_pageBuffer,$start,$end-$start);
	
			$this->addItemAttribute('director', convert_html_numeric_codes(strip_tags($director)));
		}
	
		// genre extraction block.
		$start = strpos($this->_pageBuffer,"Genre:</b>", $end);
		if($start !== FALSE)
		{
			// Move past start text.
			$start+=10;//"Genre</b>:"
	
			$end = strpos($this->_pageBuffer,"<br>", $start);
			
			// Get rid of all the html - a quick hack!
			$genre = trim(substr($this->_pageBuffer,$start,$end-$start));
			$genre = strip_tags($genre);
			
			// If composite genre, get rid of / as we do not need it.
			$genre = str_replace(" / "," ",$genre);
	
			// Expand Sci-Fi to OpenDb matching value.
			$genre = str_replace("Sci-Fi", "ScienceFiction", $genre);
	
			// Match all whitespace and convert to a comma.
			$genre = preg_replace("/[\s]+/", ",", $genre);
			
			$genre = str_replace("(more)","", $genre);	
		
			$this->addItemAttribute('genre', explode(",", $genre));
		}
	
		//
		// IMDB User rating
		//
		$start = strpos($this->_pageBuffer,'<b class="ch">User Rating:</b>', $end);
		if($start!==FALSE)
		{
			$start = strpos($this->_pageBuffer,'<a href="/rg/title-tease/rating-stars/title/', $start);
			if($start!==FALSE)
			{
				//<b>3.9/10</b>
				if(preg_match("/<b>([0-9|\.]+)\/10<\/b>/", substr($this->_pageBuffer,$start), $regs))
				{
					$this->addItemAttribute('imdbrating', $regs[1]);
				}
			}
		}
		
		//actor extraction block - changed from first billed, to Cast overview, as this always starts
		// the block anyway.  All we need is the start.
		$start = strpos($this->_pageBuffer, "Cast overview", $end);
		
		//fix by Dick Balaska
		if($start === FALSE)
		{
			$start = strpos($this->_pageBuffer,"redited cast:", $end); 
		}
		
		if($start === FALSE)
		{
			$start = strpos($this->_pageBuffer,"Cast:", $end);
		}
		
		if($start !== FALSE)
		{
			$end = strpos($this->_pageBuffer,"Also Known As", $start);
	        if(!$end)
				$end = strpos($this->_pageBuffer,"Runtime", $start);
			if(!$end)
				$end = strpos($this->_pageBuffer,"Country", $start); // if no runtime
		
		    // It is too hard to do it any other way, so we will get the block of
	    	// actors so we can parse separately.
		    $actorsBlock = substr($this->_pageBuffer,$start,$end-$start);
	
		    $lend = 0;
	    	$start = strpos($actorsBlock,"<a href=\"/name", $lend);
		    while($start !== FALSE)
	    	{
				$start = strpos($actorsBlock,"\">", $start)+2;
				$lend = strpos($actorsBlock,"<", $start);
	
				// Ensure we do not get empty actor.
				$actor = trim(substr($actorsBlock,$start,$lend-$start));
	
				// Strip any html, this is a failsafe in case the parse fails...
				$actor = convert_html_numeric_codes(strip_tags($actor));
	
				if(strlen($actor)>0)
				{
					if(strlen($actors)==0)
						$actors = $actor;
					else
						$actors = $actors . ", ".$actor;
				}//so we do not get confused with the also known as link.
		        $start = strpos($actorsBlock,"<a href=\"/name", $lend);
			}
		    $actors = str_replace(", (more)","", $actors);
	
		    $this->addItemAttribute('actors', $actors);
		}
		// We do not need this anymore.
		unset($actorsBlock);
	
		//length extraction block
		$start = strpos($this->_pageBuffer,"Runtime:</b>", $end);
		if($start !== FALSE)
		{
			$end = strpos($this->_pageBuffer,"<br>", $start+12);//12="Runtime:</b>"
			$length = trim(substr($this->_pageBuffer,$start+12,$end-($start+12)));
	
			// Look for first numeric characters, to represent runtime.
			// Ignore any country specific runtime, at this stage!
			if(preg_match("/([0-9]+)/", $length, $matches))
			{
				$this->addItemAttribute('run_time', $matches[1]);
			}
		}
	
		//Certification extraction block
		$start = strpos($this->_pageBuffer,"Certification:</b>", $end);
		if($start !== FALSE)
		{
			$end = strpos($this->_pageBuffer,"</td>", $start+18);//18="Certification</b>:"
			if($end !== FALSE)
			{
				// Now get the block which we can process.
				$certBlock = trim(substr($this->_pageBuffer, $start+18,$end-($start+18)));//18="Certification</B>:"
	
				// Ensure we have a valid value here!
				$age_certification_codes_r = $this->getConfigValue('age_certification_codes');
				
				// Default to USA if not defined!
				if(!is_array($age_certification_codes_r))
					$age_certification_codes_r = array("USA");
				else
					reset($age_certification_codes_r);
	
				while (list(,$age_code) = @each($age_certification_codes_r))
				{
					$startOfRating = laststrpos($certBlock, $age_code.":");
					if($startOfRating !== FALSE)
					{
						// Move match along past actual CODE: part!
						$startOfRating += strlen($age_code.":");
	
						$endOfRating = strpos($certBlock,"</a>", $startOfRating);
						if($endOfRating!==FALSE)
						{
							$certCode = trim(substr($certBlock,$startOfRating,$endOfRating-$startOfRating));
							if(strlen($certCode)>0)
							{
								if($certCode == 'Unrated')
									$certCode = 'NR';
									
								$this->addItemAttribute('age_rating', $certCode);
	
								break;//break out of while loop!
							}
						}
					}
				}
			}
		}
		
		$plotPage = $this->fetchURI("http://us.imdb.com/Plot?".$search_attributes_r['imdb_id']);
		$start_of_plot = strpos($plotPage,"<p class=\"plotpar\">");
		while($start_of_plot !== FALSE)
		{
			$end_of_plot = strpos($plotPage, "</p>", $start_of_plot+19);
			$plot = convert_html_numeric_codes(strip_tags(trim(substr($plotPage, $start_of_plot+19,$end_of_plot-($start_of_plot+19)))));//19="<p class=\"plotpar\">"
	
			// Replace any tabs or newlines with spaces.
			$plot = ereg_replace("[\r\n\t]+"," ", $plot);
			
			$this->addItemAttribute('plot', $plot);
	
			$start_of_plot = strpos($plotPage,"<p class=\"plotpar\">",$end_of_plot);
		}
	   	unset($plotBlock);
		
		return TRUE;
	}
}
?>
