From 728eb2a2664e6891c759e1260ce6d5119d4c9854 Mon Sep 17 00:00:00 2001 From: Stephen Griffiths Date: Mon, 23 Sep 2013 17:38:41 +0100 Subject: [PATCH] Improved harvesting and added card class --- DAL.php | 44 +++++------ card_extractors.php | 168 ++++++++++++++++++++++++++++++++---------- card_object.php | 47 ++++++++++++ create_card_table.sql | 4 +- database_updates.sql | 5 ++ index.php | 41 +++++------ 6 files changed, 224 insertions(+), 85 deletions(-) create mode 100644 card_object.php create mode 100644 database_updates.sql diff --git a/DAL.php b/DAL.php index de6d0a5..0bbe501 100644 --- a/DAL.php +++ b/DAL.php @@ -6,18 +6,18 @@ function sqlString($s, $db){ return trim($db->real_escape_string($s)); } - function DB_create_card($cardObject, $db){ - $name = $cardObject["name"]; - $mana_cost = ( isset($cardObject["mana_cost"]) ? $cardObject["mana_cost"] : null); - $converted_mana_cost = ( isset($cardObject["converted_mana_cost"]) ? $cardObject["converted_mana_cost"] : null); - $types = $cardObject["types"]; - $card_text = $cardObject["card_text"]; - $flavor_text = ( isset($cardObject["flavor_text"]) ? $cardObject["flavor_text"] : null ); - $power_toughness = ( isset($cardObject["power_toughness"]) ? $cardObject["power_toughness"] : null); - $expansion = $cardObject["expansion"]; - $rarity = $cardObject["rarity"]; - $card_number = $cardObject["card_number"]; - $artist = $cardObject["artist"]; + function DB_create_card($card, $db){ + $name = $card->get('name'); + $mana_cost = $card->has('mana_cost') ? $card->get('mana_cost') : null; + $converted_mana_cost = $card->has('converted_mana_cost') ? $card->get('converted_mana_cost') : null; + $types = $card->get('types'); + $card_text = $card->has('card_text') ? $card->get('card_text') : null; + $flavor_text = $card->has('flavor_text') ? $card->get('flavor_text') : null; + $power_toughness = $card->has('power_toughness') ? $card->get('power_toughness') : null; + $expansion = $card->get('expansion'); + $rarity = $card->get('rarity'); + $card_number = $card->get('card_number'); + $artist = $card->get('artist'); return DB_insert_card($name, $mana_cost, $converted_mana_cost, $types, $card_text, $flavor_text, $power_toughness, $expansion, $rarity, $card_number, $artist, $db); } @@ -60,26 +60,20 @@ function SQL_create_card($name, $mana_cost, $converted_mana_cost, $types, $card_ return $SQL; } - function DB_card_exists($name, $db){ - $result = $db->query( SQL_card_exists($name, $db) ); + function DB_existing_card($name, $db){ + $result = $db->query( SQL_existing_card($name, $db) ); switch ($result->num_rows){ case 0: - return false; + return null; break; default: - $fetchedCard = (array) $result->fetch_object(); - $fetchedCard["ID"] = ""; - $cardObject = array(); - foreach ($fetchedCard as $key => $value){ - if ($value){ - $cardObject[$key] = $value; - } - } - return $cardObject; + $fetchedArray = (array) $result->fetch_object(); + $card = new card($fetchedArray); + return $card; } } - function SQL_card_exists($name, $db){ + function SQL_existing_card($name, $db){ global $CardTable; $name = sqlString($name, $db); return "Select * from $CardTable diff --git a/card_extractors.php b/card_extractors.php index 1e8b09d..f3b63a7 100644 --- a/card_extractors.php +++ b/card_extractors.php @@ -1,47 +1,139 @@
", " _ ", $l); + $v = str_replace('
', ' _ ', $l); //Replace all images of icons with their alt text $v = preg_replace("#\#", '{$1}', $v); + //Replace colour icons with a shorter version {Blue} -> {U} + // but only if we can find a brace in the text (this is a mild optimisation) + if (strpos($v, '{') !== false){ + $v = str_replace('{Blue}', '{U}', $v); + $v = str_replace('{Black}', '{B}', $v); + $v = str_replace('{White}', '{W}', $v); + $v = str_replace('{Green}', '{G}', $v); + $v = str_replace('{Red}', '{R}', $v); + $v = str_replace('{Variable Colorless}', '{X}', $v); + } + //Strip out remaining tags and destroy whitespace - return trim( strip_tags($v) ); + return trim(strip_tags($v)); +} + +function tidy_line($l){ + return str_replace("\r", ' ', $l); } function download_card($name_search){ + $card = new card(null); $name_search = urlencode($name_search); $url = "http://gatherer.wizards.com/Pages/Search/Default.aspx?name=+[$name_search]"; - $page = file_get_contents( $url ); - $lines = explode( PHP_EOL, $page ); + $lines = download_page($url); + + //Loop through lines of the html until the line contains "" + for ($i = 2; strpos($lines[$i], '') === false; $i++){} + //Now $lines[$i-1] contains the page title. + // If the page title has "Card Search" there may be a single matching card, + // for example "Forest", or it may be multiple results + if (strpos($lines[$i-1], 'Card Search') === false){ + //Just a normal card page: parse the download + $card = add_data_from_lines($card, $lines); + return $card; + } + else{ + //Look for the searchTermDisplay and the number of results in it + // (Use the existing instance of $i to save time); + for (true; strpos($lines[$i], 'searchTermDisplay">') === false; $i++){} + $l = $lines[$i]; + + //Number of results is in brackets on this line, like (7) + $openBracket = strpos($l, '(') + 1; + if ($openBracket === false){ + $card->set_error('The card search results look strange. Can not return a card.'); + return $card; + } + + $numberOfResults = substr($l, $openBracket, strpos($l, ')') - $openBracket); + if ($numberOfResults == 0){ + $card->set_error('No cards match that search.'); + return $card; + } + + //More than zero results - let's try to find an exact card. + $mvid = matching_multiverse_id($name_search, $lines); + if ($mvid){ + $lines = download_page("http://gatherer.wizards.com/Pages/Card/Details.aspx?multiverseid=$mvid"); + $card = add_data_from_lines($card, $lines); + } + else{ + $card->set_error('Multiple options'); + } + return $card; + } +} + +function matching_multiverse_id($name, $lines){ + //Loop through lines of the html (from 200) + // until one contains a link to the exact card name (or end of document [minus 30 lines]) + for ($i = 200; strpos($lines[$i], ">$name") === false && $i < count($lines) - 30; $i++){} + + $l = $lines[$i]; + $mvidStart = strpos($l, '?multiverseid='); + //Was it found? (Otherwise, we hit the end of the document) + if ($mvidStart === false){ return false; } + + $mvidStart += 14; // Where 6 is the length of ?multiverseid= + $mvidEnd = strpos($l, '"', $mvidStart); + $multiverseId = substr($l, $mvidStart, $mvidEnd - $mvidStart); + return $multiverseId; +} + +//Return an array of lines representing a web page +function download_page($url){ + $page = file_get_contents($url); + return explode(PHP_EOL, $page); +} +function add_data_from_lines($card, $lines){ + + //var_dump($lines); + //The key we found on this iteration, and for which we seek a value on the next $flag = null; //Boolean - is that value entirely HTML? $htmlFlag = false; - + //Boolean - does this non-HTML line need tidying/sanitising? + $untidy = false; + $found_result = false; $i = -1; - foreach( $lines as $line ) { + foreach($lines as $line) { $i++; - //All junk before 300 lines and after 500th line. - if ($i < 300){ continue; } + //All junk before 300th and after 500th line. + if ($i < 300){ continue; } elseif ($i > 500){ break; } //Get the line with no markup or space - $line = trim( strip_tags($line) ); + $line = trim(strip_tags($line)); //If it still has content if ($line){ //If we flagged this line up as having a value, put it in the object. if ($flag){ - $cardObject[$flag] = $line; + if ($untidy){ + $card->set($flag, tidy_line($line)); + } + else{ + $card->set($flag, $line); + } + //And clear the flag, ready for next. $flag = null; + $untidy = false; $htmlFlag = false; //We have found a result by now. $found_result = true; @@ -50,44 +142,45 @@ function download_card($name_search){ //Normally not an html value, so default to false. $htmlFlag = false; switch ($line){ - case "Card Name:": - $flag = "name"; + case 'Card Name:': + $flag = 'name'; break; - case "Mana Cost:": - $flag = "mana_cost"; + case 'Mana Cost:': + $flag = 'mana_cost'; $htmlFlag = true; break; - case "Converted Mana Cost:": - $flag = "converted_mana_cost"; + case 'Converted Mana Cost:': + $flag = 'converted_mana_cost'; break; - case "Types:": - $flag="types"; + case 'Types:': + $flag='types'; break; - case "Card Text:": + case 'Card Text:': //Plain text with interspersed icons (maybe), treat as HTML - $flag = "card_text"; + $flag = 'card_text'; $htmlFlag = true; break; - case "Flavor Text:": - $flag = "flavor_text"; + case 'Flavor Text:': + $flag = 'flavor_text'; + $untidy = true; break; - case "Watermark:": - $flag = "watermark"; + case 'Watermark:': + $flag = 'watermark'; break; - case "P/T:": - $flag = "power_toughness"; + case 'P/T:': + $flag = 'power_toughness'; break; - case "Expansion:": - $flag = "expansion"; + case 'Expansion:': + $flag = 'expansion'; break; - case "Rarity:": - $flag = "rarity"; + case 'Rarity:': + $flag = 'rarity'; break; - case "Card Number:": - $flag = "card_number"; + case 'Card Number:': + $flag = 'card_number'; break; - case "Artist:": - $flag = "artist"; + case 'Artist:': + $flag = 'artist'; break; } } @@ -101,7 +194,7 @@ function download_card($name_search){ // They fall after an opening tag, so we skip that line (+1) $l = $lines[$i+1]; - $cardObject[$flag] = get_line_content($l); + $card->set($flag, get_line_content($l)); //echo " >>>> VALUE HTML [$value] ----- \n"; $flag=null; $htmlFlag=false; @@ -111,9 +204,10 @@ function download_card($name_search){ //Done parsing every line of the page. // Check if we found anything, otherwise it wasn't a card page. if (!$found_result){ - $cardObject["error"] = "No card with that name"; + $card->set_error('No card with that name'); } - return $cardObject; + return $card; } + ?> \ No newline at end of file diff --git a/card_object.php b/card_object.php new file mode 100644 index 0000000..f9d88cf --- /dev/null +++ b/card_object.php @@ -0,0 +1,47 @@ +c = null; + if (isset($fetchedArray)){ + foreach ($fetchedArray as $key => $value){ + if ($value){ + $this->set($key, $value); + } + } + } + $this->clear_error(); + } + + function set($property, $value){ + $this->c[$property] = $value; + } + function get($property){ + return $this->c[$property]; + } + function has($property){ + return isset($this->c[$property]); + } + function json(){ + return json_encode($this->c); + } + + function set_error($e){ + if ($this->no_error()){ + $this->c['error'] = $e; + } + } + function clear_error(){ + unset($this->c['error']); + } + function no_error(){ + return empty($this->c['error']); + } + +} +?> \ No newline at end of file diff --git a/create_card_table.sql b/create_card_table.sql index 140a3a9..0a07b98 100644 --- a/create_card_table.sql +++ b/create_card_table.sql @@ -1,11 +1,11 @@ create table mtg_cards ( ID SERIAL, primary key(ID), -name varchar(50) not null, +name varchar(141) not null, mana_cost varchar(50), converted_mana_cost int, types varchar(50) not null, -card_text varchar(255) not null, +card_text varchar(500) not null, flavor_text varchar(255), power_toughness varchar(10), expansion varchar(50), diff --git a/database_updates.sql b/database_updates.sql new file mode 100644 index 0000000..26f7752 --- /dev/null +++ b/database_updates.sql @@ -0,0 +1,5 @@ +alter table mtg_cards +modify column name varchar(141) not null + +alter table mtg_cards +modify column name varchar(500) not null \ No newline at end of file diff --git a/index.php b/index.php index d638aa0..3acaf0e 100644 --- a/index.php +++ b/index.php @@ -1,6 +1,6 @@ set_error('No paramaters in name search.'); } } else{ - $cardObject["error"] = "No search parameters. Use ?name="; + $card->set_error('No search parameters. Use ?name='); } if ($name_search){ - $cardObject = DB_card_exists($name_search, $db); - if ( $cardObject ){ - if ($metrics) { $cardObject["from_cache"] = "true"; } + $card = DB_existing_card($name_search, $db); + if ($card){ + if ($metrics) { $card->set('from_cache', 'true'); } } else{ - $cardObject = download_card($name_search); - - //If we got a real card, cache it and mark it. - if ( !isset($cardObject["error"]) ){ + $card = download_card($name_search); + //If we got a real card (no card errors), + // cache it, and add appropriate metadata + if ($card->no_error()){ //Save - $committed = DB_create_card($cardObject, $db); + $committed = DB_create_card($card, $db); if ($metrics){ - $cardObject["from_cache"] = "false"; - $cardObject["into_cache"] = $committed ? "success" : "failure"; + $card->set('from_cache', 'false'); + $card->set('into_cache', ($committed ? 'success' : 'failure')); } } } } -elseif (!$cardObject["error"]){ - $cardObject["error"] = "No results"; +else{ + $card->set_error('No results'); } -$cardObject["request_time"] = stopTiming() . " seconds"; +$card->set('request_time', stopTiming() . ' seconds'); header('content-Type: application/json'); header("Access-Control-Allow-Origin: *"); -echo json_encode($cardObject); +echo $card->json(); ?> \ No newline at end of file