> > PHP Crawler: -

PHP Crawler: -


27 2011, 15:55. : Mysterious Master
Crawler (), , , , - , .

Google, , - . , HTML, . -, . .

, , - , , . , , .

, : , . , . , , - . , , , , , ( ). , .

, !



PHP Linux crontab, .
MySQL - , .



, . , , Zappos ( !). , Nike Free Run+. fopen , fgets feof . fopen ( phpinfo). :

<?php
if(!$fp = fopen("http://www.zappos.com/nike-free-run-black-victory-green-anthracite-white?zlfid=111" ,"r" )) {
return false;
} //our fopen is right, so let's go
$content = "";

while(!feof($fp)) { //while it is not the last line, we will add the current line to our $content
$content .= fgets($fp, 1024);
}
fclose($fp); //we are done here, don't need the main source anymore
?>

, $content, , , CSS JS, Zappos .

, .

? , , $, , $ .

. Zappos , . . javascript, .

Regex :

<?php
//our fopen, fgets here

//our magic regex here
preg_match_all("/([$][0-9]*[,]*[.][0-9]{2})/", $content, $prices, PREG_SET_ORDER);
echo $prices[0][0]."<br />";
?>

, . . , Zappos .

MySQL

. zappos. 4 :

ID
Date , . , .
Value ,
Other_values , , , , - , .

phpmyadmin spy, zappos:

CREATE TABLE IF NOT EXISTS `zappos` (
`ID` int(5) NOT NULL AUTO_INCREMENT,
`Date` date NOT NULL,
`Value` float NOT NULL,
`Other_Values` char(100) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`ID`)
) ENGINE=MyISAMDEFAULT CHARSET=latin1 AUTO_INCREMENT=3;

, . , PHP, - .

, . , .
mysql_connect, mysql_select_db spy, mysql_query .

<?php

//preparing to save all other prices that isn't our "official" price
$otherValues = "";
foreach ($prices as $price) {
$otherValues .= str_replace( array("$", ",", " "), '', $price[0]); //we need to save it as "float" value, without string stuff like spaces, commas and anything else you have just remove here
$otherValues .= ","; //so we can separate each value via explode when we need
}

//if someday Zappos changes his order (or you change the site you want to spy), just change here
$mainPrice = str_replace( array("$", ",", " "), '', $prices[0][0]);

//lets save our date in format YYYY-MM-DD
$date = date('Y\-m\-d');

$dbhost= 'localhost';
$dbuser= 'root';
$dbpass= '';
$dbname= "spy";
$dbtable = "zappos";

$conn = mysql_connect($dbhost, $dbuser, $dbpass)
or die ('Error connecting to mysql');
echo "<br />Connected to MySQL<br />";

$selected = mysql_select_db($dbname)
or die( mysql_error() );
echo "Connected to Database<br />";

//save data
$insert = mysql_query("
INSERT INTO `$dbname`.`$dbtable` (
`ID` ,
`Date` ,
`Value` ,
`Other_values`
)
VALUES (
NULL , '$date', '$mainPrice', '$otherValues'
);
");
//get data
$results = mysql_query("SELECT * FROM $dbtable");

mysql_close($conn);

//all data comes as MYSQL resources, so we need to prepare it to be shown
while($row = mysql_fetch_array($results, MYSQL_ASSOC)) {
echo "ID :{$row['ID']} " .
"Date : {$row['Date']} " .
"Value : {$row['Value']}";
echo "<br />";
}

?>

crontab

, crontab Linux, . , , , - , .

, , 1:00. net.tuts+ , cron, , , .

, , :

#here we load php and get the physical address of the file
#0 2 * * * says that it should run in minute zero, hour two, any day of month, any month and any day of week
0 2 * * * /usr/bin/php /www/virtual/username/cron.php > /dev/null 2>&1

#my favorite, with wget the page is processed as it were loaded in a common browser
0 2 * * * wget http://whereismycronjob/cron.php



PHP Crawler:  -

, . - , .

jQuery- gvChart. , . , . ( -):

<?php
$dbhost= 'localhost';
$dbuser= 'root';
$dbpass= '';
$dbname= "spy";
$dbtable = "zappos";

$conn = mysql_connect($dbhost, $dbuser, $dbpass)
or die ('Error connecting to mysql');

$selected = mysql_select_db($dbname)
or die( mysql_error() );

//get data
$results = mysql_query("SELECT * FROM $dbtable ORDER BY `ID` DESC LIMIT 15");

mysql_close($conn);

$dates= array();
$values = array();
while($row = mysql_fetch_array($results, MYSQL_ASSOC)) {
$dates[] = "{$row['Date']}";
$values[] = "{$row['Value']}";
}

echo "<table id='real'>";
echo "<caption>Real Prices on Zappos.com</caption>";
echo "<thead>";
echo "<tr>";
echo "<th></th>";
foreach($dates as $date) {
$date = explode('-', $date);
echo "<th>" . $date[2] . "</th>";
}
echo "</tr>";
echo "</thead>";
echo "<tbody>";
echo "<tr>";
echo "<th>" . $date[0] . "-" . $date[1] . "</th>";
foreach($values as $value) {
echo "<td>" . $value . "</td>";
}
echo "</tr>";
echo "</tbody>";
?>

?

, . , , url-, .

? ...

! .