diff options
author | Stephan Poehlsen <stephan@poehlsen.net> | 2005-11-23 02:25:01 +0000 |
---|---|---|
committer | Stephan Poehlsen <stephan@poehlsen.net> | 2005-11-23 02:25:01 +0000 |
commit | e362e8beb12b18b2ef1988770128fbdaec9e265d (patch) | |
tree | 0dcc220d7900c4caf3cfc6adcd57915db0864c9b | |
parent | 72af90dbe17e3e0a1534f85e3f7af4e49ec7ac0f (diff) |
erste Version vom Webinterface
git-svn-id: file:///home/lennart/svn/public/sse/trunk@34 5fbabb74-0606-0410-a5e4-b5cc6a42724e
-rw-r--r-- | sse.sql | 2 | ||||
-rw-r--r-- | www/.htaccess | 3 | ||||
-rw-r--r-- | www/_main.inc.php | 33 | ||||
-rw-r--r-- | www/file.php | 123 | ||||
-rw-r--r-- | www/index.php | 210 |
5 files changed, 370 insertions, 1 deletions
@@ -67,5 +67,5 @@ CREATE TABLE crawler ( ) ENGINE=InnoDB; INSERT INTO crawler (id, name, url) VALUES - (1, "Tango", "http://poehlsen.org/sse/"), + (1, "Tango", "http://www.poehlsen.org/sse/"), (2, "Soleil", "http://soleil.ethium.org:8000/sse/"); diff --git a/www/.htaccess b/www/.htaccess new file mode 100644 index 0000000..8f64995 --- /dev/null +++ b/www/.htaccess @@ -0,0 +1,3 @@ +RewriteEngine on +RewriteRule show/ file.php +RewriteRule txt/ file.php diff --git a/www/_main.inc.php b/www/_main.inc.php new file mode 100644 index 0000000..9068d69 --- /dev/null +++ b/www/_main.inc.php @@ -0,0 +1,33 @@ +<?php + +function db_escape($s) { return mysql_escape_string($s); } + +function db_connect() { + mysql_pconnect('localhost', 'sse_ro', 'Shu5ahdo'); + mysql_select_db('sse'); +} + +function split_q($q) { + $q = preg_replace('/[^a-zA-Z0-9_\s]/', '', trim($q)); + return preg_split('/\s/', $q, -1, PREG_SPLIT_NO_EMPTY); +} + +function stripslashes_deep($value) { + if (!is_array($value)) return stripslashes($value); + $n = array(); + foreach ($value as $k => $v) { + $n[stripslashes($k)] = stripslashes_deep($v); + } + return $n; +} + +if (get_magic_quotes_gpc()) { + $_REQUEST = array_map('stripslashes_deep', $_REQUEST); + $_POST = array_map('stripslashes_deep', $_POST); + $_GET = array_map('stripslashes_deep', $_GET); + $_COOKIE = array_map('stripslashes_deep', $_COOKIE); +} + +$num_default = 25; + +?>
\ No newline at end of file diff --git a/www/file.php b/www/file.php new file mode 100644 index 0000000..8949c05 --- /dev/null +++ b/www/file.php @@ -0,0 +1,123 @@ +<?php +error_reporting(E_ALL); +require_once('_main.inc.php'); + +$format = 'show'; +$prid = ''; +$f_path = ''; +if (preg_match('/\/(show|txt)\/([^\:]+\:[^\/]+)\/([^\?]+)/', $_SERVER['REQUEST_URI'], $mat)) { + $format = $mat[1]; + $prid = $mat[2]; + $f_path = $mat[3]; +} + +db_connect(); +$query = "SELECT p.path AS ppath, f.path AS fpath ". +" FROM provider_record AS pr, package AS p, file AS f ". +" WHERE pr.id='".db_escape($prid)."' AND pr.package_id=p.id AND f.package_id=p.id AND f.path='".db_escape($f_path)."'"; + +$res = mysql_query($query); +if ($res === false) { + die(mysql_error().'<br />'.$sql.mysql_errno()); +} +if (!($r = mysql_fetch_array($res))) { + header('Status: 404 Not Found'); + echo('404 Not found!'); + exit; +} + + + +$file = sprintf($r['ppath'], $r['fpath']); + + + +////////////////////////////////////////////////////////////////////////// +// txt + +if ($format == 'txt') { + header('Content-type: text/plain'); + + $fh = @fopen($file, 'r'); + if (!$fh) die('failed to open file'); + + while (!feof($fh)) { + $n = rtrim(fgets($fh, 4096)); + $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + ' '); + echo($n."\n"); + } + @fclose($fh); + + exit; +} + + + + +////////////////////////////////////////////////////////////////////////// +// show +echo <<<PRINTHEADER +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/> +<title>Source Search Engine</title> +<style type="text/css"> +ol {font-size:12px;margin:0;padding:0;} +li {margin:0 0 0 50px;padding:0;} +pre {font-size:12px;padding:0;margin:0;} +.found0 {background-color:#ffff00;font-weight:bold;} +.found1 {background-color:#ff66ff;font-weight:bold;} +.found2 {background-color:#ff9966;font-weight:bold;} +.found3 {background-color:#00ff00;font-weight:bold;} +.found4 {background-color:#00ffff;font-weight:bold;} +</style> +</head> +<body> +PRINTHEADER; + +$q = (isset($_REQUEST['q']) ? $_REQUEST['q'] : ''); +$s = split_q($q); +$pattern = array(); +$replacement = array(); +$i = 0; +foreach ($s as $v) { + $pattern[] = '/('.$v.')/i'; + $replacement[] = '<span class="found'.($i%5).'">$1</span>'; + $i++; +} + +$fh = @fopen($file, 'r'); +if (!$fh) die('failed to open file'); + +$found = false; +$i = 0; +echo('<ol>'); +while (!feof($fh)) { + echo('<li>'); + $n = fgets($fh, 4096); + $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + ' '); + $n = htmlentities($n); + $o = preg_replace($pattern, $replacement, $n); + if (!$found and $o != $n) { + $found = true; + echo('<a id="first"></a>'); + } + $i++; + //printf("<a name=\"%d\" id=\"%d\">%4d:</a> %s\n", $i, $i, $i, rtrim($o)); + $code = rtrim($o); + if (!$code) $code = ' '; + printf("<a id=\"l%d\"></a><pre>%s</pre></li>\n", $i, $code); + +} +echo('</ol>'); +fclose($fh); + +echo <<<PRINTFOOTER +</body> +</html> +PRINTFOOTER; + +?>
\ No newline at end of file diff --git a/www/index.php b/www/index.php new file mode 100644 index 0000000..235266c --- /dev/null +++ b/www/index.php @@ -0,0 +1,210 @@ +<?php +error_reporting(E_ALL); +require_once('_main.inc.php'); + + +////////////////////////////////////////////////////////////////////////// +// parse arguments + +// text: search field +$q = (isset($_REQUEST['q']) ? $_REQUEST['q'] : ''); + +// checkbox: subword +$subword = ((isset($_REQUEST['subword']) and $_REQUEST['subword']) + ? 'checked' : false); + +// checkbox: casesensitiv +$case = ((isset($_REQUEST['case']) and $_REQUEST['case']) + ? 'checked' : false); +$case = false; + +// results per page +$num = $num_default; +if (isset($_REQUEST['num'])) { + $t = intval($_REQUEST['num']); + if ($t > 100) $t = 100; + if ($t > 0) $num = $t; +} + +// start with result XXX +$start = 0; +if (isset($_REQUEST['start'])) { + $t = intval($_REQUEST['start']); + if ($t > 0) $start = $t; +} + + + +////////////////////////////////////////////////////////////////////////// +// create form fields + +$title = 'Source Search Engine'.($q ? ' - '.htmlentities($q) : ''); + +$ME = $_SERVER['PHP_SELF']; +$escaped_q = htmlentities($q); +$subword_checked = ($subword ? ' checked="checked" ' : ''); +$case_checked = ($case ? ' checked="checked" ' : ''); +$hidden_form = ($num != $num_default ? '<input type="hidden" name="num" value="'.$num.'" />' : ''); + +// prettiness hack +$ME = preg_replace('/index\.php$/', '', $ME); + +echo <<<ECHOFORM +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/> +<title>$title</title> +</head> +<body> + +<div class="formbox"> +<form action="$ME" method="get">$hidden_form +<p><input type="text" name="q" value="$escaped_q" /> <input type="submit" value="Search" /></p> +<p><input type="checkbox" id="subword" name="subword" value="1" $subword_checked /> <label for="subword">subwords</label></p> +</form> +</div> + +ECHOFORM; + + + +////////////////////////////////////////////////////////////////////////// +// create sql query + +$sql = ''; +while ($q) { + + $s = split_q($q); + + if (!count($s)) { + break; + } + + if (count($s) == 1) { + $where = ($subword + ? " w.text LIKE '".db_escape($s[0])."%' " + : " w.text='".db_escape($s[0])."' AND w.type='word' " + ); + } else { + $where_parts = array(); + foreach ($s as $e) { + $where_parts[] = ($subword + ? "w.text LIKE '".db_escape($e)."%'" + : "w.text='".db_escape($e)."'"); + } + + $where = " (".implode(' OR ', $where_parts).") ". + ($subword ? '' : " AND w.type='word' "); + } + + + $sql = "SELECT COUNT(*)/COUNT(DISTINCT pr.id) AS keywords_cnt, ". + " SUM(w.cnt)/COUNT(DISTINCT pr.id) AS cnt, " . + " f.path, f.language_id, pr.id AS prid, pr.name AS package_name ". + " FROM word AS w, file AS f, package AS p, provider_record AS pr ". + " WHERE p.id=pr.package_id AND ".$where. + " AND w.file_id=f.id AND f.package_id=p.id ". + " AND f.crawler_id=w.crawler_id AND f.crawler_id=p.crawler_id ". + " GROUP BY f.crawler_id, f.id ". + " ORDER BY w.cnt DESC "; + + break; +} + + + + +////////////////////////////////////////////////////////////////////////// +// query DB + +$total = 0; +while (isset($sql) and $sql) { + + db_connect(); + + // use LIMIT ==> two queries: 1. total count, 2. results + // or mysql query cache ==> one query (dump data) + + $res = mysql_query($sql); + if ($res === false) { + echo('<p class="error">'.mysql_errno().': '.mysql_error()."<br />\n".$sql."</p>\n"); + break; + } + $total = mysql_num_rows($res); + break; +} + + +$max_page = max(1, ceil($total/$num)); +$max_start = ($max_page-1)*$num; + +// behind last result page +if ($start > $max_start) $start = $max_start; + +// start not aligned +//$start = floor($start/$num)*$num; + + + + + +////////////////////////////////////////////////////////////////////////// +// show results + +if ($total) { + + echo('<p>Results: '.$total."</p>\n". + '<ul class="results">'."\n"); + + $i = 0; + while ($r = mysql_fetch_array($res)) { + //var_dump($r); exit; + if ($i == $start+$num) break; + $i++; + if ($i <= $start) continue; + + + $r['keywords_cnt'] = intval($r['keywords_cnt']); + if ($r['keywords_cnt'] < 2) $r['keywords_cnt'] = ''; + echo('<li>'.htmlentities($r['package_name']).': '. + '<a href="show/'.htmlentities($r['prid']).'/'.htmlentities($r['path']).'?q='.htmlentities($q).'#first">'. + htmlentities($r['path']).'</a> '. + '('.intval($r['cnt']).' hits'. + ($r['keywords_cnt'] ? ' - '.$r['keywords_cnt'].' different words' : ''). + ')'."</li>\n"); + } + + echo("</ul>\n"); +} + +////////////////////////////////////////////////////////////////////////// +// links +if ($max_page > 1) { + + $l = $ME.'?q='.urlencode($q). + ($case ? '&case=1' : ''). + ($subword ? '&subword=1' : ''). + ($num != $num_default ? '&num='.$num : ''). + '&start='; + + echo('<ul class="links">'."\n". + ($start ? '<li><a href="'.$l.'0">first</a>'."</li>\n" : ''). + ($start ? '<li><a href="'.$l.max(0, $start-$num).'">previous</a>'."</li>\n" : '')); + + $lnkcnt = 10; + for ($i = max(0,$start-($lnkcnt*$num)); $i <= min($max_start, $start+($lnkcnt*$num)); $i+=$num) { + echo('<li><a href="'.$l.$i.'"'.($i == $start ? ' class="active" ' : '').'>'.(($i/$num)+1).'</a>'."</li>\n"); + } + + echo(($start < $max_start ? '<li><a href="'.$l.min($max_start, $start+$num).'">next</a>'."</li>\n" : ''). + ($start < $max_start ? '<li><a href="'.$l.$max_start.'">last</a>'."</li>\n" : ''). + "</ul>\n"); + +} + + + + +echo("</body>\n</html>"); +?>
\ No newline at end of file |