From e362e8beb12b18b2ef1988770128fbdaec9e265d Mon Sep 17 00:00:00 2001 From: Stephan Poehlsen Date: Wed, 23 Nov 2005 02:25:01 +0000 Subject: erste Version vom Webinterface git-svn-id: file:///home/lennart/svn/public/sse/trunk@34 5fbabb74-0606-0410-a5e4-b5cc6a42724e --- sse.sql | 2 +- www/.htaccess | 3 + www/_main.inc.php | 33 +++++++++ www/file.php | 123 ++++++++++++++++++++++++++++++++ www/index.php | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 www/.htaccess create mode 100644 www/_main.inc.php create mode 100644 www/file.php create mode 100644 www/index.php diff --git a/sse.sql b/sse.sql index 03117dd..86a39d6 100644 --- a/sse.sql +++ b/sse.sql @@ -67,5 +67,5 @@ CREATE TABLE crawler ( ) ENGINE=InnoDB; INSERT INTO crawler (id, name, url) VALUES - (1, "Tango", "http://poehlsen.org/sse/"), + (1, "Tango", "http://www.poehlsen.org/sse/"), (2, "Soleil", "http://soleil.ethium.org:8000/sse/"); diff --git a/www/.htaccess b/www/.htaccess new file mode 100644 index 0000000..8f64995 --- /dev/null +++ b/www/.htaccess @@ -0,0 +1,3 @@ +RewriteEngine on +RewriteRule show/ file.php +RewriteRule txt/ file.php diff --git a/www/_main.inc.php b/www/_main.inc.php new file mode 100644 index 0000000..9068d69 --- /dev/null +++ b/www/_main.inc.php @@ -0,0 +1,33 @@ + $v) { + $n[stripslashes($k)] = stripslashes_deep($v); + } + return $n; +} + +if (get_magic_quotes_gpc()) { + $_REQUEST = array_map('stripslashes_deep', $_REQUEST); + $_POST = array_map('stripslashes_deep', $_POST); + $_GET = array_map('stripslashes_deep', $_GET); + $_COOKIE = array_map('stripslashes_deep', $_COOKIE); +} + +$num_default = 25; + +?> \ No newline at end of file diff --git a/www/file.php b/www/file.php new file mode 100644 index 0000000..8949c05 --- /dev/null +++ b/www/file.php @@ -0,0 +1,123 @@ +'.$sql.mysql_errno()); +} +if (!($r = mysql_fetch_array($res))) { + header('Status: 404 Not Found'); + echo('404 Not found!'); + exit; +} + + + +$file = sprintf($r['ppath'], $r['fpath']); + + + +////////////////////////////////////////////////////////////////////////// +// txt + +if ($format == 'txt') { + header('Content-type: text/plain'); + + $fh = @fopen($file, 'r'); + if (!$fh) die('failed to open file'); + + while (!feof($fh)) { + $n = rtrim(fgets($fh, 4096)); + $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + ' '); + echo($n."\n"); + } + @fclose($fh); + + exit; +} + + + + +////////////////////////////////////////////////////////////////////////// +// show +echo << + + + +Source Search Engine + + + +PRINTHEADER; + +$q = (isset($_REQUEST['q']) ? $_REQUEST['q'] : ''); +$s = split_q($q); +$pattern = array(); +$replacement = array(); +$i = 0; +foreach ($s as $v) { + $pattern[] = '/('.$v.')/i'; + $replacement[] = '$1'; + $i++; +} + +$fh = @fopen($file, 'r'); +if (!$fh) die('failed to open file'); + +$found = false; +$i = 0; +echo('
    '); +while (!feof($fh)) { + echo('
  1. '); + $n = fgets($fh, 4096); + $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + ' '); + $n = htmlentities($n); + $o = preg_replace($pattern, $replacement, $n); + if (!$found and $o != $n) { + $found = true; + echo(''); + } + $i++; + //printf("%4d: %s\n", $i, $i, $i, rtrim($o)); + $code = rtrim($o); + if (!$code) $code = ' '; + printf("
    %s
  2. \n", $i, $code); + +} +echo('
'); +fclose($fh); + +echo << + +PRINTFOOTER; + +?> \ No newline at end of file diff --git a/www/index.php b/www/index.php new file mode 100644 index 0000000..235266c --- /dev/null +++ b/www/index.php @@ -0,0 +1,210 @@ + 100) $t = 100; + if ($t > 0) $num = $t; +} + +// start with result XXX +$start = 0; +if (isset($_REQUEST['start'])) { + $t = intval($_REQUEST['start']); + if ($t > 0) $start = $t; +} + + + +////////////////////////////////////////////////////////////////////////// +// create form fields + +$title = 'Source Search Engine'.($q ? ' - '.htmlentities($q) : ''); + +$ME = $_SERVER['PHP_SELF']; +$escaped_q = htmlentities($q); +$subword_checked = ($subword ? ' checked="checked" ' : ''); +$case_checked = ($case ? ' checked="checked" ' : ''); +$hidden_form = ($num != $num_default ? '' : ''); + +// prettiness hack +$ME = preg_replace('/index\.php$/', '', $ME); + +echo << + + + +$title + + + +
+
$hidden_form +

+

+
+
+ +ECHOFORM; + + + +////////////////////////////////////////////////////////////////////////// +// create sql query + +$sql = ''; +while ($q) { + + $s = split_q($q); + + if (!count($s)) { + break; + } + + if (count($s) == 1) { + $where = ($subword + ? " w.text LIKE '".db_escape($s[0])."%' " + : " w.text='".db_escape($s[0])."' AND w.type='word' " + ); + } else { + $where_parts = array(); + foreach ($s as $e) { + $where_parts[] = ($subword + ? "w.text LIKE '".db_escape($e)."%'" + : "w.text='".db_escape($e)."'"); + } + + $where = " (".implode(' OR ', $where_parts).") ". + ($subword ? '' : " AND w.type='word' "); + } + + + $sql = "SELECT COUNT(*)/COUNT(DISTINCT pr.id) AS keywords_cnt, ". + " SUM(w.cnt)/COUNT(DISTINCT pr.id) AS cnt, " . + " f.path, f.language_id, pr.id AS prid, pr.name AS package_name ". + " FROM word AS w, file AS f, package AS p, provider_record AS pr ". + " WHERE p.id=pr.package_id AND ".$where. + " AND w.file_id=f.id AND f.package_id=p.id ". + " AND f.crawler_id=w.crawler_id AND f.crawler_id=p.crawler_id ". + " GROUP BY f.crawler_id, f.id ". + " ORDER BY w.cnt DESC "; + + break; +} + + + + +////////////////////////////////////////////////////////////////////////// +// query DB + +$total = 0; +while (isset($sql) and $sql) { + + db_connect(); + + // use LIMIT ==> two queries: 1. total count, 2. results + // or mysql query cache ==> one query (dump data) + + $res = mysql_query($sql); + if ($res === false) { + echo('

'.mysql_errno().': '.mysql_error()."
\n".$sql."

\n"); + break; + } + $total = mysql_num_rows($res); + break; +} + + +$max_page = max(1, ceil($total/$num)); +$max_start = ($max_page-1)*$num; + +// behind last result page +if ($start > $max_start) $start = $max_start; + +// start not aligned +//$start = floor($start/$num)*$num; + + + + + +////////////////////////////////////////////////////////////////////////// +// show results + +if ($total) { + + echo('

Results: '.$total."

\n". + '
    '."\n"); + + $i = 0; + while ($r = mysql_fetch_array($res)) { + //var_dump($r); exit; + if ($i == $start+$num) break; + $i++; + if ($i <= $start) continue; + + + $r['keywords_cnt'] = intval($r['keywords_cnt']); + if ($r['keywords_cnt'] < 2) $r['keywords_cnt'] = ''; + echo('
  • '.htmlentities($r['package_name']).': '. + ''. + htmlentities($r['path']).' '. + '('.intval($r['cnt']).' hits'. + ($r['keywords_cnt'] ? ' - '.$r['keywords_cnt'].' different words' : ''). + ')'."
  • \n"); + } + + echo("
\n"); +} + +////////////////////////////////////////////////////////////////////////// +// links +if ($max_page > 1) { + + $l = $ME.'?q='.urlencode($q). + ($case ? '&case=1' : ''). + ($subword ? '&subword=1' : ''). + ($num != $num_default ? '&num='.$num : ''). + '&start='; + + echo('\n"); + +} + + + + +echo("\n"); +?> \ No newline at end of file -- cgit