summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephan Poehlsen <stephan@poehlsen.net>2005-11-23 02:25:01 +0000
committerStephan Poehlsen <stephan@poehlsen.net>2005-11-23 02:25:01 +0000
commite362e8beb12b18b2ef1988770128fbdaec9e265d (patch)
tree0dcc220d7900c4caf3cfc6adcd57915db0864c9b
parent72af90dbe17e3e0a1534f85e3f7af4e49ec7ac0f (diff)
erste Version vom Webinterface
git-svn-id: file:///home/lennart/svn/public/sse/trunk@34 5fbabb74-0606-0410-a5e4-b5cc6a42724e
-rw-r--r--sse.sql2
-rw-r--r--www/.htaccess3
-rw-r--r--www/_main.inc.php33
-rw-r--r--www/file.php123
-rw-r--r--www/index.php210
5 files changed, 370 insertions, 1 deletions
diff --git a/sse.sql b/sse.sql
index 03117dd..86a39d6 100644
--- a/sse.sql
+++ b/sse.sql
@@ -67,5 +67,5 @@ CREATE TABLE crawler (
) ENGINE=InnoDB;
INSERT INTO crawler (id, name, url) VALUES
- (1, "Tango", "http://poehlsen.org/sse/"),
+ (1, "Tango", "http://www.poehlsen.org/sse/"),
(2, "Soleil", "http://soleil.ethium.org:8000/sse/");
diff --git a/www/.htaccess b/www/.htaccess
new file mode 100644
index 0000000..8f64995
--- /dev/null
+++ b/www/.htaccess
@@ -0,0 +1,3 @@
+RewriteEngine on
+RewriteRule show/ file.php
+RewriteRule txt/ file.php
diff --git a/www/_main.inc.php b/www/_main.inc.php
new file mode 100644
index 0000000..9068d69
--- /dev/null
+++ b/www/_main.inc.php
@@ -0,0 +1,33 @@
+<?php
+
+function db_escape($s) { return mysql_escape_string($s); }
+
+function db_connect() {
+ mysql_pconnect('localhost', 'sse_ro', 'Shu5ahdo');
+ mysql_select_db('sse');
+}
+
+function split_q($q) {
+ $q = preg_replace('/[^a-zA-Z0-9_\s]/', '', trim($q));
+ return preg_split('/\s/', $q, -1, PREG_SPLIT_NO_EMPTY);
+}
+
+function stripslashes_deep($value) {
+ if (!is_array($value)) return stripslashes($value);
+ $n = array();
+ foreach ($value as $k => $v) {
+ $n[stripslashes($k)] = stripslashes_deep($v);
+ }
+ return $n;
+}
+
+if (get_magic_quotes_gpc()) {
+ $_REQUEST = array_map('stripslashes_deep', $_REQUEST);
+ $_POST = array_map('stripslashes_deep', $_POST);
+ $_GET = array_map('stripslashes_deep', $_GET);
+ $_COOKIE = array_map('stripslashes_deep', $_COOKIE);
+}
+
+$num_default = 25;
+
+?> \ No newline at end of file
diff --git a/www/file.php b/www/file.php
new file mode 100644
index 0000000..8949c05
--- /dev/null
+++ b/www/file.php
@@ -0,0 +1,123 @@
+<?php
+error_reporting(E_ALL);
+require_once('_main.inc.php');
+
+$format = 'show';
+$prid = '';
+$f_path = '';
+if (preg_match('/\/(show|txt)\/([^\:]+\:[^\/]+)\/([^\?]+)/', $_SERVER['REQUEST_URI'], $mat)) {
+ $format = $mat[1];
+ $prid = $mat[2];
+ $f_path = $mat[3];
+}
+
+db_connect();
+$query = "SELECT p.path AS ppath, f.path AS fpath ".
+" FROM provider_record AS pr, package AS p, file AS f ".
+" WHERE pr.id='".db_escape($prid)."' AND pr.package_id=p.id AND f.package_id=p.id AND f.path='".db_escape($f_path)."'";
+
+$res = mysql_query($query);
+if ($res === false) {
+ die(mysql_error().'<br />'.$sql.mysql_errno());
+}
+if (!($r = mysql_fetch_array($res))) {
+ header('Status: 404 Not Found');
+ echo('404 Not found!');
+ exit;
+}
+
+
+
+$file = sprintf($r['ppath'], $r['fpath']);
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// txt
+
+if ($format == 'txt') {
+ header('Content-type: text/plain');
+
+ $fh = @fopen($file, 'r');
+ if (!$fh) die('failed to open file');
+
+ while (!feof($fh)) {
+ $n = rtrim(fgets($fh, 4096));
+ $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+ ' ');
+ echo($n."\n");
+ }
+ @fclose($fh);
+
+ exit;
+}
+
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// show
+echo <<<PRINTHEADER
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/>
+<title>Source Search Engine</title>
+<style type="text/css">
+ol {font-size:12px;margin:0;padding:0;}
+li {margin:0 0 0 50px;padding:0;}
+pre {font-size:12px;padding:0;margin:0;}
+.found0 {background-color:#ffff00;font-weight:bold;}
+.found1 {background-color:#ff66ff;font-weight:bold;}
+.found2 {background-color:#ff9966;font-weight:bold;}
+.found3 {background-color:#00ff00;font-weight:bold;}
+.found4 {background-color:#00ffff;font-weight:bold;}
+</style>
+</head>
+<body>
+PRINTHEADER;
+
+$q = (isset($_REQUEST['q']) ? $_REQUEST['q'] : '');
+$s = split_q($q);
+$pattern = array();
+$replacement = array();
+$i = 0;
+foreach ($s as $v) {
+ $pattern[] = '/('.$v.')/i';
+ $replacement[] = '<span class="found'.($i%5).'">$1</span>';
+ $i++;
+}
+
+$fh = @fopen($file, 'r');
+if (!$fh) die('failed to open file');
+
+$found = false;
+$i = 0;
+echo('<ol>');
+while (!feof($fh)) {
+ echo('<li>');
+ $n = fgets($fh, 4096);
+ $n = strtr($n, "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+ ' ');
+ $n = htmlentities($n);
+ $o = preg_replace($pattern, $replacement, $n);
+ if (!$found and $o != $n) {
+ $found = true;
+ echo('<a id="first"></a>');
+ }
+ $i++;
+ //printf("<a name=\"%d\" id=\"%d\">%4d:</a> %s\n", $i, $i, $i, rtrim($o));
+ $code = rtrim($o);
+ if (!$code) $code = '&nbsp;';
+ printf("<a id=\"l%d\"></a><pre>%s</pre></li>\n", $i, $code);
+
+}
+echo('</ol>');
+fclose($fh);
+
+echo <<<PRINTFOOTER
+</body>
+</html>
+PRINTFOOTER;
+
+?> \ No newline at end of file
diff --git a/www/index.php b/www/index.php
new file mode 100644
index 0000000..235266c
--- /dev/null
+++ b/www/index.php
@@ -0,0 +1,210 @@
+<?php
+error_reporting(E_ALL);
+require_once('_main.inc.php');
+
+
+//////////////////////////////////////////////////////////////////////////
+// parse arguments
+
+// text: search field
+$q = (isset($_REQUEST['q']) ? $_REQUEST['q'] : '');
+
+// checkbox: subword
+$subword = ((isset($_REQUEST['subword']) and $_REQUEST['subword'])
+ ? 'checked' : false);
+
+// checkbox: casesensitiv
+$case = ((isset($_REQUEST['case']) and $_REQUEST['case'])
+ ? 'checked' : false);
+$case = false;
+
+// results per page
+$num = $num_default;
+if (isset($_REQUEST['num'])) {
+ $t = intval($_REQUEST['num']);
+ if ($t > 100) $t = 100;
+ if ($t > 0) $num = $t;
+}
+
+// start with result XXX
+$start = 0;
+if (isset($_REQUEST['start'])) {
+ $t = intval($_REQUEST['start']);
+ if ($t > 0) $start = $t;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// create form fields
+
+$title = 'Source Search Engine'.($q ? ' - '.htmlentities($q) : '');
+
+$ME = $_SERVER['PHP_SELF'];
+$escaped_q = htmlentities($q);
+$subword_checked = ($subword ? ' checked="checked" ' : '');
+$case_checked = ($case ? ' checked="checked" ' : '');
+$hidden_form = ($num != $num_default ? '<input type="hidden" name="num" value="'.$num.'" />' : '');
+
+// prettiness hack
+$ME = preg_replace('/index\.php$/', '', $ME);
+
+echo <<<ECHOFORM
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"/>
+<title>$title</title>
+</head>
+<body>
+
+<div class="formbox">
+<form action="$ME" method="get">$hidden_form
+<p><input type="text" name="q" value="$escaped_q" /> <input type="submit" value="Search" /></p>
+<p><input type="checkbox" id="subword" name="subword" value="1" $subword_checked /> <label for="subword">subwords</label></p>
+</form>
+</div>
+
+ECHOFORM;
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// create sql query
+
+$sql = '';
+while ($q) {
+
+ $s = split_q($q);
+
+ if (!count($s)) {
+ break;
+ }
+
+ if (count($s) == 1) {
+ $where = ($subword
+ ? " w.text LIKE '".db_escape($s[0])."%' "
+ : " w.text='".db_escape($s[0])."' AND w.type='word' "
+ );
+ } else {
+ $where_parts = array();
+ foreach ($s as $e) {
+ $where_parts[] = ($subword
+ ? "w.text LIKE '".db_escape($e)."%'"
+ : "w.text='".db_escape($e)."'");
+ }
+
+ $where = " (".implode(' OR ', $where_parts).") ".
+ ($subword ? '' : " AND w.type='word' ");
+ }
+
+
+ $sql = "SELECT COUNT(*)/COUNT(DISTINCT pr.id) AS keywords_cnt, ".
+ " SUM(w.cnt)/COUNT(DISTINCT pr.id) AS cnt, " .
+ " f.path, f.language_id, pr.id AS prid, pr.name AS package_name ".
+ " FROM word AS w, file AS f, package AS p, provider_record AS pr ".
+ " WHERE p.id=pr.package_id AND ".$where.
+ " AND w.file_id=f.id AND f.package_id=p.id ".
+ " AND f.crawler_id=w.crawler_id AND f.crawler_id=p.crawler_id ".
+ " GROUP BY f.crawler_id, f.id ".
+ " ORDER BY w.cnt DESC ";
+
+ break;
+}
+
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// query DB
+
+$total = 0;
+while (isset($sql) and $sql) {
+
+ db_connect();
+
+ // use LIMIT ==> two queries: 1. total count, 2. results
+ // or mysql query cache ==> one query (dump data)
+
+ $res = mysql_query($sql);
+ if ($res === false) {
+ echo('<p class="error">'.mysql_errno().': '.mysql_error()."<br />\n".$sql."</p>\n");
+ break;
+ }
+ $total = mysql_num_rows($res);
+ break;
+}
+
+
+$max_page = max(1, ceil($total/$num));
+$max_start = ($max_page-1)*$num;
+
+// behind last result page
+if ($start > $max_start) $start = $max_start;
+
+// start not aligned
+//$start = floor($start/$num)*$num;
+
+
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// show results
+
+if ($total) {
+
+ echo('<p>Results: '.$total."</p>\n".
+ '<ul class="results">'."\n");
+
+ $i = 0;
+ while ($r = mysql_fetch_array($res)) {
+ //var_dump($r); exit;
+ if ($i == $start+$num) break;
+ $i++;
+ if ($i <= $start) continue;
+
+
+ $r['keywords_cnt'] = intval($r['keywords_cnt']);
+ if ($r['keywords_cnt'] < 2) $r['keywords_cnt'] = '';
+ echo('<li>'.htmlentities($r['package_name']).': '.
+ '<a href="show/'.htmlentities($r['prid']).'/'.htmlentities($r['path']).'?q='.htmlentities($q).'#first">'.
+ htmlentities($r['path']).'</a> '.
+ '('.intval($r['cnt']).' hits'.
+ ($r['keywords_cnt'] ? ' - '.$r['keywords_cnt'].' different words' : '').
+ ')'."</li>\n");
+ }
+
+ echo("</ul>\n");
+}
+
+//////////////////////////////////////////////////////////////////////////
+// links
+if ($max_page > 1) {
+
+ $l = $ME.'?q='.urlencode($q).
+ ($case ? '&amp;case=1' : '').
+ ($subword ? '&amp;subword=1' : '').
+ ($num != $num_default ? '&amp;num='.$num : '').
+ '&amp;start=';
+
+ echo('<ul class="links">'."\n".
+ ($start ? '<li><a href="'.$l.'0">first</a>'."</li>\n" : '').
+ ($start ? '<li><a href="'.$l.max(0, $start-$num).'">previous</a>'."</li>\n" : ''));
+
+ $lnkcnt = 10;
+ for ($i = max(0,$start-($lnkcnt*$num)); $i <= min($max_start, $start+($lnkcnt*$num)); $i+=$num) {
+ echo('<li><a href="'.$l.$i.'"'.($i == $start ? ' class="active" ' : '').'>'.(($i/$num)+1).'</a>'."</li>\n");
+ }
+
+ echo(($start < $max_start ? '<li><a href="'.$l.min($max_start, $start+$num).'">next</a>'."</li>\n" : '').
+ ($start < $max_start ? '<li><a href="'.$l.$max_start.'">last</a>'."</li>\n" : '').
+ "</ul>\n");
+
+}
+
+
+
+
+echo("</body>\n</html>");
+?> \ No newline at end of file