diff options
Diffstat (limited to 'src/Search')
| -rw-r--r-- | src/Search/AbstractFTSExpr.php | 31 | ||||
| -rw-r--r-- | src/Search/CharSource.php | 33 | ||||
| -rw-r--r-- | src/Search/FTSExpr.php | 30 | ||||
| -rw-r--r-- | src/Search/FTSLogicOp.php | 46 | ||||
| -rw-r--r-- | src/Search/FTSNotExpr.php | 29 | ||||
| -rw-r--r-- | src/Search/LogicOp.php | 78 | ||||
| -rw-r--r-- | src/Search/NotOp.php | 32 | ||||
| -rw-r--r-- | src/Search/Pagination.php | 14 | ||||
| -rw-r--r-- | src/Search/ParseError.php | 9 | ||||
| -rw-r--r-- | src/Search/Parser.php | 295 | ||||
| -rw-r--r-- | src/Search/SQLSearchExpr.php | 11 | ||||
| -rw-r--r-- | src/Search/SearchExpr.php | 14 | ||||
| -rw-r--r-- | src/Search/SearchResult.php | 160 | ||||
| -rw-r--r-- | src/Search/TagExpr.php | 42 | ||||
| -rw-r--r-- | src/Search/TrueExpr.php | 25 | 
15 files changed, 849 insertions, 0 deletions
| diff --git a/src/Search/AbstractFTSExpr.php b/src/Search/AbstractFTSExpr.php new file mode 100644 index 0000000..b72b1b6 --- /dev/null +++ b/src/Search/AbstractFTSExpr.php @@ -0,0 +1,31 @@ +<?php + + +namespace Micropoly\Search; + + +abstract class AbstractFTSExpr implements SearchExpr +{ +    abstract protected function fts4Query(): string; + +    public function toSQL(string $bindPrefix, bool $singleFTS): SQLSearchExpr +    { +        $sqlex = new SQLSearchExpr(); + +        $sqlex->sql = $singleFTS +            ? "nc.note_contents MATCH :{$bindPrefix}match" +            : "n.content_row IN ( +                SELECT rowid +                FROM note_contents +                WHERE note_contents MATCH :{$bindPrefix}match +            )"; +        $sqlex->bindings["{$bindPrefix}match"] = $this->fts4Query(); + +        return $sqlex; +    } + +    public function countFTSQueries(): int +    { +        return 1; +    } +}
\ No newline at end of file diff --git a/src/Search/CharSource.php b/src/Search/CharSource.php new file mode 100644 index 0000000..165e538 --- /dev/null +++ b/src/Search/CharSource.php @@ -0,0 +1,33 @@ +<?php + + +namespace Micropoly\Search; + + +class CharSource +{ +    private string $s; +    private int $i = 0; +    private int $len; + +    public function __construct(string $s) +    { +        $this->s = $s; +        $this->len = mb_strlen($s); +    } + +    public function getNext(): ?string +    { +        if ($this->i >= $this->len) +            return null; + +        $c = mb_substr($this->s, $this->i, 1); +        $this->i++; +        return $c; +    } + +    public function unget(): void +    { +        $this->i = max(0, $this->i - 1); +    } +}
\ No newline at end of file diff --git a/src/Search/FTSExpr.php b/src/Search/FTSExpr.php new file mode 100644 index 0000000..1123cf3 --- /dev/null +++ b/src/Search/FTSExpr.php @@ -0,0 +1,30 @@ +<?php + + +namespace Micropoly\Search; + + +class FTSExpr extends AbstractFTSExpr +{ +    private string $term; + +    public function __construct(string $term) +    { +        $this->term = $term; +    } + +    public function getTerm(): string +    { +        return $this->term; +    } + +    protected function fts4Query(): string +    { +        return '"' . str_replace('"', '""', $this->term) . '"'; +    } + +    public function toString(): string +    { +        return '"' . preg_replace_callback('/(["\\\\])/', fn($s) => "\\$s", $this->term) . '"'; +    } +}
\ No newline at end of file diff --git a/src/Search/FTSLogicOp.php b/src/Search/FTSLogicOp.php new file mode 100644 index 0000000..452f63b --- /dev/null +++ b/src/Search/FTSLogicOp.php @@ -0,0 +1,46 @@ +<?php + + +namespace Micropoly\Search; + + +class FTSLogicOp extends AbstractFTSExpr +{ +    private string $op; +    private AbstractFTSExpr $a; +    private AbstractFTSExpr $b; + +    /** +     * FTSLogicOp constructor. +     * @param string $op +     * @param AbstractFTSExpr $a +     * @param AbstractFTSExpr $b +     */ +    public function __construct(string $op, AbstractFTSExpr $a, AbstractFTSExpr $b) +    { +        if (!LogicOp::checkOp($op)) +            throw new \DomainException("{$op} is not a valid operator"); + +        $this->op = $op; +        $this->a = $a; +        $this->b = $b; +    } + +    private const FTSOPS = [ +        LogicOp::OP_AND => "", +        LogicOp::OP_OR => "OR", +    ]; + +    protected function fts4Query(): string +    { +        $ftsop = self::FTSOPS[$this->op]; +        assert($ftsop); + +        return "({$this->a->fts4Query()} {$ftsop} {$this->b->fts4Query()})"; +    } + +    public function toString(): string +    { +        return "({$this->a->toString()} FTS-{$this->op} {$this->b->toString()})"; +    } +}
\ No newline at end of file diff --git a/src/Search/FTSNotExpr.php b/src/Search/FTSNotExpr.php new file mode 100644 index 0000000..a4aa219 --- /dev/null +++ b/src/Search/FTSNotExpr.php @@ -0,0 +1,29 @@ +<?php + + +namespace Micropoly\Search; + + +class FTSNotExpr extends AbstractFTSExpr +{ +    private AbstractFTSExpr $expr; + +    /** +     * FTSNotExpr constructor. +     * @param AbstractFTSExpr $expr +     */ +    public function __construct(AbstractFTSExpr $expr) +    { +        $this->expr = $expr; +    } + +    protected function fts4Query(): string +    { +        return "-{$this->expr->fts4Query()}"; +    } + +    public function toString(): string +    { +        return "(FTS-NOT {$this->expr->toString()})"; +    } +}
\ No newline at end of file diff --git a/src/Search/LogicOp.php b/src/Search/LogicOp.php new file mode 100644 index 0000000..85fb8fa --- /dev/null +++ b/src/Search/LogicOp.php @@ -0,0 +1,78 @@ +<?php + + +namespace Micropoly\Search; + + +class LogicOp implements SearchExpr +{ +    public const OP_AND = "and"; +    public const OP_OR = "or"; + +    private const SQLOPS = [ +        self::OP_AND => "AND", +        self::OP_OR => "OR", +    ]; + +    private string $op; +    private SearchExpr $a; +    private SearchExpr $b; + +    public function __construct(string $op, SearchExpr $a, SearchExpr $b) +    { +        if (!self::checkOp($op)) +            throw new \DomainException("{$op} is not a valid operator"); + +        $this->op = $op; +        $this->a = $a; +        $this->b = $b; +    } + +    public static function build(string $op, SearchExpr $a, SearchExpr $b): SearchExpr +    { +        return $a instanceof AbstractFTSExpr && $b instanceof AbstractFTSExpr +            ? new FTSLogicOp($op, $a, $b) +            : new self($op, $a, $b); +    } + +    /** +     * @param string $op +     * @return bool +     */ +    public static function checkOp(string $op): bool +    { +        return in_array($op, [ +            self::OP_AND, +            self::OP_OR, +        ]); +    } + +    public function getA(): SearchExpr { return $this->a; } +    public function getB(): SearchExpr { return $this->b; } +    public function getOp(): string { return $this->op; } + +    public function toString(): string +    { +        return "({$this->a->toString()}) {$this->op} ({$this->b->toString()})"; +    } + +    public function toSQL($bindPrefix, bool $singleFTS): SQLSearchExpr +    { +        $sqlex = new SQLSearchExpr(); + +        $a = $this->a->toSQL("a_$bindPrefix", $singleFTS); +        $b = $this->b->toSQL("b_$bindPrefix", $singleFTS); +        $sqlop = self::SQLOPS[$this->op]; +        assert($sqlop); + +        $sqlex->sql = "(({$a->sql}) {$sqlop} ({$b->sql}))"; +        $sqlex->bindings = array_merge($a->bindings, $b->bindings); + +        return $sqlex; +    } + +    public function countFTSQueries(): int +    { +        return $this->a->countFTSQueries() + $this->b->countFTSQueries(); +    } +}
\ No newline at end of file diff --git a/src/Search/NotOp.php b/src/Search/NotOp.php new file mode 100644 index 0000000..35fcf1e --- /dev/null +++ b/src/Search/NotOp.php @@ -0,0 +1,32 @@ +<?php + + +namespace Micropoly\Search; + + +class NotOp implements SearchExpr +{ +    private SearchExpr $expr; + +    public function __construct(SearchExpr $expr) +    { +        $this->expr = $expr; +    } + +    public function toString(): string +    { +        return "not ({$this->expr->toString()})"; +    } + +    public function toSQL(string $bindPrefix, bool $singleFTS): SQLSearchExpr +    { +        $sqlex = $this->expr->toSQL($bindPrefix, $singleFTS); +        $sqlex->sql = "(NOT ({$sqlex->sql}))"; +        return $sqlex; +    } + +    public function countFTSQueries(): int +    { +        return $this->expr->countFTSQueries(); +    } +}
\ No newline at end of file diff --git a/src/Search/Pagination.php b/src/Search/Pagination.php new file mode 100644 index 0000000..b4b2447 --- /dev/null +++ b/src/Search/Pagination.php @@ -0,0 +1,14 @@ +<?php + + +namespace Micropoly\Search; + + +class Pagination +{ +    public const DEFAULT_PER_PAGE = 25; + +    private int $page = 1; + + +}
\ No newline at end of file diff --git a/src/Search/ParseError.php b/src/Search/ParseError.php new file mode 100644 index 0000000..1b987d7 --- /dev/null +++ b/src/Search/ParseError.php @@ -0,0 +1,9 @@ +<?php + + +namespace Micropoly\Search; + + +use Exception; + +class ParseError extends Exception { }
\ No newline at end of file diff --git a/src/Search/Parser.php b/src/Search/Parser.php new file mode 100644 index 0000000..a8efdfd --- /dev/null +++ b/src/Search/Parser.php @@ -0,0 +1,295 @@ +<?php + + +namespace Micropoly\Search; + + +use Generator; +use Iterator; + +class Parser +{ +    public const TOK_PAROPEN = "("; +    public const TOK_PARCLOSE = ")"; +    public const TOK_TAG = "#"; +    public const TOK_WORD = '"'; +    public const TOK_OP = "op"; +    public const TOK_PROP = ":"; + +    private static function iterChars(string $input): Iterator +    { +        for ($i = 0; $i < mb_strlen($input); $i++) +            yield mb_substr($input, $i, 1); +    } + +    /** +     * @param string $input +     * @return Iterator +     * @throws ParseError +     */ +    public static function tokenize(string $input): Iterator +    { +        $chars = new CharSource($input); +        yield from self::tokenize_normal($chars); +    } + +    private static function getItemAndAdvance(Iterator $input) +    { +        if (!$input->valid()) +            return null; +        $out = $input->current(); +        $input->next(); +        return $out; +    } + +    /** +     * @return Iterator +     * @throws ParseError +     */ +    private static function tokenize_normal(CharSource $input): Iterator +    { +        $buf = ""; + +        $yieldBufAndClear = function () use (&$buf) { +            if ($buf !== "") { +                switch ($buf) { +                    case "and": +                    case "or": +                    case "not": +                        yield [self::TOK_OP, $buf]; +                        break; +                    default: +                        yield [self::TOK_WORD, $buf]; +                } +            } +            $buf = ""; +        }; + +        for (;;) { +            $c = $input->getNext(); +            if ($c === null) { +                break; +            } + +            switch ($c) { +                case '\\': +                    $next = $input->getNext(); +                    if ($next === null) { +                        $buf .= $c; +                        break 2; +                    } +                    $buf .= $next; +                    break; + +                case ' ': +                case "\t": +                    yield from $yieldBufAndClear(); +                    break; + +                case '"': +                    yield from $yieldBufAndClear(); +                    yield from self::tokenize_string($input); +                    break; + +                case ':': +                    if ($buf !== "") { +                        yield [self::TOK_PROP, $buf]; +                        $buf = ""; +                    } +                    break; + +                case '(': +                    yield from $yieldBufAndClear(); +                    yield [self::TOK_PAROPEN, null]; +                    break; + +                case ')': +                    yield from $yieldBufAndClear(); +                    yield [self::TOK_PARCLOSE, null]; +                    break; + +                case '#': +                    yield from $yieldBufAndClear(); +                    yield from self::tokenize_tag($input); +                    break; + +                default: +                    $buf .= $c; +            } +        } + +        yield from $yieldBufAndClear(); +        return; +    } + +    /** +     * @param string $input +     * @return SearchExpr|null +     * @throws ParseError +     */ +    public static function parse(string $input): ?SearchExpr +    { +        $tokens = self::tokenize($input); + +        $stack = []; +        $cur = null; +        $binOp = null; +        $negated = false; + +        $putExpr = function (SearchExpr $expr) use (&$cur, &$binOp, &$negated) { +            if ($negated) { +                $expr = new NotOp($expr); +            } + +            $cur = $cur === null +                ? $expr +                : LogicOp::build($binOp ?? LogicOp::OP_AND, $cur, $expr); + +            $binOp = null; +            $negated = false; +        }; + +        $setBinOp = function ($op) use (&$binOp) { +            if ($binOp !== null) +                throw new ParseError("Unexpected logic operator $op"); + +            $binOp = $op; +        }; + +        for (;;) { +            $token = self::getItemAndAdvance($tokens); +            if ($token === null) +                break; + +            [$ttyp, $tdata] = $token; + +            switch ($ttyp) { + +                case self::TOK_TAG: +                    $putExpr(new TagExpr($tdata)); +                    break; +                case self::TOK_OP: +                    switch ($tdata) { +                        case "and": +                            $setBinOp(LogicOp::OP_AND); +                            break; +                        case "or": +                            $setBinOp(LogicOp::OP_OR); +                            break; +                        case "not": +                            $negated = !$negated; +                            break; +                        default: +                            throw new \DomainException("Unexpected data for TOK_OP: $tdata"); +                    } +                    break; +                case self::TOK_WORD: +                    $putExpr(new FTSExpr($tdata)); +                    break; +                case self::TOK_PROP: +                    // TODO(laria): Implement this +                    throw new ParseError("Not yet supported"); +                case self::TOK_PAROPEN: +                    $stack[] = [$cur, $binOp, $negated]; +                    $cur = $binOp = $negated = null; +                    break; +                case self::TOK_PARCLOSE: +                    if (empty($stack)) +                        throw new ParseError("Unexpected closing parenthesis"); + +                    $parContent = $cur; +                    [$cur, $binOp, $negated] = array_pop($stack); +                    $putExpr($parContent); +                    break; +            } +        } + +        if (!empty($stack)) +            throw new ParseError("Unclosed parenthesis"); + +        return $cur; +    } + +    /** +     * @param CharSource $input +     * @return Generator +     * @throws ParseError +     */ +    private static function tokenize_string(CharSource $input): Generator +    { +        $content = ""; +        for (;;) { +            $c = $input->getNext(); +            if ($c === null) +                throw new ParseError("Unclosed string encountered"); + +            switch ($c) { +                case '\\': +                    $next = $input->getNext(); +                    if ($next === null) +                        throw new ParseError("Unclosed string encountered"); + +                    $content .= $next; +                    break; + +                case '"': +                    yield [self::TOK_WORD, $content]; +                    return; + +                default: +                    $content .= $c; +            } +        } +    } + +    /** +     * @param CharSource $input +     * @return Iterator +     */ +    private static function tokenize_tag(CharSource $input): Iterator +    { +        $tag = ""; + +        $yieldTag = function () use (&$tag) { +            if ($tag === "") +                yield [self::TOK_WORD, "#"]; +            else +                yield [self::TOK_TAG, $tag]; +        }; + +        for (;;) { +            $c = $input->getNext(); +            if ($c === null) { +                yield from $yieldTag(); +                return; +            } + +            switch ($c) { +                case '\\': +                    $next = $input->getNext(); +                    if ($c === null) { +                        $tag .= '\\'; +                        yield [self::TOK_TAG, $tag]; +                        return; +                    } +                    $tag .= $next; +                    break; + +                case ' ': +                case "\t": +                    yield from $yieldTag(); +                    return; + +                case '(': +                case ')': +                case '#': +                    $input->unget(); +                    yield from $yieldTag(); +                    return; + +                default: +                    $tag .= $c; +            } +        } +    } +}
\ No newline at end of file diff --git a/src/Search/SQLSearchExpr.php b/src/Search/SQLSearchExpr.php new file mode 100644 index 0000000..76306ce --- /dev/null +++ b/src/Search/SQLSearchExpr.php @@ -0,0 +1,11 @@ +<?php + + +namespace Micropoly\Search; + + +class SQLSearchExpr +{ +    public string $sql; +    public array $bindings = []; +}
\ No newline at end of file diff --git a/src/Search/SearchExpr.php b/src/Search/SearchExpr.php new file mode 100644 index 0000000..fbf2a40 --- /dev/null +++ b/src/Search/SearchExpr.php @@ -0,0 +1,14 @@ +<?php + + +namespace Micropoly\Search; + + +interface SearchExpr +{ +    public function toString(): string; + +    public function toSQL(string $bindPrefix, bool $singleFTS): SQLSearchExpr; + +    public function countFTSQueries(): int; +}
\ No newline at end of file diff --git a/src/Search/SearchResult.php b/src/Search/SearchResult.php new file mode 100644 index 0000000..1abbb86 --- /dev/null +++ b/src/Search/SearchResult.php @@ -0,0 +1,160 @@ +<?php + + +namespace Micropoly\Search; + + +use LogicException; +use Micropoly\DbQuery; +use Micropoly\Esc; +use Micropoly\Models\Note; +use SQLite3; + +class SearchResult +{ +    private Note $note; +    private array $highlights = []; + +    private function __construct(Note $note, array $highlights) +    { +        $this->note = $note; +        $this->highlights = $highlights; +    } + +    /** +     * @param SQLite3 $db +     * @param SearchExpr $expr +     * @return self[] +     */ +    public static function search(SQLite3 $db, SearchExpr $expr): array +    { +        return $expr->countFTSQueries() === 1 +            ? self::searchFTS($db, $expr) +            : self::searchComplex($db, $expr); +    } + +    private static function searchComplex(SQLite3 $db, SearchExpr $expr): array +    { +        $sqlSearchExpr = $expr->toSQL("", false); + +        $query = new DbQuery(" +            SELECT +                n.id +            FROM notes n +            INNER JOIN note_contents nc +                ON nc.rowid = n.content_row +            WHERE {$sqlSearchExpr->sql} +        "); + +        foreach ($sqlSearchExpr->bindings as $k => $v) +            $query->bind($k, $v); + +        $ids = array_map(fn ($row) => $row[0], $query->fetchRows($db)); +        $notes = Note::byIds($db, $ids); +        return array_map(fn ($note) => new self($note, []), $notes); +    } + +    private static function highlightRangeContains(array $range, int $point): bool +    { +        [$start, $end] = $range; +        return $start <= $point && $point <= $end; +    } + +    private static function areHighlightsOverlapping(array $a, array $b): bool +    { +        [$aStart, $aEnd] = $a; +        [$bStart, $bEnd] = $b; + +        return self::highlightRangeContains($a, $bStart) +            || self::highlightRangeContains($a, $bEnd) +            || self::highlightRangeContains($b, $aStart) +            || self::highlightRangeContains($b, $aEnd); +    } + +    private static function parseOffsetsToHighlights(string $offsets): array +    { +        $offsets = explode(" ", $offsets); +        $offsets = array_map("intval", $offsets); + +        $phraseMatches = count($offsets) / 4; + +        $highlights = []; +        for ($i = 0; $i < $phraseMatches; $i++) { +            $off = $offsets[$i * 4 + 2]; +            $len = $offsets[$i * 4 + 3]; + +            if ($off < 0 || $len === 0) +                continue; + +            $highlights[] = [$off, $off+$len-1]; +        } + +        usort($highlights, fn ($a, $b) => ($a[0] <=> $b[0]) ?: ($b[1] <=> $a[1])); + +        // merge overlapping areas +        for ($i = count($highlights)-1; $i >= 0; $i--) { +            for ($j = $i-1; $j >= 0; $j--) { +                if (self::areHighlightsOverlapping($highlights[$i], $highlights[$j])) { +                    [$iStart, $iEnd] = $highlights[$i]; +                    [$jStart, $jEnd] = $highlights[$j]; + +                    $highlights[$j] = [min($iStart, $jStart), max($iEnd, $jEnd)]; +                    unset($highlights[$i]); +                    break; +                } +            } +        } + +        return array_merge($highlights); // array_merge here renumbers the keys +    } + +    private static function searchFTS(SQLite3 $db, SearchExpr $expr) +    { +        $sqlSearchExpr = $expr->toSQL("", true); +        $query = new DbQuery(" +            SELECT +                n.id, +                offsets(nc.note_contents) AS offsets +            FROM notes n +            INNER JOIN note_contents nc +                ON nc.rowid = n.content_row +            WHERE {$sqlSearchExpr->sql} +        "); +        foreach ($sqlSearchExpr->bindings as $k => $v) +            $query->bind($k, $v); + + +        $offsets = $query->fetchIndexedValues($db, "offsets", "id"); + +        $notes = Note::byIds($db, array_keys($offsets)); + +        $out = []; +        foreach ($offsets as $id => $offString) { +            if (!isset($notes[$id])) +                throw new LogicException("Note '{$id}' not loaded but found?"); + +            $out[] = new self($notes[$id], self::parseOffsetsToHighlights($offString)); +        } + +        return $out; +    } + +    public function renderHighlightedContent(): string +    { +        $out = ""; +        $content = $this->note->getContent(); +        $lastOff = 0; +        foreach ($this->highlights as [$start, $end]) { +            $out .= Esc::e(substr($content, $lastOff, $start - $lastOff), Esc::HTML_WITH_BR); +            $out .= '<b>' . Esc::e(substr($content, $start, $end - $start + 1), Esc::HTML_WITH_BR) . '</b>'; + +            $lastOff = $end + 1; +        } + +        $out .= Esc::e(substr($content, $lastOff), Esc::HTML_WITH_BR); + +        return $out; +    } + +    public function getNote(): Note { return $this->note; } +}
\ No newline at end of file diff --git a/src/Search/TagExpr.php b/src/Search/TagExpr.php new file mode 100644 index 0000000..b117bbe --- /dev/null +++ b/src/Search/TagExpr.php @@ -0,0 +1,42 @@ +<?php + + +namespace Micropoly\Search; + + +class TagExpr implements SearchExpr +{ +    private string $tag; + +    public function __construct(string $tag) +    { +        $this->tag = $tag; +    } + +    public function getTag(): string { return $this->tag; } + +    public function toString(): string +    { +        return "#{$this->tag}"; +    } + +    public function toSQL(string $bindPrefix, bool $singleFTS): SQLSearchExpr +    { +        $sqlex = new SQLSearchExpr(); + +        $sqlex->sql = "EXISTS ( +            SELECT 1 +            FROM tags t +            WHERE t.tag = :{$bindPrefix}tag +                AND t.note_id = n.id +        )"; +        $sqlex->bindings["{$bindPrefix}tag"] = $this->tag; + +        return $sqlex; +    } + +    public function countFTSQueries(): int +    { +        return 0; +    } +}
\ No newline at end of file diff --git a/src/Search/TrueExpr.php b/src/Search/TrueExpr.php new file mode 100644 index 0000000..5f25c7e --- /dev/null +++ b/src/Search/TrueExpr.php @@ -0,0 +1,25 @@ +<?php + + +namespace Micropoly\Search; + + +class TrueExpr implements SearchExpr +{ +    public function toString(): string +    { +        return "<TrueExpr>"; +    } + +    public function toSQL(string $bindPrefix, bool $singleFTS): SQLSearchExpr +    { +        $sqlSearchExpr = new SQLSearchExpr(); +        $sqlSearchExpr->sql = "1"; +        return $sqlSearchExpr; +    } + +    public function countFTSQueries(): int +    { +        return 0; +    } +}
\ No newline at end of file | 
