//// -*- mode: javascript; coding: utf-8 -*-
// ==UserScript==
// @name           Search on the History
// @author         noriaki
// @namespace      noriaki
// @license        The MIT License
// @version        0.1.0
// @released       2009-10-12 00:00:00
// @updated        2009-10-13 02:01:00
// @compatible     Greasemonkey
// @include        about:blank
// @include        http://*
// @require        http://jqueryjs.googlecode.com/files/jquery-1.3.2.min.js
// @require        http://blog.fulltext-search.biz/files/gm/gm-config-helper.js
// @require        http://blog.fulltext-search.biz/files/gm/gm-update-checker.js
// @require        http://blog.fulltext-search.biz/files/gm/md5.js
// @require        http://blog.fulltext-search.biz/files/gm/extract-content-all.js
// ==/UserScript==

jQuery(function($) {
    var db = {
        words: new ConfigHelper({ key: 'words' }),
        links: new ConfigHelper({ key: 'links' }),
        meta: new ConfigHelper({ key: 'meta' })
    };
    db.meta.val('Npage', (db.meta.val('Npage') || 0));
    db.meta.val('Nword', (db.meta.val('Nword') || 0));

    if(location.href === "about:blank" || location.href === "http://blog.fulltext-search.biz/files/gm/blank.html") {
        searcher();
    } else {
        if(window["top"] === window) {
            gatherer();
        }
    }

    // Gathering Page.
    function gatherer() {
        var ex = new ExtractContentJS.LayeredExtractor();
        ex.addHandler(ex.factory.getHandler('Heuristics'));
        var res = ex.extract(document);

        var hex_url = MD5_hexhash(url(location));

        var link = db.links.val(hex_url);
        if(link !== undefined) { return false; }

        if(res.isSuccess) {
            var ymas = new YahooMAService();
            ymas.appid = [
                'aVbQ5lmxg66SMbUsfmtKr',
                'nGRdpXI3p6Wampp',
                'wCWD7Q4UA7f4lHyB6gJpKKAsLL7ZrE8J.9cA--'
            ].join('_');
            ymas.ua = 'Mozilla/4.0 (compatible) Greasemonkey (Search on the History)';
            ymas.filter = '9';
            var sentence = res.content.asTextFragment();

            link = {
                url: url(location),
                title: res.title,
                body: sentence,
                updateat: (new Date()).getTime()
            };

            ymas.sentence = sentence;
            ymas.execute(function(xml) {

                var data = $(xml),
                    words = data.find('word');

                link.nuot = words.size();
                words.each(function(i) {
                    var sf = $(this).find('surface').text(),
                        hex_sf = MD5_hexhash(sf);

                    var word = db.words.val(hex_sf);
                    if(word === undefined) {
                        word = {
                            surface: sf,
                            pages: []
                        };
                    }
                    word.pages.push({
                        key: hex_url,
                        freq: parseInt($(this).find('count').text(), 10)
                    });

                    db.words.val(hex_sf, word);
                });
                db.links.val(hex_url, link);
                db.meta.val('Npage', db.meta.val('Npage') + 1);
                db.meta.val('LastUpdate', (new Date).getTime());
                db.meta.val('Nword', db.meta.val('Nword') +
                            parseInt(data.find('filtered_count').text(),10));

                //log(db.links.val());
                //log(db.words.val());
                //log(db.meta.val());

            });

        }
    }

    // Building Search Page.
    function searcher() {
        $('body')
        .append(
            $('<form>')
            .attr({
                id: 's',
                action: 'javascript:void(0);',
                type: 'post'
            })
            .append(
                $('<input type="text">')
                .attr({
                    id: 'q',
                    name: 'q'
                })
            )
            .append(
                $('<input type="submit">')
                .attr({
                    id: 'sbmt',
                    name: 'sbmt'
                })
                .val('検索')
            )
            .submit(function() {
                var results = search($('#q').val());
                $('#message').text(results.message);
                if(results.stat) {
                    $.each(results.links, function(i,doc) {
                        $('#ret')
                        .append(
                            $('<dt>')
                            .append(
                                $('<a>')
                                .attr({
                                    href: 'http://' + doc.url
                                })
                                .text(doc.title)
                            )
                        )
                        .append(
                            $('<dd>')
                            .append(
                                $('<p>')
                                .text(doc.body)
                            )
                        );
                    });
                }
            })
        )
        .append(
            $('<p>')
            .attr({
                id: 'message'
            })
        )
        .append(
            $('<dl>')
            .attr({
                id: 'ret'
            })
        );
    }

    function search(query) {
        var word = db.words.val(MD5_hexhash(query)),
            ret = {};
        if(word === undefined) {
            ret.stat = false;
            ret.message = "no results.";
        } else {
            ret.stat = true;
            ret.message = "success.";
            ret.word = {
                surface: word.surface
            };
            ret.links = [];

            var Dfreq = word.pages.length;
            var Npage = db.meta.val('Npage');

            $.each(word.pages, function(i,page) {
                var doc = db.links.val(page.key);
                doc.tfidf = tfidf(page.freq, doc.nuot, Npage, Dfreq);
                ret.links.push(doc);
            });

            // sort by tfidf(DESC)
            ret.links.sort(function(a,b) { return b.tfidf - a.tfidf; });
        }

        return ret;
    }

    function tfidf(freq, nuot, Npage, Dfreq) {
        return log2(freq + 1) / log2(nuot) * (log2(Npage / Dfreq) + 1);
    }

    function log2(x) { return Math.LOG2E * Math.log(x); }

    GM_registerMenuCommand('SotH - Clear all INDEX', function() {
        $.each(GM_listValues(), function(i,v) {
            if(v === "meta") {
                db.meta.val('Npage', 0);
                db.meta.val('Nword', 0);
                db.meta.val('LastUpdate', (new Date()).getTime());
            } else {
                GM_deleteValue(v);
            }
        });
    });

    function url(l) { return l.hostname + l.pathname + l.search; }

    function log() {
        if(unsafeWindow.console) unsafeWindow.console.log(arguments);
    }
});

// Yahoo! JAPAN - Developer Network [ MAService ]
function YahooMAService() { this.initialize.apply(this, arguments); };
YahooMAService.prototype = {
    initialize: function(options) {
        this.END_POINT = 'http://jlp.yahooapis.jp/MAService/V1/parse';
        for(k in options) this[k] = options[k];
    },

    toQueryString: function() {
        var e = encodeURIComponent;
        var query = [];
        query.push('appid='+e(this.appid));
        query.push('sentence='+e(this.sentence));
        query.push('results='+e(this.results ? this.results : 'uniq'));
        query.push('response='+e(this.response ? this.response : 'surface'));
        query.push('filter='+ e(this.filter ? this.filter : '1|2|3|4|5|9|10|13'));
        return query.join('&');
    },

    execute: function(callback) {
        if(typeof callback != 'function') return;
        if(!this.appid || !this.sentence) return;
        var self = this,
            sentence = this.sentence;
        var LIMIT = 100000 - 100; // Limit: 100KB - (query parameter).length

        // truncate sentence for Yahoo! MA Services LIMIT
        for(var stl = encodeURI(sentence).length, scale = stl / sentence.length;
            stl > LIMIT; scale *= 1.05) {
            sentence = sentence.slice(0, LIMIT / scale);
            stl = encodeURI(sentence).length;
        }
        this.sentence = sentence;

        GM_xmlhttpRequest({
            method: 'POST',
            headers: {
                'User-Agent': self.ua ? self.ua : 'Mozilla/4.0 (compatible) Greasemonkey',
                // for POST Request
                'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
            },
            url: self.END_POINT,
            data: self.toQueryString(),
            onload: function(xhr) { callback(xhr.responseText); },
            onerror: function(xhr) { console.log(xhr.status + ': ' + xhr.responseText); }
        });
    }
};

/*
new UpdateChecker({
    script_name: 'Search on the History'
    ,script_url: 'http://doko.r08.jp/tools/createsitehelper.user.js'
    ,current_version: '0.1.0'
});
*/
