// const Emitter = require('events');
const cheerio = require('cheerio');
const needle = require('needle');
const Util = require('../util');
// const agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' +
// 'Chrome/39.0.2171.95 Safari/537.36 MicroMessenger/6.5.2.501 NetType/WIFI WindowsWechat';
const macAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36' +
' (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36';

class Spider {
    constructor(getfollowedAccountName, reportFailedAccount) {
        // 方法
        this.getfollowedAccountName = getfollowedAccountName;
        this.reportFailedAccount = reportFailedAccount;
        // 状态
        this.tasks = []; // 需要抓取的账号列表
        this.startType = 'auto';  // auto自动  manual手动
        this.sogouFailsTime = 0;
        this.weixinFailsTime = 0;
        this.running = 0;
        this.stop = 1;
        this.allTasksNum = 0;
        this.getTasking = 0;
        this.failure = [];
        // 其余控制
        this.taskTime = 10000;
        this.checkTime = 10000;
        this.currentSogouPage = 1;
        this.sogouBeforePageAccount = '';
        this.sogouHeaders = {};
        // this.sogouCookiesStr1 = 'SUV=0069271ADA6D966D5847FE23FF669968; weixinIndexVisited=1; JSESSIONID=aaaKcvjyHn7Ju9ZpcwsJv; pgv_pvi=22656000; pgv_si=s1099198464; sct=38; SNUID=9ACFAD186B6E28249EA67CD56C8F2076';
        this.sogouCookiesStr2 = '';
    }

    // promise延时处理，默认10~20秒时间间隔
    _retTimeout(res, time) {
        const _time = time || 10000;
        return new Promise(resolve => {
            setTimeout(() => {
                resolve(res);
            }, _time + parseInt(_time * Math.random(), 10));
        });
    }

    _getSogouHeaders() {
        // TODO：每次只要更新cookie就能保证info获取正常，尝试用webview更新
        return {
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Cookie': `${this.sogouCookiesStr2}`,
            'Host': 'weixin.sogou.com',
            'Pragma': 'no-cache',
            'Upgrade-Insecure-Requests': 1,
            'User-Agent': macAgent,
        };
    }

    // 更新sogou cookie
    _updateSogouCookie() {
        let cookieStr = '';
        return new Promise(resolve => {
            needle.get('http://weixin.sogou.com/', (error, response) => {
                const cookiesMap = response.cookies;
                if (cookiesMap) {
                    Object.keys(cookiesMap).forEach(key => {
                        cookieStr += `; ${key}=${cookiesMap[key]}`;
                    });
                }
                this.sogouCookiesStr2 = cookieStr;
                resolve(cookieStr.substr(2));
            });
        });
    }

    // 请求页面内容
    _getAccountPage(kw, page) {
        const self = this;
        const _kw = encodeURIComponent(kw);
        let getListUrl =
        // `http://weixin.sogou.com/weixin?type=1&query=${_kw}&ie=utf8&_sug_=n&_sug_type_=`;
        `http://weixin.sogou.com/weixin?type=1&query=${_kw}&ie=utf8&_sug_=y&_sug_type_=&w=&sut=2138&sst0=1481270747189&lkt=3%2C1481270745515%2C1481270745702`;
        if (page && page !== 1) {
            getListUrl += `&page=${page}`;
        }
        self.sogouHeaders = self._getSogouHeaders();
        return Util.Http.get(getListUrl, {
            headers: self.sogouHeaders
        }).then(res => {
            // TODO: 判断res内容决定是否更新cookie，目前都更新cookie
            self._updateSogouCookie();
            return self._retTimeout(res);
        });
    }

    // 取账号列表关键信息
    _getAccountInfo($) {
        const self = this;
        const info = [];
        $('.txt-box').map((i, item) => {
            const infoDom = $(item).find('.info');
            const _info = {};
            // 微信号
            _info.account = $(infoDom).find('label').html();
            // 微信链接
            _info.url = $(item).find('.tit a').attr('href');
            // 月发文
            const _articleNum = parseInt($(infoDom).html().split('月发文')[1], 10);
            if (_articleNum) {
                _info.articleNum = _articleNum;
            }
            // 平均阅读
            const _readNum = parseInt($(infoDom).html().split('平均阅读')[1], 10);
            if (_readNum) {
                _info.readNum = _readNum;
            }
            info.push(_info);
            return null;
        });
        // 判断是否已经是最后一页
        if (info.length) {
            if (info[0].account === self.sogouBeforePageAccount) {
                return ['done'];
            }
            self.sogouBeforePageAccount = info[0].account;
        }
        return info;
    }

    // 依次抓取每页的 微信号 月发文 平均阅读 对应的微信原文链接
    crawlAccounts(_task, page) {
        if (!_task) {
            console.warn('任务列表中存在异常任务');
            return false;
        }
        const kw = _task.name;
        const self = this;
        self.currentSogouPage = page || 1;
        return self._getAccountPage(kw, page || 1).then(res => {
            const $ = cheerio.load(res, { decodeEntities: false });
            return self._getAccountInfo($);
        });
    }

    _getArticlePage(url) {
        return Util.Http.get(url, {
            // cookies: cookiesMap,
            headers: {
                'User-Agent': macAgent,
                'Cookie': '',
            }
        });
    }

    _getArticleContent(res, item) {
        const $ = cheerio.load(res, { decodeEntities: false });
        const msgListMatch = res.match(/var msgList =(.*)/);
        const msgList = msgListMatch ? JSON.parse(msgListMatch[1].trim().match(/({.+});/)[1]) : [];
        const content = {
            info: item,   // avatar name desc company
            articles: []
        };
        try {
            content.info.avatar = $('.radius_avatar img').attr('src');
            content.info.name = $('.profile_nickname').html().trim();
            content.info.desc = $('.profile_desc_value').eq(0).html();
            content.info.wx_verify = $('.icon_verify').length ? 1 : 0;
            const _html = $('.profile_desc_value').eq(1).html();
            const _company = _html.split('">')[1];
            if (_company) {
                content.info.company = _company;
            }
            content.msgList = msgList;
        } catch (e) {
            console.warn('出现微信验证码了');
            self.weixinFailsTime ++;
        }
        return content;
    }

    crawlArticles(item) {
        const self = this;
        const itemList = [];
        const url = item && item.url.replace(/&amp;/g, '&');
        itemList.shift();
        return self._getArticlePage(url || '').then(res => {
            return self._retTimeout(self._getArticleContent(res, item), 5000);
        }).catch(e => {
            // 获取文章超时
            console.warn(164);
            console.warn(e);
            return {
                info: item,   // avatar name desc company
                msgList: [],
                articles: []
            };
        });
    }

    _getArticleJson(url) {
        if (!url) {
            return new Promise(resolve => {
                resolve({});
            });
        }
        return Util.Http.get(url, {
            headers: {
                'User-Agent': macAgent,
                'Cookie': '',
            }
        }).then(resJson => {
            const htmlUrl = url.split('&f=json')[0];
            return Util.Http.get(htmlUrl, {
                headers: {
                    'User-Agent': macAgent,
                    'Cookie': '',
                }
            }).then(resHtml => {
                // TODO：未匹配的情况访问url提示账号被封，等待间隔，cookies判断
                // 解析出授权信息
                const sgData = {};
                if (resHtml.match(/window\.sg_data/)) {
                    sgData.src = resHtml.match(/src:"(.*?)"/)[1];
                    sgData.ver = resHtml.match(/ver:"(.*?)"/)[1];
                    sgData.timestamp = resHtml.match(/timestamp:"(.*?)"/)[1];
                    sgData.signature = resHtml.match(/signature:"(.*?)"/)[1];
                } else {
                    // 如果没有抓到阅读量和点赞
                    return resJson;
                }

                const commentUrl = `http://mp.weixin.qq.com/mp/getcomment?src=${sgData.src}&ver=${sgData.ver}&timestamp=${sgData.timestamp}&signature=${sgData.signature}&&uin=&key=&pass_ticket=&wxtoken=&devicetype=&clientversion=0&x5=0`;
                return Util.Http.get(commentUrl, {
                    headers: {
                        'User-Agent': macAgent,
                        'Cookie': '',
                    }
                }).then(resComment => {
                    resJson.like_num = resComment.like_num;
                    resJson.read_num = resComment.read_num;
                    return resJson;
                });
            });
        });
    }

    // 抓取每篇文章详情的json数据
    crawlEveryArticle(account, task) {
        const self = this;
        const resultAll = [];
        let sourceUrl = [];
        function __changeUrl(url) {
            const temp = url.replace(/&amp;/g, '&').replace(/#wechat_redirect/g, '');
            if (temp) {
                if (!/http:\/\/mp\.weixin\.qq\.com/.test(temp)) {
                    return `http://mp.weixin.qq.com${temp}&f=json`;
                }
                return `${temp}&f=json`;
            }
            return '';
        }
        if (account.msgList && account.msgList.list.length) {
            account.msgList.list.forEach(msg => {
                sourceUrl.push(__changeUrl(msg.app_msg_ext_info.content_url));
                msg.app_msg_ext_info.multi_app_msg_item_list.forEach(msgItem => {
                    sourceUrl.push(__changeUrl(msgItem.content_url));
                });
            });
            sourceUrl = sourceUrl.slice(0, 8);
        }
        function __getArticle(url) {
            return self._getArticleJson(url).then(res => {
                if (res.link) {
                    res.content_noencode = '';
                } else if (url) {
                    res.link = url.split('&f=json')[0];
                }
                resultAll.push(res || {});
                sourceUrl.shift();
                // 对比uin，确定是否提前结束
                if (res && !res.bizuin) {
                    return { ret: 'next' };
                }
                if (res.bizuin && task.uin !== atob(res.bizuin)) {
                    return { ret: 'next' };
                }
                if (!sourceUrl.length) {
                    return resultAll;
                }
                // TODO：抓取太快进行暂停和重试的策略，，每篇之间间隔的策略反而抓得更少
                // return self._retTimeout(__getArticle(sourceUrl[0]));
                return __getArticle(sourceUrl[0]);
            }).catch(e => {
                console.warn(236);
                console.warn(e);
                return __getArticle(sourceUrl[0]);
            });
        }
        return __getArticle(sourceUrl[0]);
    }

    getTasks(token) {
        const self = this;
        function __action() {
            setTimeout(() => {
                __action();
            }, self.taskTime);
            // 请求后端获取任务
            if (!self.tasks.length && !self.getTasking) {
                self.getTasking = 1;
                // 发送上一次任务列表中失败的任务
                if (self.failure.length) {
                    console.warn('::::::::::::::failure');
                    console.warn(self.failure);
                    const tasks = [];
                    self.failure.forEach(t => {
                        tasks.push(t[0]);
                    });
                    console.warn('failure tasks report start');
                    self.reportFailedAccount(token, tasks).then(res => {
                        console.warn('failure tasks report end::');
                        console.warn(res);
                    }).catch(e => {
                        console.warn('failure tasks report end::');
                        console.warn(e);
                    });
                }
                console.warn('::::::::::::::getTasks');
                let _type = 'all';
                if (self.startType === 'manual') {
                    _type = 'self';
                }
                self.getfollowedAccountName(token, _type).then(res => {
                    self.tasks = self.tasks.concat(res ? res.data : []); // TODO:支持js动态执行
                    self.getTasking = 0;
                    self.allTasksNum = self.tasks.length;
                    console.warn(self.tasks);
                });
            }
        }
        __action();
    }

    getSelfTasks(token) {
        const self = this;
        return self.getfollowedAccountName(token, 'self').then(res => {
            self.tasks = self.tasks.concat(res ? res.data : []);
            self.getTasking = 0;
            self.allTasksNum = self.tasks.length;
            console.warn(self.tasks);
        });
    }

    sendTaskResult(result) {
        // 保存任务
        console.warn('::::::::::::::saveTaskResult');
        console.warn(result);
        const articles = result[1].articles;
        const info = result[1].info;
        let rawId = '';
        if (articles[0]) {
            info.originId = articles[0].user_name;
            rawId = articles[0].user_name;
        }
        const d = {
            raw_id: rawId,
            articles,
            info
        };
        // 这里是抓到底了，表示已经抓完，不传给后端，不作为错误处理
        if (!d.raw_id) {
            return false;
        }
        return Util.Http.post('http://api.wxb.com/client/saveback2', {
            t: 'following',
            d
        }).then(res => {
            console.warn('saved success');
            console.warn(res);
            console.warn(d);
        }).catch(e => {
            console.warn(e);
            self.taskTime = 100000;
            self.checkTime = 100000;
        });
    }

    start(token, startType) {
        // 检查状态再进行操作，控制值守时间等
        const self = this;
        if (startType) {
            self.startType = startType;
        }
        console.warn('::::::::::SogouSpider', self.startType);
        function __checkInfo(info, task) {
            return self.crawlArticles(info[0]).then(account => {
                console.warn(':::::::::::::::account');
                console.warn(account);
                info.shift();
                // 微信id不匹配则直接抓取下一个
                if (info.length && task.wx_alias && task.wx_alias !== account.info.account) {
                    return new Promise(resolve => {
                        resolve(__checkInfo(info, task));
                    });
                }
                return self.crawlEveryArticle(account, task).then(articles => {
                    // 如果不匹配则抓取第二个info的文章进行对比
                    if (info.length && articles && articles.ret === 'next') {
                        self._retTimeout(__checkInfo(info, task), 5000);
                        return;
                    }
                    // 第一页结束，进入下一页
                    if (articles && articles.ret === 'next') {
                        self.crawlAccounts(task, self.currentSogouPage + 1).then(_info => {
                            console.warn(':::::::::::::::_info');
                            console.warn(_info);
                            if (_info && _info.length) {
                                // 抓取到数据的标记，而不是搜狗302以后抓取为空
                                if (_info[0] === 'done') {
                                    return;
                                }
                                self._retTimeout(__checkInfo(info, task), 5000);
                            } else {
                                self.running = 0;
                                self.tasks.shift();
                                // 搜狗出错统计
                                self.sogouFailsTime ++;
                            }
                        });
                        return;
                    }
                    if (articles.length) {
                        articles.forEach(item => {
                            if (item.user_name) {
                                account.articles.push(item);
                            }
                        });
                    }
                    // 保存结果到后端
                    console.warn(':::::::::::::final');
                    console.warn(account);
                    // 结束1个任务
                    self.running = 0;
                    const _task = self.tasks.shift();
                    self.sendTaskResult([_task, account]);
                }).catch(e => {
                    console.warn(':::::::::crawlEveryArticle error');
                    console.warn(e);
                    self.running = 0;
                    self.failure.push([self.tasks.shift(), 'crawlEveryArticle']);
                });
            }).catch(e => {
                console.warn(':::::::::crawlArticles error');
                console.warn(e);
                self.running = 0;
                self.failure.push([self.tasks.shift(), 'crawlArticles']);
            });
        }
        function __action() {
            if (self.sogouFailsTime > 3) {
                self.checkTime = 1000000;
                self.taskTime = 1000000;
                self.sogouFailsTime = 0;
                self.running = 0;
                console.warn('sogou wait');
            }
            if (self.weixinFailsTime > 3) {
                self.checkTime = 1000000;
                self.taskTime = 1000000;
                self.weixinFailsTime = 0;
                self.running = 0;
                console.warn('weixin wait');
            }
            // 任务列表再次为空
            if (!self.tasks.length) {
                console.warn('stopAction');
                self.stopAction('inner');
            }
            if (self.stop) {
                return;
            }
            if (self.tasks.length && !self.running && !self.stop) {
                self.taskTime = 10000;
                self.checkTime = 10000;
                console.warn('::::::::::::::start');
                console.warn(new Date());
                self.running = 1;
                console.warn(self.tasks);
                const task = self.tasks[0];
                console.warn(task);
                self.crawlAccounts(task).then(info => {
                    console.warn(':::::::::::::::info');
                    console.warn(info);
                    if (info && info.length) {
                        // 抓取到数据的标记，而不是搜狗抓取为空
                        if (info[0] !== 'done') {
                            __checkInfo(info, task);
                        }
                    } else {
                        self.running = 0;
                        self.tasks.shift();
                        // 搜狗出错统计
                        self.sogouFailsTime ++;
                    }
                }).catch(e => {
                    console.warn('::::::::crawlAccounts error');
                    console.warn(e);
                    self.running = 0;
                    self.failure.push([self.tasks.shift(), 'crawlAccounts']);
                });
            }
            setTimeout(() => {
                __action();
            }, self.checkTime);
        }
        if (startType === 'manual') {
            // self.allTasksNum = 0;
            self.getSelfTasks(token).then(() => {
                self.stop = 0;
                __action();
            });
        } else {
            // self.allTasksNum = 0;
            // 分发任务控制状态，定期接任务
            self.getTasks(token);
            self.stop = 0;
            __action();
        }
    }

    getStatus() {
        return new Promise((resolve) => {
            const status = {
                tasks: this.tasks,
                startType: this.startType,
                sogouFailsTime: this.sogouFailsTime,
                weixinFailsTime: this.weixinFailsTime,
                stop: this.stop,
                running: this.running,
                allTasksNum: this.allTasksNum,
                getTasking: this.getTasking,
                failure: this.failure,
            };
            resolve(status);
        });
    }

    stopAction(tag) {
        // 重置状态和设置stop即可
        this.tasks = [];
        this.startType = 'auto';
        this.sogouFailsTime = 0;
        this.weixinFailsTime = 0;
        this.running = 0;
        this.stop = 1;
        // 不为0代表执行过，在开始时清空
        if (tag !== 'inner') {
            this.allTasksNum = 0;
        }
        this.getTasking = 0;
        this.failure = [];
    }

    // 模拟登陆 _login

    // 获取到验证码并发送
    // sendSeccode(){}

    // 填写验证码并继续
    // completeSeccode(){}
}

module.exports = Spider;
