﻿<%@ WebHandler Language="C#" Class="uploaddoc" %>

using System;
using System.Web;
using DianCMS.COM;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;
using D.BLL;
using D.Model;
using D.Common;

public class uploaddoc : IHttpHandler
{
    private B_Site bllSite = new B_Site();
    private M_Site modelSite = new M_Site();

    public void ProcessRequest(HttpContext context)
    {
        //context.Response.ContentType = "text/plain";
        //context.Response.Write("Hello World☂");
        try
        {
            modelSite = bllSite.GetModel();
            string BaseUrl = string.Empty;
            string[] path = context.Request.QueryString["path"].Split('|');

            string ymd = DateTime.Now.ToString("yyyyMM") + "/" + DateTime.Now.ToString("dd");
            string sServerDir = string.Empty;
            if (modelSite.UploadFolderToPath.IndexOf(":\\") != -1)
            {
                BaseUrl = "" + modelSite.UploadFolderToDomain + "/" + modelSite.UploadFolder + "/" + path[0] + "/" + ymd + "/";
                sServerDir = modelSite.UploadFolderToPath + "\\" + modelSite.UploadFolder + "\\" + path[0] + "\\" + ymd.Replace("/", "\\") + "\\";
            }
            else
            {
                BaseUrl = "" + Param.ApplicationRootPath + "/" + modelSite.UploadFolder + "/" + path[0] + "/" + ymd + "/";
                sServerDir = HttpContext.Current.Server.MapPath(BaseUrl);
            }
            if (!Directory.Exists(sServerDir))
            {
                Directory.CreateDirectory(sServerDir);
            }

            //接收上传文件
            HttpPostedFile MyFile = context.Request.Files[0];
            string[] Mystr = MyFile.FileName.Split('.');
            string filename = Path.GetFileNameWithoutExtension(MyFile.FileName);
            //获取扩展名
            string extension = Path.GetExtension(MyFile.FileName).ToString();

            if (extension.ToLower() != ".doc")
            {
                context.Response.Write("{\"result\":\"200\",\"imgurl\":\"\",\"filename\":\"文件类型只能是.doc\"}");
                return;
            }
            else
            {
                //判断文件的实际类型
                string fileExt = "," + Function.GetContentType(extension) + ",";
                if (fileExt.ToLower().IndexOf("," + MyFile.ContentType.ToLower() + ",") == -1)
                {
                    context.Response.Write("{\"result\":\"200\",\"imgurl\":\"\",\"filename\":\"文件类型不匹配！\"}");
                    return;
                }

                //上传文件
                MyFile.SaveAs(sServerDir + MyFile.FileName);

                B_Word bllWord = new B_Word();

                bllWord.WordFormat(sServerDir, filename, "html");
                System.Threading.Thread.Sleep(1000);

                #region MyRegion
                string _html = ReadTemplate(sServerDir + filename + ".html");
                _html = ClearWord(_html, true, false, false);
                _html = Regex.Replace(_html, @"\r\n", "<br>", RegexOptions.IgnoreCase);
                _html = Regex.Replace(_html, @"\r", "<br>", RegexOptions.IgnoreCase);
                _html = Regex.Replace(_html, @"\n", "<br>", RegexOptions.IgnoreCase);
                _html = Regex.Replace(_html, @"<!.*?>", "", RegexOptions.IgnoreCase);
                string regexStr = @"<body.*?>(.*?)</body>";
                Match mc = Regex.Match(_html, regexStr, RegexOptions.IgnoreCase);
                _html = mc.Groups[1].Value;

                //查找图片
                string pattStr = @"<img( ||.*?)src=('|""|)([^""|^\']+)('|""|>| )";
                MatchCollection matches = Regex.Matches(_html, pattStr, RegexOptions.IgnoreCase);
                if (matches.Count > 0)
                {
                    foreach (Match m in matches)
                    {
                        string picName = m.Groups[3].Value;
                        string newpicName = Function.GetFileName() + System.IO.Path.GetExtension(picName);
                        File.Copy(sServerDir + picName, sServerDir + newpicName);
                        _html = Regex.Replace(_html, @"" + picName + "", BaseUrl + newpicName, RegexOptions.IgnoreCase);
                        System.Threading.Thread.Sleep(200);
                    }

                }

                //删除上传文件
                File.Delete(System.IO.Path.Combine(sServerDir, MyFile.FileName));
                //删除上传文件
                File.Delete(System.IO.Path.Combine(sServerDir, filename + ".html"));
                //删除文件夹
                DeleteDirectory(Path.Combine(sServerDir, filename + ".files"));
                #endregion

                context.Response.Write("{\"result\":\"200\",\"imgurl\":\"" + _html.Replace("\"", "☂").Substring(0, 2047) + "\",\"filename\":\"" + ymd + "/" + MyFile.FileName + "\"}");
            }
        }
        catch (Exception _e)
        {
            context.Response.Write("{\"result\":\"200\",\"imgurl\":\"\",\"filename\":\"" + _e.Message + "\"}");
        }
    }

    private bool CheckNonHtmlFile(HttpPostedFile file)
    {
        byte[] buffer = new byte[1024];
        file.InputStream.Read(buffer, 0, 1024);

        string firstKB = System.Text.ASCIIEncoding.ASCII.GetString(buffer);

        if (Regex.IsMatch(firstKB, @"<!DOCTYPE\W*X?HTML", RegexOptions.IgnoreCase | RegexOptions.Singleline))
            return false;

        if (Regex.IsMatch(firstKB, @"<(?:body|head|html|img|pre|script|table|title)", RegexOptions.IgnoreCase | RegexOptions.Singleline))
            return false;

        //type = javascript
        if (Regex.IsMatch(firstKB, @"type\s*=\s*[\'""]?\s*(?:\w*/)?(?:ecma|java)", RegexOptions.IgnoreCase | RegexOptions.Singleline))
            return false;

        //href = javascript
        //src = javascript
        //data = javascript
        if (Regex.IsMatch(firstKB, @"(?:href|src|data)\s*=\s*[\'""]?\s*(?:ecma|java)script:", RegexOptions.IgnoreCase | RegexOptions.Singleline))
            return false;

        //url(javascript
        if (Regex.IsMatch(firstKB, @"url\s*\(\s*[\'""]?\s*(?:ecma|java)script:", RegexOptions.IgnoreCase | RegexOptions.Singleline))
            return false;

        return true;
    }

    /// <summary>
    /// 读取指定路径文档
    /// </summary>
    /// <param name="templatePath"></param>
    /// <returns></returns>
    public string ReadTemplate(string templatePath)
    {
        string _tmp = string.Empty;
        StreamReader sr = null;
        try
        {
            sr = new StreamReader(templatePath, Encoding.Default);
            _tmp = sr.ReadToEnd();
        }
        catch
        {
            return string.Empty;
        }
        finally
        {
            if (sr != null)
            {
                sr.Close();
            }
        }

        return _tmp;
    }

    public static string ClearWord(string sourceText, bool bIgnoreFont, bool bRemoveStyles, bool cleanWordKeepsStructure)
    {
        sourceText = Regex.Replace(sourceText, @"<o:p>\s*<\/o:p>", "");
        sourceText = Regex.Replace(sourceText, @"<o:p>.*?<\/o:p>", " ");
        sourceText = Regex.Replace(sourceText, @"\r\n", " ");

        //<a name="_Toc231123614"></a><SPAN\s*[^>]*>\s* \s*<\/SPAN>
        sourceText = Regex.Replace(sourceText, @"<a name=\s*[^>]*><\/a>", " ");

        // Remove mso-xxx styles.   
        //sourceText = Regex.Replace(sourceText, @"\s*mso-[^:]+:[^;""]+;?", "", RegexOptions.IgnoreCase);

        // Remove margin styles.   
        sourceText = Regex.Replace(sourceText, @"\s*MARGIN: 0cm 0cm 0pt\s*;", "", RegexOptions.IgnoreCase);
        sourceText = Regex.Replace(sourceText, @"\s*MARGIN: 0cm 0cm 0pt\s*""", "\"", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"\s*TEXT-INDENT: 0cm\s*;", "", RegexOptions.IgnoreCase);
        sourceText = Regex.Replace(sourceText, @"\s*TEXT-INDENT: 0cm\s*""", "\"", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"\s*TEXT-ALIGN: [^\s;]+;?""", "\"", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"\s*PAGE-BREAK-BEFORE: [^\s;]+;?""", "\"", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"\s*FONT-VARIANT: [^\s;]+;?""", "\"", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"\s*tab-stops:[^;""]*;?", "", RegexOptions.IgnoreCase);
        sourceText = Regex.Replace(sourceText, @"\s*tab-stops:[^""]*", "", RegexOptions.IgnoreCase);
        // Remove FONT face attributes.   
        if (bIgnoreFont)
        {
            sourceText = Regex.Replace(sourceText, @"\s*face=""[^""]*""", "", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"\s*face=[^ >]*", "", RegexOptions.IgnoreCase);

            //sourceText = Regex.Replace(sourceText, @"\s*FONT-FAMILY:[^;""]*;?", "", RegexOptions.IgnoreCase);
        }

        // Remove Class attributes   
        sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) class=([^ |>]*)([^>]*)", "<$1$3", RegexOptions.IgnoreCase);

        // Remove styles.   
        if (bRemoveStyles)
            sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) style=""([^\""]*)""([^>]*)", "<$1$3", RegexOptions.IgnoreCase);

        // Remove empty styles.   
        sourceText = Regex.Replace(sourceText, @"\s*style=""\s*""", "", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"<SPAN\s*[^>]*>\s* \s*<\/SPAN>", " ", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"<SPAN\s*[^>]*><\/SPAN>", "", RegexOptions.IgnoreCase);

        // Remove Lang attributes   
        sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) lang=([^ |>]*)([^>]*)", "<$1$3", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"<SPAN\s*>(.*?)<\/SPAN>", "$1", RegexOptions.IgnoreCase);

        sourceText = Regex.Replace(sourceText, @"<FONT\s*>(.*?)<\/FONT>", "$1", RegexOptions.IgnoreCase);
        // Remove XML elements and declarations   
        sourceText = Regex.Replace(sourceText, @"<\\?\?xml[^>]*>", "", RegexOptions.IgnoreCase);
        // Remove Tags with XML namespace declarations: <o:p><\/o:p>   
        sourceText = Regex.Replace(sourceText, @"<\/?\w+:[^>]*>", "", RegexOptions.IgnoreCase);
        // Remove comments [SF BUG-1481861].   
        sourceText = Regex.Replace(sourceText, @"<\!--.*?-->/", "");
        sourceText = Regex.Replace(sourceText, @"<(U|I|STRIKE)> <\/\1>", " ");
        sourceText = Regex.Replace(sourceText, @"<H\d>\s*<\/H\d>", "", RegexOptions.IgnoreCase);
        // Remove "display:none" tags.   
        sourceText = Regex.Replace(sourceText, @"<(\w+)[^>]*\sstyle=""[^""]*DISPLAY\s?:\s?none(.*?)<\/\1>", "", RegexOptions.IgnoreCase);
        // Remove language tags   
        sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) language=([^ |>]*)([^>]*)", "<$1$3", RegexOptions.IgnoreCase);

        // Remove onmouseover and onmouseout events (from MS Word comments effect)   
        sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) onmouseover=""([^\""]*)""([^>]*)", "<$1$3", RegexOptions.IgnoreCase);
        sourceText = Regex.Replace(sourceText, @"<(\w[^>]*) onmouseout=""([^\""]*)""([^>]*)", "<$1$3", RegexOptions.IgnoreCase);

        if (cleanWordKeepsStructure)
        {
            // The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">   
            sourceText = Regex.Replace(sourceText, @"<H(\d)([^>]*)>", "<h$1>", RegexOptions.IgnoreCase);

            // Word likes to insert extra <font> tags, when using MSIE. (Wierd).   
            sourceText = Regex.Replace(sourceText, @"<(H\d)><FONT[^>]*>(.*?)<\/FONT><\/\1>", @"<$1>$2<\/$1>", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<(H\d)><EM>(.*?)<\/EM><\/\1>", @"<$1>$2<\/$1>", RegexOptions.IgnoreCase);
        }
        else
        {
            sourceText = Regex.Replace(sourceText, @"<H1([^>]*)>", @"<div$1><b><font size=""6"">", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<H2([^>]*)>", @"<div$1><b><font size=""5"">", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<H3([^>]*)>", @"<div$1><b><font size=""4"">", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<H4([^>]*)>", @"<div$1><b><font size=""3"">", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<H5([^>]*)>", @"<div$1><b><font size=""2"">", RegexOptions.IgnoreCase);
            sourceText = Regex.Replace(sourceText, @"<H6([^>]*)>", @"<div$1><b><font size=""1"">", RegexOptions.IgnoreCase);

            //sourceText = Regex.Replace(sourceText, @"<\/H\d>", @"<\/font><\/b><\/div>", RegexOptions.IgnoreCase);

            // Transform <P> to <DIV>   
            //var re = new Regex(@"(<P)([^>]*>.*?)(<\/P>)", RegexOptions.IgnoreCase); // Different because of a IE 5.0 error   
            //sourceText = Regex.Replace(sourceText, @"(<P)([^>]*>.*?)(<\/P>)", @"<div$2<\/div>", RegexOptions.IgnoreCase);

            // Remove empty tags (three times, just to be sure).   
            // This also removes any empty anchor   
            //sourceText = Regex.Replace(sourceText, @"<([^\s>]+)(\s[^>]*)?>\s*<\/\1>", "");
            //sourceText = Regex.Replace(sourceText, @"<([^\s>]+)(\s[^>]*)?>\s*<\/\1>", "");
            //sourceText = Regex.Replace(sourceText, @"<([^\s>]+)(\s[^>]*)?>\s*<\/\1>", "");
        }
        return sourceText;
    }

    /// <summary>
    /// 删除非空文件夹
    /// </summary>
    /// <param name="path">要删除的文件夹目录</param>
    public void DeleteDirectory(string path)
    {
        DirectoryInfo dir = new DirectoryInfo(path);
        if (dir.Exists)
        {
            DirectoryInfo[] childs = dir.GetDirectories();
            foreach (DirectoryInfo child in childs)
            {
                child.Delete(true);
            }
            dir.Delete(true);
        }
    }

    public bool IsReusable
    {
        get
        {
            return false;
        }
    }

}