using System;
using System.IO;
using System.Net;
using System.Security.Cryptography;
using System.Text;
using Microsoft.Office.Server.Search.Administration;

namespace CrawlLogExporter
{
    /// <summary>
    /// The worlds smallest connector for pushing sharepoint crawl data to elasticsearch.
    /// </summary>
    class Program
    {
        static void Main(string[] args)
        {
            var appid = new Guid(args[0]);
            var contentsourcename = args[1];
            SearchService searchService = SearchService.Service;
            
			SearchServiceApplication searchApp =
                searchService.SearchApplications.GetValue<SearchServiceApplication>(appid );
            System.Console.Out.WriteLine("Found Search Service Application");
            LogViewer viewer = new LogViewer(searchApp);
            viewer.MaxDaysCrawlLogged = 20;
            System.Console.Out.WriteLine("Getting all status messages");
            var msgs = viewer.GetAllStatusMessages();
            System.Console.Out.WriteLine("Got'em all!");

            Content con = new Content(searchApp);
            var sources = con.ContentSources;
            int sourceid = 0;
            foreach (var source in sources)
            {
                var cs = (ContentSource)source;
                Console.Out.WriteLine(cs.Name + " Warns: " + cs.WarningCount + " Errors: " + cs.ErrorCount + " , id: " + cs.Id);
                if (cs.Name.Equals(contentsourcename))
                {
                    sourceid = cs.Id;
                }
            }

            foreach (var errmsg in msgs.Select())
            {
                int errorId = Convert.ToInt32(errmsg.ItemArray[0].ToString());
                // 0 =  OK , 1 = deletes
                //if (errorId == 0 || errorId == 1) continue;
                System.Console.Out.WriteLine("Working with errorId " + errorId);
                string errormsg = errmsg.ItemArray[1].ToString();



                var crawlfilters = new CrawlLogFilters();
                crawlfilters.AddFilter(CrawlLogFilterProperty.ContentSourceId, sourceid);
                crawlfilters.AddFilter(CrawlLogFilterProperty.MessageId, errorId);
                //crawlfilters.AddFilter(DateTime.Now.AddDays(-1), DateTime.Now);
                int nextstart = 0;
                int batchsize = 100000;
                while (nextstart != -1)
                {
                    crawlfilters.AddFilter(CrawlLogFilterProperty.StartAt, nextstart);
                    crawlfilters.AddFilter(CrawlLogFilterProperty.TotalEntries, batchsize);
                    Console.Out.WriteLine(string.Format("Getting {0} entries starting from {1}", batchsize, nextstart));
                    var urls = viewer.GetCurrentCrawlLogData(crawlfilters, out nextstart);
                    foreach (var url in urls.Select())
                    {
                        
                        var entry = new CrawlLogEntry
                                        {
                                            ContentSource = contentsourcename,
                                            Url = url.ItemArray[0].ToString(),
                                            ErrorId = errorId,
                                            ErrorMessage = url.ItemArray[2].ToString(),
                                            ErrorDescription = url.ItemArray[4].ToString(),
                                            Date = ((DateTime) url.ItemArray[6]).ToString("yyyy-MM-dd HH:mm:ss")
                                        };

                        //System.Console.Out.WriteLine("Working on: " + entry.Url);
                        var jsonSerializer = new System.Web.Script.Serialization.JavaScriptSerializer();
                        string json = jsonSerializer.Serialize(entry);

                        var httpWebRequest =
                            (HttpWebRequest) WebRequest.Create("http://localhost:9200/crawllog/log/" + GetHashId(entry)); // hash = md5(date + url)
                        httpWebRequest.ContentType = "text/json";
                        httpWebRequest.Method = "POST";

                        using (var streamWriter = new StreamWriter(httpWebRequest.GetRequestStream()))
                        {
                            streamWriter.Write(json);
                            streamWriter.Flush();
                            streamWriter.Close();
                        }
                        var httpResponse = (HttpWebResponse) httpWebRequest.GetResponse();
                        using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
                        {
                            var result = streamReader.ReadToEnd();
                            //System.Console.Out.WriteLine(httpResponse.StatusDescription);
                        }
                    }
                }

            }
        }

        private static string GetHashId(CrawlLogEntry entry)
        {
            string source = entry.Date+entry.Url;
            byte[] tmpSource = Encoding.ASCII.GetBytes(source);
            byte[] tmpHash = new MD5CryptoServiceProvider().ComputeHash(tmpSource);
            int i;
            var sOutput = new StringBuilder(tmpHash.Length);
            for (i = 0; i < tmpHash.Length; i++)
            {
                sOutput.Append(tmpHash[i].ToString("X2"));
            }
            return sOutput.ToString();
        }
    }

    /// <summary>
    /// Simple DTO file to save Crawl Log Entries before
    /// pushing them to elasticsearch.
    /// </summary>
    public class CrawlLogEntry
    {

        public string Date { get; set; }
        public int ErrorId { get; set; }
        public string ErrorMessage { get; set; }
        public string ErrorDescription { get; set; }
        public string Url { get; set; }
        public string ContentSource { get; set; }
    }
}