package com.k_int.discover.util;

import java.text.DateFormatSymbols;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class DateParser
{

  private static Log log = LogFactory.getLog(DateParser.class);
  private String inputFormat;
  protected static List<SimpleDateFormat> formats;
  protected static int maxYear;

  static
  {
    formats = new ArrayList<SimpleDateFormat>();

    DateFormatSymbols s = new DateFormatSymbols(Locale.UK);
    String[] months = s.getShortMonths();
    months[8] = "Sept";
    s.setShortMonths(months);

    formats.add(new SimpleDateFormat("EEEE',' d MMMM',' yyyy"));
    formats.add(new SimpleDateFormat("dd MMMM',' yyyy"));
    formats.add(new SimpleDateFormat("dd MMM yy", s));
    formats.add(new SimpleDateFormat("dd MMM yy"));
    formats.add(new SimpleDateFormat("MMM dd yy", s));
    formats.add(new SimpleDateFormat("MMM dd yy"));
    formats.add(new SimpleDateFormat("MMM dd',' yy", s));
    formats.add(new SimpleDateFormat("MMM dd',' yy"));
    formats.add(new SimpleDateFormat("dd'/'MM'/'yy"));
    // e.g. 06/1946
    formats.add(new SimpleDateFormat("MM'/'yy"));
    formats.add(new SimpleDateFormat("dd'-'MM'-'yy"));
    formats.add(new SimpleDateFormat("dd'-'MMM'-'yy", s));
    formats.add(new SimpleDateFormat("dd'-'MMM'-'yy"));
    formats.add(new SimpleDateFormat("yyyy'-'MM'-'dd"));
    // e.g. 1936-10
    formats.add(new SimpleDateFormat("yy'-'MM"));
    formats.add(new SimpleDateFormat("dd'.'MM'.'yyyy"));
    // e.g. 6.1946
    formats.add(new SimpleDateFormat("MM'.'yyyy"));
    formats.add(new SimpleDateFormat("MMM yy", s));
    formats.add(new SimpleDateFormat("MMM yy"));
    formats.add(new SimpleDateFormat("yyyy"));

    for (SimpleDateFormat f : formats)
      f.setLenient(false);

    Calendar c = new GregorianCalendar();
    // set maximum year: five years from now
    maxYear = c.get(Calendar.YEAR) + 5;
  }

  public DateParser(String inputFormat)
  {
    this.inputFormat = inputFormat;
  }

  public DateParser()
  {

  }

  public DateRange parseDateRange(String text)
  {
    if (text == null)
      return new DateRange();
    DateRange dateRange = new DateRange();
    log.debug("Parsing text: " + text);
    String[] tokens;
    if (inputFormat != null)
    {
      Calendar date = getCalendar(inputFormat, text);
      if (date != null)
      {
        dateRange.setStartDate(completeStartDate(date));
        dateRange.setEndDate(completeEndDate(date));
        return dateRange;
      }
    }
    Calendar date = parseDate(text, inputFormat,false, new ParsePosition(0), true);
    if (date!=null)
    {  
      dateRange.setStartDate(completeStartDate(date));
      dateRange.setEndDate(completeEndDate(date));
      return dateRange;
    }
    if (text.matches("start=.*;\\s*end=.*;"))
    {
      text = text.replaceAll("start=(.*);\\s*end=(.*);", "$1 - $2");
      dateRange = parseSingleDateRange(text);
      if (dateRange.getStartDate() != null && dateRange.getEndDate() != null)
        return dateRange;
      
    }
    //e.g. 1969-01-01/1980-12-31
    if (text.matches("\\d+\\-\\d+\\-\\d+/\\d+\\-\\d+\\-\\d+"))
    {
    	log.debug("matched test 1");
  	  String[] split = text.split("/");
  	  log.debug("split[0] = " + split[0]);
  	  log.debug("split[1] = " + split[1]);
  	  
  	  Calendar from = parseDate(split[0], this.inputFormat);
  	  Calendar to = parseDate(split[1], this.inputFormat);
  	  
  	  dateRange.setStartDate(completeStartDate(from));
  	  dateRange.setEndDate(completeEndDate(to));

  	  if ( dateRange.getStartDate() != null && dateRange.getEndDate() != null)
  		  return dateRange;
    }
    // e.g. 2650 - 2150 BCE 
    // or 2650 BCE to 2150 BCE 
    if ( text.matches("\\d+\\s*BCE\\s*-\\s*\\d+\\s*BCE") || text.matches("\\d+\\s*-\\s*\\d+\\s*BCE"))
    {
    	log.debug("########################## Matched something like xxxx - xxxx BCE or xxxx BCE - xxxx BCE!");
    	
    	String[] split = text.split("-");
    	Calendar from = parseDate(split[0].trim(), this.inputFormat, GregorianCalendar.BC);
    	Calendar to = parseDate(split[1].trim(), this.inputFormat, GregorianCalendar.BC);
    	
    	dateRange.setStartEra("BC");
    	dateRange.setStartDate(completeStartDate(from));
    	dateRange.setEndEra("BC");
    	dateRange.setEndDate(completeEndDate(to));
    	
    	return dateRange;
    }
    // e.g. 1000 BCE - 356 CE
    if ( text.matches("\\d+\\s*BCE\\s*-\\s*\\d+\\s*CE"))
    {
    	log.debug("########################## Matched xxxx BCE - xxxx CE!");

    	String[] split = text.split("-");
    	Calendar from = parseDate(split[0].trim(), this.inputFormat, GregorianCalendar.BC);
    	Calendar to = parseDate(split[1].trim(), this.inputFormat, GregorianCalendar.AD);
    
    	dateRange.setStartEra("BC");
    	dateRange.setStartDate(completeStartDate(from));
    	dateRange.setEndEra("AD");
    	dateRange.setEndDate(completeEndDate(to));
    	

    	return dateRange;
    }
    // e.g. [1815.06.12] (source: British Museum catalogue)
    if ( text.matches("\\[?\\d+\\.\\d+\\.\\d+\\]?\\s*\\(.*\\)") ) 
    {
    	log.debug("########################## Matched [xxxx.xx.xx] (*****)");
    	String[] split = text.split("\\(");
    	Calendar from = parseDate(split[0].trim(), this.inputFormat, GregorianCalendar.AD);
    	
    	dateRange.setStartDate(completeStartDate(from));
    	dateRange.setEndDate(completeEndDate(from));
    	
    	return dateRange;
    }
    
    if (text.contains(";") || (text.contains(",")))
    {
      tokens = text.split("(;|,)");
      for (int i = 0; i < tokens.length; i++)
      {
        dateRange = parseSingleDateRange(tokens[i]);
        if (dateRange.getStartDate() != null && dateRange.getEndDate() != null)
          return dateRange;
      }
    }
    dateRange = parseSingleDateRange(text);
    // If we have had issues working out the dates then clear it out
    if (text != null) {
        if ( dateRange.getStartDate() == null || dateRange.getEndDate() == null ) {
          log.debug("Temporal data missing or not parsed! " + text);
          dateRange = null;
        } else if ( "AD".equals(dateRange.getEndEra()) && dateRange.getEndDate().getYear() > 2050 ) {
            log.debug("Temporal data parsed with an end date > 2050AD! Ignoring this data");
            dateRange = null;
        } else if ( "AD".equals(dateRange.getStartEra()) && dateRange.getStartDate().getYear() > 2050 ) {
            log.debug("Temporal data parsed with a start date > 2050 AD! Ignoring this data");
            dateRange = null;
        }

    }
    return dateRange;
  }

  private DateRange parseSingleDateRange(String text)
  {
    DateRange dateRange = new DateRange();
    try
    {
      text = text.toLowerCase();
      String toBeOmitted = "((ca?\\.)|(circa)|(approx(\\.)?)|(mid(-)?)|(\\?))";
      text = text.replaceAll(toBeOmitted, " ").replaceAll("=", "-").replaceAll("--", "-").replaceAll(" to ", " - ")
          .trim();
      // 19th century, early 20th century
      if (text.matches(".*\\d\\dth\\s+century"))
      {
        int index = text.indexOf("th ");
        int century = Integer.parseInt(text.substring(index - 2, index));
        if (century > 0)
        {
          Calendar cal = new GregorianCalendar();
          cal.clear();
          cal.set(Calendar.YEAR, (century - 1) * 100 + 1);
          dateRange.setStartDate(completeStartDate(cal));
          cal.set(Calendar.YEAR, century * 100);
          dateRange.setEndDate(completeEndDate(cal));
        }
      }
      // e.g. 1980s - 1990s
      else if (text.matches(".*\\d\\d\\d0s\\s*-.*\\d\\d\\d0s"))
      {
        String[] split = text.split("-");
        Calendar calS = getDecade(split[0].trim());
        Calendar calE = getDecade(split[1].trim());
        dateRange.setStartDate(completeStartDate(calS));
        if (calE != null)
        {
          int year = calE.get(Calendar.YEAR) + 9;
          calE.clear();
          calE.set(Calendar.YEAR, year);
          dateRange.setEndDate(completeEndDate(calE));
        }
      }
      // e.g. 1980s
      else if (text.matches(".*\\d\\d\\d0s"))
      {
        dateRange = getDecadeDateRange(text);
      } else if (text.contains("-"))
      {
        dateRange = test(text);
      }
      // single date parsing
      if (dateRange.getStartDate() == null)
      {
        Calendar date = parseDate(text, inputFormat);
        dateRange.setStartDate(completeStartDate(date));
        dateRange.setEndDate(completeEndDate(date));
      }
      if ((dateRange.getStartDate() == null || dateRange.getEndDate() == null) && text.matches(".*[^\\d\\s\\.\\-]+.*"))
      {
        // removing any non-digit characters(except space) and trying to parse
        // it again
        return parseSingleDateRange(text.replaceAll("[^\\d\\s\\.\\-]", "").trim());
      }

    } catch (Exception e)
    {
      e.printStackTrace();
    }

    return dateRange;
  }

  protected static DateRange test(String text)
  {
    DateRange dateRange = new DateRange();
    String original = null, start = null, end = null;
    // e.g. Jun-Aug 1994
    if (text.matches("[a-z|A-Z]{3,9}\\s*(\\-|to)\\s*[a-z|A-Z]{3,9}\\s+\\d{4}.*"))
    {
      start = text.replaceFirst("([a-z|A-Z]+)\\s*(\\-|to)\\s*([a-z|A-Z]+)(\\s+\\d\\d\\d\\d).*", "$1$4");
      end = text.replaceFirst("([a-z|A-Z]+)\\s*(\\-|to)\\s*([a-z|A-Z]+)(\\s+\\d\\d\\d\\d).*", "$3$4");
      original = text.replaceFirst("([a-z|A-Z]+\\s*)(\\-|to)(\\s*[a-z|A-Z]+)(\\s+\\d\\d\\d\\d).*", "$1$2$3$4");
    }
    // e.g. 5-6 July 1952
    else if (text.matches("\\d\\d?\\s*(\\-|to)\\s*\\d\\d?\\s+[a-z|A-Z]{3,9}\\s+\\d{4}.*"))
    {
      start = text.replaceFirst("(\\d\\d?)\\s*(\\-|to)\\s*(\\d\\d?)(\\s+[a-z|A-Z]+\\s+\\d\\d\\d\\d).*", "$1$4");
      end = text.replaceFirst("(\\d\\d?)\\s*(\\-|to)\\s*(\\d\\d?)(\\s+[a-z|A-Z]+\\s+\\d\\d\\d\\d).*", "$3$4");
      original = text.replaceFirst("(\\d\\d?\\s*)(\\-|to)(\\s*\\d\\d?)(\\s+[a-z|A-Z]+\\s+\\d\\d\\d\\d).*", "$1$2$3$4");
    }
    // e.g. 1995-6
    else if (text.matches("\\d{4}-\\d{2}(|[^\\-\\d].*)"))
    {
      start = text.replaceFirst("(\\d\\d)(\\d\\d)\\-(\\d\\d).*", "$1$2");
      end = text.replaceFirst("(\\d\\d)(\\d\\d)\\-(\\d\\d).*", "$1$3");
      original = text.replaceFirst("(\\d\\d\\d\\d\\-\\d\\d).*", "$1");

    }
    // e.g. 1985-96
    else if (text.matches("\\d{4}-\\d(|[^\\-\\d].*)"))
    {
      start = text.replaceFirst("(\\d\\d\\d)(\\d)\\-(\\d).*", "$1$2");
      end = text.replaceFirst("(\\d\\d\\d)(\\d)\\-(\\d).*", "$1$3");
      original = text.replaceFirst("(\\d\\d\\d\\d\\-\\d).*", "$1");
    }
    // e.g. January 1985 - March 1995
    else if (text.matches("[^-]{4,}(\\-|\\sto\\s)[^-]{4,}.*"))
    {
      String[] split = text.split("-|\\sto\\s");
      start = split[0].trim();
      end = split[1].trim();
    }
    // e.g. 1920-06-14 - 1928-07-15
    else if (text.matches("(\\d{1,4}\\s*\\-){3,}.*"))
    {
      int middle = 0;
      for (int i = 0; i <= 2; i++)
      {
        middle = text.indexOf("-", middle + 1);
      }
      start = text.substring(0, middle).trim();
      end = text.substring(middle + 1).trim();

    }
    if (start != null && end != null)
    {
      Calendar calS = parseDate(start, null, false, new ParsePosition(0), true);
      ParsePosition pos = new ParsePosition(0);
      Calendar calE = parseDate(end, null, false, pos, false);
      if (calS != null && calE != null)
      {
        dateRange.setStartDate(completeStartDate(calS));
        dateRange.setEndDate(completeEndDate(calE));
        if (original == null)
        {
          String endS = end.substring(0, pos.getIndex());
          original = text.substring(0, text.indexOf(endS) + endS.length());
        }
        dateRange.setText(original);
      }
      if (calS == null || calE == null || (calS.get(Calendar.YEAR) > calE.get(Calendar.YEAR)))
      {
        dateRange = new DateRange();
      }

    }
    return dateRange;
  }

  private static Calendar parseDate(String s, String format)
  {
	  return parseDate(s,format,GregorianCalendar.AD);
  }
  
  private static Calendar parseDate(String s, String format, int era)
  {
    return parseDate(s, format, true, new ParsePosition(0), true, era);
  }
  
  private static Calendar parseDate(String s, String format, boolean lenient, ParsePosition pos, boolean wholeStringMatch)
  {
	  return parseDate(s, format, lenient, pos, wholeStringMatch, GregorianCalendar.AD);
  }

  private static Calendar parseDate(String s, String format, boolean lenient, ParsePosition pos, boolean wholeStringMatch, int era)
  {
	  
    if (s == null)
      return null;
    Calendar cal = null;
    try
    {
      s = removeOrdinalSuffixes(s);
      if (format != null && !"".equals(format))
      {
        cal = getCalendar(format, s, era);
      }
      if (cal == null)
      {
        for (SimpleDateFormat sdf : formats)
        {
          Date date = null;
          try
          {
            date = sdf.parse(s, pos);
          } catch (Exception e)
          {
          }
          if (date != null && (!wholeStringMatch || pos.getIndex()==s.length()))
          {
            cal = getCalendar(date, sdf.toPattern(), era);
            break;
          }
        }
      }
      // after 1953 or post 1987
      if (lenient && cal == null && s.matches(".*[^\\d\\s\\.\\-]+.*"))
      {
        // removing any non-digit characters(except space) and trying to parse
        // it again
        cal = parseDate(s.replaceAll("[^\\d\\s\\.\\-]", "").trim(), null, era);
      } else if (lenient && cal == null && s.matches(".*\\D\\d\\d\\d\\d(\\D.*|)"))
      {
        s = s.replaceAll(".*\\D(\\d\\d\\d\\d)(\\D.*|)", "$1");
        cal = parseDate(s, null, era);
      }
    } catch (Exception e)
    {
    }
    return cal;
  }

  protected static String removeOrdinalSuffixes(String text)
  {
    return text.replaceAll("([0|1|2|3]?[0-9])(st|nd|rd|th)", "$1");
  }

  protected static Calendar getCalendar(Date date, String format, int era)
  {
    Calendar cal = new GregorianCalendar();
    cal.setTime(date);
    // !!!Imporatant note: getting year has to be before clearing the day and
    // month field
    // otherwise, it set them
//    int year = cal.get(Calendar.YEAR);
    // clearing fields, that are unset
    if (!format.contains("M"))
    {
      cal.clear(Calendar.MONTH);
      cal.clear(Calendar.DAY_OF_MONTH);

    }
    if (!format.contains("d"))
    {
      cal.clear(Calendar.DAY_OF_MONTH);
    }
    
    // Remember the era in the calendar (has to be done after the setTime method call
    // above as that clears it...
    cal.set(Calendar.ERA, era);
    
    // checking year in interval <min, maxYear>
//    if (year <= maxYear)
//      return cal;
//    else
//      return null;
    return cal;
  }

  private static Calendar getCalendar(String format, String s, int era)
  {

    try
    {
      SimpleDateFormat sdf = new SimpleDateFormat(format, Locale.UK);
      Date date = null;
      date = sdf.parse(s);
      Calendar cal = new GregorianCalendar();
      cal.set(Calendar.ERA, era);
      cal.setTime(date);
      return cal;
    } catch (Exception e)
    {
    }
    return null;
  }
  
  private static Calendar getCalendar(String format, String s)
  {

	  return getCalendar(format, s, GregorianCalendar.AD);
  }

  protected static DateRange getDecadeDateRange(String text)
  {
    Calendar cal = getDecade(text);
    DateRange dateRange = null;
    if (cal != null)
    {
      dateRange = new DateRange();
      dateRange.setStartDate(completeStartDate(cal));
      int year = cal.get(Calendar.YEAR) + 9;
      cal.clear();
      cal.set(Calendar.YEAR, year);
      dateRange.setEndDate(completeEndDate(cal));
    }

    return dateRange;
  }

  private static Calendar getDecade(String text)
  {
    Calendar cal = null;
    if (text.matches(".*\\d\\d\\d0s"))
    {
      int index = text.lastIndexOf("0s");
      int year = Integer.parseInt(text.substring(index - 3, index + 1));
      if (year > 0)
      {
        cal = new GregorianCalendar();
        cal.clear();
        cal.set(Calendar.YEAR, year);
      }
    }
    return cal;
  }

  protected static Date completeStartDate(Calendar original)
  {
    if (original == null)
      return null;
    Calendar cal = (Calendar) original.clone();
    boolean setMonth = !cal.isSet(Calendar.MONTH);
    boolean setDayOfMonth = !cal.isSet(Calendar.DAY_OF_MONTH);
    cal.getTime(); // this line is important, because it force calendar to
    // recompute all fields
    if (setMonth)
      cal.set(Calendar.MONTH, 0);
    if (setDayOfMonth)
      cal.set(Calendar.DAY_OF_MONTH, 1);
    cal.set(Calendar.HOUR_OF_DAY, cal.getMinimum(Calendar.HOUR_OF_DAY));
    cal.set(Calendar.MINUTE, cal.getMinimum(Calendar.MINUTE));
    // the one second is add because of Solr date range search overlap both
    // start and end
    // so e.g. 1951-01-01T00:00:00Z would be count both in the year 1950 and
    // 1951
    cal.set(Calendar.SECOND, cal.getMinimum(Calendar.SECOND) + 1);
    return cal.getTime();
  }

  protected static Date completeEndDate(Calendar original)
  {
    if (original == null)
      return null;

    Calendar cal = (Calendar) original.clone();

    boolean setMonth = !cal.isSet(Calendar.MONTH);
    boolean setDayOfMonth = !cal.isSet(Calendar.DAY_OF_MONTH);
    cal.getTime(); // this line is important, because it force calendar to
    // recompute all fields
    if (setMonth)
      cal.set(Calendar.MONTH, cal.getActualMaximum(Calendar.MONTH));
    if (setDayOfMonth)
      cal.set(Calendar.DAY_OF_MONTH, cal.getActualMaximum(Calendar.DAY_OF_MONTH));
    cal.set(Calendar.HOUR_OF_DAY, cal.getMaximum(Calendar.HOUR_OF_DAY));
    cal.set(Calendar.MINUTE, cal.getMaximum(Calendar.MINUTE));
    cal.set(Calendar.SECOND, cal.getMaximum(Calendar.SECOND));
    return cal.getTime();
  }

  public static void main(String[] args)
  {
    DateParser p = new DateParser();
    String[] texts = new String[]
    {"Monday, 17 July, 2006", "January 1920", "1940s", "3/6/1922", "3/6/92", "24.5.1984", "15 January 1920", "19-02-2005", "2006-02-24",
        "19-sept-2008", "18-sep-2008", "9-11 May 1974", "17 May 1967-11 December 1984", "Jun-Aug 1994",
        "1st March 1999", "2nd April 1986", "3rd July 1987", "15th December 1986", "12 Sept 98", "11-02-1983",
        "1826 - 1830", "October 29th, 1880", "July 11 1834", "1920-5", "1945-58" };
    
    for (String text : texts)
    {
      DateRange d = p.parseDateRange(text);
      if (d != null)
      {
        SimpleDateFormat sdf = new SimpleDateFormat("dd MMMM yyyy HH':'mm':'ss");
        if (d.getStartDate() != null)
          System.out.println("Start date " + sdf.format(d.getStartDate()));
        if (d.getEndDate() != null)
          System.out.println("End date " + sdf.format(d.getEndDate()));
        HashSet<String> dec = d.getDecade();
        for (String s : dec)
          System.out.println("decade: " + s);
      }
      System.out.println("--------------------------");
    }
    
    
  }

}