package com.k_int.discover.service;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Set;

import javax.xml.transform.TransformerException;
import javax.xml.xpath.XPathExpressionException;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.xpath.XPathAPI;
import org.hibernate.HibernateException;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.context.ApplicationEvent;
import org.springframework.stereotype.Service;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.NodeIterator;

import com.k_int.aggregator.cache.CacheContentResult;
import com.k_int.aggregator.cache.impl.CacheContentImageThumbnail;
import com.k_int.aggregator.core.DepositResult;
import com.k_int.aggregator.plugin.XMLDocumentHandlerPlugin;
import com.k_int.aggregator.plugin.XMLProcessingResult;
import com.k_int.aggregator.repository.RepositoryService;
import com.k_int.aggregator.repository.RepositoryStoreOptions;
import com.k_int.aggregator.repository.RepositoryStoreResult;
import com.k_int.aggregator.util.SOLRHelper;
import com.k_int.commons.util.XMLUtil;
import com.k_int.discover.datamodel.OAIDCDocument;
import com.k_int.discover.datamodel.CultureGrid_ItemDocument;
import com.k_int.discover.util.ParseSpatialData;

@Service("XMLHandler-OAIDC")
public class OAIDCHandler implements XMLDocumentHandlerPlugin, ApplicationContextAware, org.springframework.context.ApplicationListener {

	private static Log log = LogFactory.getLog(OAIDCHandler.class);
	
	protected ApplicationContext ctx = null;
	
	// Namespace declarations
	public static String XMLNS_DC = "http://purl.org/dc/elements/1.1/";
	public static String XMLNS_DCTERMS = "http://purl.org/dc/terms/";
	public static String XMLNS_PNDSTERMS = "http://purl.org/mla/pnds/terms/";
	public static String XMLNS_XSI = "http://www.w3.org/2001/XMLSchema-instance";
	public static String XMLNS_PNDS_DC = "http://purl.org/mla/pnds/pndsdc/";
	public static String XMLNS_OAI_DC = "http://www.openarchives.org/OAI/2.0/oai_dc/";
	public static String XMLNS_XHTML = "http://www.w3.org/1999/xhtml";

	private static String[] SUPPORTED_SCHEMAS = new String[] { XMLNS_OAI_DC };
	
	public String[] getSupportedSchemas() {
		return SUPPORTED_SCHEMAS;
	}

	/**
	 * @deprecated Replaced by {@link #process(DepositResult, byte[], Document, String, String, String, Boolean, long)}
	 */
	@Deprecated
	public XMLProcessingResult process(DepositResult deposit_result,
			byte[] content, Document d, String depositor_id, String owner_id,
			String root_namespace, Boolean authoritative) {
		return this.process(deposit_result, content, d, depositor_id, owner_id, root_namespace, authoritative, (RepositoryStoreOptions.REPO_STORE_OPTIONS_CACHE_THUMBNAIL | RepositoryStoreOptions.REPO_STORE_OPTIONS_CALC_CHECKSUM | RepositoryStoreOptions.REPO_STORE_OPTIONS_STORE_ORIGINAL_DOC) );
	}

	public XMLProcessingResult process(DepositResult deposit_result,
			byte[] content, Document d, String depositor_id, String owner_id,
			String root_namespace, Boolean authoritative, long options) {

		// Set up the return value
	    XMLProcessingResult returnValue = new XMLProcessingResult();
	    
		// Set up a version of the options with store checksums set to false as used when
		// storing all versions of the document other than the original
		long noChecksumOptions = options & ~RepositoryStoreOptions.REPO_STORE_OPTIONS_CALC_CHECKSUM;

		// Set up namespace elements
		Element new_namespace_node = d.createElement("NSNode");
		// Set up our local namespace definitions
		new_namespace_node.setAttribute("xmlns:dc", XMLNS_DC);
		new_namespace_node.setAttribute("xmlns:dcterms", XMLNS_DCTERMS);
		new_namespace_node.setAttribute("xmlns:xsi", XMLNS_XSI);
		new_namespace_node.setAttribute("xmlns:pn", XMLNS_PNDS_DC);
		new_namespace_node.setAttribute("xmlns:pndsterms", XMLNS_PNDSTERMS);
		new_namespace_node.setAttribute("xmlns:oai_dc", XMLNS_OAI_DC);
		
		try {

			// Get all of the data from the original file
			HashMap<String,NameXPathMapping> xpathMappings = this.setupMappings();
			for(String key: xpathMappings.keySet()) {
				NameXPathMapping thisOne = xpathMappings.get(key);
				String xpath = thisOne.getXpathExpression();
				String[] newValues = null;
				newValues = getValues(d, xpath, new_namespace_node);
				//   	      	  log.debug("Just mapped xpath " + xpath + " to " + newValues.length + " values");

				if (newValues != null && newValues.length != 0 ) {
					thisOne.setValues(newValues);
					//	    	          	  log.debug("stored the new found value.. in a local object");
					xpathMappings.put(key, thisOne);
					//	    	          	  log.debug("stored the value in the mappings");
				}
			}

			String doc_identifier = xpathMappings.get("identifier").getValues()[0];
			log.debug("doc_identifier: " + doc_identifier);

			// Perform some modifications for the data as required by the different providers to 
			// make their data correct

			// Change 1 - English heritage thumbnails
			// If we're looking at english heritage then go and get the specified resource and derive the link to the 
			// thumbnail so that we can cache it later - only do this if going to cache the thumbnail though..
			if ( doc_identifier.startsWith("http://viewfinder.english-heritage.org.uk") && 
					( (options & RepositoryStoreOptions.REPO_STORE_OPTIONS_CACHE_THUMBNAIL) == RepositoryStoreOptions.REPO_STORE_OPTIONS_CACHE_THUMBNAIL ) ) {
				// English heritage.

				// Go and get the HTML from the page
				log.debug("About to go and get: " + doc_identifier);

				try {
					BufferedReader html_reader = new BufferedReader(new InputStreamReader(new URL(doc_identifier).openStream()));
					String line = "";
					String imageLine = "";

					while( (line = html_reader.readLine()) != null ) {
						if ( line.contains("class='vfImage'") ) {
							// This is the line we care about..
							log.debug("image line found in English Heritage document");
							imageLine = line;
							break;
						}
					}

					// If we have found an image line process the source path out of it
					if ( !imageLine.equals("") ) {
						log.debug("We do have an image - processing out the bit we want");
						String srcPath = "";
						int index = imageLine.indexOf("<img src=");
						if ( index >= 0 ) {
							// Remove everything up to "<img src=".
							srcPath = imageLine.substring(index);
							// Remove the first 10 characters from the new string (gets rid of "<img src='")
							if ( srcPath.length() > 10 ) {
								srcPath = srcPath.substring(10);

								// Just get everything before the first ' - this will be the path to the image
								index = srcPath.indexOf("'");
								if ( index >= 0 ) {
									srcPath = srcPath.substring(0,index);

									if ( !"".equals(srcPath) ) {
										log.debug("Image path extracted: " + srcPath);

										// Now remember the path for later use
										String[] values = {srcPath};
										NameXPathMapping thumbnailMapping = xpathMappings.get("thumbnail");
										thumbnailMapping.setValues(values);
										xpathMappings.put("thumbnail", thumbnailMapping);
									}
								} else {
									log.debug("Unable to actually extract the image source as can't find the closing ' character");
								}
							} else {
								log.debug("Unable to remove the leading '<img src=\'' from the string since the string isn't long enough to contain it!");
							}
						} else {
							log.debug("Unable to find '<img src=' in the image line... Line: " + imageLine);
						}

					}

				} catch(IOException ioe) {
					log.error("IOException thrown when trying to open English Heritage page to derive thumbnail path. Error: " + ioe.getMessage());
				}
			}

			// Change 2 - Portable antiquities
			// If we're looking at Portable antiquities then go and change the title to be something 
			// other than the empty string and set the type to something meaningful too
			if ( "PortableAntiquities".equals(owner_id) ) {

				NameXPathMapping typeMapping = xpathMappings.get("type");
				String oldTypeValue = "Link";
				if ( typeMapping != null && typeMapping.getValues() != null && typeMapping.getValues().length > 0 ) {
					oldTypeValue = typeMapping.getValues()[0];
				}

				log.debug("Setting new type to Image");
				String[] newValue = {"Image"};
				typeMapping.setValues(newValue);
				xpathMappings.put("type", typeMapping);

				NameXPathMapping titleMapping = xpathMappings.get("title");
				log.debug("Setting new title to " + oldTypeValue);
				String[] newTitleValue = {oldTypeValue};
				titleMapping.setValues(newTitleValue);
				xpathMappings.put("title", titleMapping);
			}

			// Modify and / or normalise the data as appropriate
			xpathMappings = this.normaliseData(xpathMappings);


			// Check that we have all of the required information
			this.checkForRequiredData(xpathMappings);
			
			// Work out whether we have a link to the resource and remember it
			if ( doc_identifier.startsWith("http://") || doc_identifier.startsWith("https://") ) {
				String[] relatedLink = {doc_identifier};
				NameXPathMapping relatedLinkEntry = new NameXPathMapping("dc.related.link","", false);
				relatedLinkEntry.setValues(relatedLink);
				
				xpathMappings.put("relatedLink", relatedLinkEntry);
			}

			// Work out the list of collections for the data
			List<String> collectionList = new ArrayList<String>();

			if ( owner_id.equals("EnglishHeritage") || owner_id.equals("LambethLandmark") 
					|| owner_id.equals("PortableAntiquities") || owner_id.equals("CollectionsBase") ) {
				collectionList.add(owner_id);
			} else {
				log.debug("Unable to determine the collection list based on the identifier of the data. Identifier: " + doc_identifier);
			}

			// Set up the objects needed to store the different documents
			RepositoryService content_store = (RepositoryService) ctx.getBean("ContentStore");
			Long resourceId = -1l;

			// Store the Original format (if specified in options)
			if ( (options & RepositoryStoreOptions.REPO_STORE_OPTIONS_STORE_ORIGINAL_DOC) == RepositoryStoreOptions.REPO_STORE_OPTIONS_STORE_ORIGINAL_DOC ) {
				RepositoryStoreResult store_result = content_store.store(content,owner_id,depositor_id,doc_identifier,"oai_dc_raw","application/xml",
						SUPPORTED_SCHEMAS[0],authoritative, collectionList.toArray(new String[collectionList.size()]), options);
				resourceId = store_result.getDocId();
			} else {
				log.debug("Not storing the original document, as specified by the options");
			}

			// Create an OAI_DC representation
			OAIDCDocument oaiDcDoc = new OAIDCDocument();
			if ( xpathMappings.containsKey("description") && xpathMappings.get("description").getValues() != null && xpathMappings.get("description").getValues().length > 0 ) {
				oaiDcDoc.setDescription(xpathMappings.get("description").getValues()[0]);
			}
			if ( xpathMappings.containsKey("identifier") && xpathMappings.get("identifier").getValues() != null && xpathMappings.get("identifier").getValues().length > 0 ) {
				oaiDcDoc.setIdentifier(xpathMappings.get("identifier").getValues()[0]);
			}
			if ( xpathMappings.containsKey("subject") && xpathMappings.get("subject").getValues() != null && xpathMappings.get("subject").getValues().length > 0 ) {
				List<String> subjects = new ArrayList<String>();
				String[] tempSubjectArray = xpathMappings.get("subject").getValues();
				for(String thisSubject: tempSubjectArray ) {
					subjects.add(thisSubject);
				}
				oaiDcDoc.setSubject(subjects);
			}
			if ( xpathMappings.containsKey("title") && xpathMappings.get("title").getValues() != null && xpathMappings.get("title").getValues().length > 0 ) {
				oaiDcDoc.setTitle(xpathMappings.get("title").getValues()[0]);
			}
			if ( xpathMappings.containsKey("type") && xpathMappings.get("type").getValues() != null && xpathMappings.get("type").getValues().length > 0 ) {
				oaiDcDoc.setType(xpathMappings.get("type").getValues()[0]);
			}
			oaiDcDoc.setCreator(owner_id);

			// Store the OAI_DC version
			log.debug("Storing the generated oai_dc document in the content store");
			byte[] serialisedOaiDc = XMLUtil.serializeDocument(oaiDcDoc.toXML());
			if ( serialisedOaiDc != null ) {
				RepositoryStoreResult store_result = content_store.store(serialisedOaiDc,owner_id, depositor_id, doc_identifier, "oai_dc", "application/xml", "http://www.openarchives.org/OAI/2.0/oai_dc/", authoritative,
						collectionList.toArray(new String[collectionList.size()]), noChecksumOptions);

				if ( resourceId == -1l ) {
					// We didn't have a resource ID before, remember it now
					resourceId = store_result.getDocId();
				}
			}

			// Create a CultureGrid_Item representation
			CultureGrid_ItemDocument cgItemDoc = new CultureGrid_ItemDocument();
			cgItemDoc.setOwner(owner_id);
			if ( xpathMappings.containsKey("identifier") && xpathMappings.get("identifier").getValues() != null && xpathMappings.get("identifier").getValues().length > 0 ) {
				cgItemDoc.setIdentifier(xpathMappings.get("identifier").getValues()[0]);
			}
			if ( xpathMappings.containsKey("title") && xpathMappings.get("title").getValues() != null && xpathMappings.get("title").getValues().length > 0 ) {
				cgItemDoc.setTitle(xpathMappings.get("title").getValues()[0]);
			}
			if ( xpathMappings.containsKey("description") && xpathMappings.get("description").getValues() != null && xpathMappings.get("description").getValues().length > 0 ) {
				cgItemDoc.setDescription(xpathMappings.get("description").getValues()[0]);
			}
			if ( xpathMappings.containsKey("rightsHolder") && xpathMappings.get("rightsHolder").getValues() != null && xpathMappings.get("rightsHolder").getValues().length > 0 ) {
				cgItemDoc.setRightsHolder(xpathMappings.get("rightsHolder").getValues()[0]);
			}
			if ( xpathMappings.containsKey("subject") && xpathMappings.get("subject").getValues() != null && xpathMappings.get("subject").getValues().length > 0 ) {
				List<String> subjects = new ArrayList<String>();
				String[] tempSubjectArray = xpathMappings.get("subject").getValues();
				for(String thisSubject: tempSubjectArray ) {
					subjects.add(thisSubject);
				}
				cgItemDoc.setSubject(subjects);
			}
			if (xpathMappings.containsKey("type") &&  xpathMappings.get("type").getValues() != null && xpathMappings.get("type").getValues().length > 0 ) {
				List<String> types = new ArrayList<String>();
				String[] tempTypesArray = xpathMappings.get("type").getValues();
				for(String thisType: tempTypesArray ) {
					types.add(thisType);
				}
				cgItemDoc.setType(types);
			}
			if ( xpathMappings.containsKey("spatial") && xpathMappings.get("spatial").getValues() != null && xpathMappings.get("spatial").getValues().length > 0 ) {
				cgItemDoc.setSpatial(xpathMappings.get("spatial").getValues()[0]);
			}
			if ( xpathMappings.containsKey("temporal") && xpathMappings.get("temporal").getValues() != null && xpathMappings.get("temporal").getValues().length > 0 ) {
				cgItemDoc.setTemporal(xpathMappings.get("temporal").getValues()[0]);
			}
			if ( xpathMappings.containsKey("thumbnail") && xpathMappings.get("thumbnail").getValues() != null && xpathMappings.get("thumbnail").getValues().length > 0 ) {
				cgItemDoc.setThumbnail(xpathMappings.get("thumbnail").getValues()[0]);
			}
			if ( xpathMappings.containsKey("relatedLink") && xpathMappings.get("relatedLink").getValues() != null && xpathMappings.get("relatedLink").getValues().length > 0 ) {
				cgItemDoc.setRelatedLink(xpathMappings.get("relatedLink").getValues()[0]);
			}
			if ( collectionList != null ) {
				cgItemDoc.setIsPartOf(collectionList);
			}

			// Store the PNDS_DCAP version
			log.debug("Storing the generated pnds_dcap document in the content store");
			byte[] serialisedPndsDcapDoc = XMLUtil.serializeDocument(cgItemDoc.toXML());
			if ( serialisedPndsDcapDoc != null ) {
				content_store.store(serialisedPndsDcapDoc,owner_id, depositor_id, doc_identifier, "CultureGrid_Item", "application/xml", "http://www.peoplesnetwork.gov.uk/schema/CultureGrid_Item", authoritative,
						collectionList.toArray(new String[collectionList.size()]), noChecksumOptions); // TODO - what's the schema?
			}

			// Download and create a thumbnail, storing it in the db if appropriate (and if specified in options)
			NameXPathMapping thumbnailEntry = xpathMappings.get("thumbnail");
			String haveThumbnail = "true";
			String possiblePathToCachedImage = "/dpp/resource/" + resourceId + "/stream/thumbnail_image_jpeg";
			if ( thumbnailEntry != null && thumbnailEntry.getValues() != null && thumbnailEntry.getValues()[0] != null ) {

				// Go and download the thumbnail resizing it as appropriate and then store it into the database (if the options
				// specify that we should)

				if ( (options & RepositoryStoreOptions.REPO_STORE_OPTIONS_CACHE_THUMBNAIL) == RepositoryStoreOptions.REPO_STORE_OPTIONS_CACHE_THUMBNAIL ) {
					CacheContentResult cachedThumbnail = null;
					CacheContentImageThumbnail resizer = new CacheContentImageThumbnail();
					cachedThumbnail = resizer.cacheObject(thumbnailEntry.getValues()[0]);

					if ( cachedThumbnail != null ) {
						// Store the thumbnail in the database and the path to it in the solr
						content_store.store(cachedThumbnail.content,owner_id, depositor_id, doc_identifier, cachedThumbnail.type,
								cachedThumbnail.mimeType, cachedThumbnail.subType, authoritative,collectionList.toArray(new String[collectionList.size()]), noChecksumOptions);
					} else {
						haveThumbnail = "false";
					}
				} else {
					// Look in the database to see if there's already a thumbnail for this record
					// and if there is use it instead
					Long thumbnailID = null;
					thumbnailID = content_store.lookupResourceInstanceId(resourceId, "thumbnail_image_jpeg");
					if ( thumbnailID == null ) {
						haveThumbnail = "false";
					} else {
						log.debug("Using previously cached thumbnail");
					}
				}

			} else if (doc_identifier.startsWith("http://viewfinder.english-heritage.org.uk") ) { 
				// We're dealing with english heritage, but aren't caching a thumbnail - check to 
				// see if we already have one in the database and use it if we are - This is needed
				// as if we're not caching then we won't have gone to the english heritage site to get 
				// the URL
				Long thumbnailID = null;
				thumbnailID = content_store.lookupResourceInstanceId(resourceId, "thumbnail_image_jpeg");
				if ( thumbnailID == null ) {
					haveThumbnail = "false";
				} else {
					log.debug("Using previously cached thumbnail for English Heritage"); // TODO - any way to keep the original thumbnail path?
				}

			} else {
				haveThumbnail = "false";
			}

			// Create a solr representation

			// Use all of the values that we've got from the file and modified as appropriate
			java.util.List<SOLRHelper.NVPair> index_properties = new java.util.ArrayList<SOLRHelper.NVPair>();
			for(String key: xpathMappings.keySet()) {
				if ( !"isPartOf".equals(key) ) {
					NameXPathMapping thisEntry = xpathMappings.get(key);
					if ( !"NO_INDEX".equals(thisEntry.getSolrName()) ) {
						String[] values = thisEntry.getValues();
						if ( values != null ) {
							if ( thisEntry.collectAll ) {
								// 	Store several values
								for(String aValue: values) {
									if ( aValue != null ) {
										index_properties.add(new SOLRHelper.NVPair(thisEntry.getSolrName(), aValue));
									}
								}
							} else {
								// 	Store just one (if there is one)
								if ( values[0] != null) {
									index_properties.add(new SOLRHelper.NVPair(thisEntry.getSolrName(), values[0]));
								}
							}
						}
					}
				}
			}
			
			// Parse the spatial information if we have any
			//if ( xpathMappings.containsKey("spatial") && xpathMappings.get("spatial").getValues() != null ) {
				//if ( xpathMappings.containsKey("spatial-format") && xpathMappings.get("spatial-format").getValues() != null && xpathMappings.get("spatial-format").getValues().length > 0) {
					//String thisFormat = xpathMappings.get("spatial-format").getValues()[0];
					//
					//if ( thisFormat.equals("http://purl.org/dc/terms/TGN") ) {
						//ParseSpatialData spatialParser = new ParseSpatialData();
						//HashMap<String,String[]> spatialDataToIndex = spatialParser.parseSpatialData(xpathMappings.get("spatial").getValues(), "Getty");
						//
						//if ( spatialDataToIndex.size() > 0 ) {
							//// We have something to index - do it
							//for(String key: spatialDataToIndex.keySet()) {
								//String[] data = spatialDataToIndex.get(key);
								//for(String thisOne: data) {
									//index_properties.add(new SOLRHelper.NVPair(key, thisOne));
								//}
							//}
						//}
						//
					//} else {
						//log.info("Unable to work out what type of parsing to run on spatial data from an OAIDC document. Format: " + thisFormat);
					//}
				//}
				//
			//}


			// Now add the other values that don't come directly from the file
			if ( "true".equals(haveThumbnail) ) {
				index_properties.add(new SOLRHelper.NVPair("cached_thumbnail", possiblePathToCachedImage));
			}

			index_properties.add(new SOLRHelper.NVPair("have_thumbnail", haveThumbnail));

			String combinedSubject = "";
			String[] separateSubjects = {"subject.person", "subject.organisation", "subject.event"};
			for(String subject: separateSubjects) {
				NameXPathMapping subjectEntry = xpathMappings.get(subject);
				if ( subjectEntry != null && subjectEntry.getValues() != null && subjectEntry.getValues()[0] != null ) {
					combinedSubject += " " + subjectEntry.getValues()[0];
				}
			}
			index_properties.add(new SOLRHelper.NVPair("subject.combined", combinedSubject.trim()));

			for(String thisCollection: collectionList ) {
				if (thisCollection != null && !"".equals(thisCollection) ) {
					index_properties.add(new SOLRHelper.NVPair("dcterms.isPartOf", thisCollection));
				}
			}

			index_properties.add(new SOLRHelper.NVPair("authority", owner_id));
			index_properties.add(new SOLRHelper.NVPair("depositor_id", depositor_id));
			index_properties.add(new SOLRHelper.NVPair("record_type", "item"));
			index_properties.add(new SOLRHelper.NVPair("restp","ServiceProvider"));
			index_properties.add(new SOLRHelper.NVPair("aggregator.internal.id",resourceId+""));

			String internalRecordTypeLink = "/dpp/resource/" + resourceId + "/stream/CultureGrid_Item";
			index_properties.add(new SOLRHelper.NVPair("aggregator.internal_record_link", internalRecordTypeLink));

			// Index the document with solr and store it in the database
			com.k_int.aggregator.util.SOLRHelper solr_helper = (com.k_int.aggregator.util.SOLRHelper) ctx.getBean("SolrHelper");

			Document solr_doc = solr_helper.postIndexEntries(index_properties, false);

			if (solr_doc != null) {
				content_store.store(XMLUtil.serializeDocument(solr_doc), owner_id, depositor_id, doc_identifier,
						"solr", "application/xml", "", authoritative, collectionList.toArray(new String[collectionList.size()]), noChecksumOptions);
			} else {
				log.debug("An error occurred when saving the document to solr - no document returned!");
				// TODO - should i change the return value? 
			}
			
	        returnValue.setDocId(resourceId);

			
		} catch (TransformerException te) {
			log.error("Problem - transformer exception: " + te);
			returnValue.setMessage("TransformerException thrown when parsing the OAI_DC document");
		} catch (XPathExpressionException xe) {
			log.error("Problem when parsing the OAI_DC document. " + xe.getMessage());
			returnValue.setMessage(xe.getMessage());
		} 

        return returnValue;
	}

	public void onApplicationEvent(ApplicationEvent evt) {
		log.info("Spring event : "+evt);
	}

	public void setApplicationContext(ApplicationContext ctx)
			throws BeansException {
		this.ctx = ctx;
	}
	
	private HashMap<String,NameXPathMapping> setupMappings() {

		HashMap<String, NameXPathMapping> mappings = new HashMap<String,NameXPathMapping>();

		//		  mappings.put("coverage", new NameXPathMapping("dc.coverage","/oai_dc:dc/dc:coverage", false));
		//		  mappings.put("creator", new NameXPathMapping("dc.creator","/oai_dc:dc/dc:creator", false)); 
		//		  mappings.put("date", new NameXPathMapping("dc.date","/oai_dc:dc/dc:date", false)); 
		mappings.put("description", new NameXPathMapping("dc.description","/oai_dc:dc/dc:description", false));
		//		  mappings.put("format", new NameXPathMapping("dc.format","/oai_dc/dc:format", false));
		mappings.put("identifier", new NameXPathMapping("dc.identifier","/oai_dc:dc/dc:identifier", false));
		//		  mappings.put("language", new NameXPathMapping("dc.language","/oai_dc:dc/dc:language", false));
		//		  mappings.put("license", new NameXPathMapping("dc.license", "/oai_dc:dc/dc.license/@valueURI", false));
		//		  mappings.put("rights", new NameXPathMapping("dc.rights","/oai_dc:dc/dc:rights", false));
		mappings.put("rightsHolder", new NameXPathMapping("dcterms.rightsHolder", "/oai_dc:dc/dc:rights", false)); // NOTE - this gets rights not rightsHolder!
		mappings.put("spatial", new NameXPathMapping("dcterms.spatial", "/oai_dc:dc/dcterms:spatial", false));
		mappings.put("spatial-format", new NameXPathMapping("NO_INDEX", "/oai_dc:dc/dcterms:spatial/@encSchemeURI", false));
		mappings.put("subject", new NameXPathMapping("dc.subject","/oai_dc:dc/dc:subject", false, true));
		mappings.put("temporal", new NameXPathMapping("dcterms.temporal", "/oai_dc:dc/dcterms:temporal", false));
		mappings.put("thumbnail", new NameXPathMapping("pndsterms.thumbnail", "/oai_dc:dc/pndsterms:thumbnail/@valueURI", false));
		mappings.put("title",new NameXPathMapping("dc.title", "/oai_dc:dc/dc:title", false));
		mappings.put("type", new NameXPathMapping("dcmi.type","/oai_dc:dc/dc:type", false));

		return mappings;
	}


	/**
	 * Check through the data in the mappings (once filled with values) - if something is
	 * required then check that it isn't null i.e. that we have found a value for it
	 * @throws XPathExpressionException thrown if a field that is required has not been filled in from the XML document
	 */
	private void checkForRequiredData(HashMap<String,NameXPathMapping> xpathMappings) throws XPathExpressionException {
		// Loop through all of the entries and consider those where required == true
		for(String key: xpathMappings.keySet()) {
			NameXPathMapping thisEntry = xpathMappings.get(key);
			if ( thisEntry.isRequired() ) {
				// We care about it being a non-null value - check
				String[] value = thisEntry.getValues();
				if ( value == null ) {
					log.debug("The required field: " + key + " doesn't have a value in the data file");
					throw(new XPathExpressionException("Required field: " + key + " does not have a value in the data"));
				}
			}
		}
	}
	  
	/**
	 * Loop through the retrieved data and normalise it if appropriate
	 * 
	 * @param xpathMappings The list of mappings and data to normalise
	 * @return The normalised list of mappings and data
	 */
	private HashMap<String,NameXPathMapping> normaliseData(HashMap<String,NameXPathMapping> xpathMappings) {

		log.debug("In normaliseData");
		Set<String> keySet = xpathMappings.keySet();

		log.debug("keySet size: " + keySet.size());

		// Normalise the dcmi:type of the data
		if ( keySet.contains("type") ) {
			// There is a type entry, does it have a value / values?
			NameXPathMapping typeMapping = xpathMappings.get("type");
			if ( typeMapping != null) {
				String[] types = xpathMappings.get("type").getValues();
				if ( types == null ) {
					types = new String[1];
					types[0] = "Unknown";
				}
				List<String> newTypes = new ArrayList<String>();

				for(int ctr = 0; ctr < types.length; ctr++) {
					String[] tempTypes = NormaliseTypes.normalise(types[ctr]);
					for(int ctr2 = 0; ctr2 < tempTypes.length; ctr2++) {
						newTypes.add(tempTypes[ctr2]);
					}
				}

				String[] newTypesAsArray = newTypes.toArray(new String[0]);

				// Remember the normalised values
				typeMapping.setValues(newTypesAsArray);
				xpathMappings.put("type", typeMapping);
			}
		}

		// Normalise the title - remove ^ from the string
		if ( keySet.contains("title") ) {
			// There is a title entry
			NameXPathMapping titleMapping = xpathMappings.get("title");
			if ( titleMapping != null ) {
				String[] titles = titleMapping.getValues();
				if ( titles == null || titles.length == 0 || titles[0] == null) {
					titles = new String[1];
					titles[0] = "Unknown";
				}
				
				if ( titles[0].contains("^") ) {
					titles[0] = titles[0].replaceAll("\\^", "");
				}

				titleMapping.setValues(titles);
				xpathMappings.put("title", titleMapping);
			}
		}
		
		// Normalise the subjects - separate any values out by commas so that they
		// are indexed better
		if ( keySet.contains("subject") ) {
			// There is a subject entry
			NameXPathMapping subjectMapping = xpathMappings.get("subject");
			if ( subjectMapping != null ) {
				String[] subjects = subjectMapping.getValues();
				if ( subjects != null && subjects.length > 0 ) {
					// We have at least one subject to parse..
					List<String> parsedSubjectList = new ArrayList<String>();
					for (String thisSubject: subjects ) {
						if ( thisSubject != null ) {
							if ( thisSubject.contains(",") ) {
								// A comma separated list - separate and remember each value separately
								String[] separatedSubjects = thisSubject.split(",");
								for(String singleSubject: separatedSubjects) {
									parsedSubjectList.add(singleSubject);
								}
							} else {
								// No comma, so just remember the entire value
								parsedSubjectList.add(thisSubject);
							}
						}
					}
					
					// We now have all of the values separately - convert this into an array and then
					// remember it for later access
					String[] parsedArray = parsedSubjectList.toArray(new String[parsedSubjectList.size()]);
					
					if ( parsedArray != null && parsedArray.length > 0 ) {
						subjectMapping.setValues(parsedArray);
						xpathMappings.put("subject", subjectMapping);
					}
				}
			}
		}

		return xpathMappings;
	}


	/**
	 * Get the value of the element or attribute given by the specified xpath expression
	 * @param metadata_record The document to apply the xpath to
	 * @param xpath The xpath expression to the element or attribute to consider
	 * @param namespace_node The set of namespaces used in the xpath and document
	 * @return The text contents of the elements or attributes as an array
	 * @throws javax.xml.transform.TransformerException thrown if there's an error with the XPath
	 */
	protected static String[] getValues(Node metadata_record, String xpath, Node namespace_node) throws javax.xml.transform.TransformerException {
		// First work out whether we're looking for an attribute or a full element
		// - we're looking at an attribute if there is an @ after the last / 
		// (and the expression doesn't end with ].
		String node_xpath = xpath;
		String attribute_xpath = "";

		if ( !xpath.endsWith("]") ) {

			int indexOfLastSlash = xpath.lastIndexOf("/");
			int indexOfLastAt = xpath.lastIndexOf("@");

			if ( indexOfLastAt > indexOfLastSlash ) {
				// Dealing with an attribute
				attribute_xpath = xpath.substring(indexOfLastAt + 1);
				node_xpath = xpath.substring(0, indexOfLastAt - 1);
			}
		}

		// Go and get the node(s) we're interested in
		NodeIterator nodeList = XPathAPI.selectNodeIterator(metadata_record, node_xpath, namespace_node);
		String[] returnValue = null;
		ArrayList<String> tempList = new ArrayList<String>();

		Node actualNode;
		while ((actualNode = nodeList.nextNode()) != null) {
			// We've got a node - now are we getting an attribute or not?
			String value = null;
			if ( !"".equals(attribute_xpath) ) {
				value = ((Element)actualNode).getAttribute(attribute_xpath);
			} else {
				value = extractText(actualNode);
			}

			// Unescape any html entites, etc. in the string (a couple of times since there are some places
			// that encode multiple times and then remove the HTML
			//			  log.debug("about to unescape the value: " + value);
			if ( value != null ) {
				value = StringEscapeUtils.unescapeHtml(StringEscapeUtils.unescapeHtml(value));
				value = value.replaceAll("\\<.*?\\>", "");
			}

			//			  log.debug("value after unescaping and stripping tags: " + value);

			tempList.add(value);
		}

		returnValue = (String[])tempList.toArray(new String[tempList.size()]);

		return returnValue;
	}


	protected static String extractText(Node n) {
		try {
			Node node = XPathAPI.selectSingleNode(n,"./text()");
			if ( node != null ) {
				node.normalize();
				return node.getNodeValue().trim();
			}
		}
		catch ( javax.xml.transform.TransformerException te ) {
			te.printStackTrace();
		}
		return null;
	}

}
