
# metadata.py:  Imports folder metadata into "folder" table in MySQL database.
#
# Ben Legler
# 9/24/2010
#
#
# Windows command-line usage:
# C:\Python27\python.exe -W ignore::DeprecationWarning C:\PNWHerbaria\Scripts\metadata.py
#
#
# Requirements:
#  1) Python (tested with version 2.6.4 and 2.7.1)
#     (http://www.python.org/download/)
#  2) ExifTool, for extracting date taken from the RAW images EXIF data for insertion into MySQL database.
#     (http://www.sno.phy.queensu.ca/~phil/exiftool/)


# CONFIGURATION:

# Directory containing metadata file(s):
metadataTempDir = "C:\\PNWHerbaria\\Dropbox\\Metadata\\"

# Directory to which metadata file(s) will be moved for permanent storage:
metadataArchiveDir = "C:\\PNWHerbaria\\Metadata\\"

# Array to hold database and image metadata settings for each herbarium or collection:
# Add blocks as needed.
herbaria = {
'WWB': {
'acronym': 'WWB',        # Acronym of this herbarium/collection (must be identical to the acronym used for images)
'addToDB': True,         # Flag indicating whether images should be linked to the database for this collection, if one exists.
'MySQLServer': '',       # Database connection (URL or IP address)
'MySQLUser': '',         # Database user name
'MySQLPassword': '',     # Database password
'MySQLDatabase': '',     # Database name
'copyright': '',         # This is inserted into the image medadata text file used in the image viewer
'copyrightURL': '',      # This is inserted into the image medadata text file used in the image viewer
'rotate': 90,            # Number of degrees by which to rotate the image clockwise (enter 0 for no rotation)
'imageScale': 320,       # Scale, in pixels per unit
'imageScaleUnit': 'in'   # Scale unit, can be one of: "mm", "cm", "m", "km", "in", "ft", "mi" (or define others as desired)
}
}

# END CONFIGURATION


import os, os.path, sys, re, shutil, _mysql
import sqlite3
import time
from time import strftime
from math import *
import warnings


if __name__ == "__main__":
    
    print "\n-------------------------------------------"
    print "Metadata import started on", strftime("%Y-%m-%d %H:%M:%S")
    print "Metadata temp directory: %s" % metadataTempDir
    print "Metadata archival directory: %s" % metadataArchiveDir
    print "\-------------------------------------------"
    
    processed = 0
    
    # Loop through metadata files in metadataTempDir:
    for root, dirs, files in os.walk(metadataTempDir):
        for name in files:
            
            #Only process metadata files (.db) that do not begin with "._" (Mac junk):
            if re.search('\.(db)$', name, re.IGNORECASE) and name.startswith('._') == False:
                
                # Extract herbarium acronym from metadata file name:
                # (file name is of the form "HHHH-metadata-YYYYMMDD-HHMMSS.db" where HHHH = herbarium acronym)
                acronym = re.search('^[a-zA-Z]+\-', name).group(0)[:-1]
                
                # Paths to temp and archival metadata directories:
                metadataTempPath = os.path.join(metadataTempDir, name)
                metadataArchivePath = os.path.join(metadataArchiveDir, acronym, name)
                
                if acronym != "" and herbaria[acronym]['addToDB'] == True:
                    # Connect to the SQLite database file with folder metadata:
                    sqliteconn = sqlite3.connect(metadataTempPath)
                    
                    # Connect to MySQL database:
                    h = herbaria[acronym]
                    mysqlconn = _mysql.connect(h['MySQLServer'], h['MySQLUser'], h['MySQLPassword'], h['MySQLDatabase'])
                    
                    # Find the max ID from folders table:
                    mysqlconn.query("SELECT MAX(ID) FROM folders")
                    id = mysqlconn.store_result()
                    
                    # Loop through each row in the folder metadata file:
                    cursor = sqliteconn.cursor()
                    cursor.execute("SELECT * FROM imagemetadata ORDER BY Date, Time")
                    for row in cursor:
                        # USE THIS FOR WWB BATCH 1:
                        # imagemetadata table fields: ID, ImagedBy, Acronym, Date, Time, Family, ScientificName
                        # folder table fields: ID, ModificationDateTime, Acronym, Family, FolderName, FolderCode, DateImaged, TimeImaged, ImagedBy, Notes
                        #mysqlconn.query("INSERT INTO folders (Acronym, Family, FolderName, FolderCode, DateImaged, TimeImaged, ImagedBy, Notes) VALUES ('%s', '%s', '%s', NULL, '%s', '%s', '%s', NULL)" % (row[2], row[5], row[6], row[3], row[4], row[1]))
                        
                        # USE THIS FOR ALL OTHERS:
                        # imagemetadata table fields: ID, ImagedBy, Acronym, Date, Time, Family, ScientificName, FolderCode
                        # folder table fields: ID, ModificationDateTime, Acronym, Family, FolderName, FolderCode, DateImaged, TimeImaged, ImagedBy, Notes
                        
                        # have to encode strings to deal with certain characters (e.g., u'\xd7' = hybrid symbol)
                        ImagedBy = row[1].encode("utf-8").replace('\'', '\\\'')
                        Acronym = row[2].encode("utf-8").replace('\'', '\\\'')
                        DateImaged = row[3].encode("utf-8").replace('\'', '\\\'')
                        TimeImaged = row[4].encode("utf-8").replace('\'', '\\\'')
                        Family = row[5].encode("utf-8").replace('\'', '\\\'')
                        FolderName = row[6].encode("utf-8").replace('\'', '\\\'').replace(u'xc3'.encode("utf-8"), u'\xd7'.encode("utf-8"))
                        FolderCode = row[7].encode("utf-8").replace('\'', '\\\'')
                        
                        mysqlconn.query("INSERT INTO folders (Acronym, Family, FolderName, FolderCode, DateImaged, TimeImaged, ImagedBy, Notes) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', NULL)" % (Acronym, Family, FolderName, FolderCode, DateImaged, TimeImaged, ImagedBy))
                        processed = processed + 1
                    
                    # THIS QUERY FAILS WHEN RUN HERE; I'M NOT SURE WHY:
                    # RUN IT MANUALLY INSTEAD.
                    # Adjust folder times backwards by 5 seconds to accommodate possible misalignments between folder timestamps and image timestamps:
                    #if id > 0:
                    #    mysqlconn.query("UPDATE folders SET TimeImaged=SUBTIME(TimeImaged, '00:00:05') WHERE ID>%s" % (id))
                    
                    cursor.close()
                    sqliteconn.close()
                    mysqlconn.close()
                
                # Move metadata file to archival directory:
                if acronym != "":
                    if not os.path.exists(os.path.join(metadataArchiveDir, acronym)):
                        os.makedirs(os.path.join(metadataArchiveDir, acronym))
                    if os.path.exists(metadataArchivePath):
                        os.remove(metadataArchivePath)
                    os.rename(metadataTempPath, metadataArchivePath)

    print "-------------------------------------------"
    print "Metadata import completed on %s. %i folder entries imported." % (strftime("%Y-%m-%d %H:%M:%S"), processed)
