Refactor date parsing, handle ISO 8601 dates

This commit is contained in:
Matthias Neeracher 2011-08-24 21:41:27 +02:00
parent ee5d7b5d6f
commit d438c0a961
3 changed files with 246 additions and 142 deletions

View File

@ -12,7 +12,7 @@
@implementation MAAddMediaSheet
static NSArray * sDateHypotheses;
static NSDictionary * sDateHypotheses;
@synthesize media, dateFormats, selectedDateFormat;
@ -21,12 +21,31 @@ static NSArray * sDateHypotheses;
self = [super initWithWindowNibName:@"MAAddMedia"];
if (!sDateHypotheses)
sDateHypotheses = [[NSArray alloc] initWithContentsOfURL:
sDateHypotheses = [[NSDictionary alloc] initWithContentsOfURL:
[[NSBundle mainBundle] URLForResource:@"DateHypotheses" withExtension:@"plist"]];
return self;
}
- (NSString *)normalizedFileName:(NSURL *)url
{
NSString * fileName = [[url lastPathComponent] stringByDeletingPathExtension];
for (NSArray * norm in [sDateHypotheses objectForKey:@"normalize"]) {
NSRegularExpression * regexp = [NSRegularExpression regularExpressionWithPattern:[norm objectAtIndex:0] options:0 error:nil];
NSString * templ = [norm objectAtIndex:1];
fileName = [regexp stringByReplacingMatchesInString:fileName options:0
range:NSMakeRange(0, [fileName length]) withTemplate:templ];
}
return fileName;
}
#define LOG_DATEGUESS 0
#if LOG_DATEGUESS
#define LOG_DG NSLog
#else
#define LOG_DG(...) do {} while(0)
#endif
- (void)guessDateFormats:(NSArray *)urls
{
[self window];
@ -41,22 +60,46 @@ static NSArray * sDateHypotheses;
NSMutableArray * formats = [NSMutableArray array];
NSString * fileName = [[[urls objectAtIndex:0] lastPathComponent] stringByDeletingPathExtension];
NSString * fileName = [self normalizedFileName:[urls objectAtIndex:0]];
LOG_DG(@"Representative File: %@\n", fileName);
NSRange nameRange= NSMakeRange(0, [fileName length]);
for (NSDictionary * hypo in sDateHypotheses) {
NSRegularExpression * regexp =
[NSRegularExpression regularExpressionWithPattern:[hypo objectForKey:@"regexp"] options:0 error:nil];
NSTextCheckingResult* match = [regexp firstMatchInString:fileName options:0 range:nameRange];
if (match && [match range].location != NSNotFound) {
for (NSString * format in [hypo objectForKey:@"formats"]) {
NSString * dateFormat = [regexp replacementStringForResult:match inString:fileName offset:0 template:format];
NSString * dateTitle = [NSString stringWithFormat:NSLocalizedString(@"File Name Date", @"File Name yyyy-mm-dd"), dateFormat];
NSMenuItem * item = [dateFormatMenu addItemWithTitle:dateTitle action:@selector(useFileNameDates:) keyEquivalent:@""];
[item setTag:[formats count]];
[formats addObject:[NSDictionary dictionaryWithObjectsAndKeys:
dateFormat, @"format",
regexp, @"regexp",
nil]];
//
// Since date and time formats are largely orthogonal, and since for each regular expression,
// there are a number of possible date format hypotheses, we end up with 4 nested loops:
// - Regular expressions matching date portion
// - Regular expressions matching time portion
// - Date format matching date portion
// - Date format matching time portion
//
for (NSDictionary * dateHypo in [sDateHypotheses objectForKey:@"date"]) {
NSString * filterString = [dateHypo objectForKey:@"regexp"];
NSRegularExpression * filter =
[NSRegularExpression regularExpressionWithPattern:filterString options:0 error:nil];
LOG_DG(@" Filtering %@\n", filterString);
if (![filter numberOfMatchesInString:fileName options:0 range:nameRange])
continue; // Not going to match this one, regardless of time format
for (NSDictionary * timeHypo in [sDateHypotheses objectForKey:@"time"]) {
NSString * regexpString =
[[dateHypo objectForKey:@"regexp"] stringByAppendingString:[timeHypo objectForKey:@"regexp"]];
LOG_DG(@" Trying %@\n", regexpString);
NSRegularExpression * regexp =
[NSRegularExpression regularExpressionWithPattern:regexpString options:0 error:nil];
NSTextCheckingResult* match = [regexp firstMatchInString:fileName options:0 range:nameRange];
if (match && [match range].location != NSNotFound) {
for (NSString * dateFmt in [dateHypo objectForKey:@"formats"]) {
for (NSString * timeFmt in [timeHypo objectForKey:@"formats"]) {
NSString * format = [dateFmt stringByAppendingString:timeFmt];
LOG_DG(@" Format %@\n", format);
NSString * dateFormat = [regexp replacementStringForResult:match inString:fileName offset:0 template:format];
NSString * dateTitle = [NSString stringWithFormat:NSLocalizedString(@"File Name Date", @"File Name yyyy-mm-dd"), dateFormat];
NSMenuItem * item = [dateFormatMenu addItemWithTitle:dateTitle action:@selector(useFileNameDates:) keyEquivalent:@""];
[item setTag:[formats count]];
[formats addObject:[NSDictionary dictionaryWithObjectsAndKeys:
dateFormat, @"format",
regexp, @"regexp",
nil]];
}
}
}
}
}
@ -112,8 +155,9 @@ static NSArray * sDateHypotheses;
[formatter setDateFormat:format];
[self willChangeValueForKey:@"media"];
for (NSMutableDictionary * m in media) {
NSString * name = [[[m objectForKey:@"url"] lastPathComponent] stringByDeletingPathExtension];
for (NSMutableDictionary * m in media) {
NSString * name = [self normalizedFileName:[m objectForKey:@"url"]];
LOG_DG(@"File %@\n", name);
NSTextCheckingResult * match= [regexp firstMatchInString:name options:0 range:NSMakeRange(0, [name length])];
NSDate * modificationDate = nil;
if (match && [match range].location != NSNotFound) {
@ -125,6 +169,7 @@ static NSArray * sDateHypotheses;
dateRange.length = [match rangeAtIndex:[match numberOfRanges]-1].location - dateRange.location;
NSString * dateString = [name substringWithRange:dateRange];
modificationDate = [formatter dateFromString:dateString];
LOG_DG(@" Date %@ %@\n", format, (modificationDate ? @"succeeded" : @"failed"));
}
if (!modificationDate)
if (![[m objectForKey:@"url"] getResourceValue:&modificationDate forKey:NSURLContentModificationDateKey error:nil])

View File

@ -1,126 +1,186 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<array>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>4 digit year, seconds, AM/PM</string>
<key>formats</key>
<dict>
<key>date</key>
<array>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d()\d\d(\2)\d\d</string>
<key>comment</key>
<string>4 digit year, no separators</string>
<key>formats</key>
<array>
<string>yyyyddMM</string>
<string>yyyyMMdd</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d?(\2)\d\d?</string>
<key>comment</key>
<string>4 digit year, separators</string>
<key>formats</key>
<array>
<string>yyyy$2dd$3MM</string>
<string>yyyy$2MM$3dd</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d()\d\d(\2)\d\d</string>
<key>comment</key>
<string>2 digit year, no separators</string>
<key>formats</key>
<array>
<string>MMddyy</string>
<string>ddMMyy</string>
<string>yyMMdd</string>
<string>yyddMM</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d(\2)\d\d</string>
<key>comment</key>
<string>2 digit year (yy at end), separators</string>
<key>formats</key>
<array>
<string>MM$2dd$3yy</string>
<string>dd$2MM$3yy</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d(\D)\d\d(\2)\d\d?</string>
<key>comment</key>
<string>2 digit year (yy at beginning), separators</string>
<key>formats</key>
<array>
<string>yy$2dd$3MM</string>
<string>yy$2MM$3dd</string>
</array>
</dict>
</array>
<key>time</key>
<array>
<dict>
<key>regexp</key>
<string>(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>seconds, AM/PM</string>
<key>formats</key>
<array>
<string>$4hh$5mm$6ss$7a</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\D*\s\D*)\d\d?(\D?)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>no seconds, AM/PM</string>
<key>formats</key>
<array>
<string>$4hh$5mm$6a</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\D.*|)$</string>
<key>comment</key>
<string>seconds, no AM/PM</string>
<key>formats</key>
<array>
<string>$4HH$5mm$6ss</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\D*\s\D*)\d\d?(\D?)\d\d(\D.*|)$</string>
<key>comment</key>
<string>no seconds, no AM/PM</string>
<key>formats</key>
<array>
<string>$4HH$5mm</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\D.*|)$</string>
<key>comment</key>
<string>date only</string>
<key>formats</key>
<array>
<string></string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s*T\s*)\d\d?(\D?)\d\d(\5)\d\d(\s*Z?\D.*|)$</string>
<key>comment</key>
<string>seconds, ISO UTC</string>
<key>formats</key>
<array>
<string>&apos;$4&apos;HH$5mm$6ss</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s+)\d\d?(\D?)\d\d(\5)\d\d(\s*Z\D.*|)$</string>
<key>comment</key>
<string>seconds, ISO UTC</string>
<key>formats</key>
<array>
<string>$4HH$5mm$6ss</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s*(?:T|\s)\s*)\d\d?(\D?)\d\d(\5)\d\d(\s*)[-+]\d\d(\d\d|)(\s*\D.*|)$</string>
<key>comment</key>
<string>seconds, ISO time zone</string>
<key>formats</key>
<array>
<string>&apos;$4&apos;HH$5mm$6ss$7ZZZ</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s*T\s*)\d\d?(\D?)\d\d(\s*Z?\D.*|)$</string>
<key>comment</key>
<string>no seconds, ISO UTC</string>
<key>formats</key>
<array>
<string>&apos;$4&apos;HH$5mm</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s+)\d\d?(\D?)\d\d(\s*Z\D.*|)$</string>
<key>comment</key>
<string>no seconds, ISO UTC</string>
<key>formats</key>
<array>
<string>$4HH$5mm</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>(\s*(?:T|\s)\s*)\d\d?(\D?)\d\d(\s*)[-+]\d\d(\d\d|)(\s*\D.*|)$</string>
<key>comment</key>
<string>no seconds, ISO time zone</string>
<key>formats</key>
<array>
<string>&apos;$4&apos;HH$5mm$6ZZZ</string>
</array>
</dict>
</array>
<key>normalize</key>
<array>
<array>
<string>yyyy$2dd$3mm$4hh$5mm$6ss$7a</string>
<string>yyyy$2mm$3dd$4hh$5mm$6ss$7a</string>
<string>([-+]\d\d):(\d\d)</string>
<string>$1$2</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>4 digit year, no seconds, AM/PM</string>
<key>formats</key>
<array>
<string>yyyy$2dd$3mm$4hh$5mm$6a</string>
<string>yyyy$2mm$3dd$4hh$5mm$6a</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\D.*|)$</string>
<key>comment</key>
<string>4 digit year, seconds, no AM/PM</string>
<key>formats</key>
<array>
<string>yyyy$2dd$3mm$4hh$5mm$6ss</string>
<string>yyyy$2mm$3dd$4hh$5mm$6ss</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\D.*|)$</string>
<key>comment</key>
<string>4 digit year, no seconds, AM/PM</string>
<key>formats</key>
<array>
<string>yyyy$2dd$3mm$4hh$5mm</string>
<string>yyyy$2mm$3dd$4hh$5mm</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d\d\d(\D)\d\d(\2)\d\d(\D.*|)$</string>
<key>comment</key>
<string>4 digit year, date only</string>
<key>formats</key>
<array>
<string>yyyy$2dd$3mm</string>
<string>yyyy$2mm$3dd</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>2 digit year, seconds, AM/PM</string>
<key>formats</key>
<array>
<string>yy$2dd$3mm$4hh$5mm$6ss$7a</string>
<string>yy$2mm$3dd$4hh$5mm$6ss$7a</string>
<string>mm$2dd$3yy$4hh$5mm$6ss$7a</string>
<string>dd$2mm$3yy$4hh$5mm$6ss$7a</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\s+)[APap][mM](\W.*|)$</string>
<key>comment</key>
<string>2 digit year, no seconds, AM/PM</string>
<key>formats</key>
<array>
<string>yy$2dd$3mm$4hh$5mm$6a</string>
<string>yy$2mm$3dd$4hh$5mm$6a</string>
<string>mm$2dd$3yy$4hh$5mm$6a</string>
<string>dd$2mm$3yy$4hh$5mm$6a</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\5)\d\d(\D.*|)$</string>
<key>comment</key>
<string>2 digit year, seconds, no AM/PM</string>
<key>formats</key>
<array>
<string>yy$2dd$3mm$4hh$5mm$6ss</string>
<string>yy$2mm$3dd$4hh$5mm$6ss</string>
<string>mm$2dd$3yy$4hh$5mm$6ss</string>
<string>dd$2mm$3yy$4hh$5mm$6ss</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d?(\2)\d\d?(\D*\s\D*)\d\d?(\D?)\d\d(\D.*|)$</string>
<key>comment</key>
<string>2 digit year, no seconds, AM/PM</string>
<key>formats</key>
<array>
<string>yy$2dd$3mm$4hh$5mm</string>
<string>yy$2mm$3dd$4hh$5mm</string>
<string>mm$2dd$3yy$4hh$5mm</string>
<string>dd$2mm$3yy$4hh$5mm</string>
</array>
</dict>
<dict>
<key>regexp</key>
<string>^(.*\D|)\d\d?(\D)\d\d(\2)\d\d(\D.*|)$</string>
<key>comment</key>
<string>2 digit year, date only</string>
<key>formats</key>
<array>
<string>yy$2dd$3mm</string>
<string>yy$2mm$3dd</string>
<string>mm$2dd$3yy</string>
<string>dd$2mm$3yy</string>
</array>
</dict>
</array>
</array>
</dict>
</plist>

View File

@ -381,7 +381,7 @@
<string key="NSFrameSize">{798, 432}</string>
<reference key="NSSuperview" ref="43006809"/>
<reference key="NSWindow"/>
<reference key="NSNextKeyView" ref="233033100"/>
<reference key="NSNextKeyView" ref="687601302"/>
<string key="NSReuseIdentifierKey">_NS:1197</string>
<bool key="NSEnabled">YES</bool>
<object class="NSTableHeaderView" key="NSHeaderView" id="818285608">
@ -528,7 +528,6 @@
<string key="NSFrame">{{1, 434}, {798, 15}}</string>
<reference key="NSSuperview" ref="296418393"/>
<reference key="NSWindow"/>
<reference key="NSNextKeyView"/>
<string key="NSReuseIdentifierKey">_NS:1216</string>
<int key="NSsFlags">1</int>
<reference key="NSTarget" ref="296418393"/>
@ -555,7 +554,7 @@
<string key="NSFrameSize">{800, 450}</string>
<reference key="NSSuperview" ref="19986666"/>
<reference key="NSWindow"/>
<reference key="NSNextKeyView" ref="687601302"/>
<reference key="NSNextKeyView" ref="43006809"/>
<string key="NSReuseIdentifierKey">_NS:1193</string>
<int key="NSsFlags">133682</int>
<reference key="NSVScroller" ref="233033100"/>