Module opening_hours.patterns

Expand source code
from pyparsing import *

def caselessWord(some_str, **kwargs):
        return Word(some_str.lower() + some_str.upper(), **kwargs)

def caselessChar(some_str):
        return Char(some_str.lower() + some_str.upper())

space = Word(" ")

useless_optional_prefixes = Optional(Or([
        CaselessLiteral("Open")
])).suppress()

words_for_range = Or([
        caselessWord("to"),
        caselessWord("thru"),
        caselessWord("through"),
        caselessWord("until"),
        caselessWord("'til")
])
word_range_separators = Optional(space) + words_for_range + Optional(space)
range_separator = Or([Char("–—‐-\u2013"), word_range_separators])#.suppress()

word_list_separators = Optional(space) + oneOf("and") + Optional(space)
list_separator = Or([Word(" ,+/"), word_list_separators])

day_time_separators = Optional(Or([
        Literal(":"),
        CaselessLiteral("from"),
        Literal(",")
])).suppress()

plural = caselessWord("s'", max=2)
possibly_dots = Optional(Char(".")).suppress()


#TODO: support multiple sections like M 8am-2pm, W 9am-2pm
section_separator = Optional(",")
time_separator = Optional(":")

# TODO: use CaselessCloseMatch here once implemented to handle typos, particularly for the longer names
day = Combine(Or([
        MatchFirst([
                CaselessLiteral("Monday") + Optional(plural),
                CaselessLiteral("Mon") + possibly_dots,
                CaselessLiteral("M") + possibly_dots
        ]),
        MatchFirst([
                CaselessLiteral("Tuesday") + Optional(plural),
                CaselessLiteral("Tues") + possibly_dots,
                CaselessLiteral("Tue") + possibly_dots,
                CaselessLiteral("T") + possibly_dots
        ]),
        MatchFirst([
                CaselessLiteral("Wednesday") + Optional(plural),
                CaselessLiteral("Wed") + possibly_dots,
                CaselessLiteral("W") + possibly_dots
        ]),
        MatchFirst([
                CaselessLiteral("Thursday") + Optional(plural),
                CaselessLiteral("Thurs") + possibly_dots,
                CaselessLiteral("Th") + possibly_dots,
                CaselessLiteral("H") + possibly_dots
        ]),
        MatchFirst([
                CaselessLiteral("Friday") + Optional(plural),
                CaselessLiteral("Fri") + possibly_dots,
                CaselessLiteral("F") + possibly_dots + ~CaselessLiteral("rom"),
        ]),
        MatchFirst([
                CaselessLiteral("Saturday") + Optional(plural),
                CaselessLiteral("Sat") + possibly_dots,
                CaselessLiteral("Sa") + possibly_dots
        ]),
        MatchFirst([
                CaselessLiteral("Sunday") + Optional(plural),
                CaselessLiteral("Sun") + possibly_dots,
                CaselessLiteral("Su") + possibly_dots
        ]),
]))

# TODO: use CaselessCloseMatch here once implemented to handle typos, particularly for the longer names
day_shortcuts = Combine(Or([
        Or([
                CaselessLiteral("Work"),
                CaselessLiteral("All")
        ]) + Optional(space) + CaselessLiteral("Week"),
        Char("57") + Optional(space) + CaselessLiteral("days") + Optional(space) + Optional(CaselessLiteral("a week")),
        CaselessLiteral("Every Day"),
        CaselessLiteral("daily"),
        CaselessLiteral("Week") + Optional(space) + Or([
                CaselessLiteral("day"),
                CaselessLiteral("end")
        ]) + Optional(plural),
        CaselessLiteral("Business Day") + Optional(plural),
]), adjacent=False)

days = OneOrMore(day)

# the r is here because of https://stackoverflow.com/questions/50504500/deprecationwarning-invalid-escape-sequence-what-to-use-instead-of-d#50504635
hour = Regex(r'[01]\d|2[0-3]|\d').setParseAction(pyparsing_common.convertToInteger).setResultsName("hour")
minute = Regex(r'[0-5]\d').setParseAction(pyparsing_common.convertToInteger).setResultsName("minute")


am_or_pm = Optional(Combine(Or([CaselessLiteral("A"), CaselessLiteral("P")]) + possibly_dots + Optional(CaselessLiteral("M")  + possibly_dots)).setResultsName('am_pm'))

per = Or([
        Literal("/"),
        CaselessLiteral("a"),
        CaselessLiteral("per")
]).suppress()


hours_shortcuts = Optional(
        MatchFirst([
                CaselessLiteral("hour") + Optional(plural),
                CaselessLiteral("hr") + Optional(plural),
                CaselessLiteral("h")
        ]) + Optional(per) + Optional(CaselessLiteral("day"))
)

time_range_shortcuts = Or([
        Literal("24") + hours_shortcuts,
        CaselessLiteral("All Day")
])

# TODO: use CaselessCloseMatch here once implemented to handle typos, particularly for the longer names
single_time_shortcut = Combine(Or([
        CaselessLiteral("noon"),
        CaselessLiteral("midnight"),
        CaselessLiteral("CLOSED")
]), adjacent=False)

time_shortcuts = Or([single_time_shortcut, time_range_shortcuts]).setResultsName("time_shortcuts")

clocktime = Combine(hour + time_separator + Optional(minute) + am_or_pm, adjacent=False)

dateShortcuts = day_shortcuts.setResultsName('day_shortcuts', listAllMatches=True) + Optional(list_separator)

dateList = OneOrMore(day.setResultsName('day', listAllMatches=True) + Optional(list_separator))

daterange = day.setResultsName('startday', listAllMatches=True) + range_separator + day.setResultsName('endday', listAllMatches=True)

dates = Or([daterange, dateList, dateShortcuts])

time = Group(Or([clocktime, time_shortcuts]))

timerange = time.setResultsName('starttime', listAllMatches=True) + Optional(range_separator + time.setResultsName('endtime', listAllMatches=True))

opening_hours_format = Or([
        useless_optional_prefixes + OneOrMore(Optional(dates) + day_time_separators + timerange),
        useless_optional_prefixes + OneOrMore(timerange + dates)
])      

note = Optional(Group(OneOrMore(caselessWord(alphas + " "), stopOn=opening_hours_format)).setResultsName('note', listAllMatches=True))

notes_start = note #+ FollowedBy(Or([dates, timerange]))
notes_end = FollowedBy(opening_hours_format) + note

Functions

def caselessChar(some_str)
Expand source code
def caselessChar(some_str):
        return Char(some_str.lower() + some_str.upper())
def caselessWord(some_str, **kwargs)
Expand source code
def caselessWord(some_str, **kwargs):
        return Word(some_str.lower() + some_str.upper(), **kwargs)