Skip to content

Commit

Permalink
Merge pull request #138 from metakgp/harsh/fixes
Browse files Browse the repository at this point in the history
Some Fixes and Refactoring
  • Loading branch information
proffapt authored Aug 1, 2024
2 parents 2f42edd + 7c26a32 commit f493c02
Showing 1 changed file with 61 additions and 47 deletions.
108 changes: 61 additions & 47 deletions timetable/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
with open("full_location.json") as data_file:
full_locations = json.load(data_file)

DAYS_MAP = {'Mon': "Monday", 'Tue': "Tuesday", 'Wed': "Wednesday", 'Thur': "Thursday", 'Fri': "Friday", 'Sat': "Saturday"}

@dataclass
class Course:
Expand Down Expand Up @@ -36,6 +37,25 @@ def title(self) -> str:
def end_time(self) -> int:
return self.duration + self.start_time

def create_timings(_table: Tag | NavigableString) -> list[int]:
r""" Creates a list of timings in 24 hours format - [8, ..., 12, 14, ..., 17]"""
headings = _table.find_all(
'td',
{
'class': 'tableheader',
'style': 'padding-top:5px;padding-bottom:5px;padding-left:7px;padding-right:7px',
'nowrap': ''
}
)

get_hour = lambda t: int(t.get_text().split(':')[0])

return [
get_hour(time) + 12 if
get_hour(time) < 6 else
get_hour(time) for
time in headings if time.get_text() != 'Day Name'
]

def build_courses(html: str, course_names: dict) -> list[Course]:
r"""
Expand All @@ -48,71 +68,65 @@ def build_courses(html: str, course_names: dict) -> list[Course]:
list of Course objects
"""
courses = []
days = {'Mon': "Monday", 'Tue': "Tuesday", 'Wed': "Wednesday", 'Thur': "Thursday", 'Fri': "Friday",
'Sat': "Saturday"}
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table', {'border': '1', 'cellpadding': '0', 'cellspacing': '0'})

def create_timings(_table: Tag | NavigableString) -> list[int]:
r""" Creates a list of timings in 24 hours format - [8, ..., 12, 14, ..., 17]"""
headings = _table.find_all('td', {'class': 'tableheader',
'style': 'padding-top:5px;padding-bottom:5px;padding-left:7px;padding-right'
':7px', 'nowrap': ''})
return [int(time.get_text().split(':')[0]) + 12 if int(time.get_text().split(':')[0]) < 6 else int(
time.get_text().split(':')[0]) for time in headings if time.get_text() != 'Day Name']

timings = create_timings(table)
rows = table.find_all('tr')
for row in rows:
day_cell: PageElement = row.find('td', {'valign': 'top'})
if not day_cell:
continue
day = days[day_cell.get_text()] if day_cell.get_text() in days.keys() else day_cell.get_text()
day = DAYS_MAP.get(day_cell.get_text(), day_cell.get_text())

# Merge timeslots occurring adjacent to each other and initialize course objects
# This is the previously parsed course/time slot. Used to merge timeslots occurring adjacent to each other and initialize course objects
prev: Course | None = None
course_duration: int = 0
cells = [cell for cell in row.find_all('td') if cell.attrs.get('valign') != 'top']
for index, cell in enumerate(cells):
code = cell.get_text()[:7].strip() if cell.get_text()[:7] != "CS10001" else "CS10003"
if not code: continue # continue if cell has no course in it
# CS10003 is the actual code, but it is written as CS10001 in the timetable
location = cell.get_text()[7:]
cell_duration = int(cell.attrs.get('colspan'))
# To reuse code, uses outer scope variables
def append_prev():
prev.duration = course_duration
courses.append(prev)

for index, cell in enumerate(cells):
# Empty cell means either previous class just ended or no class is scheduled
if cell.get_text() == ' ' or cell.get_text() == ' ':
if cell.get_text().strip() == "":
if prev:
append_prev()
course_duration = 0
prev = None
# Previous slot ended
courses.append(prev)
prev = None
continue

code = cell.get_text()[:7].strip()
if not code: continue # continue if cell has no course in it

# Special exception: CS10003 is the actual code, but it is written as CS10001 in the timetable
if code == "CS10001":
code = "CS10003"

location = cell.get_text()[7:]
cell_duration = int(cell.attrs.get('colspan'))

# Either new class started just after previous one or previous class is continued
if prev:
# If previous class is same as current class, add the duration to the previous class
if prev is not None:
if code == prev.code:
course_duration += cell_duration
# The previous class is different from the current one, meaning previous class ended. Clean up, add to
# list and initialize a new course object for the current class
# Previous class is same as current class, add the duration to the previous class
prev.duration += cell_duration
else:
append_prev()
prev = Course(code=code, name=course_names[code], day=day,
start_time=timings[index],
location=location)
course_duration = cell_duration
# New class started after break
else:
prev = Course(code=code, name=course_names[code], day=day,
start_time=timings[index],
location=location)
course_duration = cell_duration

# Day ended, add the last class
if index == len(cells) - 1:
append_prev()
# The previous class is different from the current one, meaning previous class ended. Clean up, add to
# list and initialize a new course object for the current class
courses.append(prev)
prev = None

# Either new class started after break or after a different previous course
if prev is None:
prev = Course(
code=code,
name=course_names[code],
day=day,
start_time=timings[index],
location=location,
duration=cell_duration
)

# Day ended, add the last class
if prev is not None:
courses.append(prev)
prev = None

return courses

0 comments on commit f493c02

Please sign in to comment.