Skip to content

Commit

Permalink
update library crawler to accomodate course_code
Browse files Browse the repository at this point in the history
Signed-off-by: Rajiv Harlalka <[email protected]>
  • Loading branch information
rajivharlalka authored and harshkhandeparkar committed Sep 16, 2024
1 parent 83fb18e commit d287b02
Showing 1 changed file with 28 additions and 14 deletions.
42 changes: 28 additions & 14 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ type QuestionPaper struct {
}

type qpRaw struct {
Filename string `json:"filename"`
Name string `json:"name"`
Year int `json:"year"`
ExamType string `json:"exam_type"`
Url string `json:"url"`
CourseCode string `json:"course_code"`
Filename string `json:"filename"`
Name string `json:"name"`
Year int `json:"year"`
ExamType string `json:"exam_type"`
Url string `json:"url"`
}

func downloadFile(new_qp qpRaw) {

res, err := http.Get(new_qp.Url)
if err != nil {
fmt.Println(err)
Expand Down Expand Up @@ -64,13 +64,12 @@ func sanitizeFilename(s string) string {
}

func main() {

c := colly.NewCollector(
colly.AllowedDomains("10.18.24.75"),
colly.MaxDepth(9),
)

res, err := http.Get("http://localhost:5000/library")
res, err := http.Get("https://iqps-server.metakgp.org/library")
if err != nil {
fmt.Println(err)
}
Expand Down Expand Up @@ -103,14 +102,25 @@ func main() {
} else {
exam_type = ""
}

// as per 16/09/2024, filenames in library are of the form course-code_course-name_extra-details,
//extracting course_code from the filename since course_code is a mandatory field
course_code := ""
name_split := strings.Split(name, "_")

if len(name_split[0]) == 7 {
course_code = name_split[0]
name_split = name_split[1:]
name = strings.Join(name_split, " ")
}

for i := range existing_qp {
if existing_qp[i].CourseName == name && existing_qp[i].Year == year && existing_qp[i].Exam == exam_type {
if existing_qp[i].CourseCode == course_code && existing_qp[i].Year == year && existing_qp[i].Exam == exam_type {
return
}
}

new_qp = append(new_qp, qpRaw{sanitizeFilename(strings.Join(temp[4:], "_")), name, year, exam_type, file_url})
new_qp = append(new_qp, qpRaw{course_code, sanitizeFilename(strings.Join(temp[4:], "_")), name, year, exam_type, file_url})
}

c.Visit(e.Request.AbsoluteURL(link))
Expand All @@ -129,7 +139,7 @@ func main() {
writer := csv.NewWriter(file)
defer writer.Flush()

header := []string{"course_name", "year", "exam", "filelink", "from_library"}
header := []string{"course_code", "course_name", "year", "exam", "filelink", "from_library", "approve_status"}
if err := writer.Write(header); err != nil {
fmt.Println("Error writing header to CSV:", err)
return
Expand All @@ -140,12 +150,16 @@ func main() {
if new_qp[i].ExamType != "" {
exam_type = new_qp[i].ExamType + "sem"
}
var row = []string{

row := []string{
new_qp[i].CourseCode,
strings.Trim(new_qp[i].Name, ".pdf"),
fmt.Sprint(new_qp[i].Year),
exam_type,
new_qp[i].Filename,
"true"}
"peqp/qp/" + new_qp[i].Filename,
"true",
"true",
}
if err := writer.Write(row); err != nil {
fmt.Println("Error writing row to CSV:", err)
return
Expand Down

0 comments on commit d287b02

Please sign in to comment.