summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRunxi Yu <me@runxiyu.org>2024-06-16 15:57:05 +0000
committerRunxi Yu <me@runxiyu.org>2024-06-16 15:57:05 +0000
commitc587f0b66ae75a2a8f1c6cad30b492cbeb2dfc8e (patch)
tree2dd4ac678b4c696553862cbbd95a971591ae3fb1
parentf92bab5c8e2fe3829f8f85bb9e039db2ec2b01de (diff)
downloadalbertscripts-c587f0b66ae75a2a8f1c6cad30b492cbeb2dfc8e.tar.gz
albertscripts-c587f0b66ae75a2a8f1c6cad30b492cbeb2dfc8e.zip
Add everything related to past papers searching
-rw-r--r--pdf_read_test.py7
-rw-r--r--read_past_papers.py27
-rw-r--r--search_past_papers.py17
-rw-r--r--search_past_papers_advanced.py113
4 files changed, 164 insertions, 0 deletions
diff --git a/pdf_read_test.py b/pdf_read_test.py
new file mode 100644
index 0000000..d879362
--- /dev/null
+++ b/pdf_read_test.py
@@ -0,0 +1,7 @@
+import PyPDF2 as pdf
+
+f = open("0460_s22_ms_11.pdf", "rb")
+r = pdf.PdfReader(f)
+
+for i in range(len(r.pages)):
+ print(r.pages[i].extract_text(), "\n")
diff --git a/read_past_papers.py b/read_past_papers.py
new file mode 100644
index 0000000..b7870a3
--- /dev/null
+++ b/read_past_papers.py
@@ -0,0 +1,27 @@
+import os
+import re
+import json
+import PyPDF2 as pdf
+
+path = "/Users/albert/Desktop/YK Pao School/Y9/Others/IGCSE Past Papers"
+
+for subject in sorted(os.listdir(path)):
+ if "." not in subject:
+ print(f"Starting to read {subject} ...")
+ subject_code = subject.split()[-1].replace("(", "").replace(")", "")
+ dic = {}
+ for year in sorted(os.listdir(path+"/"+subject)):
+ if "." not in year:
+ dic[year] = {}
+ for file in sorted(os.listdir(path+"/"+subject+"/"+year)):
+ if re.match("\d\d\d\d_\w\d\d_qp_\d\d?.pdf", file):
+ print(f"Reading ./{subject}/{year}/{file} ...")
+ dic[year][file] = {}
+ r = pdf.PdfReader(open(path+"/"+subject+"/"+year+"/"+file, "rb"))
+ for page in range(len(r.pages)):
+ dic[year][file][str(int(page)+1)] = r.pages[page].extract_text().lower().replace("\n", " ").replace("....", "")
+ print(f"Creating {subject_code}.json ...")
+ with open(f"{subject_code}.json", "w") as f:
+ json.dump(dic, f, indent=4)
+ print(f"Created {subject_code}.json")
+print("Process completed")
diff --git a/search_past_papers.py b/search_past_papers.py
new file mode 100644
index 0000000..a053226
--- /dev/null
+++ b/search_past_papers.py
@@ -0,0 +1,17 @@
+import json
+
+subject_code = input("Subject code: ")
+dic = json.load(open(f"{subject_code}.json"))
+search_str = input("Word or words to be searched: ").lower()
+flag = True
+print("\nThe following are files that contain such string: ")
+
+for year in dic.keys():
+ for file in dic[year].keys():
+ for page in dic[year][file].keys():
+ if search_str in dic[year][file][page]:
+ print(file, f"[Page {page}]")
+ flag = False
+
+if flag:
+ print("No file found")
diff --git a/search_past_papers_advanced.py b/search_past_papers_advanced.py
new file mode 100644
index 0000000..45deca0
--- /dev/null
+++ b/search_past_papers_advanced.py
@@ -0,0 +1,113 @@
+import os
+import json
+
+dic = {"Empty": True}
+path = r"/Users/albert/Documents/IGCSE_Past_Papers"
+code = "Empty"
+files = ["Empty"]
+
+help_doc = '''Help on module 'search_past_papers':
+--- set
+ > set (int)code
+ Accepts a valid 4-digit integer subject code and loads corresponding json.
+ Returns a dictionary from the json file that can be later used in 'print'.
+--- list
+ > list *args
+ Accepts multiple arguments that will be concatenated into a string.
+ Returns all past papers that contain the string.
+--- open
+ > open (char)option
+ Accepts a character indicating which file type to be opened.
+ - q: question paper only
+ - m: corresponding mark scheme only
+ - b: both question paper and corresponding mark scheme
+ Opens all files of the indicated type returned from the last 'list'.
+--- help
+ > help
+ Returns a help document.
+--- quit
+ > quit
+ Leaves the program properly and directly. '''
+
+
+def subject(code):
+ dic = json.load(open(f"{code}.json"))
+ return dic
+
+
+def search(dic, string):
+ flag = True
+ files = []
+ for year in dic.keys():
+ for file in dic[year].keys():
+ for page in dic[year][file].keys():
+ if string in dic[year][file][page]:
+ print(file, f"[Page {page}]")
+ files.append(file)
+ flag = False
+ if flag:
+ print("Warning: no file found, unable to use 'open' later")
+ files = ["Empty"]
+ return files
+
+
+try:
+ while True:
+ action = input(">>> ")
+ action_l = action.split()
+
+ if len(action_l) <= 0:
+ print("Invalid syntax: no command entered")
+
+ elif action_l[0] == "set":
+ if len(action_l) == 2:
+ if len(action_l[1]) == 4:
+ try:
+ code = action_l[1]
+ dic = subject(code)
+ except FileNotFoundError:
+ print("Execution failed: subject not supported, try a different code")
+ else:
+ print(f"Execution failed: 'set' does not except argument '{action_l[1]}'")
+ else:
+ print(f"Execution failed: 'set' expects 1 argument, gets {len(action_l)-1}")
+
+ elif action_l[0] == "list":
+ if len(action_l) >= 2:
+ if "Empty" not in dic.keys():
+ files = search(dic, " ".join(action_l[1:]).lower())
+ else:
+ print("Execution failed: no subject selected, use 'set' first")
+ else:
+ print("Execution failed: 'list' expects at least 1 argument, gets 0")
+
+ elif action_l[0] == "help":
+ print(help_doc)
+
+ elif action_l[0] == "quit":
+ print("Process completed")
+ break
+
+ elif action_l[0] == "open":
+ if len(action_l) == 2:
+ if code != "Empty" and "Empty" not in files:
+ if action_l[1] == "m" or action_l[1] == "b":
+ for file in files:
+ os.system(f"open {path}/{code}/20{file[6:8]}/{file[:9]}ms{file[11:]}")
+ if action_l[1] == "q" or action_l[1] == "b":
+ for file in files:
+ os.system(f"open {path}/{code}/20{file[6:8]}/{file}")
+ elif action_l[1] not in ["q", "m", "b"]:
+ print(f"Execution failed: 'open' does not except argument '{action_l[1]}'")
+ elif code == "Empty":
+ print("Execution failed: no subject selected, use 'set' first")
+ else:
+ print("Execution failed: last 'list' finds no file")
+ else:
+ print(f"Execution failed: 'open' expects 1 argument, gets {len(action_l)-1}")
+
+ else:
+ print(f"Invalid syntax: command '{action_l[0]}' not found")
+
+except KeyboardInterrupt:
+ print("Process exited")