diff --git a/.gitignore b/.gitignore index 0005a8d..e629645 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /venv/ -/Matthew Henry Commentary/xlm/ +/Matthew Henry Commentary OLD/xlm/* +/.idea +*/.obsidian \ No newline at end of file diff --git a/Matthew Henry Commentary/CHAP. I..md b/Matthew Henry Commentary OLD/CHAP. I..md similarity index 100% rename from Matthew Henry Commentary/CHAP. I..md rename to Matthew Henry Commentary OLD/CHAP. I..md diff --git a/Matthew Henry Commentary/CHAP. II..md b/Matthew Henry Commentary OLD/CHAP. II..md similarity index 100% rename from Matthew Henry Commentary/CHAP. II..md rename to Matthew Henry Commentary OLD/CHAP. II..md diff --git a/Matthew Henry Commentary/CHAP. III..md b/Matthew Henry Commentary OLD/CHAP. III..md similarity index 100% rename from Matthew Henry Commentary/CHAP. III..md rename to Matthew Henry Commentary OLD/CHAP. III..md diff --git a/Matthew Henry Commentary/Commentary on the Whole Bible Volume I (Ge - Matthew Henry.md b/Matthew Henry Commentary OLD/Commentary on the Whole Bible Volume I (Ge - Matthew Henry.md similarity index 100% rename from Matthew Henry Commentary/Commentary on the Whole Bible Volume I (Ge - Matthew Henry.md rename to Matthew Henry Commentary OLD/Commentary on the Whole Bible Volume I (Ge - Matthew Henry.md diff --git a/Matthew Henry Commentary/Genesis/Genesis 1.md b/Matthew Henry Commentary OLD/Genesis/Genesis 1.md similarity index 100% rename from Matthew Henry Commentary/Genesis/Genesis 1.md rename to Matthew Henry Commentary OLD/Genesis/Genesis 1.md diff --git a/Matthew Henry Commentary/Genesis/Genesis.md b/Matthew Henry Commentary OLD/Genesis/Genesis.md similarity index 100% rename from Matthew Henry Commentary/Genesis/Genesis.md rename to Matthew Henry Commentary OLD/Genesis/Genesis.md diff --git a/Matthew Henry Commentary/.obsidian/app.json b/Matthew Henry Commentary/.obsidian/app.json index c9391a4..9e26dfe 100644 --- a/Matthew Henry Commentary/.obsidian/app.json +++ b/Matthew Henry Commentary/.obsidian/app.json @@ -1,5 +1 @@ -{ - "readableLineLength": true, - "strictLineBreaks": false, - "showLineNumber": false -} \ No newline at end of file +{} \ No newline at end of file diff --git a/Matthew Henry Commentary/.obsidian/workspace.json b/Matthew Henry Commentary/.obsidian/workspace.json index cb4eb80..3044e4a 100644 --- a/Matthew Henry Commentary/.obsidian/workspace.json +++ b/Matthew Henry Commentary/.obsidian/workspace.json @@ -1,22 +1,18 @@ { "main": { - "id": "74aa2bd24def0dbb", + "id": "55636d74db5cd17b", "type": "split", "children": [ { - "id": "2d08192becdbbc46", + "id": "70dbebffc56f01b6", "type": "tabs", "children": [ { - "id": "26aa765acbe7529a", + "id": "f5dca16b5209f2c5", "type": "leaf", "state": { - "type": "markdown", - "state": { - "file": "xlm/Deuteronomy/Chapter I.md", - "mode": "source", - "source": false - } + "type": "empty", + "state": {} } } ] @@ -25,15 +21,15 @@ "direction": "vertical" }, "left": { - "id": "195826f1439b747b", + "id": "7f0c5c2cf0aa46bb", "type": "split", "children": [ { - "id": "f08655a065f50eac", + "id": "0c030a34bea5328c", "type": "tabs", "children": [ { - "id": "aa8f58742beaa66c", + "id": "1f642b2794afdb3c", "type": "leaf", "state": { "type": "file-explorer", @@ -43,7 +39,7 @@ } }, { - "id": "5e4fc3226ed10d77", + "id": "32a633c38dafb6cd", "type": "leaf", "state": { "type": "search", @@ -58,7 +54,7 @@ } }, { - "id": "51e400c71a311b28", + "id": "2ad3b96f7190f448", "type": "leaf", "state": { "type": "bookmarks", @@ -72,30 +68,19 @@ "width": 300 }, "right": { - "id": "a7de7eb166b864ba", + "id": "70ee2af17b487873", "type": "split", "children": [ { - "id": "fc018b89448f5dfc", + "id": "a572819b160e1d9f", "type": "tabs", "children": [ { - "id": "27c2bf6ccd240b82", - "type": "leaf", - "state": { - "type": "outline", - "state": { - "file": "xlm/Deuteronomy/Chapter I.md" - } - } - }, - { - "id": "13a00d44d2a7761f", + "id": "6aa66234614af992", "type": "leaf", "state": { "type": "backlink", "state": { - "file": "xlm/Deuteronomy/Chapter I.md", "collapseAll": false, "extraContext": false, "sortOrder": "alphabetical", @@ -107,19 +92,18 @@ } }, { - "id": "3886b56e0b09b278", + "id": "af8ee6cd6dfa886a", "type": "leaf", "state": { "type": "outgoing-link", "state": { - "file": "xlm/Deuteronomy/Chapter I.md", "linksCollapsed": false, "unlinkedCollapsed": true } } }, { - "id": "4eec6a6bf78181c7", + "id": "0b3c3c936f81d729", "type": "leaf", "state": { "type": "tag", @@ -128,12 +112,21 @@ "useHierarchy": true } } + }, + { + "id": "962eb91d668348c3", + "type": "leaf", + "state": { + "type": "outline", + "state": {} + } } ] } ], "direction": "horizontal", - "width": 300 + "width": 300, + "collapsed": true }, "left-ribbon": { "hiddenItems": { @@ -145,43 +138,6 @@ "command-palette:Open command palette": false } }, - "active": "26aa765acbe7529a", - "lastOpenFiles": [ - "xlm/Genesis/Chapter I.md", - "xlm/Indexes/Index of Pages of the Print Edition.md", - "xlm/Indexes/Index of Scripture Commentary.md", - "xlm/Indexes/Index of Scripture References.md", - "xlm/Indexes", - "xlm/Deuteronomy/Chapter XXXIV.md", - "xlm/Deuteronomy/Chapter XXXIII.md", - "xlm/Deuteronomy/Chapter XXXII.md", - "xlm/Deuteronomy/Chapter XXXI.md", - "xlm/Deuteronomy/Chapter XXX.md", - "xlm/Deuteronomy/Chapter XXIX.md", - "xlm/Deuteronomy/Chapter XXVIII.md", - "xlm/Deuteronomy/Chapter XXVII.md", - "xlm/Deuteronomy/Chapter XXVI.md", - "xlm/Deuteronomy/Chapter XXV.md", - "xlm/Deuteronomy/Chapter XXIV.md", - "xlm/Deuteronomy/Chapter XXIII.md", - "xlm/Deuteronomy/Chapter XXII.md", - "xlm/Deuteronomy/Chapter XXI.md", - "xlm/Deuteronomy/Chapter XX.md", - "xlm/Deuteronomy/Chapter XIX.md", - "xlm/Deuteronomy/Chapter XVIII.md", - "xlm/Deuteronomy/Chapter XVII.md", - "xlm/Deuteronomy/Chapter XVI.md", - "xlm/Deuteronomy/Chapter XV.md", - "xlm/Deuteronomy/Chapter XIV.md", - "xlm/Deuteronomy/Chapter XIII.md", - "xlm/Deuteronomy", - "xlm/Numbers", - "xlm/Leviticus", - "xlm/Exodus", - "xlm/Genesis/Chapter I.md~", - "xlm/Genesis", - "xlm", - "xlm/None", - "Commentary on the Whole Bible Volume I (Ge - Matthew Henry.txt" - ] + "active": "f5dca16b5209f2c5", + "lastOpenFiles": [] } \ No newline at end of file diff --git a/console.py b/console.py index a439707..41128fb 100644 --- a/console.py +++ b/console.py @@ -7,7 +7,7 @@ class MyHTMLParser(HTMLParser): file = None current_tag = None writeable = False - output_folder = Path('./Matthew Henry Commentary') + output_folder = Path('Matthew Henry Commentary OLD') section = [] @staticmethod diff --git a/html_parse.py b/html_parse.py index fe359c7..ba86dd3 100644 --- a/html_parse.py +++ b/html_parse.py @@ -115,7 +115,7 @@ if __name__ == '__main__': folder = Path('/home/bear/PycharmProjects/mark_henry_md/matthew_henry') htm_files = list(folder.glob('*.HTM')) - output_folder = Path('./Matthew Henry Commentary').absolute() + output_folder = Path('Matthew Henry Commentary OLD').absolute() # for file in tqdm(htm_files[7:8]): for file in htm_files[7:8]: soup = BeautifulSoup(file.read_text(), 'html.parser') diff --git a/main.py b/main.py index 36b3028..2a1756a 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import itertools import re import string import sys @@ -9,10 +10,90 @@ from bs4 import BeautifulSoup from loguru import logger from tqdm import tqdm -base = Path('./xml/Matthew Henry Commentary').absolute() +base = Path('./xml/Matthew Henry Commentary 2').absolute() logger.remove(0) -logger.add('log.txt') + +from functools import partialmethod + +logger.__class__.foobar = partialmethod(logger.__class__.log, "foobar", level='WARNING') +logger.add('log.txt', level='WARNING') + +convert = dict([ +("Eccl", "Ecclesiastes"), +("Rom", "Romans"), +("2Tim", "2 Timothy"), +("2Pet", "2 Peter"), +("Gen", "Genesis"), +("John", "John"), +("1Cor", "1 Corinthians"), +("Ezek", "Ezekiel"), +("Heb", "Hebrews"), +("Rev", "Revelation"), +("Eph", "Ephesians"), +("Isa", "Isaiah"), +("Ps", "Psalms"), +("Acts", "Acts"), +("Deut", "Deuteronomy"), +("Neh", "Nehemiah"), +("1Chr", "1 Chronicles"), +("2Kgs", "2 Kings"), +("2Cor", "2 Corinthians"), +("Hos", "Hosea"), +("Exod", "Exodus"), +("Matt", "Matthew"), +("Job", "Job"), +("Prov", "Proverbs"), +("Col", "Colossians"), +("Jer", "Jeremiah"), +("1John", "1 John"), +("Jas", "James"), +("1Tim", "1 Timothy"), +("Amos", "Amos"), +("Mic", "Micah"), +("Dan", "Daniel"), +("Jonah", "Jonah"), +("Luke", "Luke"), +("Mal", "Malachi"), +("Zech", "Zechariah"), +("Phil", "Philippians"), +("Mark", "Mark"), +("2Thess", "2 Thessalonians"), +("Jude", "Jude"), +("1Sam", "1 Samuel"), +("Num", "Numbers"), +("Gal", "Galatians"), +("1Pet", "1 Peter"), +("Lev", "Leviticus"), +("2Chr", "2 Chronicles"), +("Lam", "Lamentations"), +("1Kgs", "1 Kings"), +("Ezra", "Ezra"), +("Obad", "Obadiah"), +("Hab", "Habakkuk"), +("2Sam", "2 Samuel"), +("Josh", "Joshua"), +("Judg", "Judges"), +("Ruth", "Ruth"), +("Esth", "Esther"), +("Phlm", "Philemon"), +("1Thess", "1 Thessalonians"), +("Joel", "Joel"), +("Titus", "Titus"), +("3John", "3 John"), +("Zeph", "Zephaniah"), +("Song", "Song of Songs"), +("Hag", "Haggai"), +("Wis", "Wisdom of Solomon"), +("2John", "2 John"), +("2Macc", "2 Maccabees"), +("Nah", "Nahum"), +("Sir", "Ecclesiasticus"), +("Psa", "Psalms"), +("1Macc", "1 Maccabees"), +("2Peter", "2 Peter"), +] +) class MyHTMLParser(HTMLParser): file = None @@ -152,15 +233,17 @@ class MyHTMLParser(HTMLParser): file.write(line) - @staticmethod - def normalize_osis_verses(verses): - """Takes this 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25' + def normalize_osis_verses(self, verses): + """Takes this + 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5' and turns it into somthing more readable""" normalized_refs = list() for chunks in verses.removeprefix("Bible:").split('Bible:'): spanned_verses = list() for ref in chunks.split('-'): verse = ref.replace('.', ' ', 1).replace('.', ':') + book = ref.split('.')[0] + verse.replace(book, convert[book]) spanned_verses.append( bible.format_scripture_references( bible.get_references(verse.strip()) @@ -174,16 +257,57 @@ class MyHTMLParser(HTMLParser): except: logger.warning(f"Error with: {verses=}: {spanned_verses=}") raise + + if normalized is False: + print(f"Error with: {verses=}: {spanned_verses=}") + sys.exit() + normalized_refs.append(normalized) return ';'.join(normalized_refs) + # @staticmethod + def old_normalize_osis_verses(self, verses): + """Takes this + 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5' + and turns it into somthing more readable""" + normalized_refs = list() + for chunks in verses.removeprefix("Bible:").split('Bible:'): + spanned_verses = chunks.split('-') + if len(spanned_verses) < 2: + spanned_verses = list(itertools.repeat(spanned_verses[0], 2)) + + book, start_chapter, start_verse = spanned_verses[0].split('.') + _, end_chapter, end_verse = spanned_verses[1].split('.') + book = bible.Book[convert[book].upper()] + + normalized = bible.NormalizedReference( + book=book, + start_chapter=start_chapter, + start_verse=start_verse, + end_chapter=end_chapter, + end_verse=end_verse, + ) + + if normalized is False: + logger.foobar(f'{self.args=}') + + normalized_refs.append( + bible.format_scripture_references([normalized]).strip() + ) + + return ';'.join(normalized_refs) + @staticmethod + @logger.catch(reraise=False) def obsidian_links(verses): """Take `Book Chapter:Ver-Ver and turn it into a obsidian bible link""" links = [] for verse in verses.split(';'): - links.append(f"[[{verse.replace(':', '#').replace('-', '..')}]]") + verse = verse.replace(':', '#').replace('-', '..') + if verse[0] in string.digits: + verse.replace(' ', '', 1) + links.append(f"[[{verse}]]") return '; '.join(links) @@ -237,9 +361,10 @@ class MyHTMLParser(HTMLParser): self.attrs['osisref'] ) else: - verses = self.normalize_osis_verses( - self.convert_passage(self.attrs['passage']) - ) + # verses = self.normalize_osis_verses( + # self.convert_passage(self.attrs['passage']) + # ) + raise ValueError('No `osisref` in tag attrs') self.write_to_file(self.obsidian_links(verses)) @@ -326,18 +451,23 @@ if __name__ == '__main__': # Get each book in the volume # This will be the main folder for all the book's chapters for book in soup.find_all('div1'): - book_name = book['title'].replace('First', '1').replace('Second', '2').replace('Third', '3') + book_name = (book['title']. + replace('First ', '1'). + replace('Second ', '2'). + replace('Third ', '3') + ) logger.info(book_name) # These are the chapters/files for each book folder for chapter in book.find_all('div2'): - filename = chapter['title'] + # For introduction files + filename = f'MH {book_name} {chapter['title']}' # Remove Roman Numerals from file name if 'CHAPTER' in filename.upper(): - _, roman_num = chapter['title'].split(' ') - filename = f'Chapter {parser.roman_to_int(roman_num)}' + *_, roman_num = chapter['title'].split(' ') + filename = f'MH {book_name} {parser.roman_to_int(roman_num)}' logger.info(filename) parser.create_md_file(base, book_name, filename) diff --git a/vol_1.xml b/v/vol_1.xml similarity index 100% rename from vol_1.xml rename to v/vol_1.xml diff --git a/vol_2.xml b/v/vol_2.xml similarity index 100% rename from vol_2.xml rename to v/vol_2.xml diff --git a/vol_4.xml b/v/vol_4.xml similarity index 100% rename from vol_4.xml rename to v/vol_4.xml diff --git a/vol_5.xml b/v/vol_5.xml similarity index 100% rename from vol_5.xml rename to v/vol_5.xml diff --git a/vol_6.xml b/v/vol_6.xml similarity index 100% rename from vol_6.xml rename to v/vol_6.xml diff --git a/vol_3.xml b/vol_3.xml index 5210685..4a8fa19 100644 --- a/vol_3.xml +++ b/vol_3.xml @@ -73481,7 +73481,7 @@ highest.

PSALM CXIX.

-

This is a psalm by itself, like none of the rest; +

I. This is a psalm by itself, like none of the rest; it excels them all, and shines brightest in this constellation. It is much longer than any of them more than twice as long as any of them. It is not making long prayers that Christ censurers, but @@ -73510,8 +73510,8 @@ any flaw throughout the whole psalm. Archbishop Tillotson says, It seems to have more of poetical skill and number in it than we at this distance can easily understand. Some have called it the saints' alphabet; and it were to be wished we had it as ready in -our memories as the very letters of our alphabet, as ready as our A -B C. Perhaps the penman found it of use to himself to observe this +our memories as the very letters of our alphabet, as ready as our 'A B C'. +Perhaps the penman found it of use to himself to observe this method, as it obliged him to seek for thoughts, and search for them, that he might fill up the quota of every part; and the letter he was to begin with might lead him to a word which might suggest a diff --git a/xml_parse.py b/xml_parse.py index 5d01109..9beb50a 100644 --- a/xml_parse.py +++ b/xml_parse.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from loguru import logger from tqdm import tqdm -base = Path('./Matthew Henry Commentary/xml').absolute() +base = Path('Matthew Henry Commentary OLD/xml').absolute() logger.remove(0) logger.add('log.txt')