This commit is contained in:
boose_magoose 2023-12-07 06:56:01 -05:00
parent 6f00db705e
commit 9997abcbe6
19 changed files with 180 additions and 96 deletions

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
/venv/ /venv/
/Matthew Henry Commentary/xlm/ /Matthew Henry Commentary OLD/xlm/*
/.idea
*/.obsidian

View File

@ -1,5 +1 @@
{ {}
"readableLineLength": true,
"strictLineBreaks": false,
"showLineNumber": false
}

View File

@ -1,22 +1,18 @@
{ {
"main": { "main": {
"id": "74aa2bd24def0dbb", "id": "55636d74db5cd17b",
"type": "split", "type": "split",
"children": [ "children": [
{ {
"id": "2d08192becdbbc46", "id": "70dbebffc56f01b6",
"type": "tabs", "type": "tabs",
"children": [ "children": [
{ {
"id": "26aa765acbe7529a", "id": "f5dca16b5209f2c5",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "markdown", "type": "empty",
"state": { "state": {}
"file": "xlm/Deuteronomy/Chapter I.md",
"mode": "source",
"source": false
}
} }
} }
] ]
@ -25,15 +21,15 @@
"direction": "vertical" "direction": "vertical"
}, },
"left": { "left": {
"id": "195826f1439b747b", "id": "7f0c5c2cf0aa46bb",
"type": "split", "type": "split",
"children": [ "children": [
{ {
"id": "f08655a065f50eac", "id": "0c030a34bea5328c",
"type": "tabs", "type": "tabs",
"children": [ "children": [
{ {
"id": "aa8f58742beaa66c", "id": "1f642b2794afdb3c",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "file-explorer", "type": "file-explorer",
@ -43,7 +39,7 @@
} }
}, },
{ {
"id": "5e4fc3226ed10d77", "id": "32a633c38dafb6cd",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "search", "type": "search",
@ -58,7 +54,7 @@
} }
}, },
{ {
"id": "51e400c71a311b28", "id": "2ad3b96f7190f448",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "bookmarks", "type": "bookmarks",
@ -72,30 +68,19 @@
"width": 300 "width": 300
}, },
"right": { "right": {
"id": "a7de7eb166b864ba", "id": "70ee2af17b487873",
"type": "split", "type": "split",
"children": [ "children": [
{ {
"id": "fc018b89448f5dfc", "id": "a572819b160e1d9f",
"type": "tabs", "type": "tabs",
"children": [ "children": [
{ {
"id": "27c2bf6ccd240b82", "id": "6aa66234614af992",
"type": "leaf",
"state": {
"type": "outline",
"state": {
"file": "xlm/Deuteronomy/Chapter I.md"
}
}
},
{
"id": "13a00d44d2a7761f",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "backlink", "type": "backlink",
"state": { "state": {
"file": "xlm/Deuteronomy/Chapter I.md",
"collapseAll": false, "collapseAll": false,
"extraContext": false, "extraContext": false,
"sortOrder": "alphabetical", "sortOrder": "alphabetical",
@ -107,19 +92,18 @@
} }
}, },
{ {
"id": "3886b56e0b09b278", "id": "af8ee6cd6dfa886a",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "outgoing-link", "type": "outgoing-link",
"state": { "state": {
"file": "xlm/Deuteronomy/Chapter I.md",
"linksCollapsed": false, "linksCollapsed": false,
"unlinkedCollapsed": true "unlinkedCollapsed": true
} }
} }
}, },
{ {
"id": "4eec6a6bf78181c7", "id": "0b3c3c936f81d729",
"type": "leaf", "type": "leaf",
"state": { "state": {
"type": "tag", "type": "tag",
@ -128,12 +112,21 @@
"useHierarchy": true "useHierarchy": true
} }
} }
},
{
"id": "962eb91d668348c3",
"type": "leaf",
"state": {
"type": "outline",
"state": {}
}
} }
] ]
} }
], ],
"direction": "horizontal", "direction": "horizontal",
"width": 300 "width": 300,
"collapsed": true
}, },
"left-ribbon": { "left-ribbon": {
"hiddenItems": { "hiddenItems": {
@ -145,43 +138,6 @@
"command-palette:Open command palette": false "command-palette:Open command palette": false
} }
}, },
"active": "26aa765acbe7529a", "active": "f5dca16b5209f2c5",
"lastOpenFiles": [ "lastOpenFiles": []
"xlm/Genesis/Chapter I.md",
"xlm/Indexes/Index of Pages of the Print Edition.md",
"xlm/Indexes/Index of Scripture Commentary.md",
"xlm/Indexes/Index of Scripture References.md",
"xlm/Indexes",
"xlm/Deuteronomy/Chapter XXXIV.md",
"xlm/Deuteronomy/Chapter XXXIII.md",
"xlm/Deuteronomy/Chapter XXXII.md",
"xlm/Deuteronomy/Chapter XXXI.md",
"xlm/Deuteronomy/Chapter XXX.md",
"xlm/Deuteronomy/Chapter XXIX.md",
"xlm/Deuteronomy/Chapter XXVIII.md",
"xlm/Deuteronomy/Chapter XXVII.md",
"xlm/Deuteronomy/Chapter XXVI.md",
"xlm/Deuteronomy/Chapter XXV.md",
"xlm/Deuteronomy/Chapter XXIV.md",
"xlm/Deuteronomy/Chapter XXIII.md",
"xlm/Deuteronomy/Chapter XXII.md",
"xlm/Deuteronomy/Chapter XXI.md",
"xlm/Deuteronomy/Chapter XX.md",
"xlm/Deuteronomy/Chapter XIX.md",
"xlm/Deuteronomy/Chapter XVIII.md",
"xlm/Deuteronomy/Chapter XVII.md",
"xlm/Deuteronomy/Chapter XVI.md",
"xlm/Deuteronomy/Chapter XV.md",
"xlm/Deuteronomy/Chapter XIV.md",
"xlm/Deuteronomy/Chapter XIII.md",
"xlm/Deuteronomy",
"xlm/Numbers",
"xlm/Leviticus",
"xlm/Exodus",
"xlm/Genesis/Chapter I.md~",
"xlm/Genesis",
"xlm",
"xlm/None",
"Commentary on the Whole Bible Volume I (Ge - Matthew Henry.txt"
]
} }

View File

@ -7,7 +7,7 @@ class MyHTMLParser(HTMLParser):
file = None file = None
current_tag = None current_tag = None
writeable = False writeable = False
output_folder = Path('./Matthew Henry Commentary') output_folder = Path('Matthew Henry Commentary OLD')
section = [] section = []
@staticmethod @staticmethod

View File

@ -115,7 +115,7 @@ if __name__ == '__main__':
folder = Path('/home/bear/PycharmProjects/mark_henry_md/matthew_henry') folder = Path('/home/bear/PycharmProjects/mark_henry_md/matthew_henry')
htm_files = list(folder.glob('*.HTM')) htm_files = list(folder.glob('*.HTM'))
output_folder = Path('./Matthew Henry Commentary').absolute() output_folder = Path('Matthew Henry Commentary OLD').absolute()
# for file in tqdm(htm_files[7:8]): # for file in tqdm(htm_files[7:8]):
for file in htm_files[7:8]: for file in htm_files[7:8]:
soup = BeautifulSoup(file.read_text(), 'html.parser') soup = BeautifulSoup(file.read_text(), 'html.parser')

156
main.py
View File

@ -1,3 +1,4 @@
import itertools
import re import re
import string import string
import sys import sys
@ -9,10 +10,90 @@ from bs4 import BeautifulSoup
from loguru import logger from loguru import logger
from tqdm import tqdm from tqdm import tqdm
base = Path('./xml/Matthew Henry Commentary').absolute() base = Path('./xml/Matthew Henry Commentary 2').absolute()
logger.remove(0) logger.remove(0)
logger.add('log.txt')
from functools import partialmethod
logger.__class__.foobar = partialmethod(logger.__class__.log, "foobar", level='WARNING')
logger.add('log.txt', level='WARNING')
convert = dict([
("Eccl", "Ecclesiastes"),
("Rom", "Romans"),
("2Tim", "2 Timothy"),
("2Pet", "2 Peter"),
("Gen", "Genesis"),
("John", "John"),
("1Cor", "1 Corinthians"),
("Ezek", "Ezekiel"),
("Heb", "Hebrews"),
("Rev", "Revelation"),
("Eph", "Ephesians"),
("Isa", "Isaiah"),
("Ps", "Psalms"),
("Acts", "Acts"),
("Deut", "Deuteronomy"),
("Neh", "Nehemiah"),
("1Chr", "1 Chronicles"),
("2Kgs", "2 Kings"),
("2Cor", "2 Corinthians"),
("Hos", "Hosea"),
("Exod", "Exodus"),
("Matt", "Matthew"),
("Job", "Job"),
("Prov", "Proverbs"),
("Col", "Colossians"),
("Jer", "Jeremiah"),
("1John", "1 John"),
("Jas", "James"),
("1Tim", "1 Timothy"),
("Amos", "Amos"),
("Mic", "Micah"),
("Dan", "Daniel"),
("Jonah", "Jonah"),
("Luke", "Luke"),
("Mal", "Malachi"),
("Zech", "Zechariah"),
("Phil", "Philippians"),
("Mark", "Mark"),
("2Thess", "2 Thessalonians"),
("Jude", "Jude"),
("1Sam", "1 Samuel"),
("Num", "Numbers"),
("Gal", "Galatians"),
("1Pet", "1 Peter"),
("Lev", "Leviticus"),
("2Chr", "2 Chronicles"),
("Lam", "Lamentations"),
("1Kgs", "1 Kings"),
("Ezra", "Ezra"),
("Obad", "Obadiah"),
("Hab", "Habakkuk"),
("2Sam", "2 Samuel"),
("Josh", "Joshua"),
("Judg", "Judges"),
("Ruth", "Ruth"),
("Esth", "Esther"),
("Phlm", "Philemon"),
("1Thess", "1 Thessalonians"),
("Joel", "Joel"),
("Titus", "Titus"),
("3John", "3 John"),
("Zeph", "Zephaniah"),
("Song", "Song of Songs"),
("Hag", "Haggai"),
("Wis", "Wisdom of Solomon"),
("2John", "2 John"),
("2Macc", "2 Maccabees"),
("Nah", "Nahum"),
("Sir", "Ecclesiasticus"),
("Psa", "Psalms"),
("1Macc", "1 Maccabees"),
("2Peter", "2 Peter"),
]
)
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
file = None file = None
@ -152,15 +233,17 @@ class MyHTMLParser(HTMLParser):
file.write(line) file.write(line)
@staticmethod def normalize_osis_verses(self, verses):
def normalize_osis_verses(verses): """Takes this
"""Takes this 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25' 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5'
and turns it into somthing more readable""" and turns it into somthing more readable"""
normalized_refs = list() normalized_refs = list()
for chunks in verses.removeprefix("Bible:").split('Bible:'): for chunks in verses.removeprefix("Bible:").split('Bible:'):
spanned_verses = list() spanned_verses = list()
for ref in chunks.split('-'): for ref in chunks.split('-'):
verse = ref.replace('.', ' ', 1).replace('.', ':') verse = ref.replace('.', ' ', 1).replace('.', ':')
book = ref.split('.')[0]
verse.replace(book, convert[book])
spanned_verses.append( spanned_verses.append(
bible.format_scripture_references( bible.format_scripture_references(
bible.get_references(verse.strip()) bible.get_references(verse.strip())
@ -174,16 +257,57 @@ class MyHTMLParser(HTMLParser):
except: except:
logger.warning(f"Error with: {verses=}: {spanned_verses=}") logger.warning(f"Error with: {verses=}: {spanned_verses=}")
raise raise
if normalized is False:
print(f"Error with: {verses=}: {spanned_verses=}")
sys.exit()
normalized_refs.append(normalized) normalized_refs.append(normalized)
return ';'.join(normalized_refs) return ';'.join(normalized_refs)
# @staticmethod
def old_normalize_osis_verses(self, verses):
"""Takes this
'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5'
and turns it into somthing more readable"""
normalized_refs = list()
for chunks in verses.removeprefix("Bible:").split('Bible:'):
spanned_verses = chunks.split('-')
if len(spanned_verses) < 2:
spanned_verses = list(itertools.repeat(spanned_verses[0], 2))
book, start_chapter, start_verse = spanned_verses[0].split('.')
_, end_chapter, end_verse = spanned_verses[1].split('.')
book = bible.Book[convert[book].upper()]
normalized = bible.NormalizedReference(
book=book,
start_chapter=start_chapter,
start_verse=start_verse,
end_chapter=end_chapter,
end_verse=end_verse,
)
if normalized is False:
logger.foobar(f'{self.args=}')
normalized_refs.append(
bible.format_scripture_references([normalized]).strip()
)
return ';'.join(normalized_refs)
@staticmethod @staticmethod
@logger.catch(reraise=False)
def obsidian_links(verses): def obsidian_links(verses):
"""Take `Book Chapter:Ver-Ver and turn it into a obsidian bible link""" """Take `Book Chapter:Ver-Ver and turn it into a obsidian bible link"""
links = [] links = []
for verse in verses.split(';'): for verse in verses.split(';'):
links.append(f"[[{verse.replace(':', '#').replace('-', '..')}]]") verse = verse.replace(':', '#').replace('-', '..')
if verse[0] in string.digits:
verse.replace(' ', '', 1)
links.append(f"[[{verse}]]")
return '; '.join(links) return '; '.join(links)
@ -237,9 +361,10 @@ class MyHTMLParser(HTMLParser):
self.attrs['osisref'] self.attrs['osisref']
) )
else: else:
verses = self.normalize_osis_verses( # verses = self.normalize_osis_verses(
self.convert_passage(self.attrs['passage']) # self.convert_passage(self.attrs['passage'])
) # )
raise ValueError('No `osisref` in tag attrs')
self.write_to_file(self.obsidian_links(verses)) self.write_to_file(self.obsidian_links(verses))
@ -326,18 +451,23 @@ if __name__ == '__main__':
# Get each book in the volume # Get each book in the volume
# This will be the main folder for all the book's chapters # This will be the main folder for all the book's chapters
for book in soup.find_all('div1'): for book in soup.find_all('div1'):
book_name = book['title'].replace('First', '1').replace('Second', '2').replace('Third', '3') book_name = (book['title'].
replace('First ', '1').
replace('Second ', '2').
replace('Third ', '3')
)
logger.info(book_name) logger.info(book_name)
# These are the chapters/files for each book folder # These are the chapters/files for each book folder
for chapter in book.find_all('div2'): for chapter in book.find_all('div2'):
filename = chapter['title'] # For introduction files
filename = f'MH {book_name} {chapter['title']}'
# Remove Roman Numerals from file name # Remove Roman Numerals from file name
if 'CHAPTER' in filename.upper(): if 'CHAPTER' in filename.upper():
_, roman_num = chapter['title'].split(' ') *_, roman_num = chapter['title'].split(' ')
filename = f'Chapter {parser.roman_to_int(roman_num)}' filename = f'MH {book_name} {parser.roman_to_int(roman_num)}'
logger.info(filename) logger.info(filename)
parser.create_md_file(base, book_name, filename) parser.create_md_file(base, book_name, filename)

View File

@ -73481,7 +73481,7 @@ highest.</i></p>
<h3 id="Ps.cxx-p0.2">PSALM CXIX.</h3> <h3 id="Ps.cxx-p0.2">PSALM CXIX.</h3>
<p class="intro" id="Ps.cxx-p1">This is a psalm by itself, like none of the rest; <p class="intro" id="Ps.cxx-p1">I. This is a psalm by itself, like none of the rest;
it excels them all, and shines brightest in this constellation. It it excels them all, and shines brightest in this constellation. It
is much longer than any of them more than twice as long as any of is much longer than any of them more than twice as long as any of
them. It is not making long prayers that Christ censurers, but them. It is not making long prayers that Christ censurers, but
@ -73510,8 +73510,8 @@ any flaw throughout the whole psalm. Archbishop Tillotson says, It
seems to have more of poetical skill and number in it than we at seems to have more of poetical skill and number in it than we at
this distance can easily understand. Some have called it the this distance can easily understand. Some have called it the
saints' alphabet; and it were to be wished we had it as ready in saints' alphabet; and it were to be wished we had it as ready in
our memories as the very letters of our alphabet, as ready as our A our memories as the very letters of our alphabet, as ready as our 'A B C'.
B C. Perhaps the penman found it of use to himself to observe this Perhaps the penman found it of use to himself to observe this
method, as it obliged him to seek for thoughts, and search for method, as it obliged him to seek for thoughts, and search for
them, that he might fill up the quota of every part; and the letter them, that he might fill up the quota of every part; and the letter
he was to begin with might lead him to a word which might suggest a he was to begin with might lead him to a word which might suggest a

View File

@ -9,7 +9,7 @@ from bs4 import BeautifulSoup
from loguru import logger from loguru import logger
from tqdm import tqdm from tqdm import tqdm
base = Path('./Matthew Henry Commentary/xml').absolute() base = Path('Matthew Henry Commentary OLD/xml').absolute()
logger.remove(0) logger.remove(0)
logger.add('log.txt') logger.add('log.txt')