current
This commit is contained in:
parent
6f00db705e
commit
9997abcbe6
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
/venv/
|
||||
/Matthew Henry Commentary/xlm/
|
||||
/Matthew Henry Commentary OLD/xlm/*
|
||||
/.idea
|
||||
*/.obsidian
|
6
Matthew Henry Commentary/.obsidian/app.json
vendored
6
Matthew Henry Commentary/.obsidian/app.json
vendored
@ -1,5 +1 @@
|
||||
{
|
||||
"readableLineLength": true,
|
||||
"strictLineBreaks": false,
|
||||
"showLineNumber": false
|
||||
}
|
||||
{}
|
@ -1,22 +1,18 @@
|
||||
{
|
||||
"main": {
|
||||
"id": "74aa2bd24def0dbb",
|
||||
"id": "55636d74db5cd17b",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "2d08192becdbbc46",
|
||||
"id": "70dbebffc56f01b6",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "26aa765acbe7529a",
|
||||
"id": "f5dca16b5209f2c5",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "markdown",
|
||||
"state": {
|
||||
"file": "xlm/Deuteronomy/Chapter I.md",
|
||||
"mode": "source",
|
||||
"source": false
|
||||
}
|
||||
"type": "empty",
|
||||
"state": {}
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -25,15 +21,15 @@
|
||||
"direction": "vertical"
|
||||
},
|
||||
"left": {
|
||||
"id": "195826f1439b747b",
|
||||
"id": "7f0c5c2cf0aa46bb",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "f08655a065f50eac",
|
||||
"id": "0c030a34bea5328c",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "aa8f58742beaa66c",
|
||||
"id": "1f642b2794afdb3c",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "file-explorer",
|
||||
@ -43,7 +39,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "5e4fc3226ed10d77",
|
||||
"id": "32a633c38dafb6cd",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "search",
|
||||
@ -58,7 +54,7 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "51e400c71a311b28",
|
||||
"id": "2ad3b96f7190f448",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "bookmarks",
|
||||
@ -72,30 +68,19 @@
|
||||
"width": 300
|
||||
},
|
||||
"right": {
|
||||
"id": "a7de7eb166b864ba",
|
||||
"id": "70ee2af17b487873",
|
||||
"type": "split",
|
||||
"children": [
|
||||
{
|
||||
"id": "fc018b89448f5dfc",
|
||||
"id": "a572819b160e1d9f",
|
||||
"type": "tabs",
|
||||
"children": [
|
||||
{
|
||||
"id": "27c2bf6ccd240b82",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outline",
|
||||
"state": {
|
||||
"file": "xlm/Deuteronomy/Chapter I.md"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "13a00d44d2a7761f",
|
||||
"id": "6aa66234614af992",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "backlink",
|
||||
"state": {
|
||||
"file": "xlm/Deuteronomy/Chapter I.md",
|
||||
"collapseAll": false,
|
||||
"extraContext": false,
|
||||
"sortOrder": "alphabetical",
|
||||
@ -107,19 +92,18 @@
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "3886b56e0b09b278",
|
||||
"id": "af8ee6cd6dfa886a",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outgoing-link",
|
||||
"state": {
|
||||
"file": "xlm/Deuteronomy/Chapter I.md",
|
||||
"linksCollapsed": false,
|
||||
"unlinkedCollapsed": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "4eec6a6bf78181c7",
|
||||
"id": "0b3c3c936f81d729",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "tag",
|
||||
@ -128,12 +112,21 @@
|
||||
"useHierarchy": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "962eb91d668348c3",
|
||||
"type": "leaf",
|
||||
"state": {
|
||||
"type": "outline",
|
||||
"state": {}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"direction": "horizontal",
|
||||
"width": 300
|
||||
"width": 300,
|
||||
"collapsed": true
|
||||
},
|
||||
"left-ribbon": {
|
||||
"hiddenItems": {
|
||||
@ -145,43 +138,6 @@
|
||||
"command-palette:Open command palette": false
|
||||
}
|
||||
},
|
||||
"active": "26aa765acbe7529a",
|
||||
"lastOpenFiles": [
|
||||
"xlm/Genesis/Chapter I.md",
|
||||
"xlm/Indexes/Index of Pages of the Print Edition.md",
|
||||
"xlm/Indexes/Index of Scripture Commentary.md",
|
||||
"xlm/Indexes/Index of Scripture References.md",
|
||||
"xlm/Indexes",
|
||||
"xlm/Deuteronomy/Chapter XXXIV.md",
|
||||
"xlm/Deuteronomy/Chapter XXXIII.md",
|
||||
"xlm/Deuteronomy/Chapter XXXII.md",
|
||||
"xlm/Deuteronomy/Chapter XXXI.md",
|
||||
"xlm/Deuteronomy/Chapter XXX.md",
|
||||
"xlm/Deuteronomy/Chapter XXIX.md",
|
||||
"xlm/Deuteronomy/Chapter XXVIII.md",
|
||||
"xlm/Deuteronomy/Chapter XXVII.md",
|
||||
"xlm/Deuteronomy/Chapter XXVI.md",
|
||||
"xlm/Deuteronomy/Chapter XXV.md",
|
||||
"xlm/Deuteronomy/Chapter XXIV.md",
|
||||
"xlm/Deuteronomy/Chapter XXIII.md",
|
||||
"xlm/Deuteronomy/Chapter XXII.md",
|
||||
"xlm/Deuteronomy/Chapter XXI.md",
|
||||
"xlm/Deuteronomy/Chapter XX.md",
|
||||
"xlm/Deuteronomy/Chapter XIX.md",
|
||||
"xlm/Deuteronomy/Chapter XVIII.md",
|
||||
"xlm/Deuteronomy/Chapter XVII.md",
|
||||
"xlm/Deuteronomy/Chapter XVI.md",
|
||||
"xlm/Deuteronomy/Chapter XV.md",
|
||||
"xlm/Deuteronomy/Chapter XIV.md",
|
||||
"xlm/Deuteronomy/Chapter XIII.md",
|
||||
"xlm/Deuteronomy",
|
||||
"xlm/Numbers",
|
||||
"xlm/Leviticus",
|
||||
"xlm/Exodus",
|
||||
"xlm/Genesis/Chapter I.md~",
|
||||
"xlm/Genesis",
|
||||
"xlm",
|
||||
"xlm/None",
|
||||
"Commentary on the Whole Bible Volume I (Ge - Matthew Henry.txt"
|
||||
]
|
||||
"active": "f5dca16b5209f2c5",
|
||||
"lastOpenFiles": []
|
||||
}
|
@ -7,7 +7,7 @@ class MyHTMLParser(HTMLParser):
|
||||
file = None
|
||||
current_tag = None
|
||||
writeable = False
|
||||
output_folder = Path('./Matthew Henry Commentary')
|
||||
output_folder = Path('Matthew Henry Commentary OLD')
|
||||
section = []
|
||||
|
||||
@staticmethod
|
||||
|
@ -115,7 +115,7 @@ if __name__ == '__main__':
|
||||
folder = Path('/home/bear/PycharmProjects/mark_henry_md/matthew_henry')
|
||||
|
||||
htm_files = list(folder.glob('*.HTM'))
|
||||
output_folder = Path('./Matthew Henry Commentary').absolute()
|
||||
output_folder = Path('Matthew Henry Commentary OLD').absolute()
|
||||
# for file in tqdm(htm_files[7:8]):
|
||||
for file in htm_files[7:8]:
|
||||
soup = BeautifulSoup(file.read_text(), 'html.parser')
|
||||
|
156
main.py
156
main.py
@ -1,3 +1,4 @@
|
||||
import itertools
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
@ -9,10 +10,90 @@ from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
|
||||
base = Path('./xml/Matthew Henry Commentary').absolute()
|
||||
base = Path('./xml/Matthew Henry Commentary 2').absolute()
|
||||
|
||||
logger.remove(0)
|
||||
logger.add('log.txt')
|
||||
|
||||
from functools import partialmethod
|
||||
|
||||
logger.__class__.foobar = partialmethod(logger.__class__.log, "foobar", level='WARNING')
|
||||
logger.add('log.txt', level='WARNING')
|
||||
|
||||
convert = dict([
|
||||
("Eccl", "Ecclesiastes"),
|
||||
("Rom", "Romans"),
|
||||
("2Tim", "2 Timothy"),
|
||||
("2Pet", "2 Peter"),
|
||||
("Gen", "Genesis"),
|
||||
("John", "John"),
|
||||
("1Cor", "1 Corinthians"),
|
||||
("Ezek", "Ezekiel"),
|
||||
("Heb", "Hebrews"),
|
||||
("Rev", "Revelation"),
|
||||
("Eph", "Ephesians"),
|
||||
("Isa", "Isaiah"),
|
||||
("Ps", "Psalms"),
|
||||
("Acts", "Acts"),
|
||||
("Deut", "Deuteronomy"),
|
||||
("Neh", "Nehemiah"),
|
||||
("1Chr", "1 Chronicles"),
|
||||
("2Kgs", "2 Kings"),
|
||||
("2Cor", "2 Corinthians"),
|
||||
("Hos", "Hosea"),
|
||||
("Exod", "Exodus"),
|
||||
("Matt", "Matthew"),
|
||||
("Job", "Job"),
|
||||
("Prov", "Proverbs"),
|
||||
("Col", "Colossians"),
|
||||
("Jer", "Jeremiah"),
|
||||
("1John", "1 John"),
|
||||
("Jas", "James"),
|
||||
("1Tim", "1 Timothy"),
|
||||
("Amos", "Amos"),
|
||||
("Mic", "Micah"),
|
||||
("Dan", "Daniel"),
|
||||
("Jonah", "Jonah"),
|
||||
("Luke", "Luke"),
|
||||
("Mal", "Malachi"),
|
||||
("Zech", "Zechariah"),
|
||||
("Phil", "Philippians"),
|
||||
("Mark", "Mark"),
|
||||
("2Thess", "2 Thessalonians"),
|
||||
("Jude", "Jude"),
|
||||
("1Sam", "1 Samuel"),
|
||||
("Num", "Numbers"),
|
||||
("Gal", "Galatians"),
|
||||
("1Pet", "1 Peter"),
|
||||
("Lev", "Leviticus"),
|
||||
("2Chr", "2 Chronicles"),
|
||||
("Lam", "Lamentations"),
|
||||
("1Kgs", "1 Kings"),
|
||||
("Ezra", "Ezra"),
|
||||
("Obad", "Obadiah"),
|
||||
("Hab", "Habakkuk"),
|
||||
("2Sam", "2 Samuel"),
|
||||
("Josh", "Joshua"),
|
||||
("Judg", "Judges"),
|
||||
("Ruth", "Ruth"),
|
||||
("Esth", "Esther"),
|
||||
("Phlm", "Philemon"),
|
||||
("1Thess", "1 Thessalonians"),
|
||||
("Joel", "Joel"),
|
||||
("Titus", "Titus"),
|
||||
("3John", "3 John"),
|
||||
("Zeph", "Zephaniah"),
|
||||
("Song", "Song of Songs"),
|
||||
("Hag", "Haggai"),
|
||||
("Wis", "Wisdom of Solomon"),
|
||||
("2John", "2 John"),
|
||||
("2Macc", "2 Maccabees"),
|
||||
("Nah", "Nahum"),
|
||||
("Sir", "Ecclesiasticus"),
|
||||
("Psa", "Psalms"),
|
||||
("1Macc", "1 Maccabees"),
|
||||
("2Peter", "2 Peter"),
|
||||
]
|
||||
)
|
||||
|
||||
class MyHTMLParser(HTMLParser):
|
||||
file = None
|
||||
@ -152,15 +233,17 @@ class MyHTMLParser(HTMLParser):
|
||||
|
||||
file.write(line)
|
||||
|
||||
@staticmethod
|
||||
def normalize_osis_verses(verses):
|
||||
"""Takes this 'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25'
|
||||
def normalize_osis_verses(self, verses):
|
||||
"""Takes this
|
||||
'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Col.1.16 Bible:Heb.1.2 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5'
|
||||
and turns it into somthing more readable"""
|
||||
normalized_refs = list()
|
||||
for chunks in verses.removeprefix("Bible:").split('Bible:'):
|
||||
spanned_verses = list()
|
||||
for ref in chunks.split('-'):
|
||||
verse = ref.replace('.', ' ', 1).replace('.', ':')
|
||||
book = ref.split('.')[0]
|
||||
verse.replace(book, convert[book])
|
||||
spanned_verses.append(
|
||||
bible.format_scripture_references(
|
||||
bible.get_references(verse.strip())
|
||||
@ -174,16 +257,57 @@ class MyHTMLParser(HTMLParser):
|
||||
except:
|
||||
logger.warning(f"Error with: {verses=}: {spanned_verses=}")
|
||||
raise
|
||||
|
||||
if normalized is False:
|
||||
print(f"Error with: {verses=}: {spanned_verses=}")
|
||||
sys.exit()
|
||||
|
||||
normalized_refs.append(normalized)
|
||||
|
||||
return ';'.join(normalized_refs)
|
||||
|
||||
# @staticmethod
|
||||
def old_normalize_osis_verses(self, verses):
|
||||
"""Takes this
|
||||
'Bible:John.1.3 Bible:John.1.10 Bible:Eph.3.9 Bible:Acts.17.24-Acts.17.25 Bible:Exod.5.26 Bible:Gen.5'
|
||||
and turns it into somthing more readable"""
|
||||
normalized_refs = list()
|
||||
for chunks in verses.removeprefix("Bible:").split('Bible:'):
|
||||
spanned_verses = chunks.split('-')
|
||||
if len(spanned_verses) < 2:
|
||||
spanned_verses = list(itertools.repeat(spanned_verses[0], 2))
|
||||
|
||||
book, start_chapter, start_verse = spanned_verses[0].split('.')
|
||||
_, end_chapter, end_verse = spanned_verses[1].split('.')
|
||||
book = bible.Book[convert[book].upper()]
|
||||
|
||||
normalized = bible.NormalizedReference(
|
||||
book=book,
|
||||
start_chapter=start_chapter,
|
||||
start_verse=start_verse,
|
||||
end_chapter=end_chapter,
|
||||
end_verse=end_verse,
|
||||
)
|
||||
|
||||
if normalized is False:
|
||||
logger.foobar(f'{self.args=}')
|
||||
|
||||
normalized_refs.append(
|
||||
bible.format_scripture_references([normalized]).strip()
|
||||
)
|
||||
|
||||
return ';'.join(normalized_refs)
|
||||
|
||||
@staticmethod
|
||||
@logger.catch(reraise=False)
|
||||
def obsidian_links(verses):
|
||||
"""Take `Book Chapter:Ver-Ver and turn it into a obsidian bible link"""
|
||||
links = []
|
||||
for verse in verses.split(';'):
|
||||
links.append(f"[[{verse.replace(':', '#').replace('-', '..')}]]")
|
||||
verse = verse.replace(':', '#').replace('-', '..')
|
||||
if verse[0] in string.digits:
|
||||
verse.replace(' ', '', 1)
|
||||
links.append(f"[[{verse}]]")
|
||||
|
||||
return '; '.join(links)
|
||||
|
||||
@ -237,9 +361,10 @@ class MyHTMLParser(HTMLParser):
|
||||
self.attrs['osisref']
|
||||
)
|
||||
else:
|
||||
verses = self.normalize_osis_verses(
|
||||
self.convert_passage(self.attrs['passage'])
|
||||
)
|
||||
# verses = self.normalize_osis_verses(
|
||||
# self.convert_passage(self.attrs['passage'])
|
||||
# )
|
||||
raise ValueError('No `osisref` in tag attrs')
|
||||
|
||||
self.write_to_file(self.obsidian_links(verses))
|
||||
|
||||
@ -326,18 +451,23 @@ if __name__ == '__main__':
|
||||
# Get each book in the volume
|
||||
# This will be the main folder for all the book's chapters
|
||||
for book in soup.find_all('div1'):
|
||||
book_name = book['title'].replace('First', '1').replace('Second', '2').replace('Third', '3')
|
||||
book_name = (book['title'].
|
||||
replace('First ', '1').
|
||||
replace('Second ', '2').
|
||||
replace('Third ', '3')
|
||||
)
|
||||
logger.info(book_name)
|
||||
|
||||
# These are the chapters/files for each book folder
|
||||
for chapter in book.find_all('div2'):
|
||||
|
||||
filename = chapter['title']
|
||||
# For introduction files
|
||||
filename = f'MH {book_name} {chapter['title']}'
|
||||
|
||||
# Remove Roman Numerals from file name
|
||||
if 'CHAPTER' in filename.upper():
|
||||
_, roman_num = chapter['title'].split(' ')
|
||||
filename = f'Chapter {parser.roman_to_int(roman_num)}'
|
||||
*_, roman_num = chapter['title'].split(' ')
|
||||
filename = f'MH {book_name} {parser.roman_to_int(roman_num)}'
|
||||
|
||||
logger.info(filename)
|
||||
parser.create_md_file(base, book_name, filename)
|
||||
|
@ -73481,7 +73481,7 @@ highest.</i></p>
|
||||
<h3 id="Ps.cxx-p0.2">PSALM CXIX.</h3>
|
||||
|
||||
|
||||
<p class="intro" id="Ps.cxx-p1">This is a psalm by itself, like none of the rest;
|
||||
<p class="intro" id="Ps.cxx-p1">I. This is a psalm by itself, like none of the rest;
|
||||
it excels them all, and shines brightest in this constellation. It
|
||||
is much longer than any of them more than twice as long as any of
|
||||
them. It is not making long prayers that Christ censurers, but
|
||||
@ -73510,8 +73510,8 @@ any flaw throughout the whole psalm. Archbishop Tillotson says, It
|
||||
seems to have more of poetical skill and number in it than we at
|
||||
this distance can easily understand. Some have called it the
|
||||
saints' alphabet; and it were to be wished we had it as ready in
|
||||
our memories as the very letters of our alphabet, as ready as our A
|
||||
B C. Perhaps the penman found it of use to himself to observe this
|
||||
our memories as the very letters of our alphabet, as ready as our 'A B C'.
|
||||
Perhaps the penman found it of use to himself to observe this
|
||||
method, as it obliged him to seek for thoughts, and search for
|
||||
them, that he might fill up the quota of every part; and the letter
|
||||
he was to begin with might lead him to a word which might suggest a
|
||||
|
@ -9,7 +9,7 @@ from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
|
||||
base = Path('./Matthew Henry Commentary/xml').absolute()
|
||||
base = Path('Matthew Henry Commentary OLD/xml').absolute()
|
||||
|
||||
logger.remove(0)
|
||||
logger.add('log.txt')
|
||||
|
Loading…
Reference in New Issue
Block a user