This commit is contained in:
boose_magoose 2023-12-19 06:02:07 -05:00
parent 8f46274857
commit 9753f095f8
9 changed files with 803167 additions and 1 deletions

6
.gitignore vendored
View File

@ -1,4 +1,8 @@
/venv/ /venv/
/Matthew Henry Commentary OLD/xlm/* /Matthew Henry Commentary OLD/xlm/*
/.idea /.idea
*/.obsidian */.obsidian
/Matthew Henry Commentary/
/Matthew Henry Commentary OLD/
/matthew_henry/
/xml/

37
template.html Normal file
View File

@ -0,0 +1,37 @@
<!-- There are also the volume prefaces here. Will get those on a second passthrough -->
<div1 title="Book Name">
<!-- There are also book Introductions title="Introduction" -->
<div2 title="Book Chapter">
<!-- This is the intro to the chapter -->
<p class="intro"></p>
<p class="intro"></p>
<!-- Chapter passage -->
<!-- Should Skip -->
<scripCom type="Commentary" osisRef="Bible:Gen.1"/>
<!-- Section passage -->
<scripCom type="Commentary" osisRef="Bible:Gen.1.1-Gen.1.2"/>
<div class="Commentary">
<h4>Verse Section Heading</h4>
<p class="passage">Scripture</p>
<p class="indent">Commentary</p>
<p class="indent">Commentary</p>
</div>
<!-- Section passage -->
<scripCom type="Commentary" passage="Ge 1:1-2" id="Gen.ii-p2.12" parsed="|Gen|1|1|1|2" osisRef="Bible:Gen.1.1-Gen.1.2"/>
<div class="Commentary">
<h4>Verse Section Heading</h4>
<p class="passage">Scripture</p>
<p class="indent">Commentary</p>
<p class="indent">Commentary</p>
</div>
</div2>
</div1>

101991
vol_1.xml Normal file

File diff suppressed because it is too large Load Diff

126303
vol_2.xml Normal file

File diff suppressed because it is too large Load Diff

132796
vol_3.xml Normal file

File diff suppressed because it is too large Load Diff

167879
vol_4.xml Normal file

File diff suppressed because it is too large Load Diff

142852
vol_5.xml Normal file

File diff suppressed because it is too large Load Diff

131252
vol_6.xml Normal file

File diff suppressed because it is too large Load Diff

52
vol_splitter.py Normal file
View File

@ -0,0 +1,52 @@
from pathlib import Path
from bs4 import BeautifulSoup
import sys
def roman_to_int(number: str) -> int:
if number is None:
return 0
# Sometimes the roman numeral comes with a '.'. Striping to not cause
# issues
number = number.strip('.')
roman = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
total = 0
for i in range(len(number) - 1, -1, -1):
num = roman[number[i]]
if 3 * num < total:
total -= num
else:
total += num
return total
folder = Path('./vol_split').absolute()
book_num = 1
for vol in sorted(Path('./').glob('*.xml')):
print(vol)
soup = BeautifulSoup(vol.read_text(), 'xml')
for book in soup.find_all('div1'):
book_name = book['title'].replace('First ', '1').replace('Second ', '2').replace('Third ', '3')
if any(i in book_name for i in ['Title', 'Preface', 'Indexes']):
continue
book_name = f'{book_num} - {book_name}'
print('\t', book_name)
for chapter in book.find_all('div2'):
if 'CHAPTER' in chapter['title'].upper():
_, roman_num = chapter['title'].split(' ')
chapter_num = roman_to_int(roman_num)
chapter_title = f'Chapter {chapter_num}'
else:
chapter_title = f'0 - {chapter['title']}'
chapter_file = folder/ book_name / f'{chapter_title}.xml'
chapter_file.parent.mkdir(parents=True, exist_ok=True)
chapter_file.write_text(str(chapter))
book_num += 1