current
This commit is contained in:
parent
8f46274857
commit
9753f095f8
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,4 +1,8 @@
|
||||
/venv/
|
||||
/Matthew Henry Commentary OLD/xlm/*
|
||||
/.idea
|
||||
*/.obsidian
|
||||
*/.obsidian
|
||||
/Matthew Henry Commentary/
|
||||
/Matthew Henry Commentary OLD/
|
||||
/matthew_henry/
|
||||
/xml/
|
||||
|
37
template.html
Normal file
37
template.html
Normal file
@ -0,0 +1,37 @@
|
||||
<!-- There are also the volume prefaces here. Will get those on a second passthrough -->
|
||||
<div1 title="Book Name">
|
||||
|
||||
<!-- There are also book Introductions title="Introduction" -->
|
||||
<div2 title="Book Chapter">
|
||||
<!-- This is the intro to the chapter -->
|
||||
<p class="intro"></p>
|
||||
<p class="intro"></p>
|
||||
|
||||
<!-- Chapter passage -->
|
||||
<!-- Should Skip -->
|
||||
<scripCom type="Commentary" osisRef="Bible:Gen.1"/>
|
||||
|
||||
<!-- Section passage -->
|
||||
<scripCom type="Commentary" osisRef="Bible:Gen.1.1-Gen.1.2"/>
|
||||
|
||||
<div class="Commentary">
|
||||
<h4>Verse Section Heading</h4>
|
||||
<p class="passage">Scripture</p>
|
||||
<p class="indent">Commentary</p>
|
||||
<p class="indent">Commentary</p>
|
||||
</div>
|
||||
|
||||
<!-- Section passage -->
|
||||
<scripCom type="Commentary" passage="Ge 1:1-2" id="Gen.ii-p2.12" parsed="|Gen|1|1|1|2" osisRef="Bible:Gen.1.1-Gen.1.2"/>
|
||||
|
||||
<div class="Commentary">
|
||||
<h4>Verse Section Heading</h4>
|
||||
<p class="passage">Scripture</p>
|
||||
<p class="indent">Commentary</p>
|
||||
<p class="indent">Commentary</p>
|
||||
</div>
|
||||
|
||||
</div2>
|
||||
|
||||
|
||||
</div1>
|
52
vol_splitter.py
Normal file
52
vol_splitter.py
Normal file
@ -0,0 +1,52 @@
|
||||
from pathlib import Path
|
||||
from bs4 import BeautifulSoup
|
||||
import sys
|
||||
|
||||
|
||||
def roman_to_int(number: str) -> int:
|
||||
if number is None:
|
||||
return 0
|
||||
|
||||
# Sometimes the roman numeral comes with a '.'. Striping to not cause
|
||||
# issues
|
||||
number = number.strip('.')
|
||||
|
||||
roman = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
|
||||
total = 0
|
||||
for i in range(len(number) - 1, -1, -1):
|
||||
num = roman[number[i]]
|
||||
if 3 * num < total:
|
||||
total -= num
|
||||
else:
|
||||
total += num
|
||||
|
||||
return total
|
||||
|
||||
folder = Path('./vol_split').absolute()
|
||||
|
||||
book_num = 1
|
||||
for vol in sorted(Path('./').glob('*.xml')):
|
||||
print(vol)
|
||||
soup = BeautifulSoup(vol.read_text(), 'xml')
|
||||
for book in soup.find_all('div1'):
|
||||
book_name = book['title'].replace('First ', '1').replace('Second ', '2').replace('Third ', '3')
|
||||
if any(i in book_name for i in ['Title', 'Preface', 'Indexes']):
|
||||
continue
|
||||
|
||||
book_name = f'{book_num} - {book_name}'
|
||||
|
||||
print('\t', book_name)
|
||||
for chapter in book.find_all('div2'):
|
||||
if 'CHAPTER' in chapter['title'].upper():
|
||||
_, roman_num = chapter['title'].split(' ')
|
||||
chapter_num = roman_to_int(roman_num)
|
||||
chapter_title = f'Chapter {chapter_num}'
|
||||
else:
|
||||
chapter_title = f'0 - {chapter['title']}'
|
||||
|
||||
chapter_file = folder/ book_name / f'{chapter_title}.xml'
|
||||
chapter_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
chapter_file.write_text(str(chapter))
|
||||
book_num += 1
|
||||
|
Loading…
Reference in New Issue
Block a user