current
This commit is contained in:
parent
8f46274857
commit
9753f095f8
4
.gitignore
vendored
4
.gitignore
vendored
@ -2,3 +2,7 @@
|
|||||||
/Matthew Henry Commentary OLD/xlm/*
|
/Matthew Henry Commentary OLD/xlm/*
|
||||||
/.idea
|
/.idea
|
||||||
*/.obsidian
|
*/.obsidian
|
||||||
|
/Matthew Henry Commentary/
|
||||||
|
/Matthew Henry Commentary OLD/
|
||||||
|
/matthew_henry/
|
||||||
|
/xml/
|
||||||
|
37
template.html
Normal file
37
template.html
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<!-- There are also the volume prefaces here. Will get those on a second passthrough -->
|
||||||
|
<div1 title="Book Name">
|
||||||
|
|
||||||
|
<!-- There are also book Introductions title="Introduction" -->
|
||||||
|
<div2 title="Book Chapter">
|
||||||
|
<!-- This is the intro to the chapter -->
|
||||||
|
<p class="intro"></p>
|
||||||
|
<p class="intro"></p>
|
||||||
|
|
||||||
|
<!-- Chapter passage -->
|
||||||
|
<!-- Should Skip -->
|
||||||
|
<scripCom type="Commentary" osisRef="Bible:Gen.1"/>
|
||||||
|
|
||||||
|
<!-- Section passage -->
|
||||||
|
<scripCom type="Commentary" osisRef="Bible:Gen.1.1-Gen.1.2"/>
|
||||||
|
|
||||||
|
<div class="Commentary">
|
||||||
|
<h4>Verse Section Heading</h4>
|
||||||
|
<p class="passage">Scripture</p>
|
||||||
|
<p class="indent">Commentary</p>
|
||||||
|
<p class="indent">Commentary</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Section passage -->
|
||||||
|
<scripCom type="Commentary" passage="Ge 1:1-2" id="Gen.ii-p2.12" parsed="|Gen|1|1|1|2" osisRef="Bible:Gen.1.1-Gen.1.2"/>
|
||||||
|
|
||||||
|
<div class="Commentary">
|
||||||
|
<h4>Verse Section Heading</h4>
|
||||||
|
<p class="passage">Scripture</p>
|
||||||
|
<p class="indent">Commentary</p>
|
||||||
|
<p class="indent">Commentary</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div2>
|
||||||
|
|
||||||
|
|
||||||
|
</div1>
|
52
vol_splitter.py
Normal file
52
vol_splitter.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def roman_to_int(number: str) -> int:
|
||||||
|
if number is None:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Sometimes the roman numeral comes with a '.'. Striping to not cause
|
||||||
|
# issues
|
||||||
|
number = number.strip('.')
|
||||||
|
|
||||||
|
roman = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
|
||||||
|
total = 0
|
||||||
|
for i in range(len(number) - 1, -1, -1):
|
||||||
|
num = roman[number[i]]
|
||||||
|
if 3 * num < total:
|
||||||
|
total -= num
|
||||||
|
else:
|
||||||
|
total += num
|
||||||
|
|
||||||
|
return total
|
||||||
|
|
||||||
|
folder = Path('./vol_split').absolute()
|
||||||
|
|
||||||
|
book_num = 1
|
||||||
|
for vol in sorted(Path('./').glob('*.xml')):
|
||||||
|
print(vol)
|
||||||
|
soup = BeautifulSoup(vol.read_text(), 'xml')
|
||||||
|
for book in soup.find_all('div1'):
|
||||||
|
book_name = book['title'].replace('First ', '1').replace('Second ', '2').replace('Third ', '3')
|
||||||
|
if any(i in book_name for i in ['Title', 'Preface', 'Indexes']):
|
||||||
|
continue
|
||||||
|
|
||||||
|
book_name = f'{book_num} - {book_name}'
|
||||||
|
|
||||||
|
print('\t', book_name)
|
||||||
|
for chapter in book.find_all('div2'):
|
||||||
|
if 'CHAPTER' in chapter['title'].upper():
|
||||||
|
_, roman_num = chapter['title'].split(' ')
|
||||||
|
chapter_num = roman_to_int(roman_num)
|
||||||
|
chapter_title = f'Chapter {chapter_num}'
|
||||||
|
else:
|
||||||
|
chapter_title = f'0 - {chapter['title']}'
|
||||||
|
|
||||||
|
chapter_file = folder/ book_name / f'{chapter_title}.xml'
|
||||||
|
chapter_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
chapter_file.write_text(str(chapter))
|
||||||
|
book_num += 1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user