Integrate subby
This commit is contained in:
parent
0c92977f5a
commit
616ab317c1
135
scripts/subby/.gitignore
vendored
Normal file
135
scripts/subby/.gitignore
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
stubs/
|
||||
mypy.ini
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# Ruff
|
||||
.ruff_cache/
|
||||
.ruff.toml
|
||||
2
scripts/subby/.pylintrc
Normal file
2
scripts/subby/.pylintrc
Normal file
@ -0,0 +1,2 @@
|
||||
[FORMAT]
|
||||
max-line-length=120
|
||||
674
scripts/subby/LICENSE
Normal file
674
scripts/subby/LICENSE
Normal file
@ -0,0 +1,674 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 3, 29 June 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU General Public License is a free, copyleft license for
|
||||
software and other kinds of works.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
the GNU General Public License is intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users. We, the Free Software Foundation, use the
|
||||
GNU General Public License for most of our software; it applies also to
|
||||
any other work released this way by its authors. You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to prevent others from denying you
|
||||
these rights or asking you to surrender the rights. Therefore, you have
|
||||
certain responsibilities if you distribute copies of the software, or if
|
||||
you modify it: responsibilities to respect the freedom of others.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must pass on to the recipients the same
|
||||
freedoms that you received. You must make sure that they, too, receive
|
||||
or can get the source code. And you must show them these terms so they
|
||||
know their rights.
|
||||
|
||||
Developers that use the GNU GPL protect your rights with two steps:
|
||||
(1) assert copyright on the software, and (2) offer you this License
|
||||
giving you legal permission to copy, distribute and/or modify it.
|
||||
|
||||
For the developers' and authors' protection, the GPL clearly explains
|
||||
that there is no warranty for this free software. For both users' and
|
||||
authors' sake, the GPL requires that modified versions be marked as
|
||||
changed, so that their problems will not be attributed erroneously to
|
||||
authors of previous versions.
|
||||
|
||||
Some devices are designed to deny users access to install or run
|
||||
modified versions of the software inside them, although the manufacturer
|
||||
can do so. This is fundamentally incompatible with the aim of
|
||||
protecting users' freedom to change the software. The systematic
|
||||
pattern of such abuse occurs in the area of products for individuals to
|
||||
use, which is precisely where it is most unacceptable. Therefore, we
|
||||
have designed this version of the GPL to prohibit the practice for those
|
||||
products. If such problems arise substantially in other domains, we
|
||||
stand ready to extend this provision to those domains in future versions
|
||||
of the GPL, as needed to protect the freedom of users.
|
||||
|
||||
Finally, every program is threatened constantly by software patents.
|
||||
States should not allow patents to restrict development and use of
|
||||
software on general-purpose computers, but in those that do, we wish to
|
||||
avoid the special danger that patents applied to a free program could
|
||||
make it effectively proprietary. To prevent this, the GPL assures that
|
||||
patents cannot be used to render the program non-free.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Use with the GNU Affero General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU Affero General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the special requirements of the GNU Affero General Public License,
|
||||
section 13, concerning interaction through a network will apply to the
|
||||
combination as such.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program does terminal interaction, make it output a short
|
||||
notice like this when it starts in an interactive mode:
|
||||
|
||||
<program> Copyright (C) <year> <name of author>
|
||||
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, your program's commands
|
||||
might be different; for a GUI interface, you would use an "about box".
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU GPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
||||
The GNU General Public License does not permit incorporating your program
|
||||
into proprietary programs. If your program is a subroutine library, you
|
||||
may consider it more useful to permit linking proprietary applications with
|
||||
the library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License. But first, please read
|
||||
<https://www.gnu.org/licenses/why-not-lgpl.html>.
|
||||
137
scripts/subby/README.md
Normal file
137
scripts/subby/README.md
Normal file
@ -0,0 +1,137 @@
|
||||
# Subby
|
||||
Advanced subtitle converter and processor.
|
||||
|
||||
# Supported formats
|
||||
WebVTT, DFXP/TTML/TTML2/SMPTE, SAMI, WVTT (WebVTT in MP4), STPP/ISMT (DFXP in MP4), JSON (Bilibili)
|
||||
|
||||
# Functionality
|
||||
- converts supported input format to SRT
|
||||
- retains select formatting tags (italics, basic \an8 positioning)
|
||||
- corrects often found flaws in subtitles
|
||||
- opinionated timing and formatting improvements
|
||||
|
||||
# Installation
|
||||
```
|
||||
git clone https://github.com/vevv/subby
|
||||
cd subby
|
||||
pip install .
|
||||
```
|
||||
|
||||
# Usage notes
|
||||
`CommonIssuesFixer` should be ran both after conversion and SDH stripping
|
||||
as it's designed to fix source issues, including ones which can cause playback problems.
|
||||
|
||||
`CommonIssuesFixer` removes short gaps (2 frames) by default.
|
||||
This can be disabled by setting `CommonIssuesFixer.remove_gaps` to `False` before running.
|
||||
|
||||
`subby.SubRipFile` accepts similar methods to `pysrt.SubRipFile`, but isn't a fully compatible replacement.
|
||||
Only `from_string`, `clean_indexes`, `export`, `save` are guaranteed to work.
|
||||
|
||||
This object is otherwise just a list storing `srt.Subtitle` elements.
|
||||
|
||||
## Language specific fixing
|
||||
|
||||
As of 0.3.6, both `CommonIssuesFixer` and `SDHStripper` support a language parameter,
|
||||
which accepts a BCP47 language code.
|
||||
|
||||
This is currently used only for RTL tagging in CommonIssuesFixer.
|
||||
|
||||
**It is highly recommended for every script to pass it for future use.**
|
||||
|
||||
# Command line usage
|
||||
```
|
||||
Usage: subby [OPTIONS] COMMAND [ARGS]...
|
||||
|
||||
Subby—Advanced Subtitle Converter and Processor.
|
||||
|
||||
Options:
|
||||
-d, --debug Enable DEBUG level logs.
|
||||
--help Show this message and exit.
|
||||
|
||||
Commands:
|
||||
convert Convert a Subtitle to SubRip (SRT).
|
||||
process SubRip (SRT) post-processing.
|
||||
version Print version information.
|
||||
```
|
||||
Example
|
||||
|
||||
```
|
||||
subby process /path/to/subs/subs.srt strip-sdh
|
||||
```
|
||||
|
||||
# Library usage
|
||||
## Converter
|
||||
```py
|
||||
from subby import WebVTTConverter
|
||||
from pathlib import Path
|
||||
|
||||
converter = WebVTTConverter()
|
||||
file = Path('test.vtt')
|
||||
|
||||
# All statements below are equivalent
|
||||
srt = converter.from_file(file)
|
||||
srt = converter.from_string(file.read_text())
|
||||
srt = converter.from_bytes(file.read_bytes())
|
||||
|
||||
# srt is subby.SubRipFile
|
||||
|
||||
output = Path('file.srt')
|
||||
srt.save(output)
|
||||
# saved to file.srt
|
||||
```
|
||||
|
||||
## Processor
|
||||
Processor returns a bool indicating success - whether any changes were made, useful for determining if SDH subtitles should be saved.
|
||||
|
||||
```py
|
||||
from subby import CommonIssuesFixer
|
||||
from pathlib import Path
|
||||
|
||||
processor = CommonIssuesFixer()
|
||||
file = Path('test.vtt')
|
||||
|
||||
# All statements below are equivalent
|
||||
srt, status = processor.from_file(file)
|
||||
srt, status = processor.from_string(file.read_text())
|
||||
srt, status = processor.from_bytes(file.read_bytes())
|
||||
|
||||
# srt is subby.SubRipFile, status is bool
|
||||
|
||||
output = Path('test_fixed.srt')
|
||||
srt.save(output)
|
||||
# saved to test_fixed.srt
|
||||
```
|
||||
|
||||
## Chaining
|
||||
The following example will convert a VTT file, attempt to strip SDH, and then save the result.
|
||||
|
||||
```py
|
||||
from subby import WebVTTConverter, CommonIssuesFixer, SDHStripper
|
||||
from pathlib import Path
|
||||
|
||||
converter = WebVTTConverter()
|
||||
fixer = CommonIssuesFixer()
|
||||
stripper = SDHStripper()
|
||||
|
||||
file = Path('file.vtt')
|
||||
file_sdh = Path('file_sdh.srt')
|
||||
file_stripped = Path('file_stripped.srt')
|
||||
srt, _ = fixer.from_srt(converter.from_file(file))
|
||||
|
||||
srt.save(file_sdh)
|
||||
# saved to file_sdh.srt
|
||||
|
||||
stripped, status = stripper.from_srt(srt)
|
||||
if status is True:
|
||||
print('stripping successful')
|
||||
stripped.save(file_stripped)
|
||||
# saved to file_stripped.srt
|
||||
```
|
||||
|
||||
## Tests
|
||||
To run tests, go to the "tests" directory and run `pytest`.
|
||||
|
||||
## Contributors
|
||||
|
||||
<a href="https://github.com/vevv"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/68520787?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
|
||||
<a href="https://github.com/rlaphoenix"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/17136956?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt=""/></a>
|
||||
551
scripts/subby/poetry.lock
generated
Normal file
551
scripts/subby/poetry.lock
generated
Normal file
@ -0,0 +1,551 @@
|
||||
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.13.3"
|
||||
description = "Screen-scraping library"
|
||||
optional = false
|
||||
python-versions = ">=3.7.0"
|
||||
files = [
|
||||
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
||||
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
soupsieve = ">1.2"
|
||||
typing-extensions = ">=4.0.0"
|
||||
|
||||
[package.extras]
|
||||
cchardet = ["cchardet"]
|
||||
chardet = ["chardet"]
|
||||
charset-normalizer = ["charset-normalizer"]
|
||||
html5lib = ["html5lib"]
|
||||
lxml = ["lxml"]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.1.8"
|
||||
description = "Composable command line interface toolkit"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
|
||||
{file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
description = "Cross-platform colored terminal text."
|
||||
optional = false
|
||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||
files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "construct"
|
||||
version = "2.8.8"
|
||||
description = "A powerful declarative parser/builder for binary data"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "construct-2.8.8.tar.gz", hash = "sha256:1b84b8147f6fd15bcf64b737c3e8ac5100811ad80c830cb4b2545140511c4157"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.2.2"
|
||||
description = "Backport of PEP 654 (exception groups)"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
||||
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest (>=6)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.0.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langcodes"
|
||||
version = "3.4.1"
|
||||
description = "Tools for labeling human languages with IETF language tags"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "langcodes-3.4.1-py3-none-any.whl", hash = "sha256:68f686fc3d358f222674ecf697ddcee3ace3c2fe325083ecad2543fd28a20e77"},
|
||||
{file = "langcodes-3.4.1.tar.gz", hash = "sha256:a24879fed238013ac3af2424b9d1124e38b4a38b2044fd297c8ff38e5912e718"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
language-data = ">=1.2"
|
||||
|
||||
[package.extras]
|
||||
build = ["build", "twine"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "language-data"
|
||||
version = "1.3.0"
|
||||
description = "Supplementary data about languages used by the langcodes module"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf"},
|
||||
{file = "language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
marisa-trie = ">=1.1.0"
|
||||
|
||||
[package.extras]
|
||||
build = ["build", "twine"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
[[package]]
|
||||
name = "lxml"
|
||||
version = "5.3.1"
|
||||
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"},
|
||||
{file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"},
|
||||
{file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"},
|
||||
{file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"},
|
||||
{file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"},
|
||||
{file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"},
|
||||
{file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"},
|
||||
{file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"},
|
||||
{file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"},
|
||||
{file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"},
|
||||
{file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"},
|
||||
{file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"},
|
||||
{file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"},
|
||||
{file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
cssselect = ["cssselect (>=0.7)"]
|
||||
html-clean = ["lxml_html_clean"]
|
||||
html5 = ["html5lib"]
|
||||
htmlsoup = ["BeautifulSoup4"]
|
||||
source = ["Cython (>=3.0.11,<3.1.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "lxml-stubs"
|
||||
version = "0.4.0"
|
||||
description = "Type annotations for the lxml package"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "lxml-stubs-0.4.0.tar.gz", hash = "sha256:184877b42127256abc2b932ba8bd0ab5ea80bd0b0fee618d16daa40e0b71abee"},
|
||||
{file = "lxml_stubs-0.4.0-py3-none-any.whl", hash = "sha256:3b381e9e82397c64ea3cc4d6f79d1255d015f7b114806d4826218805c10ec003"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.9.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "marisa-trie"
|
||||
version = "1.2.1"
|
||||
description = "Static memory-efficient and fast Trie-like structures for Python."
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5"},
|
||||
{file = "marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571"},
|
||||
{file = "marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a"},
|
||||
{file = "marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f"},
|
||||
{file = "marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e43891a37b0d7f618819fea14bd951289a0a8e3dd0da50c596139ca83ebb9b1"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6946100a43f933fad6bc458c502a59926d80b321d5ac1ed2ff9c56605360496f"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4177dc0bd1374e82be9b2ba4d0c2733b0a85b9d154ceeea83a5bee8c1e62fbf"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f35c2603a6be168088ed1db6ad1704b078aa8f39974c60888fbbced95dcadad4"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d659fda873d8dcb2c14c2c331de1dee21f5a902d7f2de7978b62c6431a8850ef"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:b0ef26733d3c836be79e812071e1a431ce1f807955a27a981ebb7993d95f842b"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:536ea19ce6a2ce61c57fed4123ecd10d18d77a0db45cd2741afff2b8b68f15b3"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-win32.whl", hash = "sha256:0ee6cf6a16d9c3d1c94e21c8e63c93d8b34bede170ca4e937e16e1c0700d399f"},
|
||||
{file = "marisa_trie-1.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7e7b1786e852e014d03e5f32dbd991f9a9eb223dd3fa9a2564108b807e4b7e1c"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:952af3a5859c3b20b15a00748c36e9eb8316eb2c70bd353ae1646da216322908"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24a81aa7566e4ec96fc4d934581fe26d62eac47fc02b35fa443a0bb718b471e8"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9c9b32b14651a6dcf9e8857d2df5d29d322a1ea8c0be5c8ffb88f9841c4ec62b"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ac170d20b97beb75059ba65d1ccad6b434d777c8992ab41ffabdade3b06dd74"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da4e4facb79614cc4653cfd859f398e4db4ca9ab26270ff12610e50ed7f1f6c6"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25688f34cac3bec01b4f655ffdd6c599a01f0bd596b4a79cf56c6f01a7df3560"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1db3213b451bf058d558f6e619bceff09d1d130214448a207c55e1526e2773a1"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:d5648c6dcc5dc9200297fb779b1663b8a4467bda034a3c69bd9c32d8afb33b1d"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5bd39a4e1cc839a88acca2889d17ebc3f202a5039cd6059a13148ce75c8a6244"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-win32.whl", hash = "sha256:594f98491a96c7f1ffe13ce292cef1b4e63c028f0707effdea0f113364c1ae6c"},
|
||||
{file = "marisa_trie-1.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:5fe5a286f997848a410eebe1c28657506adaeb405220ee1e16cfcfd10deb37f2"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c0fe2ace0cb1806badbd1c551a8ec2f8d4cf97bf044313c082ef1acfe631ddca"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67f0c2ec82c20a02c16fc9ba81dee2586ef20270127c470cb1054767aa8ba310"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a3c98613180cf1730e221933ff74b454008161b1a82597e41054127719964188"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:429858a0452a7bedcf67bc7bb34383d00f666c980cb75a31bcd31285fbdd4403"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2eacb84446543082ec50f2fb563f1a94c96804d4057b7da8ed815958d0cdfbe"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:852d7bcf14b0c63404de26e7c4c8d5d65ecaeca935e93794331bc4e2f213660b"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e58788004adda24c401d1751331618ed20c507ffc23bfd28d7c0661a1cf0ad16"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aefe0973cc4698e0907289dc0517ab0c7cdb13d588201932ff567d08a50b0e2e"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6c50c861faad0a5c091bd763e0729f958c316e678dfa065d3984fbb9e4eacbcd"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-win32.whl", hash = "sha256:b1ce340da608530500ab4f963f12d6bfc8d8680900919a60dbdc9b78c02060a4"},
|
||||
{file = "marisa_trie-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:ce37d8ca462bb64cc13f529b9ed92f7b21fe8d1f1679b62e29f9cb7d0e888b49"},
|
||||
{file = "marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
setuptools = "*"
|
||||
|
||||
[package.extras]
|
||||
test = ["hypothesis", "pytest", "readme-renderer"]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "24.2"
|
||||
description = "Core utilities for Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
|
||||
{file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.5.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
|
||||
{file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pymp4"
|
||||
version = "1.4.0"
|
||||
description = "Python parser for MP4 boxes"
|
||||
optional = false
|
||||
python-versions = ">=3.7,<4.0"
|
||||
files = [
|
||||
{file = "pymp4-1.4.0-py3-none-any.whl", hash = "sha256:3401666c1e2a97ac94dffb18c5a5dcbd46d0a436da5272d378a6f9f6506dd12d"},
|
||||
{file = "pymp4-1.4.0.tar.gz", hash = "sha256:bc9e77732a8a143d34c38aa862a54180716246938e4bf3e07585d19252b77bb5"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
construct = "2.8.8"
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.4.4"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
|
||||
{file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "75.3.0"
|
||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
|
||||
{file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
|
||||
core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
|
||||
cover = ["pytest-cov"]
|
||||
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
|
||||
enabler = ["pytest-enabler (>=2.2)"]
|
||||
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
|
||||
type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.6"
|
||||
description = "A modern CSS selector implementation for Beautiful Soup."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
|
||||
{file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "srt"
|
||||
version = "3.5.3"
|
||||
description = "A tiny library for parsing, modifying, and composing SRT files."
|
||||
optional = false
|
||||
python-versions = ">=2.7"
|
||||
files = [
|
||||
{file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinycss"
|
||||
version = "0.4"
|
||||
description = "tinycss is a complete yet simple CSS parser for Python."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "tinycss-0.4.tar.gz", hash = "sha256:12306fb50e5e9e7eaeef84b802ed877488ba80e35c672867f548c0924a76716e"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
test = ["pytest-cov", "pytest-flake8", "pytest-isort", "pytest-runner"]
|
||||
|
||||
[[package]]
|
||||
name = "tomli"
|
||||
version = "2.2.1"
|
||||
description = "A lil' TOML parser"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
|
||||
{file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
|
||||
{file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
|
||||
{file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
|
||||
{file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
|
||||
{file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.12.2"
|
||||
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
|
||||
{file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
|
||||
]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "d1dc09a1492c59373465886d52b572d761f7e370239ae93c63a7c88f4a69c47f"
|
||||
33
scripts/subby/pyproject.toml
Normal file
33
scripts/subby/pyproject.toml
Normal file
@ -0,0 +1,33 @@
|
||||
[tool.poetry]
|
||||
name = "subby"
|
||||
version = "0.3.19"
|
||||
description = "Advanced subtitle converter and processor"
|
||||
authors = ["vevv"]
|
||||
license = "GPL-3.0-or-later"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/vevv/subby"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
pymp4 = "~1.4.0"
|
||||
beautifulsoup4 = "^4.11.2"
|
||||
tinycss = "^0.4"
|
||||
click = "^8.1.3"
|
||||
srt = "^3.5.3"
|
||||
lxml = "^5.3.0"
|
||||
langcodes = "^3.4.0"
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^7.4.3"
|
||||
lxml-stubs = "^0.4.0"
|
||||
|
||||
|
||||
[tool.poetry.scripts]
|
||||
subby = "subby.cli:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
4
scripts/subby/setup.py
Normal file
4
scripts/subby/setup.py
Normal file
@ -0,0 +1,4 @@
|
||||
import setuptools
|
||||
|
||||
if __name__ == "__main__":
|
||||
setuptools.setup()
|
||||
25
scripts/subby/subby/__init__.py
Normal file
25
scripts/subby/subby/__init__.py
Normal file
@ -0,0 +1,25 @@
|
||||
from subby.converters.bilibili_json import BilibiliJSONConverter
|
||||
from subby.converters.mp4 import ISMTConverter, WVTTConverter
|
||||
from subby.converters.sami import SAMIConverter
|
||||
from subby.converters.smpte import SMPTEConverter
|
||||
from subby.converters.webvtt import WebVTTConverter
|
||||
from subby.processors.common_issues import CommonIssuesFixer
|
||||
from subby.processors.sdh import SDHStripper
|
||||
from subby.subripfile import SubRipFile
|
||||
|
||||
__version__ = '0.3.19'
|
||||
|
||||
__all__ = [
|
||||
# Converters
|
||||
'SAMIConverter',
|
||||
'SMPTEConverter', 'ISMTConverter',
|
||||
'WebVTTConverter', 'WVTTConverter',
|
||||
'BilibiliJSONConverter',
|
||||
# Processors
|
||||
'CommonIssuesFixer',
|
||||
'SDHStripper',
|
||||
# Utility
|
||||
'SubRipFile',
|
||||
# Version
|
||||
'__version__'
|
||||
]
|
||||
217
scripts/subby/subby/cli.py
Normal file
217
scripts/subby/subby/cli.py
Normal file
@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from subby import (BilibiliJSONConverter, CommonIssuesFixer, ISMTConverter,
|
||||
SAMIConverter, SDHStripper, SMPTEConverter, WebVTTConverter,
|
||||
WVTTConverter, __version__)
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option("-d", "--debug", is_flag=True, default=False, help="Enable debug level logs.")
|
||||
def main(debug: bool) -> None:
|
||||
"""Subby—Advanced Subtitle Converter and Processor."""
|
||||
logging.basicConfig(level=logging.DEBUG if debug else logging.INFO)
|
||||
logging.getLogger('srt').setLevel(logging.DEBUG if debug else logging.CRITICAL)
|
||||
|
||||
|
||||
@main.command()
|
||||
def version():
|
||||
"""Print version information."""
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
copyright_years = 2023
|
||||
current_year = datetime.now().year
|
||||
if copyright_years != current_year:
|
||||
copyright_years = f"{copyright_years}-{current_year}"
|
||||
|
||||
log.info("Subby version %s Copyright (c) %s vevv", __version__, copyright_years)
|
||||
log.info("https://github.com/vevv/subby")
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("file", type=Path)
|
||||
@click.option("-o", "--out", type=Path, default=None, help="Output path.")
|
||||
@click.option(
|
||||
"-l",
|
||||
"--language",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Subtitle language (used for language specific processing)"
|
||||
)
|
||||
@click.option(
|
||||
"-e",
|
||||
"--encoding",
|
||||
type=str,
|
||||
default="utf-8",
|
||||
help="Character encoding (default: utf-8)."
|
||||
)
|
||||
@click.option(
|
||||
"-n",
|
||||
"--no-post-processing",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Disable post-processing after conversion."
|
||||
)
|
||||
@click.option(
|
||||
"-g",
|
||||
"--keep-short-gaps",
|
||||
is_flag=True,
|
||||
help="Keep short gaps between lines (< 85 ms; only with post-processing enabled)"
|
||||
)
|
||||
def convert(
|
||||
file: Path,
|
||||
out: Path | None,
|
||||
language: str,
|
||||
encoding: str,
|
||||
no_post_processing: bool,
|
||||
keep_short_gaps: bool
|
||||
):
|
||||
"""Convert a Subtitle to SubRip (SRT)."""
|
||||
if not isinstance(file, Path):
|
||||
raise click.ClickException(f"Expected file to be a {Path} not {file!r}")
|
||||
if out and not isinstance(out, Path):
|
||||
raise click.ClickException(f"Expected out to be a {Path} not {out!r}")
|
||||
|
||||
if not out:
|
||||
out = file.with_suffix(".srt")
|
||||
|
||||
log = logging.getLogger("convert")
|
||||
|
||||
data = file.read_bytes()
|
||||
converter = None
|
||||
|
||||
if b"mdat" in data and b"moof" in data:
|
||||
if b"</tt>" in data:
|
||||
log.info("Subtitle format: ISMT (DFXP in MP4)")
|
||||
converter = ISMTConverter()
|
||||
elif b"vttc" in data:
|
||||
log.info("Subtitle format: WVTT (WebVTT in MP4)")
|
||||
converter = WVTTConverter()
|
||||
elif b"<SAMI>" in data:
|
||||
log.info("Subtitle format: SAMI")
|
||||
converter = SAMIConverter()
|
||||
elif b"</tt>" in data or b"</tt:tt>" in data:
|
||||
log.info("Subtitle format: DFXP/TTML/TTML2")
|
||||
converter = SMPTEConverter()
|
||||
elif b"WEBVTT" in data:
|
||||
log.info("Subtitle format: WebVTT")
|
||||
converter = WebVTTConverter()
|
||||
elif data.startswith(b'{') and b'"Stroke"' in data and b'"background_color"' in data:
|
||||
log.info("Subtitle format: JSON (Bilibili)")
|
||||
converter = BilibiliJSONConverter()
|
||||
|
||||
if not converter:
|
||||
log.error("Subtitle format was unrecognized...")
|
||||
return
|
||||
|
||||
srt = converter.from_file(file)
|
||||
log.info("Converted subtitle to SubRip (SRT)")
|
||||
|
||||
if not no_post_processing:
|
||||
processor = CommonIssuesFixer()
|
||||
processor.remove_gaps = not keep_short_gaps
|
||||
srt, status = processor.from_srt(srt, language=language)
|
||||
log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}")
|
||||
|
||||
srt.save(out, encoding=encoding)
|
||||
log.info(f"Saved to: {out}")
|
||||
log.debug(f"Used character encoding {encoding}")
|
||||
|
||||
|
||||
@main.group()
|
||||
@click.argument("file", type=Path)
|
||||
@click.option("-o", "--out", type=Path, default=None, help="Output path.")
|
||||
@click.option(
|
||||
"-l",
|
||||
"--language",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Subtitle language (used for language specific processing)"
|
||||
)
|
||||
@click.option(
|
||||
"-e",
|
||||
"--encoding",
|
||||
type=str,
|
||||
default="utf-8",
|
||||
help="Character encoding (default: utf-8)."
|
||||
)
|
||||
@click.option(
|
||||
"-n",
|
||||
"--no-post-processing",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Disable post-processing after SDH stripping."
|
||||
)
|
||||
@click.option(
|
||||
"-g",
|
||||
"--keep-short-gaps",
|
||||
is_flag=True,
|
||||
help="Keep short gaps between lines (< 85 ms)"
|
||||
)
|
||||
def process(file: Path, out: Path | None, **__):
|
||||
"""SubRip (SRT) post-processing."""
|
||||
if not isinstance(file, Path):
|
||||
raise click.ClickException(f"Expected file to be a {Path} not {file!r}")
|
||||
if out and not isinstance(out, Path):
|
||||
raise click.ClickException(f"Expected out to be a {Path} not {out!r}")
|
||||
|
||||
|
||||
@process.command()
|
||||
@click.pass_context
|
||||
def mend(ctx: click.Context):
|
||||
"""Repair or Mend common issues in a Subtitle."""
|
||||
file = ctx.parent.params["file"]
|
||||
|
||||
if not ctx.parent.params["out"]:
|
||||
ctx.parent.params["out"] = file.with_stem(file.stem + "_mend")
|
||||
|
||||
log = logging.getLogger("process.mend")
|
||||
|
||||
processor = CommonIssuesFixer()
|
||||
processor.remove_gaps = not ctx.parent.params["keep_short_gaps"]
|
||||
processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"])
|
||||
log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}")
|
||||
|
||||
return processed_srt, status
|
||||
|
||||
|
||||
@process.command("strip-sdh")
|
||||
@click.pass_context
|
||||
def strip_sdh(ctx: click.Context):
|
||||
"""Remove Hard-of-hearing descriptions from Subtitles."""
|
||||
file = ctx.parent.params["file"]
|
||||
|
||||
if not ctx.parent.params["out"]:
|
||||
ctx.parent.params["out"] = file.with_stem(file.stem + "_sdh_stripped")
|
||||
|
||||
log = logging.getLogger("process.strip_sdh")
|
||||
|
||||
processor = SDHStripper()
|
||||
processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"])
|
||||
log.info(f"Processed subtitle {['but no SDH descriptions were found...', 'and removed SDH!'][status]}")
|
||||
|
||||
if not ctx.parent.params["no_post_processing"]:
|
||||
processor = CommonIssuesFixer()
|
||||
processor.remove_gaps = not ctx.parent.params["keep_short_gaps"]
|
||||
processed_srt, _ = processor.from_srt(processed_srt, language=ctx.parent.params["language"])
|
||||
log.info(
|
||||
"Processed stripped subtitle "
|
||||
+ ['but no issues were found...', 'and repaired some issues!'][status]
|
||||
)
|
||||
|
||||
return processed_srt, status
|
||||
|
||||
|
||||
@process.result_callback()
|
||||
def process_result(result, out, encoding, *_, **__):
|
||||
log = logging.getLogger("process")
|
||||
processed_srt, status = result
|
||||
if status:
|
||||
processed_srt.save(out, encoding=encoding)
|
||||
log.info(f"Saved to: {out}")
|
||||
log.debug(f"Used character encoding {encoding}")
|
||||
0
scripts/subby/subby/converters/__init__.py
Normal file
0
scripts/subby/subby/converters/__init__.py
Normal file
27
scripts/subby/subby/converters/base.py
Normal file
27
scripts/subby/subby/converters/base.py
Normal file
@ -0,0 +1,27 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import BinaryIO
|
||||
|
||||
from subby.subripfile import SubRipFile
|
||||
|
||||
|
||||
class BaseConverter(ABC):
|
||||
"""Base subtitle converter class"""
|
||||
|
||||
def from_file(self, file: Path) -> SubRipFile:
|
||||
"""Reads a given file and converts it to srt"""
|
||||
with file.open(mode='rb') as stream:
|
||||
return self.parse(stream)
|
||||
|
||||
def from_string(self, data: str) -> SubRipFile:
|
||||
"""Reads a given string and converts it to srt"""
|
||||
return self.parse(BytesIO(data.encode('utf-8')))
|
||||
|
||||
def from_bytes(self, data: bytes) -> SubRipFile:
|
||||
"""Parses given data and converts it to srt"""
|
||||
return self.parse(BytesIO(data))
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, stream: BinaryIO) -> SubRipFile:
|
||||
"""Parses data from a given stream and converts it to srt"""
|
||||
27
scripts/subby/subby/converters/bilibili_json.py
Normal file
27
scripts/subby/subby/converters/bilibili_json.py
Normal file
@ -0,0 +1,27 @@
|
||||
import datetime
|
||||
import json
|
||||
|
||||
from srt import Subtitle
|
||||
|
||||
from subby.converters.base import BaseConverter
|
||||
from subby.subripfile import SubRipFile
|
||||
|
||||
|
||||
class BilibiliJSONConverter(BaseConverter):
|
||||
"""Bilibili JSON subtitle converter"""
|
||||
|
||||
def parse(self, stream):
|
||||
json_data = json.load(stream)
|
||||
srt = SubRipFile()
|
||||
for i, line in enumerate(json_data['body']):
|
||||
if line['location'] != 2:
|
||||
line['content'] = ('{\\an%s}' % line['location']) + line['content']
|
||||
|
||||
srt.append(Subtitle(
|
||||
index=i,
|
||||
start=datetime.timedelta(seconds=line['from']),
|
||||
end=datetime.timedelta(seconds=line['to']),
|
||||
content=line['content']
|
||||
))
|
||||
|
||||
return srt
|
||||
111
scripts/subby/subby/converters/mp4.py
Normal file
111
scripts/subby/subby/converters/mp4.py
Normal file
@ -0,0 +1,111 @@
|
||||
from collections import deque
|
||||
|
||||
from pymp4.parser import MP4
|
||||
from pymp4.util import BoxUtil
|
||||
|
||||
from subby.converters.base import BaseConverter
|
||||
from subby.converters.smpte import SMPTEConverter
|
||||
from subby.converters.webvtt import WebVTTConverter
|
||||
from subby.subripfile import SubRipFile
|
||||
from subby.utils.time import timestamp_from_ms
|
||||
|
||||
|
||||
class ISMTConverter(BaseConverter):
|
||||
"""ISMT (DFXP in MP4) subtitle converter"""
|
||||
|
||||
def parse(self, stream):
|
||||
srt = SubRipFile([])
|
||||
for box in MP4.parse(stream.read()):
|
||||
if box.type == b'mdat':
|
||||
new = SMPTEConverter().from_bytes(box.data)
|
||||
|
||||
# Offset timecodes if necessary
|
||||
# https://github.com/SubtitleEdit/subtitleedit/blob/abd36e5/src/libse/SubtitleFormats/IsmtDfxp.cs#L85-L90
|
||||
if srt and new and srt[-1].start > new[0].start:
|
||||
new.offset(srt[-1].end)
|
||||
|
||||
srt.extend(new)
|
||||
|
||||
return srt
|
||||
|
||||
|
||||
class WVTTConverter(BaseConverter):
|
||||
"""WVTT (WebVTT in MP4) subtitle converter"""
|
||||
|
||||
def parse(self, stream): # pylint: disable=too-many-locals, too-many-branches
|
||||
sample_durations = deque()
|
||||
vtt_lines = []
|
||||
timescale = 0
|
||||
|
||||
for box in MP4.parse(stream.read()):
|
||||
if box.type == b'moov':
|
||||
for mdhd in BoxUtil.find(box, b'mdhd'):
|
||||
timescale = mdhd.timescale
|
||||
break
|
||||
|
||||
for stsd in BoxUtil.find(box, b'stsd'):
|
||||
wvtt = stsd.entries[0]
|
||||
header = [box.config for box in wvtt.children
|
||||
if box.type == b'vttC'][0]
|
||||
vtt_lines.append(f'{header}\n\n')
|
||||
break
|
||||
|
||||
if box.type == b'moof':
|
||||
start_offset = 0
|
||||
duration = 0
|
||||
for tfdt in BoxUtil.find(box, b'tfdt'):
|
||||
start_offset = tfdt.baseMediaDecodeTime
|
||||
break
|
||||
|
||||
for trun in BoxUtil.find(box, b'trun'):
|
||||
for sample in trun.sample_info:
|
||||
start_offset += sample.sample_composition_time_offsets or 0
|
||||
duration += sample.sample_duration or 0
|
||||
sample_durations.append({
|
||||
'start_ms': (start_offset / timescale) * 1000,
|
||||
'end_ms': ((start_offset + duration) / timescale) * 1000
|
||||
})
|
||||
|
||||
if box.type == b'mdat':
|
||||
vtt_boxes = MP4.parse(box.data)
|
||||
new_start = None
|
||||
for vtt_box in vtt_boxes:
|
||||
settings = None
|
||||
for sttg in BoxUtil.find(vtt_box, b'sttg'):
|
||||
settings = sttg.settings
|
||||
break
|
||||
|
||||
cue_text = None
|
||||
for payl in BoxUtil.find(vtt_box, b'payl'):
|
||||
cue_text = payl.cue_text
|
||||
break
|
||||
|
||||
try:
|
||||
sample_duration = sample_durations.popleft()
|
||||
except IndexError: # broken line, no durations found
|
||||
continue
|
||||
|
||||
if vtt_box.type == b'vttc':
|
||||
try:
|
||||
start_ms = end_ms
|
||||
except UnboundLocalError:
|
||||
end_ms = sample_duration['end_ms']
|
||||
start_ms = end_ms
|
||||
else:
|
||||
start_ms = sample_duration['start_ms']
|
||||
|
||||
end_ms = sample_duration['end_ms']
|
||||
|
||||
if vtt_box.type == b'vtte':
|
||||
new_start = end_ms
|
||||
continue
|
||||
|
||||
if new_start:
|
||||
start_ms = new_start
|
||||
new_start = None
|
||||
|
||||
vtt_lines.append((f'{timestamp_from_ms(start_ms)} --> '
|
||||
f'{timestamp_from_ms(end_ms)} '
|
||||
f'{settings}\n{cue_text}\n\n'))
|
||||
|
||||
return WebVTTConverter().from_string(''.join(vtt_lines))
|
||||
90
scripts/subby/subby/converters/sami.py
Normal file
90
scripts/subby/subby/converters/sami.py
Normal file
@ -0,0 +1,90 @@
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from srt import Subtitle
|
||||
|
||||
from subby.converters.base import BaseConverter
|
||||
from subby.subripfile import SubRipFile
|
||||
from subby.utils.time import timedelta_from_ms
|
||||
|
||||
|
||||
class SAMIConverter(BaseConverter):
|
||||
"""SAMI subtitle converter"""
|
||||
|
||||
def parse(self, stream):
|
||||
return _SAMIConverter(stream.read().decode('utf-8-sig')).srt
|
||||
|
||||
|
||||
# Internal converter class as we inherit from HTMLParser
|
||||
class _SAMIConverter(HTMLParser):
|
||||
def __init__(self, subtitle):
|
||||
super().__init__()
|
||||
self.lines = []
|
||||
self.tags = []
|
||||
|
||||
self.srt = SubRipFile([])
|
||||
self.line_list = []
|
||||
|
||||
self.feed(self._correct_tags(subtitle))
|
||||
self._convert()
|
||||
|
||||
def handle_starttag(self, tag, attrs_org):
|
||||
attrs = {}
|
||||
for attr, val in attrs_org:
|
||||
attrs[attr] = val
|
||||
|
||||
if tag == 'sync':
|
||||
data = {'text': ''}
|
||||
data.update(attrs)
|
||||
self.lines.append(data)
|
||||
|
||||
self.tags.append({'name': tag, 'attrs': attrs})
|
||||
|
||||
def handle_data(self, data):
|
||||
last_tag = self.tags[-1]['name']
|
||||
if last_tag == 'br':
|
||||
self.lines[-1]['text'] += '\n'
|
||||
return
|
||||
|
||||
if last_tag == 'i' and data.strip():
|
||||
self.lines[-1]['text'] += f'<i>{data}</i>'
|
||||
return
|
||||
|
||||
if last_tag != 'sync' and self.lines:
|
||||
self.lines[-1]['text'] += data
|
||||
|
||||
def _convert(self):
|
||||
for num, line in enumerate(self.lines):
|
||||
# Use empty lines as the end of previous line
|
||||
if not line.get('text', '').strip():
|
||||
end_time = float(line['start'])
|
||||
self.line_list[-1]['end'] = end_time
|
||||
continue
|
||||
|
||||
if not line.get('end'):
|
||||
# Arbitrarily set duration to 4s if end time not present
|
||||
line['end'] = float(line['start']) + 4000
|
||||
|
||||
srt_line = {
|
||||
'start': float(line['start']),
|
||||
'end': float(line['end']),
|
||||
'content': line['text'].strip()
|
||||
}
|
||||
self.line_list.append(srt_line)
|
||||
|
||||
for num, line in enumerate(self.line_list):
|
||||
srt_line = Subtitle(
|
||||
index=num,
|
||||
start=timedelta_from_ms(line['start']),
|
||||
end=timedelta_from_ms(line['end']),
|
||||
content=line['content']
|
||||
)
|
||||
self.srt.append(srt_line)
|
||||
|
||||
@staticmethod
|
||||
def _correct_tags(data):
|
||||
data = data.replace('<i/>', '<i>')
|
||||
data = data.replace(';>', '>')
|
||||
data = data.replace('<br>', '\n')
|
||||
data = data.replace('<br/>', '\n')
|
||||
data = data.replace('<br >', '\n')
|
||||
return data
|
||||
168
scripts/subby/subby/converters/smpte.py
Normal file
168
scripts/subby/subby/converters/smpte.py
Normal file
@ -0,0 +1,168 @@
|
||||
import html
|
||||
import logging
|
||||
import re
|
||||
|
||||
import bs4
|
||||
from srt import Subtitle
|
||||
|
||||
from subby.converters.base import BaseConverter
|
||||
from subby.subripfile import SubRipFile
|
||||
from subby.utils.time import timedelta_from_timestamp, timestamp_from_ms
|
||||
|
||||
|
||||
class SMPTEConverter(BaseConverter):
|
||||
"""DFXP/TTML/TTML2 subtitle converter"""
|
||||
|
||||
def parse(self, stream):
|
||||
data = stream.read().decode('utf-8-sig')
|
||||
|
||||
if data.count('</tt>') == 1:
|
||||
return _SMPTEConverter(data).srt
|
||||
|
||||
# Support for multiple XML documents in a single file
|
||||
smpte_subs = [s + '</tt>' for s in data.strip().split('</tt>') if s]
|
||||
srt = SubRipFile([])
|
||||
|
||||
for sub in smpte_subs:
|
||||
srt.extend(_SMPTEConverter(sub).srt)
|
||||
|
||||
return srt
|
||||
|
||||
|
||||
# Internal converter class as we need to handle multiple subs in one stream
|
||||
class _SMPTEConverter:
|
||||
def __init__(self, data):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.root = bs4.BeautifulSoup(data, 'lxml-xml')
|
||||
# Unescape only if necessary (parsing fails)
|
||||
if not self.root:
|
||||
self.root = bs4.BeautifulSoup(html.unescape(data), 'lxml-xml')
|
||||
|
||||
self.srt = SubRipFile([])
|
||||
|
||||
self.tickrate = int(self.root.tt.get('ttp:tickRate', 0))
|
||||
self.frame_duration = 1
|
||||
if (rate := self.root.tt.get('ttp:frameRate')) is not None:
|
||||
num, denom = map(int, self.root.tt.get('ttp:frameRateMultiplier', '1 1').split())
|
||||
framerate = (int(rate) * num) / denom
|
||||
self.frame_duration = (1 / framerate) * 1000 # ms
|
||||
|
||||
self.italics = {}
|
||||
self.an8 = {}
|
||||
self.all_span_italics = '<span tts:fontStyle="italic">' not in data
|
||||
|
||||
self._parse_styles()
|
||||
self._convert()
|
||||
|
||||
def _convert(self):
|
||||
try:
|
||||
assert self.root.tt.body.div is not None
|
||||
except (AttributeError, AssertionError):
|
||||
return
|
||||
|
||||
for num, line in enumerate(self.root.tt.body.div.find_all('p'), 1):
|
||||
line_text = ''
|
||||
|
||||
try:
|
||||
for time in ('begin', 'end'):
|
||||
if line[time].endswith('t'):
|
||||
line[time] = self._convert_ticks(line[time])
|
||||
elif line[time].endswith('ms'):
|
||||
line[time] = timestamp_from_ms(line[time][:-2])
|
||||
else:
|
||||
line[time] = self._parse_timestamp(line[time])
|
||||
except (AttributeError, KeyError):
|
||||
self.logger.warning(
|
||||
'Could not parse %s timestamp for line %02d, skipping',
|
||||
time, num
|
||||
)
|
||||
continue
|
||||
|
||||
srt_line = Subtitle(
|
||||
index=num,
|
||||
start=timedelta_from_timestamp(line['begin']),
|
||||
end=timedelta_from_timestamp(line['end']),
|
||||
content=''
|
||||
)
|
||||
|
||||
for element in line:
|
||||
line_text += self._parse_element(element)
|
||||
|
||||
if self._is_italic(line) and line_text.strip():
|
||||
line_text = line_text.replace('<i>', '')
|
||||
line_text = line_text.replace('</i>', '')
|
||||
line_text = '<i>%s</i>' % line_text.strip()
|
||||
|
||||
if self._is_an8(line) and line_text.strip():
|
||||
line_text = '{\\an8}%s' % line_text.strip()
|
||||
|
||||
srt_line.content = line_text.strip().strip('\n')
|
||||
if srt_line.content:
|
||||
self.srt.append(srt_line)
|
||||
|
||||
def _parse_styles(self):
|
||||
for style in self.root.find_all('style'):
|
||||
if style.get('xml:id'):
|
||||
self.italics[style['xml:id']] = self._is_italic(style)
|
||||
for region in self.root.find_all('region'):
|
||||
if region.get('xml:id'):
|
||||
self.an8[region['xml:id']] = self._is_an8(region)
|
||||
|
||||
def _parse_element(self, element):
|
||||
element_text = ''
|
||||
if isinstance(element, bs4.element.NavigableString):
|
||||
element_text += element
|
||||
elif isinstance(element, bs4.element.Tag):
|
||||
subelement_text = ''
|
||||
for subelement in element:
|
||||
subelement_text += self._parse_element(subelement)
|
||||
element_text += subelement_text
|
||||
if element.name == 'br':
|
||||
element_text += '\n'
|
||||
|
||||
if self._is_italic(element) and element_text.strip():
|
||||
element_text = element_text.replace('<i>', '')
|
||||
element_text = element_text.replace('</i>', '')
|
||||
element_text = '<i>%s</i>' % element_text
|
||||
|
||||
if self._is_an8(element) and element_text.strip():
|
||||
element_text = '{\\an8}%s' % element_text
|
||||
|
||||
return element_text
|
||||
|
||||
def _is_italic(self, element):
|
||||
if element.get('tts:fontStyle'):
|
||||
return element.get('tts:fontStyle') == 'italic'
|
||||
elif element.get('style'):
|
||||
return self.italics.get(element['style'])
|
||||
elif element.name == 'span' and not element.attrs and self.all_span_italics:
|
||||
return not self._is_italic(element.parent)
|
||||
|
||||
return False
|
||||
|
||||
def _is_an8(self, element):
|
||||
if element.get('tts:displayAlign'):
|
||||
return element.get('tts:displayAlign') == 'before'
|
||||
elif element.get('region'):
|
||||
return self.an8.get(element['region'])
|
||||
|
||||
return False
|
||||
|
||||
def _convert_ticks(self, ticks):
|
||||
ticks = int(ticks[:-1])
|
||||
offset = 1.0 / self.tickrate
|
||||
seconds = (offset * ticks) * 1000
|
||||
|
||||
return timestamp_from_ms(seconds)
|
||||
|
||||
def _parse_timestamp(self, timestamp):
|
||||
regex = r'([0-9]{2}):([0-9]{2}):([0-9]{2})[:\.,]?([0-9]{0,3})?'
|
||||
parsed = re.search(regex, timestamp)
|
||||
hours = int(parsed.group(1))
|
||||
minutes = int(parsed.group(2))
|
||||
seconds = int(parsed.group(3))
|
||||
miliseconds = 0
|
||||
if frames := parsed.group(4):
|
||||
miliseconds = self.frame_duration * int(frames)
|
||||
|
||||
return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds)
|
||||
162
scripts/subby/subby/converters/webvtt.py
Normal file
162
scripts/subby/subby/converters/webvtt.py
Normal file
@ -0,0 +1,162 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import tinycss
|
||||
from srt import Subtitle
|
||||
|
||||
from subby.converters.base import BaseConverter
|
||||
from subby.subripfile import SubRipFile
|
||||
from subby.utils.time import timedelta_from_timestamp
|
||||
|
||||
HTML_TAG = re.compile(r'</?(?!/?i)[^>\s]+>')
|
||||
STYLE_TAG_OPEN = re.compile(r'^<c.([a-zA-Z0-9]+)>([^<]+)')
|
||||
STYLE_TAG = re.compile(r'<c.([a-zA-Z0-9]+)>([^<]+)<\/c>')
|
||||
STYLE_TAG_CLOSE = re.compile(r'<\/c>$')
|
||||
SKIP_WORDS = ('WEBVTT', 'NOTE', '/*', 'X-TIMESTAMP-MAP')
|
||||
SPEAKER_TAG = re.compile(r'<v\s+[^>]+>') # Matches opening <v Name> tags, closing tags handled by STYLE_TAG_CLOSE
|
||||
|
||||
|
||||
class WebVTTConverter(BaseConverter):
|
||||
"""WebVTT subtitle converter"""
|
||||
|
||||
def parse(self, stream):
|
||||
srt = SubRipFile()
|
||||
looking_for_text = False
|
||||
looking_for_style = False
|
||||
text = []
|
||||
position = None
|
||||
line_number = 1
|
||||
styles = {}
|
||||
current_style = []
|
||||
|
||||
css_parser = tinycss.make_parser('page3')
|
||||
|
||||
for line in stream:
|
||||
# As our stream is bytes we have to deal with line breaks here
|
||||
line = line.decode('utf-8').replace('\r\n', '\n').replace('\r', '\n').strip()
|
||||
|
||||
# Skip processing any unnecessary lines
|
||||
if any(line.startswith(word) for word in SKIP_WORDS):
|
||||
continue
|
||||
|
||||
# Empty line separates cues
|
||||
if line == '':
|
||||
# Parse current style
|
||||
if looking_for_style:
|
||||
stylesheet = css_parser.parse_stylesheet('\n'.join(current_style))
|
||||
for rule in stylesheet.rules:
|
||||
ft = next((e for e in rule.selector if e.type == 'FUNCTION'), None)
|
||||
if not ft:
|
||||
continue
|
||||
name = next((t for t in ft.content if t.type == 'IDENT'), None)
|
||||
if not name:
|
||||
continue
|
||||
styles[name.value] = {}
|
||||
for dec in rule.declarations:
|
||||
styles[name.value][dec.name] = dec.value.as_css()
|
||||
|
||||
looking_for_style = False
|
||||
|
||||
# Keep looking for text if last line has none
|
||||
# this will only happen if there's an unexpected line break
|
||||
if not text:
|
||||
continue
|
||||
|
||||
srt[-1].content = '\n'.join(text)
|
||||
text = []
|
||||
looking_for_text = False
|
||||
|
||||
# Check for style start
|
||||
elif 'STYLE' in line:
|
||||
looking_for_style = True
|
||||
|
||||
# Check for style content
|
||||
elif looking_for_style:
|
||||
current_style.append(line)
|
||||
|
||||
# Check for time line
|
||||
elif ' --> ' in line:
|
||||
parts = line.strip().split()
|
||||
position = self._get_position([p for p in parts[3:] if ':' in p])
|
||||
|
||||
start, _, end, *_ = parts
|
||||
# Fix short timecodes (no hour)
|
||||
if start.count(':') == 1:
|
||||
start = f'00:{start}'
|
||||
if end.count(':') == 1:
|
||||
end = f'00:{end}'
|
||||
|
||||
srt.append(Subtitle(
|
||||
index=line_number,
|
||||
start=timedelta_from_timestamp(start),
|
||||
end=timedelta_from_timestamp(end),
|
||||
content=''
|
||||
))
|
||||
looking_for_text = True
|
||||
line_number += 1
|
||||
|
||||
# Append text if we're inside a line
|
||||
elif looking_for_text:
|
||||
# Unescape html entities
|
||||
line = html.unescape(line)
|
||||
|
||||
# Remove speaker tags here
|
||||
line = re.sub(SPEAKER_TAG, '', line)
|
||||
|
||||
# Set \an8 tag if position is below 25
|
||||
# (value taken from SubtitleEdit)
|
||||
if position is not None and position < 25:
|
||||
line = '{\\an8}' + line
|
||||
position = None
|
||||
|
||||
text.append(line.strip())
|
||||
|
||||
# Add any leftover text to the last line
|
||||
if text:
|
||||
srt[-1].content += '\n'.join(text)
|
||||
|
||||
for line in srt:
|
||||
# Replace styles with italics tag when appropriate
|
||||
# (replace instead of match, to handle nested)
|
||||
line.content = re.sub(
|
||||
STYLE_TAG,
|
||||
partial(self._replace_italics, styles=styles),
|
||||
line.content
|
||||
)
|
||||
|
||||
# Strip non-italic tags
|
||||
line.content = re.sub(HTML_TAG, '', line.content)
|
||||
|
||||
return srt
|
||||
|
||||
@staticmethod
|
||||
def _get_position(cue_settings: list[str]) -> Optional[float]:
|
||||
"""
|
||||
Parses list of cue settings and extracts position offset as a float
|
||||
Line number based offset and alignment strings are ignored
|
||||
|
||||
https://www.w3.org/TR/webvtt1/#webvtt-line-cue-setting
|
||||
"""
|
||||
if not cue_settings or cue_settings == ['None']:
|
||||
return None
|
||||
|
||||
position = None
|
||||
for key, val in (pos.split(':') for pos in cue_settings):
|
||||
if key == 'line' and val and (val := val.split(',')[0])[-1] == '%':
|
||||
position = float(val[:-1])
|
||||
break
|
||||
elif key == 'line' and val and val == '0':
|
||||
position = 0
|
||||
break
|
||||
|
||||
return position
|
||||
|
||||
@staticmethod
|
||||
def _replace_italics(match: re.Match, styles: dict[str, dict[str, str]]) -> str:
|
||||
if (s := styles.get(match[1])) and s.get('font-style') == 'italic':
|
||||
return f'<i>{match[2]}</i>'
|
||||
return match[0]
|
||||
0
scripts/subby/subby/processors/__init__.py
Normal file
0
scripts/subby/subby/processors/__init__.py
Normal file
30
scripts/subby/subby/processors/base.py
Normal file
30
scripts/subby/subby/processors/base.py
Normal file
@ -0,0 +1,30 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
from subby.subripfile import SubRipFile
|
||||
|
||||
|
||||
class BaseProcessor(ABC):
|
||||
"""Base subtitle processor class"""
|
||||
|
||||
def from_srt(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]:
|
||||
"""Processes given SubRipFile"""
|
||||
return self.process(srt, language)
|
||||
|
||||
def from_file(self, file: Path, language: str | None = None) -> tuple[SubRipFile, bool]:
|
||||
"""Processes given srt file"""
|
||||
with file.open(mode='r', encoding='utf-8') as stream:
|
||||
return self.from_string(stream.read(), language)
|
||||
|
||||
def from_string(self, data: str, language: str | None = None) -> tuple[SubRipFile, bool]:
|
||||
"""Processes given string with srt subtitles"""
|
||||
return self.process(SubRipFile.from_string(data), language)
|
||||
|
||||
@abstractmethod
|
||||
def process(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]:
|
||||
"""
|
||||
Processes given SubRipFile
|
||||
:return: Processed SubRipFile, success (whether any changes were made)
|
||||
"""
|
||||
278
scripts/subby/subby/processors/common_issues.py
Normal file
278
scripts/subby/subby/processors/common_issues.py
Normal file
@ -0,0 +1,278 @@
|
||||
import copy
|
||||
import datetime
|
||||
import html
|
||||
import re
|
||||
import unicodedata
|
||||
from datetime import timedelta
|
||||
|
||||
import langcodes
|
||||
|
||||
from subby import regex as Regex
|
||||
from subby.processors.base import BaseProcessor
|
||||
from subby.processors.rtl import RTL_LANGUAGES, RTLFixer
|
||||
from subby.subripfile import SubRipFile
|
||||
from subby.utils.time import line_duration
|
||||
|
||||
|
||||
class CommonIssuesFixer(BaseProcessor):
|
||||
"""Processor fixing common issues found in subtitles"""
|
||||
|
||||
remove_gaps = True
|
||||
|
||||
def process(self, srt, language=None):
|
||||
fixed = self._fix_time_codes(copy.deepcopy(srt))
|
||||
corrected = self._correct_subtitles(fixed)
|
||||
|
||||
if language and langcodes.get(language).language in RTL_LANGUAGES:
|
||||
corrected, _ = RTLFixer().process(corrected, language=language)
|
||||
|
||||
return corrected, corrected != srt
|
||||
|
||||
def _correct_subtitles(self, srt: SubRipFile) -> SubRipFile:
|
||||
def _fix_line(line):
|
||||
# [GENERAL] - Affects other regexes
|
||||
# Remove more than one space
|
||||
line = re.sub(r' {2,}', ' ', line)
|
||||
# Correct lines starting with space
|
||||
line = re.sub(r'^\s*', '', line)
|
||||
line = re.sub(r'\n\s*', '\n', line)
|
||||
#
|
||||
# [ENCODING FIXES, CHARACTER REPLACEMENTS]
|
||||
# Fix musical notes garbled by encoding
|
||||
# has to happen before normalization as that replaces the TM char
|
||||
line = line.replace(r'♪', '♪')
|
||||
# Normalize unicode characters
|
||||
line = unicodedata.normalize('NFKC', line)
|
||||
# Replace short hyphen with regular size
|
||||
line = line.replace(r'‐', r'-')
|
||||
# Replace double note with single note
|
||||
line = line.replace(r'♫', r'♪')
|
||||
# Replace hashes, asterisks at the start of a line with a musical note
|
||||
line = re.sub(
|
||||
r'^((?:{\\an8})?(?:<i>)?)(- ?)?[#\*]{1,}(?=\s+)',
|
||||
r'\1\2♪',
|
||||
line,
|
||||
flags=re.M
|
||||
)
|
||||
# Replace hashes, asterisks at the end of a line with a musical note
|
||||
line = re.sub(
|
||||
r'(?<=\s)(?<![#\*])(?:[#\*]{1,3}|[#\*]{1,3})(?![0-9A-Z])(</i>$|$)',
|
||||
r'♪\1',
|
||||
line,
|
||||
flags=re.M
|
||||
)
|
||||
line = re.sub(r'^[#\*]+$', r'♪', line, flags=re.M)
|
||||
# Move notes into italics, if rest of the line is
|
||||
line = re.sub(r'♪ <i>(.*)', r'<i>♪ \1', line)
|
||||
line = re.sub(r'(♪.*)</i>\s*♪', r'\1 ♪</i>', line)
|
||||
# Replace some pound signs with notes (Binge...)
|
||||
# (Matches only start/end of a line with a space
|
||||
# to avoid false positives)
|
||||
line = re.sub(r'^£ ', r'♪ ', line)
|
||||
line = re.sub(r' £$', r' ♪', line)
|
||||
# Duplicated notes
|
||||
line = re.sub(r'♪{1,}', r'♪', line)
|
||||
# Add spaces between notes and text
|
||||
line = re.sub(r'^♪([A-Za-z])', r'♪ \1', line)
|
||||
line = re.sub(r'([A-Za-z])♪', r'\1 ♪', line)
|
||||
# Replace \h (non-breaking space in ASS) with a regular space
|
||||
# (result of ffmpeg extraction of mp4-embedded subtitles)
|
||||
line = re.sub(r'(\\h)+', ' ', line).strip()
|
||||
# Fix leftover amps (html unescape fixes those, but not when they're duped)
|
||||
line = re.sub(r'&(amp;){1,}', r'&', line)
|
||||
# Fix "it'`s" -> "it's"
|
||||
line = re.sub(r"'[`’]", r"'", line)
|
||||
|
||||
# [TAG STRIPPING AND CORRECTING]
|
||||
#
|
||||
# Replace ASS positioning tags with top only
|
||||
line = re.sub(r'(\{\\an[0-9]\}){1,}', r'{\\an8}', line)
|
||||
# Remove space after ASS positioning tags
|
||||
line = re.sub(r'(\{\\an[0-9]\}) +(?=[A-Za-z-])', r'{\\an8}', line)
|
||||
# Fix hanging tags
|
||||
line = re.sub(r'^(<[a-z]>)\n', r'\1', line)
|
||||
line = re.sub(r'</([a-z])>$\n<([a-z])>', r'\n', line, flags=re.M)
|
||||
# Remove duplicated tags
|
||||
line = re.sub(r'(<[a-z]>){1,}', r'\1', line)
|
||||
line = re.sub(r'(</[a-z]>){1,}', r'\1', line)
|
||||
# Remove an unnecessary space after italic tag open
|
||||
line = re.sub(r'^(<[a-z]>) {1,}', r'\1', line)
|
||||
line = re.sub(r'^ {1,}', '', line)
|
||||
# Remove non-italic tags
|
||||
line = re.sub(r'</?(?!i>)[a-z]+>', '', line)
|
||||
# Remove spaces between tags
|
||||
line = re.sub(r'(<[a-z]>|\{\\an8\}) (<[a-z]>|\{\\an8\})', r'\1\2', line)
|
||||
# Move hanging opening tags onto separate lines
|
||||
line = re.sub(r'(<[a-z]>)\n', r'\n\1', line)
|
||||
# Move hanging closing tags onto separate lines
|
||||
line = re.sub(r'\n(</[a-z]>)', r'\1\n', line)
|
||||
# Move spaces outside italic tags
|
||||
line = re.sub(r'(<[a-z]>) ', r' \1', line)
|
||||
line = re.sub(r' (</[a-z]>)', r'\1 ', line)
|
||||
# Remove needless spaces inside italic tags
|
||||
line = re.sub(r'^(<[a-z]>) ', r'\1', line)
|
||||
# Fix "</tag>space<tag>"
|
||||
line = re.sub(r'(?:</[a-z]>)(\s*)(?:<[a-z]>)', r'\1', line, flags=re.M)
|
||||
# Remove empty tags
|
||||
line = re.sub(r'<[a-z]>\s*</[a-z]>', r'', line)
|
||||
# Move "{\an8}" to the rest of the text if it's on a new line
|
||||
line = re.sub(r'({\\an8\})\n', r'\1', line)
|
||||
|
||||
# [REFORMATTING]
|
||||
#
|
||||
# Remove spaces inside brackets ("( TEXT )" -> "(TEXT)")
|
||||
line = re.sub(r'\( (.*) \)', r'(\1)', line)
|
||||
# Remove ">> " before text
|
||||
line = re.sub(r'(^|\n)(</?[a-z]>|\{\\an8\})?>> ', r'\1\2', line)
|
||||
# Remove lines consisting only of ">>"
|
||||
line = re.sub(r'(^|\n)(</?[a-z]>|\{\\an8\})?>>($|\n)', r'', line)
|
||||
# Replace any leftover <br> tags with a proper line break
|
||||
line = re.sub(r'<br ?\/?>', '\n', line)
|
||||
# Remove empty lines
|
||||
line = re.sub(r'^\.?\s*$', '', line, flags=re.M)
|
||||
line = re.sub(r'^-?\s*$', '', line, flags=re.M)
|
||||
line = re.sub(r'^(</?i>|\{\\an8\})?\s*$', '', line, flags=re.M)
|
||||
# Remove lines consisting only of a single character or digit
|
||||
line = re.sub(r'^\[A-Za-z0-9]$', '', line)
|
||||
# Adds missing spaces after "...", commas, and tags
|
||||
line = re.sub(r'([a-z])(\.\.\.)([a-zA-Z][^.])', r'\1\2 \3', line)
|
||||
line = re.sub(r'(</[a-z]>)(\w)', r'\1 \2', line)
|
||||
line = re.sub(r'([a-z]),([a-zA-Z])', r'\1, \2', line)
|
||||
line = re.sub(r',\n([a-z]+[\.\?])\s*$', r', \1', line)
|
||||
# Correct front and end elypses
|
||||
line = re.sub(
|
||||
rf'({Regex.FRONT_OPTIONAL_TAGS_WITH_HYPHEN})' r'\.{1,}',
|
||||
r'\1...',
|
||||
line, flags=re.M
|
||||
)
|
||||
line = re.sub(r'\.{2,}' rf'({Regex.TAGS})?' r'\s*$', r'...\1', line, flags=re.M)
|
||||
# Add space after frontal speaker hyphen
|
||||
line = re.sub(r"^(<i>|\{\\an8\})?-+(?='?[\w\"\[\(\<\{\.\$♪¿¡])", r'\1- ', line, flags=re.M)
|
||||
# Remove unnecessary space before "--"
|
||||
line = re.sub(r'\s*--(\s*)', r'--\1', line, flags=re.M)
|
||||
# Move notes inside tags (</i> ♪ -> </i>)
|
||||
line = re.sub(r'(</[a-z]>)(\s*♪{1,})$', r'\2\1', line, flags=re.M)
|
||||
# Remove trailing spaces
|
||||
line = re.sub(r' +$', r'', line, flags=re.M).strip()
|
||||
|
||||
# [LINE SPLITS AND LINE BREAKS]
|
||||
#
|
||||
# Adds missing line splits (primarily present in Amazon subtitles)
|
||||
line = re.sub(r'(.*)([^.][\]\)])([A-Z][^.])', r'\1\2\n\3', line)
|
||||
line = re.sub(
|
||||
r'(.*)([^\.\sA-Z][!\.;:?])(?<!(?:Mr|Ms)\.)(?<!Mrs\.)([A-Z][^.])',
|
||||
r'- \1\2\n- \3',
|
||||
line
|
||||
)
|
||||
# Fix weird linebreaks (caused by stripping SDH or not)
|
||||
line = re.sub(r'(^<[a-z]>|\n<[a-z]>)(\w+)\n', r'\1\2 ', line)
|
||||
# Add missing hyphens
|
||||
line = re.sub(r'^\s*(?!-)(.*)\n- ([A-Z][a-z]+)$', r'- \1\n- \2', line)
|
||||
# Remove linebreaks inside lines
|
||||
line = re.sub(r'\r\n{1,}', r'\r\n', line).strip()
|
||||
line = re.sub(r'\n{1,}', r'\n', line).strip()
|
||||
# Remove duplicate spaces around italics
|
||||
line = re.sub(r' +</i> +', r'</i> ', line).strip()
|
||||
# Remove italics from hyphen, when content immediately following is not italics
|
||||
line = re.sub(r'<i>-</i>([^<]+)', r'-\1', line).strip()
|
||||
|
||||
return line
|
||||
|
||||
for line in srt:
|
||||
# Unescape html entities (twice, because yes, double encoding happens...)
|
||||
for _ in range(2):
|
||||
line.content = html.unescape(line.content)
|
||||
|
||||
# Run fix_line twice, as some of the fixes can introduce issues, e.g. double spaces
|
||||
for _ in range(2):
|
||||
line.content = _fix_line(line.content)
|
||||
line.content = line.content.strip()
|
||||
|
||||
# Remove remaining linebreaks
|
||||
line.content = line.content.strip('\n')
|
||||
|
||||
# Remove italics if every line is italicized, as this is almost certainly a mistake
|
||||
# (using slices should be more performant than regex or startswith/endswith)
|
||||
if len(srt) > 10 \
|
||||
and all(line.content[:3] == '<i>' and line.content[-4:] == '</i>' for line in srt):
|
||||
for line in srt:
|
||||
line.content = line.content[3:-4]
|
||||
|
||||
combined = self._combine_timecodes(srt)
|
||||
if self.remove_gaps:
|
||||
return self._remove_gaps(combined)
|
||||
|
||||
return combined
|
||||
|
||||
def _combine_timecodes(self, srt: SubRipFile) -> SubRipFile:
|
||||
"""Combines lines with timecodes and same content"""
|
||||
subs_copy = SubRipFile([])
|
||||
for line in srt:
|
||||
if len(subs_copy) == 0:
|
||||
subs_copy.append(line)
|
||||
continue
|
||||
if line_duration(subs_copy[-1]) == line_duration(line) \
|
||||
and subs_copy[-1].start == line.start \
|
||||
and subs_copy[-1].end == line.end:
|
||||
if subs_copy[-1].content != line.content:
|
||||
subs_copy[-1].content += '\n' + line.content
|
||||
# Merge lines with the same text within 10 ms
|
||||
elif self._subtract_ts(line.start, subs_copy[-1].end) < 10 \
|
||||
and line.content == subs_copy[-1].content:
|
||||
subs_copy[-1].end = line.end
|
||||
# Merge lines with less than 2 frames of gap and same text
|
||||
# to avoid duplicating lines as we remove gaps later
|
||||
elif 0 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85 \
|
||||
and line.content.startswith(subs_copy[-1].content) \
|
||||
and self.remove_gaps:
|
||||
subs_copy[-1].end = line.end
|
||||
subs_copy[-1].content = line.content
|
||||
# Fix overlapping times
|
||||
elif self._subtract_ts(line.start, subs_copy[-1].end) == 0:
|
||||
subs_copy[-1].end -= timedelta(milliseconds=1)
|
||||
subs_copy.append(line)
|
||||
elif line.content.strip():
|
||||
subs_copy.append(line)
|
||||
|
||||
subs_copy = subs_copy or srt
|
||||
subs_copy.clean_indexes()
|
||||
return subs_copy
|
||||
|
||||
def _remove_gaps(self, srt: SubRipFile) -> SubRipFile:
|
||||
"""Remove short gaps between lines"""
|
||||
subs_copy = SubRipFile([])
|
||||
for line in srt:
|
||||
if len(subs_copy) == 0:
|
||||
subs_copy.append(line)
|
||||
continue
|
||||
# Remove 2-frame or smaller gaps (2 frames/83ms@24 is Netflix standard)
|
||||
elif 1 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85:
|
||||
line.start = subs_copy[-1].end
|
||||
subs_copy[-1].end -= timedelta(milliseconds=1)
|
||||
subs_copy.append(line)
|
||||
elif line.content.strip():
|
||||
subs_copy.append(line)
|
||||
|
||||
subs_copy = subs_copy or srt
|
||||
subs_copy.clean_indexes()
|
||||
return subs_copy
|
||||
|
||||
@staticmethod
|
||||
def _fix_time_codes(srt: SubRipFile) -> SubRipFile:
|
||||
"""Fixes timecodes over 23:59, often present in live content"""
|
||||
offset = 0
|
||||
for line in srt:
|
||||
hours, _ = divmod(line.start.seconds, 3600)
|
||||
hours += line.start.days * 24
|
||||
|
||||
if not offset and hours > 23:
|
||||
offset = hours
|
||||
if offset:
|
||||
line.start -= datetime.timedelta(hours=offset)
|
||||
line.end -= datetime.timedelta(hours=offset)
|
||||
return srt
|
||||
|
||||
@staticmethod
|
||||
def _subtract_ts(ts1: datetime.timedelta, ts2: datetime.timedelta) -> int:
|
||||
"""Subtracts two timestamps and returns a difference as int of miliseconds"""
|
||||
return round((ts1 - ts2).total_seconds() * 1000)
|
||||
34
scripts/subby/subby/processors/rtl.py
Normal file
34
scripts/subby/subby/processors/rtl.py
Normal file
@ -0,0 +1,34 @@
|
||||
import logging
|
||||
|
||||
import langcodes
|
||||
|
||||
from subby.processors.base import BaseProcessor
|
||||
|
||||
RTL_LANGUAGES = ('ar', 'fa', 'he', 'ps', 'syc', 'ug', 'ur')
|
||||
RTL_CONTROL_CHARS = ('\u200e', '\u200f', '\u202a', '\u202b', '\u202c', '\u202d', '\u202e')
|
||||
RTL_CHAR = '\u202b'
|
||||
|
||||
|
||||
class RTLFixer(BaseProcessor):
|
||||
"""Processor fixing right-to-left language tagging"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def process(self, srt, language=None):
|
||||
if language and langcodes.get(language).language not in RTL_LANGUAGES:
|
||||
self.logger.warning('RTL tagger running on an unexpected language (%s)', language)
|
||||
|
||||
corrected = self._correct_subtitles(srt)
|
||||
return srt, corrected != srt
|
||||
|
||||
def _correct_subtitles(self, srt):
|
||||
for line in srt:
|
||||
# Remove previous RTL-related formatting
|
||||
for char in RTL_CONTROL_CHARS:
|
||||
line.content = line.content.replace(char, '')
|
||||
|
||||
# Add RLM char at the start of every line
|
||||
line.content = RTL_CHAR + line.content.replace("\n", f"\n{RTL_CHAR}")
|
||||
|
||||
return srt
|
||||
109
scripts/subby/subby/processors/sdh.py
Normal file
109
scripts/subby/subby/processors/sdh.py
Normal file
@ -0,0 +1,109 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import re
|
||||
|
||||
from subby import regex as Regex
|
||||
from subby.processors.base import BaseProcessor
|
||||
from subby.subripfile import SubRipFile
|
||||
|
||||
|
||||
class SDHStripper(BaseProcessor):
|
||||
"""Processor removing hard-of-hearing descriptions from subtitles"""
|
||||
|
||||
def __init__(self, extra_regexes: list[str] | None = None):
|
||||
self.extra_regexes = [
|
||||
re.compile(regex, re.MULTILINE)
|
||||
for regex in extra_regexes or []
|
||||
]
|
||||
|
||||
def process(self, srt, language=None):
|
||||
stripped = [line for line in copy.deepcopy(srt)]
|
||||
stripped = self._clean_full_line_descriptions(stripped)
|
||||
stripped = self._clean_new_line_descriptions(stripped)
|
||||
stripped = self._clean_inline_descriptions(stripped)
|
||||
stripped = self._clean_speaker_names(stripped)
|
||||
stripped = self._strip_notes(stripped)
|
||||
stripped = self._remove_extra_hyphens(stripped)
|
||||
stripped = self._run_extra_regexes(stripped)
|
||||
|
||||
stripped = SubRipFile([line for line in stripped if line.content])
|
||||
stripped.clean_indexes()
|
||||
|
||||
return stripped, stripped != srt
|
||||
|
||||
def _clean_full_line_descriptions(self, srt):
|
||||
"""Removes full line descriptions"""
|
||||
for line in srt:
|
||||
text = self._strip_tags(line.content)
|
||||
for regex in (Regex.FULL_LINE_DESCIRPTION_BRACKET, Regex.FULL_LINE_DESCIRPTION_PARENTHESES):
|
||||
text = re.sub(regex, r'', text, flags=re.S).strip()
|
||||
|
||||
if not text:
|
||||
continue
|
||||
|
||||
yield line
|
||||
|
||||
def _clean_new_line_descriptions(self, srt):
|
||||
"""Removes line descriptions taking up an entire line break"""
|
||||
for line in srt:
|
||||
position = re.match(Regex.POSITION_TAGS, line.content.strip())
|
||||
for regex in (Regex.NEW_LINE_DESCRIPTION_BRACKET, Regex.NEW_LINE_DESCRIPTION_PARENTHESES):
|
||||
line.content = re.sub(regex, r'', line.content, flags=re.M).strip()
|
||||
|
||||
# Restore position, if it has been removed with the description
|
||||
if position and position[0] not in line.content:
|
||||
line.content = position[0] + line.content
|
||||
|
||||
yield line
|
||||
|
||||
def _clean_inline_descriptions(self, srt):
|
||||
"""Removes inline"""
|
||||
for line in srt:
|
||||
line.content = re.sub(Regex.FRONT_DESCRIPTION_BRACKET, r'\10', line.content, flags=re.M)
|
||||
line.content = re.sub(Regex.FRONT_DESCRIPTION_PARENTHESES, r'\1', line.content, flags=re.M)
|
||||
for regex in (
|
||||
Regex.END_DESCRIPTION_BRACKET,
|
||||
Regex.END_DESCRIPTION_PARENTHESES,
|
||||
Regex.INLINE_DESCRIPTION
|
||||
):
|
||||
line.content = re.sub(regex, r'', line.content, flags=re.M)
|
||||
line.content = line.content.strip()
|
||||
yield line
|
||||
|
||||
def _clean_speaker_names(self, srt):
|
||||
"""Removes speaker names"""
|
||||
for line in srt:
|
||||
# Retain frontal tags/hyphens
|
||||
for regex in (Regex.SPEAKER_PARENTHESES, Regex.SPEAKER):
|
||||
line.content = re.sub(regex, r'\2\3', line.content, flags=re.M).strip()
|
||||
yield line
|
||||
|
||||
def _strip_notes(self, srt):
|
||||
"""Removes lines with just musical notes"""
|
||||
for line in srt:
|
||||
if re.match(r'^♪+$', re.sub(r'\s*', r'', self._strip_tags(line.content).strip())):
|
||||
continue
|
||||
|
||||
yield line
|
||||
|
||||
def _run_extra_regexes(self, srt):
|
||||
"""Runs extra regexes provided by user"""
|
||||
for line in srt:
|
||||
for regex in self.extra_regexes:
|
||||
line.content = re.sub(regex, r'', line.content)
|
||||
yield line
|
||||
|
||||
def _remove_extra_hyphens(self, srt):
|
||||
"""Remove speaker hyphens if there's only one line"""
|
||||
for line in srt:
|
||||
splits = len(re.findall(r'^(<i>|\{\\an8\})?-\s*', line.content, flags=re.M))
|
||||
if splits == 1:
|
||||
line.content = re.sub(r'^(<i>|\{\\an8\})?-\s*', r'\1', line.content.strip())
|
||||
|
||||
yield line
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _strip_tags(text: str) -> str:
|
||||
return re.sub(Regex.TAGS, r'', text)
|
||||
22
scripts/subby/subby/regex.py
Normal file
22
scripts/subby/subby/regex.py
Normal file
@ -0,0 +1,22 @@
|
||||
TAGS = r'[<{][/\\]?[a-z0-9.]+[}>]'
|
||||
POSITION_TAGS = r'^{\\an[0-9]}'
|
||||
FRONT_OPTIONAL_TAGS_WITH_HYPHEN = rf'^\s*({TAGS})?\s*(-)?\s*({TAGS})?\s*'
|
||||
TIME_LOOKAHEAD = r'(?![0-9]{2})'
|
||||
|
||||
SPEAKER = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(Mc[A-Z][a-zA-Z]+|[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+):{TIME_LOOKAHEAD} ?'
|
||||
SPEAKER_PARENTHESES = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(?:[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+)(?: \([a-zA-Z ]+\)): ?'
|
||||
|
||||
FRONT_NOTES = r'(?:♪+\s+)'
|
||||
BACK_NOTES = r'(?:\s+♪+)'
|
||||
|
||||
DESCRIPTION_BRACKET = r'\[(?:[^\]]|\s)*\]'
|
||||
DESCRIPTION_PARENTHESES = r'\((?:[^\)]|\s)*\)'
|
||||
FULL_LINE_DESCIRPTION_BRACKET = rf'^-?\s*{FRONT_NOTES}?\[[^\]]+\]{BACK_NOTES}?$'
|
||||
NEW_LINE_DESCRIPTION_BRACKET = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_BRACKET}(?:{TAGS})?{BACK_NOTES}?$'
|
||||
FRONT_DESCRIPTION_BRACKET = rf'^(?:{SPEAKER}|{SPEAKER_PARENTHESES})?({FRONT_OPTIONAL_TAGS_WITH_HYPHEN}){DESCRIPTION_BRACKET}:?'
|
||||
END_DESCRIPTION_BRACKET = rf'\s*{DESCRIPTION_BRACKET}\s*$'
|
||||
FULL_LINE_DESCIRPTION_PARENTHESES = rf'^-?\s*{FRONT_NOTES}?\([^\)]+\){BACK_NOTES}?$'
|
||||
NEW_LINE_DESCRIPTION_PARENTHESES = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_PARENTHESES}{BACK_NOTES}?(?:{TAGS})?$'
|
||||
FRONT_DESCRIPTION_PARENTHESES = rf'^({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})(?:{SPEAKER}|{SPEAKER_PARENTHESES})?{DESCRIPTION_PARENTHESES}:?'
|
||||
END_DESCRIPTION_PARENTHESES = rf'\s*{DESCRIPTION_PARENTHESES}:?\s*$'
|
||||
INLINE_DESCRIPTION = r'(?:<[a-z]+>)?[\[(][A-Za-z]+[)\]](?:</[a-z]+>)?'
|
||||
38
scripts/subby/subby/subripfile.py
Normal file
38
scripts/subby/subby/subripfile.py
Normal file
@ -0,0 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import UserList
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import srt
|
||||
|
||||
|
||||
class SubRipFile(UserList):
|
||||
def __init__(self, data: list[srt.Subtitle] | None = None):
|
||||
self.data: list[srt.Subtitle] = data or []
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, source: str):
|
||||
return cls(list(srt.parse(source, ignore_errors=True)))
|
||||
|
||||
def clean_indexes(self):
|
||||
self.data = list(srt.sort_and_reindex(self.data))
|
||||
|
||||
def offset(self, offset: timedelta):
|
||||
for line in self.data:
|
||||
line.start += offset
|
||||
line.end += offset
|
||||
|
||||
def export(self, eol: str | None = None) -> str:
|
||||
"""Exports subtitle as text"""
|
||||
return srt.compose(self.data, eol=eol)
|
||||
|
||||
def save(self, path: Path, encoding: str = 'utf-8-sig', eol: str | None = None):
|
||||
"""Exports subtitle as text"""
|
||||
with path.open(mode='wb') as fp:
|
||||
fp.write(srt.compose(self.data, eol=eol).encode(encoding))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, SubRipFile):
|
||||
raise NotImplementedError
|
||||
return self.export(eol='\n') == other.export(eol='\n')
|
||||
44
scripts/subby/subby/utils/time.py
Normal file
44
scripts/subby/subby/utils/time.py
Normal file
@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from srt import Subtitle
|
||||
|
||||
|
||||
def timestamp_from_ms(duration: float | int) -> str:
|
||||
"""Returns a formatted timestamp from miliseconds"""
|
||||
seconds, miliseconds = divmod(float(duration), 1000)
|
||||
minutes, seconds = divmod(seconds, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds)
|
||||
|
||||
|
||||
def timestamp_from_seconds(duration: float | int) -> str:
|
||||
"""Returns a formatted timestamp from seconds"""
|
||||
return timestamp_from_ms(duration * 1000)
|
||||
|
||||
|
||||
def ms_from_timestamp(timestamp: str) -> int:
|
||||
"""Returns miliseconds from a timestamp"""
|
||||
timestamp = re.sub(r'[;\.\,]', r':', timestamp.replace('T:', ''))
|
||||
hours, minutes, seconds, miliseconds = map(int, timestamp.split(':'))
|
||||
miliseconds += hours * 3600000
|
||||
miliseconds += minutes * 60000
|
||||
miliseconds += seconds * 1000
|
||||
return miliseconds
|
||||
|
||||
|
||||
def timedelta_from_timestamp(timestamp: str) -> datetime.timedelta:
|
||||
"""Returns timedelta from a timestamp"""
|
||||
return datetime.timedelta(seconds=ms_from_timestamp(timestamp) / 1000)
|
||||
|
||||
|
||||
def timedelta_from_ms(duration: float | int) -> datetime.timedelta:
|
||||
"""Returns timedelta from miliseconds"""
|
||||
return datetime.timedelta(seconds=duration / 1000)
|
||||
|
||||
|
||||
def line_duration(line: Subtitle):
|
||||
"""Returns duration of a srt.Subtitle line"""
|
||||
return abs(line.end - line.start)
|
||||
Loading…
Reference in New Issue
Block a user