diff --git a/scripts/subby/.gitignore b/scripts/subby/.gitignore
new file mode 100644
index 0000000..dec16f1
--- /dev/null
+++ b/scripts/subby/.gitignore
@@ -0,0 +1,135 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+stubs/
+mypy.ini
+
+# Pyre type checker
+.pyre/
+
+# Ruff
+.ruff_cache/
+.ruff.toml
diff --git a/scripts/subby/.pylintrc b/scripts/subby/.pylintrc
new file mode 100644
index 0000000..7615b86
--- /dev/null
+++ b/scripts/subby/.pylintrc
@@ -0,0 +1,2 @@
+[FORMAT]
+max-line-length=120
diff --git a/scripts/subby/LICENSE b/scripts/subby/LICENSE
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/scripts/subby/LICENSE
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/scripts/subby/README.md b/scripts/subby/README.md
new file mode 100644
index 0000000..1bb6fcb
--- /dev/null
+++ b/scripts/subby/README.md
@@ -0,0 +1,137 @@
+# Subby
+Advanced subtitle converter and processor.
+
+# Supported formats
+WebVTT, DFXP/TTML/TTML2/SMPTE, SAMI, WVTT (WebVTT in MP4), STPP/ISMT (DFXP in MP4), JSON (Bilibili)
+
+# Functionality
+- converts supported input format to SRT
+- retains select formatting tags (italics, basic \an8 positioning)
+- corrects often found flaws in subtitles
+- opinionated timing and formatting improvements
+
+# Installation
+```
+git clone https://github.com/vevv/subby
+cd subby
+pip install .
+```
+
+# Usage notes
+`CommonIssuesFixer` should be ran both after conversion and SDH stripping
+as it's designed to fix source issues, including ones which can cause playback problems.
+
+`CommonIssuesFixer` removes short gaps (2 frames) by default.
+This can be disabled by setting `CommonIssuesFixer.remove_gaps` to `False` before running.
+
+`subby.SubRipFile` accepts similar methods to `pysrt.SubRipFile`, but isn't a fully compatible replacement.
+Only `from_string`, `clean_indexes`, `export`, `save` are guaranteed to work.
+
+This object is otherwise just a list storing `srt.Subtitle` elements.
+
+## Language specific fixing
+
+As of 0.3.6, both `CommonIssuesFixer` and `SDHStripper` support a language parameter,
+which accepts a BCP47 language code.
+
+This is currently used only for RTL tagging in CommonIssuesFixer.
+
+**It is highly recommended for every script to pass it for future use.**
+
+# Command line usage
+```
+Usage: subby [OPTIONS] COMMAND [ARGS]...
+
+ Subby—Advanced Subtitle Converter and Processor.
+
+Options:
+ -d, --debug Enable DEBUG level logs.
+ --help Show this message and exit.
+
+Commands:
+ convert Convert a Subtitle to SubRip (SRT).
+ process SubRip (SRT) post-processing.
+ version Print version information.
+```
+Example
+
+```
+subby process /path/to/subs/subs.srt strip-sdh
+```
+
+# Library usage
+## Converter
+```py
+from subby import WebVTTConverter
+from pathlib import Path
+
+converter = WebVTTConverter()
+file = Path('test.vtt')
+
+# All statements below are equivalent
+srt = converter.from_file(file)
+srt = converter.from_string(file.read_text())
+srt = converter.from_bytes(file.read_bytes())
+
+# srt is subby.SubRipFile
+
+output = Path('file.srt')
+srt.save(output)
+# saved to file.srt
+```
+
+## Processor
+Processor returns a bool indicating success - whether any changes were made, useful for determining if SDH subtitles should be saved.
+
+```py
+from subby import CommonIssuesFixer
+from pathlib import Path
+
+processor = CommonIssuesFixer()
+file = Path('test.vtt')
+
+# All statements below are equivalent
+srt, status = processor.from_file(file)
+srt, status = processor.from_string(file.read_text())
+srt, status = processor.from_bytes(file.read_bytes())
+
+# srt is subby.SubRipFile, status is bool
+
+output = Path('test_fixed.srt')
+srt.save(output)
+# saved to test_fixed.srt
+```
+
+## Chaining
+The following example will convert a VTT file, attempt to strip SDH, and then save the result.
+
+```py
+from subby import WebVTTConverter, CommonIssuesFixer, SDHStripper
+from pathlib import Path
+
+converter = WebVTTConverter()
+fixer = CommonIssuesFixer()
+stripper = SDHStripper()
+
+file = Path('file.vtt')
+file_sdh = Path('file_sdh.srt')
+file_stripped = Path('file_stripped.srt')
+srt, _ = fixer.from_srt(converter.from_file(file))
+
+srt.save(file_sdh)
+# saved to file_sdh.srt
+
+stripped, status = stripper.from_srt(srt)
+if status is True:
+ print('stripping successful')
+ stripped.save(file_stripped)
+ # saved to file_stripped.srt
+```
+
+## Tests
+To run tests, go to the "tests" directory and run `pytest`.
+
+## Contributors
+
+
+
diff --git a/scripts/subby/poetry.lock b/scripts/subby/poetry.lock
new file mode 100644
index 0000000..241fc18
--- /dev/null
+++ b/scripts/subby/poetry.lock
@@ -0,0 +1,551 @@
+# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.13.3"
+description = "Screen-scraping library"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+ {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
+ {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
+]
+
+[package.dependencies]
+soupsieve = ">1.2"
+typing-extensions = ">=4.0.0"
+
+[package.extras]
+cchardet = ["cchardet"]
+chardet = ["chardet"]
+charset-normalizer = ["charset-normalizer"]
+html5lib = ["html5lib"]
+lxml = ["lxml"]
+
+[[package]]
+name = "click"
+version = "8.1.8"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
+ {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+description = "Cross-platform colored terminal text."
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
+]
+
+[[package]]
+name = "construct"
+version = "2.8.8"
+description = "A powerful declarative parser/builder for binary data"
+optional = false
+python-versions = "*"
+files = [
+ {file = "construct-2.8.8.tar.gz", hash = "sha256:1b84b8147f6fd15bcf64b737c3e8ac5100811ad80c830cb4b2545140511c4157"},
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.2.2"
+description = "Backport of PEP 654 (exception groups)"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
+ {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
+]
+
+[package.extras]
+test = ["pytest (>=6)"]
+
+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+description = "brain-dead simple config-ini parsing"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
+ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
+]
+
+[[package]]
+name = "langcodes"
+version = "3.4.1"
+description = "Tools for labeling human languages with IETF language tags"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "langcodes-3.4.1-py3-none-any.whl", hash = "sha256:68f686fc3d358f222674ecf697ddcee3ace3c2fe325083ecad2543fd28a20e77"},
+ {file = "langcodes-3.4.1.tar.gz", hash = "sha256:a24879fed238013ac3af2424b9d1124e38b4a38b2044fd297c8ff38e5912e718"},
+]
+
+[package.dependencies]
+language-data = ">=1.2"
+
+[package.extras]
+build = ["build", "twine"]
+test = ["pytest", "pytest-cov"]
+
+[[package]]
+name = "language-data"
+version = "1.3.0"
+description = "Supplementary data about languages used by the langcodes module"
+optional = false
+python-versions = "*"
+files = [
+ {file = "language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf"},
+ {file = "language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec"},
+]
+
+[package.dependencies]
+marisa-trie = ">=1.1.0"
+
+[package.extras]
+build = ["build", "twine"]
+test = ["pytest", "pytest-cov"]
+
+[[package]]
+name = "lxml"
+version = "5.3.1"
+description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+optional = false
+python-versions = ">=3.6"
+files = [
+ {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"},
+ {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"},
+ {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"},
+ {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"},
+ {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"},
+ {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"},
+ {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"},
+ {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"},
+ {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"},
+ {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"},
+ {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"},
+ {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"},
+ {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"},
+ {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"},
+ {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"},
+ {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"},
+ {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"},
+ {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"},
+ {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"},
+ {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"},
+ {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"},
+ {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"},
+ {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"},
+ {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"},
+ {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"},
+ {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"},
+ {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"},
+ {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"},
+ {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"},
+ {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"},
+ {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"},
+ {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"},
+ {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"},
+ {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"},
+ {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"},
+ {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"},
+ {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"},
+ {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"},
+ {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"},
+ {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"},
+ {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"},
+ {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"},
+ {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"},
+ {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"},
+ {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"},
+ {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"},
+ {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"},
+ {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"},
+ {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"},
+ {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"},
+ {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"},
+ {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"},
+ {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"},
+ {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"},
+ {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"},
+ {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"},
+ {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"},
+ {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"},
+ {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"},
+ {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"},
+ {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"},
+ {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"},
+ {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"},
+ {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"},
+ {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"},
+ {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"},
+ {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"},
+ {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"},
+ {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"},
+ {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"},
+ {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"},
+ {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"},
+ {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"},
+ {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"},
+ {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"},
+ {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"},
+ {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"},
+ {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"},
+]
+
+[package.extras]
+cssselect = ["cssselect (>=0.7)"]
+html-clean = ["lxml_html_clean"]
+html5 = ["html5lib"]
+htmlsoup = ["BeautifulSoup4"]
+source = ["Cython (>=3.0.11,<3.1.0)"]
+
+[[package]]
+name = "lxml-stubs"
+version = "0.4.0"
+description = "Type annotations for the lxml package"
+optional = false
+python-versions = "*"
+files = [
+ {file = "lxml-stubs-0.4.0.tar.gz", hash = "sha256:184877b42127256abc2b932ba8bd0ab5ea80bd0b0fee618d16daa40e0b71abee"},
+ {file = "lxml_stubs-0.4.0-py3-none-any.whl", hash = "sha256:3b381e9e82397c64ea3cc4d6f79d1255d015f7b114806d4826218805c10ec003"},
+]
+
+[package.extras]
+test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.9.3)"]
+
+[[package]]
+name = "marisa-trie"
+version = "1.2.1"
+description = "Static memory-efficient and fast Trie-like structures for Python."
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5"},
+ {file = "marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571"},
+ {file = "marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a"},
+ {file = "marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f"},
+ {file = "marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e43891a37b0d7f618819fea14bd951289a0a8e3dd0da50c596139ca83ebb9b1"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6946100a43f933fad6bc458c502a59926d80b321d5ac1ed2ff9c56605360496f"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4177dc0bd1374e82be9b2ba4d0c2733b0a85b9d154ceeea83a5bee8c1e62fbf"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f35c2603a6be168088ed1db6ad1704b078aa8f39974c60888fbbced95dcadad4"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d659fda873d8dcb2c14c2c331de1dee21f5a902d7f2de7978b62c6431a8850ef"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:b0ef26733d3c836be79e812071e1a431ce1f807955a27a981ebb7993d95f842b"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:536ea19ce6a2ce61c57fed4123ecd10d18d77a0db45cd2741afff2b8b68f15b3"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-win32.whl", hash = "sha256:0ee6cf6a16d9c3d1c94e21c8e63c93d8b34bede170ca4e937e16e1c0700d399f"},
+ {file = "marisa_trie-1.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7e7b1786e852e014d03e5f32dbd991f9a9eb223dd3fa9a2564108b807e4b7e1c"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:952af3a5859c3b20b15a00748c36e9eb8316eb2c70bd353ae1646da216322908"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24a81aa7566e4ec96fc4d934581fe26d62eac47fc02b35fa443a0bb718b471e8"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9c9b32b14651a6dcf9e8857d2df5d29d322a1ea8c0be5c8ffb88f9841c4ec62b"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ac170d20b97beb75059ba65d1ccad6b434d777c8992ab41ffabdade3b06dd74"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da4e4facb79614cc4653cfd859f398e4db4ca9ab26270ff12610e50ed7f1f6c6"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25688f34cac3bec01b4f655ffdd6c599a01f0bd596b4a79cf56c6f01a7df3560"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1db3213b451bf058d558f6e619bceff09d1d130214448a207c55e1526e2773a1"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:d5648c6dcc5dc9200297fb779b1663b8a4467bda034a3c69bd9c32d8afb33b1d"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5bd39a4e1cc839a88acca2889d17ebc3f202a5039cd6059a13148ce75c8a6244"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-win32.whl", hash = "sha256:594f98491a96c7f1ffe13ce292cef1b4e63c028f0707effdea0f113364c1ae6c"},
+ {file = "marisa_trie-1.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:5fe5a286f997848a410eebe1c28657506adaeb405220ee1e16cfcfd10deb37f2"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c0fe2ace0cb1806badbd1c551a8ec2f8d4cf97bf044313c082ef1acfe631ddca"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67f0c2ec82c20a02c16fc9ba81dee2586ef20270127c470cb1054767aa8ba310"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a3c98613180cf1730e221933ff74b454008161b1a82597e41054127719964188"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:429858a0452a7bedcf67bc7bb34383d00f666c980cb75a31bcd31285fbdd4403"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2eacb84446543082ec50f2fb563f1a94c96804d4057b7da8ed815958d0cdfbe"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:852d7bcf14b0c63404de26e7c4c8d5d65ecaeca935e93794331bc4e2f213660b"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e58788004adda24c401d1751331618ed20c507ffc23bfd28d7c0661a1cf0ad16"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aefe0973cc4698e0907289dc0517ab0c7cdb13d588201932ff567d08a50b0e2e"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6c50c861faad0a5c091bd763e0729f958c316e678dfa065d3984fbb9e4eacbcd"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-win32.whl", hash = "sha256:b1ce340da608530500ab4f963f12d6bfc8d8680900919a60dbdc9b78c02060a4"},
+ {file = "marisa_trie-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:ce37d8ca462bb64cc13f529b9ed92f7b21fe8d1f1679b62e29f9cb7d0e888b49"},
+ {file = "marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d"},
+]
+
+[package.dependencies]
+setuptools = "*"
+
+[package.extras]
+test = ["hypothesis", "pytest", "readme-renderer"]
+
+[[package]]
+name = "packaging"
+version = "24.2"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
+]
+
+[[package]]
+name = "pluggy"
+version = "1.5.0"
+description = "plugin and hook calling mechanisms for python"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+ {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
+]
+
+[package.extras]
+dev = ["pre-commit", "tox"]
+testing = ["pytest", "pytest-benchmark"]
+
+[[package]]
+name = "pymp4"
+version = "1.4.0"
+description = "Python parser for MP4 boxes"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+ {file = "pymp4-1.4.0-py3-none-any.whl", hash = "sha256:3401666c1e2a97ac94dffb18c5a5dcbd46d0a436da5272d378a6f9f6506dd12d"},
+ {file = "pymp4-1.4.0.tar.gz", hash = "sha256:bc9e77732a8a143d34c38aa862a54180716246938e4bf3e07585d19252b77bb5"},
+]
+
+[package.dependencies]
+construct = "2.8.8"
+
+[[package]]
+name = "pytest"
+version = "7.4.4"
+description = "pytest: simple powerful testing with Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
+ {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
+iniconfig = "*"
+packaging = "*"
+pluggy = ">=0.12,<2.0"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
+
+[[package]]
+name = "setuptools"
+version = "75.3.0"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
+ {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
+]
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
+
+[[package]]
+name = "soupsieve"
+version = "2.6"
+description = "A modern CSS selector implementation for Beautiful Soup."
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
+ {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
+]
+
+[[package]]
+name = "srt"
+version = "3.5.3"
+description = "A tiny library for parsing, modifying, and composing SRT files."
+optional = false
+python-versions = ">=2.7"
+files = [
+ {file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"},
+]
+
+[[package]]
+name = "tinycss"
+version = "0.4"
+description = "tinycss is a complete yet simple CSS parser for Python."
+optional = false
+python-versions = "*"
+files = [
+ {file = "tinycss-0.4.tar.gz", hash = "sha256:12306fb50e5e9e7eaeef84b802ed877488ba80e35c672867f548c0924a76716e"},
+]
+
+[package.extras]
+test = ["pytest-cov", "pytest-flake8", "pytest-isort", "pytest-runner"]
+
+[[package]]
+name = "tomli"
+version = "2.2.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
+ {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
+ {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
+ {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
+ {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
+ {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
+ {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
+ {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
+ {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
+ {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
+ {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
+ {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
+ {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
+ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
+[metadata]
+lock-version = "2.0"
+python-versions = "^3.8"
+content-hash = "d1dc09a1492c59373465886d52b572d761f7e370239ae93c63a7c88f4a69c47f"
diff --git a/scripts/subby/pyproject.toml b/scripts/subby/pyproject.toml
new file mode 100644
index 0000000..28db507
--- /dev/null
+++ b/scripts/subby/pyproject.toml
@@ -0,0 +1,33 @@
+[tool.poetry]
+name = "subby"
+version = "0.3.19"
+description = "Advanced subtitle converter and processor"
+authors = ["vevv"]
+license = "GPL-3.0-or-later"
+readme = "README.md"
+repository = "https://github.com/vevv/subby"
+
+[tool.poetry.dependencies]
+python = "^3.8"
+pymp4 = "~1.4.0"
+beautifulsoup4 = "^4.11.2"
+tinycss = "^0.4"
+click = "^8.1.3"
+srt = "^3.5.3"
+lxml = "^5.3.0"
+langcodes = "^3.4.0"
+
+[tool.poetry.group.dev]
+optional = true
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.3"
+lxml-stubs = "^0.4.0"
+
+
+[tool.poetry.scripts]
+subby = "subby.cli:main"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/scripts/subby/setup.py b/scripts/subby/setup.py
new file mode 100644
index 0000000..1abbd06
--- /dev/null
+++ b/scripts/subby/setup.py
@@ -0,0 +1,4 @@
+import setuptools
+
+if __name__ == "__main__":
+ setuptools.setup()
diff --git a/scripts/subby/subby/__init__.py b/scripts/subby/subby/__init__.py
new file mode 100644
index 0000000..df019f9
--- /dev/null
+++ b/scripts/subby/subby/__init__.py
@@ -0,0 +1,25 @@
+from subby.converters.bilibili_json import BilibiliJSONConverter
+from subby.converters.mp4 import ISMTConverter, WVTTConverter
+from subby.converters.sami import SAMIConverter
+from subby.converters.smpte import SMPTEConverter
+from subby.converters.webvtt import WebVTTConverter
+from subby.processors.common_issues import CommonIssuesFixer
+from subby.processors.sdh import SDHStripper
+from subby.subripfile import SubRipFile
+
+__version__ = '0.3.19'
+
+__all__ = [
+ # Converters
+ 'SAMIConverter',
+ 'SMPTEConverter', 'ISMTConverter',
+ 'WebVTTConverter', 'WVTTConverter',
+ 'BilibiliJSONConverter',
+ # Processors
+ 'CommonIssuesFixer',
+ 'SDHStripper',
+ # Utility
+ 'SubRipFile',
+ # Version
+ '__version__'
+]
diff --git a/scripts/subby/subby/cli.py b/scripts/subby/subby/cli.py
new file mode 100644
index 0000000..b70de80
--- /dev/null
+++ b/scripts/subby/subby/cli.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+import logging
+from datetime import datetime
+from pathlib import Path
+
+import click
+
+from subby import (BilibiliJSONConverter, CommonIssuesFixer, ISMTConverter,
+ SAMIConverter, SDHStripper, SMPTEConverter, WebVTTConverter,
+ WVTTConverter, __version__)
+
+
+@click.group()
+@click.option("-d", "--debug", is_flag=True, default=False, help="Enable debug level logs.")
+def main(debug: bool) -> None:
+ """Subby—Advanced Subtitle Converter and Processor."""
+ logging.basicConfig(level=logging.DEBUG if debug else logging.INFO)
+ logging.getLogger('srt').setLevel(logging.DEBUG if debug else logging.CRITICAL)
+
+
+@main.command()
+def version():
+ """Print version information."""
+ log = logging.getLogger(__name__)
+
+ copyright_years = 2023
+ current_year = datetime.now().year
+ if copyright_years != current_year:
+ copyright_years = f"{copyright_years}-{current_year}"
+
+ log.info("Subby version %s Copyright (c) %s vevv", __version__, copyright_years)
+ log.info("https://github.com/vevv/subby")
+
+
+@main.command()
+@click.argument("file", type=Path)
+@click.option("-o", "--out", type=Path, default=None, help="Output path.")
+@click.option(
+ "-l",
+ "--language",
+ type=str,
+ default=None,
+ help="Subtitle language (used for language specific processing)"
+)
+@click.option(
+ "-e",
+ "--encoding",
+ type=str,
+ default="utf-8",
+ help="Character encoding (default: utf-8)."
+)
+@click.option(
+ "-n",
+ "--no-post-processing",
+ is_flag=True,
+ default=False,
+ help="Disable post-processing after conversion."
+)
+@click.option(
+ "-g",
+ "--keep-short-gaps",
+ is_flag=True,
+ help="Keep short gaps between lines (< 85 ms; only with post-processing enabled)"
+)
+def convert(
+ file: Path,
+ out: Path | None,
+ language: str,
+ encoding: str,
+ no_post_processing: bool,
+ keep_short_gaps: bool
+):
+ """Convert a Subtitle to SubRip (SRT)."""
+ if not isinstance(file, Path):
+ raise click.ClickException(f"Expected file to be a {Path} not {file!r}")
+ if out and not isinstance(out, Path):
+ raise click.ClickException(f"Expected out to be a {Path} not {out!r}")
+
+ if not out:
+ out = file.with_suffix(".srt")
+
+ log = logging.getLogger("convert")
+
+ data = file.read_bytes()
+ converter = None
+
+ if b"mdat" in data and b"moof" in data:
+ if b"" in data:
+ log.info("Subtitle format: ISMT (DFXP in MP4)")
+ converter = ISMTConverter()
+ elif b"vttc" in data:
+ log.info("Subtitle format: WVTT (WebVTT in MP4)")
+ converter = WVTTConverter()
+ elif b"" in data:
+ log.info("Subtitle format: SAMI")
+ converter = SAMIConverter()
+ elif b"" in data or b"" in data:
+ log.info("Subtitle format: DFXP/TTML/TTML2")
+ converter = SMPTEConverter()
+ elif b"WEBVTT" in data:
+ log.info("Subtitle format: WebVTT")
+ converter = WebVTTConverter()
+ elif data.startswith(b'{') and b'"Stroke"' in data and b'"background_color"' in data:
+ log.info("Subtitle format: JSON (Bilibili)")
+ converter = BilibiliJSONConverter()
+
+ if not converter:
+ log.error("Subtitle format was unrecognized...")
+ return
+
+ srt = converter.from_file(file)
+ log.info("Converted subtitle to SubRip (SRT)")
+
+ if not no_post_processing:
+ processor = CommonIssuesFixer()
+ processor.remove_gaps = not keep_short_gaps
+ srt, status = processor.from_srt(srt, language=language)
+ log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}")
+
+ srt.save(out, encoding=encoding)
+ log.info(f"Saved to: {out}")
+ log.debug(f"Used character encoding {encoding}")
+
+
+@main.group()
+@click.argument("file", type=Path)
+@click.option("-o", "--out", type=Path, default=None, help="Output path.")
+@click.option(
+ "-l",
+ "--language",
+ type=str,
+ default=None,
+ help="Subtitle language (used for language specific processing)"
+)
+@click.option(
+ "-e",
+ "--encoding",
+ type=str,
+ default="utf-8",
+ help="Character encoding (default: utf-8)."
+)
+@click.option(
+ "-n",
+ "--no-post-processing",
+ is_flag=True,
+ default=False,
+ help="Disable post-processing after SDH stripping."
+)
+@click.option(
+ "-g",
+ "--keep-short-gaps",
+ is_flag=True,
+ help="Keep short gaps between lines (< 85 ms)"
+)
+def process(file: Path, out: Path | None, **__):
+ """SubRip (SRT) post-processing."""
+ if not isinstance(file, Path):
+ raise click.ClickException(f"Expected file to be a {Path} not {file!r}")
+ if out and not isinstance(out, Path):
+ raise click.ClickException(f"Expected out to be a {Path} not {out!r}")
+
+
+@process.command()
+@click.pass_context
+def mend(ctx: click.Context):
+ """Repair or Mend common issues in a Subtitle."""
+ file = ctx.parent.params["file"]
+
+ if not ctx.parent.params["out"]:
+ ctx.parent.params["out"] = file.with_stem(file.stem + "_mend")
+
+ log = logging.getLogger("process.mend")
+
+ processor = CommonIssuesFixer()
+ processor.remove_gaps = not ctx.parent.params["keep_short_gaps"]
+ processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"])
+ log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}")
+
+ return processed_srt, status
+
+
+@process.command("strip-sdh")
+@click.pass_context
+def strip_sdh(ctx: click.Context):
+ """Remove Hard-of-hearing descriptions from Subtitles."""
+ file = ctx.parent.params["file"]
+
+ if not ctx.parent.params["out"]:
+ ctx.parent.params["out"] = file.with_stem(file.stem + "_sdh_stripped")
+
+ log = logging.getLogger("process.strip_sdh")
+
+ processor = SDHStripper()
+ processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"])
+ log.info(f"Processed subtitle {['but no SDH descriptions were found...', 'and removed SDH!'][status]}")
+
+ if not ctx.parent.params["no_post_processing"]:
+ processor = CommonIssuesFixer()
+ processor.remove_gaps = not ctx.parent.params["keep_short_gaps"]
+ processed_srt, _ = processor.from_srt(processed_srt, language=ctx.parent.params["language"])
+ log.info(
+ "Processed stripped subtitle "
+ + ['but no issues were found...', 'and repaired some issues!'][status]
+ )
+
+ return processed_srt, status
+
+
+@process.result_callback()
+def process_result(result, out, encoding, *_, **__):
+ log = logging.getLogger("process")
+ processed_srt, status = result
+ if status:
+ processed_srt.save(out, encoding=encoding)
+ log.info(f"Saved to: {out}")
+ log.debug(f"Used character encoding {encoding}")
diff --git a/scripts/subby/subby/converters/__init__.py b/scripts/subby/subby/converters/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/subby/subby/converters/base.py b/scripts/subby/subby/converters/base.py
new file mode 100644
index 0000000..4464f9c
--- /dev/null
+++ b/scripts/subby/subby/converters/base.py
@@ -0,0 +1,27 @@
+from abc import ABC, abstractmethod
+from io import BytesIO
+from pathlib import Path
+from typing import BinaryIO
+
+from subby.subripfile import SubRipFile
+
+
+class BaseConverter(ABC):
+ """Base subtitle converter class"""
+
+ def from_file(self, file: Path) -> SubRipFile:
+ """Reads a given file and converts it to srt"""
+ with file.open(mode='rb') as stream:
+ return self.parse(stream)
+
+ def from_string(self, data: str) -> SubRipFile:
+ """Reads a given string and converts it to srt"""
+ return self.parse(BytesIO(data.encode('utf-8')))
+
+ def from_bytes(self, data: bytes) -> SubRipFile:
+ """Parses given data and converts it to srt"""
+ return self.parse(BytesIO(data))
+
+ @abstractmethod
+ def parse(self, stream: BinaryIO) -> SubRipFile:
+ """Parses data from a given stream and converts it to srt"""
diff --git a/scripts/subby/subby/converters/bilibili_json.py b/scripts/subby/subby/converters/bilibili_json.py
new file mode 100644
index 0000000..2559cc8
--- /dev/null
+++ b/scripts/subby/subby/converters/bilibili_json.py
@@ -0,0 +1,27 @@
+import datetime
+import json
+
+from srt import Subtitle
+
+from subby.converters.base import BaseConverter
+from subby.subripfile import SubRipFile
+
+
+class BilibiliJSONConverter(BaseConverter):
+ """Bilibili JSON subtitle converter"""
+
+ def parse(self, stream):
+ json_data = json.load(stream)
+ srt = SubRipFile()
+ for i, line in enumerate(json_data['body']):
+ if line['location'] != 2:
+ line['content'] = ('{\\an%s}' % line['location']) + line['content']
+
+ srt.append(Subtitle(
+ index=i,
+ start=datetime.timedelta(seconds=line['from']),
+ end=datetime.timedelta(seconds=line['to']),
+ content=line['content']
+ ))
+
+ return srt
diff --git a/scripts/subby/subby/converters/mp4.py b/scripts/subby/subby/converters/mp4.py
new file mode 100644
index 0000000..05fd110
--- /dev/null
+++ b/scripts/subby/subby/converters/mp4.py
@@ -0,0 +1,111 @@
+from collections import deque
+
+from pymp4.parser import MP4
+from pymp4.util import BoxUtil
+
+from subby.converters.base import BaseConverter
+from subby.converters.smpte import SMPTEConverter
+from subby.converters.webvtt import WebVTTConverter
+from subby.subripfile import SubRipFile
+from subby.utils.time import timestamp_from_ms
+
+
+class ISMTConverter(BaseConverter):
+ """ISMT (DFXP in MP4) subtitle converter"""
+
+ def parse(self, stream):
+ srt = SubRipFile([])
+ for box in MP4.parse(stream.read()):
+ if box.type == b'mdat':
+ new = SMPTEConverter().from_bytes(box.data)
+
+ # Offset timecodes if necessary
+ # https://github.com/SubtitleEdit/subtitleedit/blob/abd36e5/src/libse/SubtitleFormats/IsmtDfxp.cs#L85-L90
+ if srt and new and srt[-1].start > new[0].start:
+ new.offset(srt[-1].end)
+
+ srt.extend(new)
+
+ return srt
+
+
+class WVTTConverter(BaseConverter):
+ """WVTT (WebVTT in MP4) subtitle converter"""
+
+ def parse(self, stream): # pylint: disable=too-many-locals, too-many-branches
+ sample_durations = deque()
+ vtt_lines = []
+ timescale = 0
+
+ for box in MP4.parse(stream.read()):
+ if box.type == b'moov':
+ for mdhd in BoxUtil.find(box, b'mdhd'):
+ timescale = mdhd.timescale
+ break
+
+ for stsd in BoxUtil.find(box, b'stsd'):
+ wvtt = stsd.entries[0]
+ header = [box.config for box in wvtt.children
+ if box.type == b'vttC'][0]
+ vtt_lines.append(f'{header}\n\n')
+ break
+
+ if box.type == b'moof':
+ start_offset = 0
+ duration = 0
+ for tfdt in BoxUtil.find(box, b'tfdt'):
+ start_offset = tfdt.baseMediaDecodeTime
+ break
+
+ for trun in BoxUtil.find(box, b'trun'):
+ for sample in trun.sample_info:
+ start_offset += sample.sample_composition_time_offsets or 0
+ duration += sample.sample_duration or 0
+ sample_durations.append({
+ 'start_ms': (start_offset / timescale) * 1000,
+ 'end_ms': ((start_offset + duration) / timescale) * 1000
+ })
+
+ if box.type == b'mdat':
+ vtt_boxes = MP4.parse(box.data)
+ new_start = None
+ for vtt_box in vtt_boxes:
+ settings = None
+ for sttg in BoxUtil.find(vtt_box, b'sttg'):
+ settings = sttg.settings
+ break
+
+ cue_text = None
+ for payl in BoxUtil.find(vtt_box, b'payl'):
+ cue_text = payl.cue_text
+ break
+
+ try:
+ sample_duration = sample_durations.popleft()
+ except IndexError: # broken line, no durations found
+ continue
+
+ if vtt_box.type == b'vttc':
+ try:
+ start_ms = end_ms
+ except UnboundLocalError:
+ end_ms = sample_duration['end_ms']
+ start_ms = end_ms
+ else:
+ start_ms = sample_duration['start_ms']
+
+ end_ms = sample_duration['end_ms']
+
+ if vtt_box.type == b'vtte':
+ new_start = end_ms
+ continue
+
+ if new_start:
+ start_ms = new_start
+ new_start = None
+
+ vtt_lines.append((f'{timestamp_from_ms(start_ms)} --> '
+ f'{timestamp_from_ms(end_ms)} '
+ f'{settings}\n{cue_text}\n\n'))
+
+ return WebVTTConverter().from_string(''.join(vtt_lines))
diff --git a/scripts/subby/subby/converters/sami.py b/scripts/subby/subby/converters/sami.py
new file mode 100644
index 0000000..ea261e4
--- /dev/null
+++ b/scripts/subby/subby/converters/sami.py
@@ -0,0 +1,90 @@
+from html.parser import HTMLParser
+
+from srt import Subtitle
+
+from subby.converters.base import BaseConverter
+from subby.subripfile import SubRipFile
+from subby.utils.time import timedelta_from_ms
+
+
+class SAMIConverter(BaseConverter):
+ """SAMI subtitle converter"""
+
+ def parse(self, stream):
+ return _SAMIConverter(stream.read().decode('utf-8-sig')).srt
+
+
+# Internal converter class as we inherit from HTMLParser
+class _SAMIConverter(HTMLParser):
+ def __init__(self, subtitle):
+ super().__init__()
+ self.lines = []
+ self.tags = []
+
+ self.srt = SubRipFile([])
+ self.line_list = []
+
+ self.feed(self._correct_tags(subtitle))
+ self._convert()
+
+ def handle_starttag(self, tag, attrs_org):
+ attrs = {}
+ for attr, val in attrs_org:
+ attrs[attr] = val
+
+ if tag == 'sync':
+ data = {'text': ''}
+ data.update(attrs)
+ self.lines.append(data)
+
+ self.tags.append({'name': tag, 'attrs': attrs})
+
+ def handle_data(self, data):
+ last_tag = self.tags[-1]['name']
+ if last_tag == 'br':
+ self.lines[-1]['text'] += '\n'
+ return
+
+ if last_tag == 'i' and data.strip():
+ self.lines[-1]['text'] += f'{data}'
+ return
+
+ if last_tag != 'sync' and self.lines:
+ self.lines[-1]['text'] += data
+
+ def _convert(self):
+ for num, line in enumerate(self.lines):
+ # Use empty lines as the end of previous line
+ if not line.get('text', '').strip():
+ end_time = float(line['start'])
+ self.line_list[-1]['end'] = end_time
+ continue
+
+ if not line.get('end'):
+ # Arbitrarily set duration to 4s if end time not present
+ line['end'] = float(line['start']) + 4000
+
+ srt_line = {
+ 'start': float(line['start']),
+ 'end': float(line['end']),
+ 'content': line['text'].strip()
+ }
+ self.line_list.append(srt_line)
+
+ for num, line in enumerate(self.line_list):
+ srt_line = Subtitle(
+ index=num,
+ start=timedelta_from_ms(line['start']),
+ end=timedelta_from_ms(line['end']),
+ content=line['content']
+ )
+ self.srt.append(srt_line)
+
+ @staticmethod
+ def _correct_tags(data):
+ data = data.replace('', '')
+ data = data.replace(';>', '>')
+ data = data.replace('
', '\n')
+ data = data.replace('
', '\n')
+ data = data.replace('
', '\n')
+ return data
diff --git a/scripts/subby/subby/converters/smpte.py b/scripts/subby/subby/converters/smpte.py
new file mode 100644
index 0000000..e5d7f38
--- /dev/null
+++ b/scripts/subby/subby/converters/smpte.py
@@ -0,0 +1,168 @@
+import html
+import logging
+import re
+
+import bs4
+from srt import Subtitle
+
+from subby.converters.base import BaseConverter
+from subby.subripfile import SubRipFile
+from subby.utils.time import timedelta_from_timestamp, timestamp_from_ms
+
+
+class SMPTEConverter(BaseConverter):
+ """DFXP/TTML/TTML2 subtitle converter"""
+
+ def parse(self, stream):
+ data = stream.read().decode('utf-8-sig')
+
+ if data.count('') == 1:
+ return _SMPTEConverter(data).srt
+
+ # Support for multiple XML documents in a single file
+ smpte_subs = [s + '' for s in data.strip().split('') if s]
+ srt = SubRipFile([])
+
+ for sub in smpte_subs:
+ srt.extend(_SMPTEConverter(sub).srt)
+
+ return srt
+
+
+# Internal converter class as we need to handle multiple subs in one stream
+class _SMPTEConverter:
+ def __init__(self, data):
+ self.logger = logging.getLogger(__name__)
+ self.root = bs4.BeautifulSoup(data, 'lxml-xml')
+ # Unescape only if necessary (parsing fails)
+ if not self.root:
+ self.root = bs4.BeautifulSoup(html.unescape(data), 'lxml-xml')
+
+ self.srt = SubRipFile([])
+
+ self.tickrate = int(self.root.tt.get('ttp:tickRate', 0))
+ self.frame_duration = 1
+ if (rate := self.root.tt.get('ttp:frameRate')) is not None:
+ num, denom = map(int, self.root.tt.get('ttp:frameRateMultiplier', '1 1').split())
+ framerate = (int(rate) * num) / denom
+ self.frame_duration = (1 / framerate) * 1000 # ms
+
+ self.italics = {}
+ self.an8 = {}
+ self.all_span_italics = '' not in data
+
+ self._parse_styles()
+ self._convert()
+
+ def _convert(self):
+ try:
+ assert self.root.tt.body.div is not None
+ except (AttributeError, AssertionError):
+ return
+
+ for num, line in enumerate(self.root.tt.body.div.find_all('p'), 1):
+ line_text = ''
+
+ try:
+ for time in ('begin', 'end'):
+ if line[time].endswith('t'):
+ line[time] = self._convert_ticks(line[time])
+ elif line[time].endswith('ms'):
+ line[time] = timestamp_from_ms(line[time][:-2])
+ else:
+ line[time] = self._parse_timestamp(line[time])
+ except (AttributeError, KeyError):
+ self.logger.warning(
+ 'Could not parse %s timestamp for line %02d, skipping',
+ time, num
+ )
+ continue
+
+ srt_line = Subtitle(
+ index=num,
+ start=timedelta_from_timestamp(line['begin']),
+ end=timedelta_from_timestamp(line['end']),
+ content=''
+ )
+
+ for element in line:
+ line_text += self._parse_element(element)
+
+ if self._is_italic(line) and line_text.strip():
+ line_text = line_text.replace('', '')
+ line_text = line_text.replace('', '')
+ line_text = '%s' % line_text.strip()
+
+ if self._is_an8(line) and line_text.strip():
+ line_text = '{\\an8}%s' % line_text.strip()
+
+ srt_line.content = line_text.strip().strip('\n')
+ if srt_line.content:
+ self.srt.append(srt_line)
+
+ def _parse_styles(self):
+ for style in self.root.find_all('style'):
+ if style.get('xml:id'):
+ self.italics[style['xml:id']] = self._is_italic(style)
+ for region in self.root.find_all('region'):
+ if region.get('xml:id'):
+ self.an8[region['xml:id']] = self._is_an8(region)
+
+ def _parse_element(self, element):
+ element_text = ''
+ if isinstance(element, bs4.element.NavigableString):
+ element_text += element
+ elif isinstance(element, bs4.element.Tag):
+ subelement_text = ''
+ for subelement in element:
+ subelement_text += self._parse_element(subelement)
+ element_text += subelement_text
+ if element.name == 'br':
+ element_text += '\n'
+
+ if self._is_italic(element) and element_text.strip():
+ element_text = element_text.replace('', '')
+ element_text = element_text.replace('', '')
+ element_text = '%s' % element_text
+
+ if self._is_an8(element) and element_text.strip():
+ element_text = '{\\an8}%s' % element_text
+
+ return element_text
+
+ def _is_italic(self, element):
+ if element.get('tts:fontStyle'):
+ return element.get('tts:fontStyle') == 'italic'
+ elif element.get('style'):
+ return self.italics.get(element['style'])
+ elif element.name == 'span' and not element.attrs and self.all_span_italics:
+ return not self._is_italic(element.parent)
+
+ return False
+
+ def _is_an8(self, element):
+ if element.get('tts:displayAlign'):
+ return element.get('tts:displayAlign') == 'before'
+ elif element.get('region'):
+ return self.an8.get(element['region'])
+
+ return False
+
+ def _convert_ticks(self, ticks):
+ ticks = int(ticks[:-1])
+ offset = 1.0 / self.tickrate
+ seconds = (offset * ticks) * 1000
+
+ return timestamp_from_ms(seconds)
+
+ def _parse_timestamp(self, timestamp):
+ regex = r'([0-9]{2}):([0-9]{2}):([0-9]{2})[:\.,]?([0-9]{0,3})?'
+ parsed = re.search(regex, timestamp)
+ hours = int(parsed.group(1))
+ minutes = int(parsed.group(2))
+ seconds = int(parsed.group(3))
+ miliseconds = 0
+ if frames := parsed.group(4):
+ miliseconds = self.frame_duration * int(frames)
+
+ return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds)
diff --git a/scripts/subby/subby/converters/webvtt.py b/scripts/subby/subby/converters/webvtt.py
new file mode 100644
index 0000000..80f00f7
--- /dev/null
+++ b/scripts/subby/subby/converters/webvtt.py
@@ -0,0 +1,162 @@
+from __future__ import annotations
+
+import html
+import re
+from functools import partial
+from typing import Optional
+
+import tinycss
+from srt import Subtitle
+
+from subby.converters.base import BaseConverter
+from subby.subripfile import SubRipFile
+from subby.utils.time import timedelta_from_timestamp
+
+HTML_TAG = re.compile(r'?(?!/?i)[^>\s]+>')
+STYLE_TAG_OPEN = re.compile(r'^([^<]+)')
+STYLE_TAG = re.compile(r'([^<]+)<\/c>')
+STYLE_TAG_CLOSE = re.compile(r'<\/c>$')
+SKIP_WORDS = ('WEBVTT', 'NOTE', '/*', 'X-TIMESTAMP-MAP')
+SPEAKER_TAG = re.compile(r']+>') # Matches opening tags, closing tags handled by STYLE_TAG_CLOSE
+
+
+class WebVTTConverter(BaseConverter):
+ """WebVTT subtitle converter"""
+
+ def parse(self, stream):
+ srt = SubRipFile()
+ looking_for_text = False
+ looking_for_style = False
+ text = []
+ position = None
+ line_number = 1
+ styles = {}
+ current_style = []
+
+ css_parser = tinycss.make_parser('page3')
+
+ for line in stream:
+ # As our stream is bytes we have to deal with line breaks here
+ line = line.decode('utf-8').replace('\r\n', '\n').replace('\r', '\n').strip()
+
+ # Skip processing any unnecessary lines
+ if any(line.startswith(word) for word in SKIP_WORDS):
+ continue
+
+ # Empty line separates cues
+ if line == '':
+ # Parse current style
+ if looking_for_style:
+ stylesheet = css_parser.parse_stylesheet('\n'.join(current_style))
+ for rule in stylesheet.rules:
+ ft = next((e for e in rule.selector if e.type == 'FUNCTION'), None)
+ if not ft:
+ continue
+ name = next((t for t in ft.content if t.type == 'IDENT'), None)
+ if not name:
+ continue
+ styles[name.value] = {}
+ for dec in rule.declarations:
+ styles[name.value][dec.name] = dec.value.as_css()
+
+ looking_for_style = False
+
+ # Keep looking for text if last line has none
+ # this will only happen if there's an unexpected line break
+ if not text:
+ continue
+
+ srt[-1].content = '\n'.join(text)
+ text = []
+ looking_for_text = False
+
+ # Check for style start
+ elif 'STYLE' in line:
+ looking_for_style = True
+
+ # Check for style content
+ elif looking_for_style:
+ current_style.append(line)
+
+ # Check for time line
+ elif ' --> ' in line:
+ parts = line.strip().split()
+ position = self._get_position([p for p in parts[3:] if ':' in p])
+
+ start, _, end, *_ = parts
+ # Fix short timecodes (no hour)
+ if start.count(':') == 1:
+ start = f'00:{start}'
+ if end.count(':') == 1:
+ end = f'00:{end}'
+
+ srt.append(Subtitle(
+ index=line_number,
+ start=timedelta_from_timestamp(start),
+ end=timedelta_from_timestamp(end),
+ content=''
+ ))
+ looking_for_text = True
+ line_number += 1
+
+ # Append text if we're inside a line
+ elif looking_for_text:
+ # Unescape html entities
+ line = html.unescape(line)
+
+ # Remove speaker tags here
+ line = re.sub(SPEAKER_TAG, '', line)
+
+ # Set \an8 tag if position is below 25
+ # (value taken from SubtitleEdit)
+ if position is not None and position < 25:
+ line = '{\\an8}' + line
+ position = None
+
+ text.append(line.strip())
+
+ # Add any leftover text to the last line
+ if text:
+ srt[-1].content += '\n'.join(text)
+
+ for line in srt:
+ # Replace styles with italics tag when appropriate
+ # (replace instead of match, to handle nested)
+ line.content = re.sub(
+ STYLE_TAG,
+ partial(self._replace_italics, styles=styles),
+ line.content
+ )
+
+ # Strip non-italic tags
+ line.content = re.sub(HTML_TAG, '', line.content)
+
+ return srt
+
+ @staticmethod
+ def _get_position(cue_settings: list[str]) -> Optional[float]:
+ """
+ Parses list of cue settings and extracts position offset as a float
+ Line number based offset and alignment strings are ignored
+
+ https://www.w3.org/TR/webvtt1/#webvtt-line-cue-setting
+ """
+ if not cue_settings or cue_settings == ['None']:
+ return None
+
+ position = None
+ for key, val in (pos.split(':') for pos in cue_settings):
+ if key == 'line' and val and (val := val.split(',')[0])[-1] == '%':
+ position = float(val[:-1])
+ break
+ elif key == 'line' and val and val == '0':
+ position = 0
+ break
+
+ return position
+
+ @staticmethod
+ def _replace_italics(match: re.Match, styles: dict[str, dict[str, str]]) -> str:
+ if (s := styles.get(match[1])) and s.get('font-style') == 'italic':
+ return f'{match[2]}'
+ return match[0]
diff --git a/scripts/subby/subby/processors/__init__.py b/scripts/subby/subby/processors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/subby/subby/processors/base.py b/scripts/subby/subby/processors/base.py
new file mode 100644
index 0000000..5e99e2d
--- /dev/null
+++ b/scripts/subby/subby/processors/base.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from subby.subripfile import SubRipFile
+
+
+class BaseProcessor(ABC):
+ """Base subtitle processor class"""
+
+ def from_srt(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]:
+ """Processes given SubRipFile"""
+ return self.process(srt, language)
+
+ def from_file(self, file: Path, language: str | None = None) -> tuple[SubRipFile, bool]:
+ """Processes given srt file"""
+ with file.open(mode='r', encoding='utf-8') as stream:
+ return self.from_string(stream.read(), language)
+
+ def from_string(self, data: str, language: str | None = None) -> tuple[SubRipFile, bool]:
+ """Processes given string with srt subtitles"""
+ return self.process(SubRipFile.from_string(data), language)
+
+ @abstractmethod
+ def process(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]:
+ """
+ Processes given SubRipFile
+ :return: Processed SubRipFile, success (whether any changes were made)
+ """
diff --git a/scripts/subby/subby/processors/common_issues.py b/scripts/subby/subby/processors/common_issues.py
new file mode 100644
index 0000000..83425d1
--- /dev/null
+++ b/scripts/subby/subby/processors/common_issues.py
@@ -0,0 +1,278 @@
+import copy
+import datetime
+import html
+import re
+import unicodedata
+from datetime import timedelta
+
+import langcodes
+
+from subby import regex as Regex
+from subby.processors.base import BaseProcessor
+from subby.processors.rtl import RTL_LANGUAGES, RTLFixer
+from subby.subripfile import SubRipFile
+from subby.utils.time import line_duration
+
+
+class CommonIssuesFixer(BaseProcessor):
+ """Processor fixing common issues found in subtitles"""
+
+ remove_gaps = True
+
+ def process(self, srt, language=None):
+ fixed = self._fix_time_codes(copy.deepcopy(srt))
+ corrected = self._correct_subtitles(fixed)
+
+ if language and langcodes.get(language).language in RTL_LANGUAGES:
+ corrected, _ = RTLFixer().process(corrected, language=language)
+
+ return corrected, corrected != srt
+
+ def _correct_subtitles(self, srt: SubRipFile) -> SubRipFile:
+ def _fix_line(line):
+ # [GENERAL] - Affects other regexes
+ # Remove more than one space
+ line = re.sub(r' {2,}', ' ', line)
+ # Correct lines starting with space
+ line = re.sub(r'^\s*', '', line)
+ line = re.sub(r'\n\s*', '\n', line)
+ #
+ # [ENCODING FIXES, CHARACTER REPLACEMENTS]
+ # Fix musical notes garbled by encoding
+ # has to happen before normalization as that replaces the TM char
+ line = line.replace(r'♪', '♪')
+ # Normalize unicode characters
+ line = unicodedata.normalize('NFKC', line)
+ # Replace short hyphen with regular size
+ line = line.replace(r'‐', r'-')
+ # Replace double note with single note
+ line = line.replace(r'♫', r'♪')
+ # Replace hashes, asterisks at the start of a line with a musical note
+ line = re.sub(
+ r'^((?:{\\an8})?(?:)?)(- ?)?[#\*]{1,}(?=\s+)',
+ r'\1\2♪',
+ line,
+ flags=re.M
+ )
+ # Replace hashes, asterisks at the end of a line with a musical note
+ line = re.sub(
+ r'(?<=\s)(?$|$)',
+ r'♪\1',
+ line,
+ flags=re.M
+ )
+ line = re.sub(r'^[#\*]+$', r'♪', line, flags=re.M)
+ # Move notes into italics, if rest of the line is
+ line = re.sub(r'♪ (.*)', r'♪ \1', line)
+ line = re.sub(r'(♪.*)\s*♪', r'\1 ♪', line)
+ # Replace some pound signs with notes (Binge...)
+ # (Matches only start/end of a line with a space
+ # to avoid false positives)
+ line = re.sub(r'^£ ', r'♪ ', line)
+ line = re.sub(r' £$', r' ♪', line)
+ # Duplicated notes
+ line = re.sub(r'♪{1,}', r'♪', line)
+ # Add spaces between notes and text
+ line = re.sub(r'^♪([A-Za-z])', r'♪ \1', line)
+ line = re.sub(r'([A-Za-z])♪', r'\1 ♪', line)
+ # Replace \h (non-breaking space in ASS) with a regular space
+ # (result of ffmpeg extraction of mp4-embedded subtitles)
+ line = re.sub(r'(\\h)+', ' ', line).strip()
+ # Fix leftover amps (html unescape fixes those, but not when they're duped)
+ line = re.sub(r'&(amp;){1,}', r'&', line)
+ # Fix "it'`s" -> "it's"
+ line = re.sub(r"'[`’]", r"'", line)
+
+ # [TAG STRIPPING AND CORRECTING]
+ #
+ # Replace ASS positioning tags with top only
+ line = re.sub(r'(\{\\an[0-9]\}){1,}', r'{\\an8}', line)
+ # Remove space after ASS positioning tags
+ line = re.sub(r'(\{\\an[0-9]\}) +(?=[A-Za-z-])', r'{\\an8}', line)
+ # Fix hanging tags
+ line = re.sub(r'^(<[a-z]>)\n', r'\1', line)
+ line = re.sub(r'([a-z])>$\n<([a-z])>', r'\n', line, flags=re.M)
+ # Remove duplicated tags
+ line = re.sub(r'(<[a-z]>){1,}', r'\1', line)
+ line = re.sub(r'([a-z]>){1,}', r'\1', line)
+ # Remove an unnecessary space after italic tag open
+ line = re.sub(r'^(<[a-z]>) {1,}', r'\1', line)
+ line = re.sub(r'^ {1,}', '', line)
+ # Remove non-italic tags
+ line = re.sub(r'?(?!i>)[a-z]+>', '', line)
+ # Remove spaces between tags
+ line = re.sub(r'(<[a-z]>|\{\\an8\}) (<[a-z]>|\{\\an8\})', r'\1\2', line)
+ # Move hanging opening tags onto separate lines
+ line = re.sub(r'(<[a-z]>)\n', r'\n\1', line)
+ # Move hanging closing tags onto separate lines
+ line = re.sub(r'\n([a-z]>)', r'\1\n', line)
+ # Move spaces outside italic tags
+ line = re.sub(r'(<[a-z]>) ', r' \1', line)
+ line = re.sub(r' ([a-z]>)', r'\1 ', line)
+ # Remove needless spaces inside italic tags
+ line = re.sub(r'^(<[a-z]>) ', r'\1', line)
+ # Fix "space"
+ line = re.sub(r'(?:[a-z]>)(\s*)(?:<[a-z]>)', r'\1', line, flags=re.M)
+ # Remove empty tags
+ line = re.sub(r'<[a-z]>\s*[a-z]>', r'', line)
+ # Move "{\an8}" to the rest of the text if it's on a new line
+ line = re.sub(r'({\\an8\})\n', r'\1', line)
+
+ # [REFORMATTING]
+ #
+ # Remove spaces inside brackets ("( TEXT )" -> "(TEXT)")
+ line = re.sub(r'\( (.*) \)', r'(\1)', line)
+ # Remove ">> " before text
+ line = re.sub(r'(^|\n)(?[a-z]>|\{\\an8\})?>> ', r'\1\2', line)
+ # Remove lines consisting only of ">>"
+ line = re.sub(r'(^|\n)(?[a-z]>|\{\\an8\})?>>($|\n)', r'', line)
+ # Replace any leftover
tags with a proper line break
+ line = re.sub(r'
', '\n', line)
+ # Remove empty lines
+ line = re.sub(r'^\.?\s*$', '', line, flags=re.M)
+ line = re.sub(r'^-?\s*$', '', line, flags=re.M)
+ line = re.sub(r'^(?i>|\{\\an8\})?\s*$', '', line, flags=re.M)
+ # Remove lines consisting only of a single character or digit
+ line = re.sub(r'^\[A-Za-z0-9]$', '', line)
+ # Adds missing spaces after "...", commas, and tags
+ line = re.sub(r'([a-z])(\.\.\.)([a-zA-Z][^.])', r'\1\2 \3', line)
+ line = re.sub(r'([a-z]>)(\w)', r'\1 \2', line)
+ line = re.sub(r'([a-z]),([a-zA-Z])', r'\1, \2', line)
+ line = re.sub(r',\n([a-z]+[\.\?])\s*$', r', \1', line)
+ # Correct front and end elypses
+ line = re.sub(
+ rf'({Regex.FRONT_OPTIONAL_TAGS_WITH_HYPHEN})' r'\.{1,}',
+ r'\1...',
+ line, flags=re.M
+ )
+ line = re.sub(r'\.{2,}' rf'({Regex.TAGS})?' r'\s*$', r'...\1', line, flags=re.M)
+ # Add space after frontal speaker hyphen
+ line = re.sub(r"^(|\{\\an8\})?-+(?='?[\w\"\[\(\<\{\.\$♪¿¡])", r'\1- ', line, flags=re.M)
+ # Remove unnecessary space before "--"
+ line = re.sub(r'\s*--(\s*)', r'--\1', line, flags=re.M)
+ # Move notes inside tags ( ♪ -> )
+ line = re.sub(r'([a-z]>)(\s*♪{1,})$', r'\2\1', line, flags=re.M)
+ # Remove trailing spaces
+ line = re.sub(r' +$', r'', line, flags=re.M).strip()
+
+ # [LINE SPLITS AND LINE BREAKS]
+ #
+ # Adds missing line splits (primarily present in Amazon subtitles)
+ line = re.sub(r'(.*)([^.][\]\)])([A-Z][^.])', r'\1\2\n\3', line)
+ line = re.sub(
+ r'(.*)([^\.\sA-Z][!\.;:?])(?|\n<[a-z]>)(\w+)\n', r'\1\2 ', line)
+ # Add missing hyphens
+ line = re.sub(r'^\s*(?!-)(.*)\n- ([A-Z][a-z]+)$', r'- \1\n- \2', line)
+ # Remove linebreaks inside lines
+ line = re.sub(r'\r\n{1,}', r'\r\n', line).strip()
+ line = re.sub(r'\n{1,}', r'\n', line).strip()
+ # Remove duplicate spaces around italics
+ line = re.sub(r' + +', r' ', line).strip()
+ # Remove italics from hyphen, when content immediately following is not italics
+ line = re.sub(r'-([^<]+)', r'-\1', line).strip()
+
+ return line
+
+ for line in srt:
+ # Unescape html entities (twice, because yes, double encoding happens...)
+ for _ in range(2):
+ line.content = html.unescape(line.content)
+
+ # Run fix_line twice, as some of the fixes can introduce issues, e.g. double spaces
+ for _ in range(2):
+ line.content = _fix_line(line.content)
+ line.content = line.content.strip()
+
+ # Remove remaining linebreaks
+ line.content = line.content.strip('\n')
+
+ # Remove italics if every line is italicized, as this is almost certainly a mistake
+ # (using slices should be more performant than regex or startswith/endswith)
+ if len(srt) > 10 \
+ and all(line.content[:3] == '' and line.content[-4:] == '' for line in srt):
+ for line in srt:
+ line.content = line.content[3:-4]
+
+ combined = self._combine_timecodes(srt)
+ if self.remove_gaps:
+ return self._remove_gaps(combined)
+
+ return combined
+
+ def _combine_timecodes(self, srt: SubRipFile) -> SubRipFile:
+ """Combines lines with timecodes and same content"""
+ subs_copy = SubRipFile([])
+ for line in srt:
+ if len(subs_copy) == 0:
+ subs_copy.append(line)
+ continue
+ if line_duration(subs_copy[-1]) == line_duration(line) \
+ and subs_copy[-1].start == line.start \
+ and subs_copy[-1].end == line.end:
+ if subs_copy[-1].content != line.content:
+ subs_copy[-1].content += '\n' + line.content
+ # Merge lines with the same text within 10 ms
+ elif self._subtract_ts(line.start, subs_copy[-1].end) < 10 \
+ and line.content == subs_copy[-1].content:
+ subs_copy[-1].end = line.end
+ # Merge lines with less than 2 frames of gap and same text
+ # to avoid duplicating lines as we remove gaps later
+ elif 0 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85 \
+ and line.content.startswith(subs_copy[-1].content) \
+ and self.remove_gaps:
+ subs_copy[-1].end = line.end
+ subs_copy[-1].content = line.content
+ # Fix overlapping times
+ elif self._subtract_ts(line.start, subs_copy[-1].end) == 0:
+ subs_copy[-1].end -= timedelta(milliseconds=1)
+ subs_copy.append(line)
+ elif line.content.strip():
+ subs_copy.append(line)
+
+ subs_copy = subs_copy or srt
+ subs_copy.clean_indexes()
+ return subs_copy
+
+ def _remove_gaps(self, srt: SubRipFile) -> SubRipFile:
+ """Remove short gaps between lines"""
+ subs_copy = SubRipFile([])
+ for line in srt:
+ if len(subs_copy) == 0:
+ subs_copy.append(line)
+ continue
+ # Remove 2-frame or smaller gaps (2 frames/83ms@24 is Netflix standard)
+ elif 1 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85:
+ line.start = subs_copy[-1].end
+ subs_copy[-1].end -= timedelta(milliseconds=1)
+ subs_copy.append(line)
+ elif line.content.strip():
+ subs_copy.append(line)
+
+ subs_copy = subs_copy or srt
+ subs_copy.clean_indexes()
+ return subs_copy
+
+ @staticmethod
+ def _fix_time_codes(srt: SubRipFile) -> SubRipFile:
+ """Fixes timecodes over 23:59, often present in live content"""
+ offset = 0
+ for line in srt:
+ hours, _ = divmod(line.start.seconds, 3600)
+ hours += line.start.days * 24
+
+ if not offset and hours > 23:
+ offset = hours
+ if offset:
+ line.start -= datetime.timedelta(hours=offset)
+ line.end -= datetime.timedelta(hours=offset)
+ return srt
+
+ @staticmethod
+ def _subtract_ts(ts1: datetime.timedelta, ts2: datetime.timedelta) -> int:
+ """Subtracts two timestamps and returns a difference as int of miliseconds"""
+ return round((ts1 - ts2).total_seconds() * 1000)
diff --git a/scripts/subby/subby/processors/rtl.py b/scripts/subby/subby/processors/rtl.py
new file mode 100644
index 0000000..0f324c8
--- /dev/null
+++ b/scripts/subby/subby/processors/rtl.py
@@ -0,0 +1,34 @@
+import logging
+
+import langcodes
+
+from subby.processors.base import BaseProcessor
+
+RTL_LANGUAGES = ('ar', 'fa', 'he', 'ps', 'syc', 'ug', 'ur')
+RTL_CONTROL_CHARS = ('\u200e', '\u200f', '\u202a', '\u202b', '\u202c', '\u202d', '\u202e')
+RTL_CHAR = '\u202b'
+
+
+class RTLFixer(BaseProcessor):
+ """Processor fixing right-to-left language tagging"""
+
+ def __init__(self):
+ self.logger = logging.getLogger(__name__)
+
+ def process(self, srt, language=None):
+ if language and langcodes.get(language).language not in RTL_LANGUAGES:
+ self.logger.warning('RTL tagger running on an unexpected language (%s)', language)
+
+ corrected = self._correct_subtitles(srt)
+ return srt, corrected != srt
+
+ def _correct_subtitles(self, srt):
+ for line in srt:
+ # Remove previous RTL-related formatting
+ for char in RTL_CONTROL_CHARS:
+ line.content = line.content.replace(char, '')
+
+ # Add RLM char at the start of every line
+ line.content = RTL_CHAR + line.content.replace("\n", f"\n{RTL_CHAR}")
+
+ return srt
diff --git a/scripts/subby/subby/processors/sdh.py b/scripts/subby/subby/processors/sdh.py
new file mode 100644
index 0000000..353f348
--- /dev/null
+++ b/scripts/subby/subby/processors/sdh.py
@@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import copy
+import re
+
+from subby import regex as Regex
+from subby.processors.base import BaseProcessor
+from subby.subripfile import SubRipFile
+
+
+class SDHStripper(BaseProcessor):
+ """Processor removing hard-of-hearing descriptions from subtitles"""
+
+ def __init__(self, extra_regexes: list[str] | None = None):
+ self.extra_regexes = [
+ re.compile(regex, re.MULTILINE)
+ for regex in extra_regexes or []
+ ]
+
+ def process(self, srt, language=None):
+ stripped = [line for line in copy.deepcopy(srt)]
+ stripped = self._clean_full_line_descriptions(stripped)
+ stripped = self._clean_new_line_descriptions(stripped)
+ stripped = self._clean_inline_descriptions(stripped)
+ stripped = self._clean_speaker_names(stripped)
+ stripped = self._strip_notes(stripped)
+ stripped = self._remove_extra_hyphens(stripped)
+ stripped = self._run_extra_regexes(stripped)
+
+ stripped = SubRipFile([line for line in stripped if line.content])
+ stripped.clean_indexes()
+
+ return stripped, stripped != srt
+
+ def _clean_full_line_descriptions(self, srt):
+ """Removes full line descriptions"""
+ for line in srt:
+ text = self._strip_tags(line.content)
+ for regex in (Regex.FULL_LINE_DESCIRPTION_BRACKET, Regex.FULL_LINE_DESCIRPTION_PARENTHESES):
+ text = re.sub(regex, r'', text, flags=re.S).strip()
+
+ if not text:
+ continue
+
+ yield line
+
+ def _clean_new_line_descriptions(self, srt):
+ """Removes line descriptions taking up an entire line break"""
+ for line in srt:
+ position = re.match(Regex.POSITION_TAGS, line.content.strip())
+ for regex in (Regex.NEW_LINE_DESCRIPTION_BRACKET, Regex.NEW_LINE_DESCRIPTION_PARENTHESES):
+ line.content = re.sub(regex, r'', line.content, flags=re.M).strip()
+
+ # Restore position, if it has been removed with the description
+ if position and position[0] not in line.content:
+ line.content = position[0] + line.content
+
+ yield line
+
+ def _clean_inline_descriptions(self, srt):
+ """Removes inline"""
+ for line in srt:
+ line.content = re.sub(Regex.FRONT_DESCRIPTION_BRACKET, r'\10', line.content, flags=re.M)
+ line.content = re.sub(Regex.FRONT_DESCRIPTION_PARENTHESES, r'\1', line.content, flags=re.M)
+ for regex in (
+ Regex.END_DESCRIPTION_BRACKET,
+ Regex.END_DESCRIPTION_PARENTHESES,
+ Regex.INLINE_DESCRIPTION
+ ):
+ line.content = re.sub(regex, r'', line.content, flags=re.M)
+ line.content = line.content.strip()
+ yield line
+
+ def _clean_speaker_names(self, srt):
+ """Removes speaker names"""
+ for line in srt:
+ # Retain frontal tags/hyphens
+ for regex in (Regex.SPEAKER_PARENTHESES, Regex.SPEAKER):
+ line.content = re.sub(regex, r'\2\3', line.content, flags=re.M).strip()
+ yield line
+
+ def _strip_notes(self, srt):
+ """Removes lines with just musical notes"""
+ for line in srt:
+ if re.match(r'^♪+$', re.sub(r'\s*', r'', self._strip_tags(line.content).strip())):
+ continue
+
+ yield line
+
+ def _run_extra_regexes(self, srt):
+ """Runs extra regexes provided by user"""
+ for line in srt:
+ for regex in self.extra_regexes:
+ line.content = re.sub(regex, r'', line.content)
+ yield line
+
+ def _remove_extra_hyphens(self, srt):
+ """Remove speaker hyphens if there's only one line"""
+ for line in srt:
+ splits = len(re.findall(r'^(|\{\\an8\})?-\s*', line.content, flags=re.M))
+ if splits == 1:
+ line.content = re.sub(r'^(|\{\\an8\})?-\s*', r'\1', line.content.strip())
+
+ yield line
+
+
+ @staticmethod
+ def _strip_tags(text: str) -> str:
+ return re.sub(Regex.TAGS, r'', text)
diff --git a/scripts/subby/subby/regex.py b/scripts/subby/subby/regex.py
new file mode 100644
index 0000000..44496d6
--- /dev/null
+++ b/scripts/subby/subby/regex.py
@@ -0,0 +1,22 @@
+TAGS = r'[<{][/\\]?[a-z0-9.]+[}>]'
+POSITION_TAGS = r'^{\\an[0-9]}'
+FRONT_OPTIONAL_TAGS_WITH_HYPHEN = rf'^\s*({TAGS})?\s*(-)?\s*({TAGS})?\s*'
+TIME_LOOKAHEAD = r'(?![0-9]{2})'
+
+SPEAKER = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(Mc[A-Z][a-zA-Z]+|[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+):{TIME_LOOKAHEAD} ?'
+SPEAKER_PARENTHESES = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(?:[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+)(?: \([a-zA-Z ]+\)): ?'
+
+FRONT_NOTES = r'(?:♪+\s+)'
+BACK_NOTES = r'(?:\s+♪+)'
+
+DESCRIPTION_BRACKET = r'\[(?:[^\]]|\s)*\]'
+DESCRIPTION_PARENTHESES = r'\((?:[^\)]|\s)*\)'
+FULL_LINE_DESCIRPTION_BRACKET = rf'^-?\s*{FRONT_NOTES}?\[[^\]]+\]{BACK_NOTES}?$'
+NEW_LINE_DESCRIPTION_BRACKET = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_BRACKET}(?:{TAGS})?{BACK_NOTES}?$'
+FRONT_DESCRIPTION_BRACKET = rf'^(?:{SPEAKER}|{SPEAKER_PARENTHESES})?({FRONT_OPTIONAL_TAGS_WITH_HYPHEN}){DESCRIPTION_BRACKET}:?'
+END_DESCRIPTION_BRACKET = rf'\s*{DESCRIPTION_BRACKET}\s*$'
+FULL_LINE_DESCIRPTION_PARENTHESES = rf'^-?\s*{FRONT_NOTES}?\([^\)]+\){BACK_NOTES}?$'
+NEW_LINE_DESCRIPTION_PARENTHESES = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_PARENTHESES}{BACK_NOTES}?(?:{TAGS})?$'
+FRONT_DESCRIPTION_PARENTHESES = rf'^({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})(?:{SPEAKER}|{SPEAKER_PARENTHESES})?{DESCRIPTION_PARENTHESES}:?'
+END_DESCRIPTION_PARENTHESES = rf'\s*{DESCRIPTION_PARENTHESES}:?\s*$'
+INLINE_DESCRIPTION = r'(?:<[a-z]+>)?[\[(][A-Za-z]+[)\]](?:[a-z]+>)?'
diff --git a/scripts/subby/subby/subripfile.py b/scripts/subby/subby/subripfile.py
new file mode 100644
index 0000000..7bfa8c0
--- /dev/null
+++ b/scripts/subby/subby/subripfile.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from collections import UserList
+from datetime import timedelta
+from pathlib import Path
+
+import srt
+
+
+class SubRipFile(UserList):
+ def __init__(self, data: list[srt.Subtitle] | None = None):
+ self.data: list[srt.Subtitle] = data or []
+
+ @classmethod
+ def from_string(cls, source: str):
+ return cls(list(srt.parse(source, ignore_errors=True)))
+
+ def clean_indexes(self):
+ self.data = list(srt.sort_and_reindex(self.data))
+
+ def offset(self, offset: timedelta):
+ for line in self.data:
+ line.start += offset
+ line.end += offset
+
+ def export(self, eol: str | None = None) -> str:
+ """Exports subtitle as text"""
+ return srt.compose(self.data, eol=eol)
+
+ def save(self, path: Path, encoding: str = 'utf-8-sig', eol: str | None = None):
+ """Exports subtitle as text"""
+ with path.open(mode='wb') as fp:
+ fp.write(srt.compose(self.data, eol=eol).encode(encoding))
+
+ def __eq__(self, other):
+ if not isinstance(other, SubRipFile):
+ raise NotImplementedError
+ return self.export(eol='\n') == other.export(eol='\n')
diff --git a/scripts/subby/subby/utils/time.py b/scripts/subby/subby/utils/time.py
new file mode 100644
index 0000000..443623a
--- /dev/null
+++ b/scripts/subby/subby/utils/time.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import datetime
+import re
+
+from srt import Subtitle
+
+
+def timestamp_from_ms(duration: float | int) -> str:
+ """Returns a formatted timestamp from miliseconds"""
+ seconds, miliseconds = divmod(float(duration), 1000)
+ minutes, seconds = divmod(seconds, 60)
+ hours, minutes = divmod(minutes, 60)
+ return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds)
+
+
+def timestamp_from_seconds(duration: float | int) -> str:
+ """Returns a formatted timestamp from seconds"""
+ return timestamp_from_ms(duration * 1000)
+
+
+def ms_from_timestamp(timestamp: str) -> int:
+ """Returns miliseconds from a timestamp"""
+ timestamp = re.sub(r'[;\.\,]', r':', timestamp.replace('T:', ''))
+ hours, minutes, seconds, miliseconds = map(int, timestamp.split(':'))
+ miliseconds += hours * 3600000
+ miliseconds += minutes * 60000
+ miliseconds += seconds * 1000
+ return miliseconds
+
+
+def timedelta_from_timestamp(timestamp: str) -> datetime.timedelta:
+ """Returns timedelta from a timestamp"""
+ return datetime.timedelta(seconds=ms_from_timestamp(timestamp) / 1000)
+
+
+def timedelta_from_ms(duration: float | int) -> datetime.timedelta:
+ """Returns timedelta from miliseconds"""
+ return datetime.timedelta(seconds=duration / 1000)
+
+
+def line_duration(line: Subtitle):
+ """Returns duration of a srt.Subtitle line"""
+ return abs(line.end - line.start)