From 616ab317c17cdbf6b333b30ef4d14d547ada65dd Mon Sep 17 00:00:00 2001 From: chu23465 <130033130+chu23465@users.noreply.github.com> Date: Mon, 14 Apr 2025 08:39:57 +0530 Subject: [PATCH] Integrate subby --- scripts/subby/.gitignore | 135 ++++ scripts/subby/.pylintrc | 2 + scripts/subby/LICENSE | 674 ++++++++++++++++++ scripts/subby/README.md | 137 ++++ scripts/subby/poetry.lock | 551 ++++++++++++++ scripts/subby/pyproject.toml | 33 + scripts/subby/setup.py | 4 + scripts/subby/subby/__init__.py | 25 + scripts/subby/subby/cli.py | 217 ++++++ scripts/subby/subby/converters/__init__.py | 0 scripts/subby/subby/converters/base.py | 27 + .../subby/subby/converters/bilibili_json.py | 27 + scripts/subby/subby/converters/mp4.py | 111 +++ scripts/subby/subby/converters/sami.py | 90 +++ scripts/subby/subby/converters/smpte.py | 168 +++++ scripts/subby/subby/converters/webvtt.py | 162 +++++ scripts/subby/subby/processors/__init__.py | 0 scripts/subby/subby/processors/base.py | 30 + .../subby/subby/processors/common_issues.py | 278 ++++++++ scripts/subby/subby/processors/rtl.py | 34 + scripts/subby/subby/processors/sdh.py | 109 +++ scripts/subby/subby/regex.py | 22 + scripts/subby/subby/subripfile.py | 38 + scripts/subby/subby/utils/time.py | 44 ++ 24 files changed, 2918 insertions(+) create mode 100644 scripts/subby/.gitignore create mode 100644 scripts/subby/.pylintrc create mode 100644 scripts/subby/LICENSE create mode 100644 scripts/subby/README.md create mode 100644 scripts/subby/poetry.lock create mode 100644 scripts/subby/pyproject.toml create mode 100644 scripts/subby/setup.py create mode 100644 scripts/subby/subby/__init__.py create mode 100644 scripts/subby/subby/cli.py create mode 100644 scripts/subby/subby/converters/__init__.py create mode 100644 scripts/subby/subby/converters/base.py create mode 100644 scripts/subby/subby/converters/bilibili_json.py create mode 100644 scripts/subby/subby/converters/mp4.py create mode 100644 scripts/subby/subby/converters/sami.py create mode 100644 scripts/subby/subby/converters/smpte.py create mode 100644 scripts/subby/subby/converters/webvtt.py create mode 100644 scripts/subby/subby/processors/__init__.py create mode 100644 scripts/subby/subby/processors/base.py create mode 100644 scripts/subby/subby/processors/common_issues.py create mode 100644 scripts/subby/subby/processors/rtl.py create mode 100644 scripts/subby/subby/processors/sdh.py create mode 100644 scripts/subby/subby/regex.py create mode 100644 scripts/subby/subby/subripfile.py create mode 100644 scripts/subby/subby/utils/time.py diff --git a/scripts/subby/.gitignore b/scripts/subby/.gitignore new file mode 100644 index 0000000..dec16f1 --- /dev/null +++ b/scripts/subby/.gitignore @@ -0,0 +1,135 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json +stubs/ +mypy.ini + +# Pyre type checker +.pyre/ + +# Ruff +.ruff_cache/ +.ruff.toml diff --git a/scripts/subby/.pylintrc b/scripts/subby/.pylintrc new file mode 100644 index 0000000..7615b86 --- /dev/null +++ b/scripts/subby/.pylintrc @@ -0,0 +1,2 @@ +[FORMAT] +max-line-length=120 diff --git a/scripts/subby/LICENSE b/scripts/subby/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/scripts/subby/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/scripts/subby/README.md b/scripts/subby/README.md new file mode 100644 index 0000000..1bb6fcb --- /dev/null +++ b/scripts/subby/README.md @@ -0,0 +1,137 @@ +# Subby +Advanced subtitle converter and processor. + +# Supported formats +WebVTT, DFXP/TTML/TTML2/SMPTE, SAMI, WVTT (WebVTT in MP4), STPP/ISMT (DFXP in MP4), JSON (Bilibili) + +# Functionality +- converts supported input format to SRT +- retains select formatting tags (italics, basic \an8 positioning) +- corrects often found flaws in subtitles +- opinionated timing and formatting improvements + +# Installation +``` +git clone https://github.com/vevv/subby +cd subby +pip install . +``` + +# Usage notes +`CommonIssuesFixer` should be ran both after conversion and SDH stripping +as it's designed to fix source issues, including ones which can cause playback problems. + +`CommonIssuesFixer` removes short gaps (2 frames) by default. +This can be disabled by setting `CommonIssuesFixer.remove_gaps` to `False` before running. + +`subby.SubRipFile` accepts similar methods to `pysrt.SubRipFile`, but isn't a fully compatible replacement. +Only `from_string`, `clean_indexes`, `export`, `save` are guaranteed to work. + +This object is otherwise just a list storing `srt.Subtitle` elements. + +## Language specific fixing + +As of 0.3.6, both `CommonIssuesFixer` and `SDHStripper` support a language parameter, +which accepts a BCP47 language code. + +This is currently used only for RTL tagging in CommonIssuesFixer. + +**It is highly recommended for every script to pass it for future use.** + +# Command line usage +``` +Usage: subby [OPTIONS] COMMAND [ARGS]... + + Subby—Advanced Subtitle Converter and Processor. + +Options: + -d, --debug Enable DEBUG level logs. + --help Show this message and exit. + +Commands: + convert Convert a Subtitle to SubRip (SRT). + process SubRip (SRT) post-processing. + version Print version information. +``` +Example + +``` +subby process /path/to/subs/subs.srt strip-sdh +``` + +# Library usage +## Converter +```py +from subby import WebVTTConverter +from pathlib import Path + +converter = WebVTTConverter() +file = Path('test.vtt') + +# All statements below are equivalent +srt = converter.from_file(file) +srt = converter.from_string(file.read_text()) +srt = converter.from_bytes(file.read_bytes()) + +# srt is subby.SubRipFile + +output = Path('file.srt') +srt.save(output) +# saved to file.srt +``` + +## Processor +Processor returns a bool indicating success - whether any changes were made, useful for determining if SDH subtitles should be saved. + +```py +from subby import CommonIssuesFixer +from pathlib import Path + +processor = CommonIssuesFixer() +file = Path('test.vtt') + +# All statements below are equivalent +srt, status = processor.from_file(file) +srt, status = processor.from_string(file.read_text()) +srt, status = processor.from_bytes(file.read_bytes()) + +# srt is subby.SubRipFile, status is bool + +output = Path('test_fixed.srt') +srt.save(output) +# saved to test_fixed.srt +``` + +## Chaining +The following example will convert a VTT file, attempt to strip SDH, and then save the result. + +```py +from subby import WebVTTConverter, CommonIssuesFixer, SDHStripper +from pathlib import Path + +converter = WebVTTConverter() +fixer = CommonIssuesFixer() +stripper = SDHStripper() + +file = Path('file.vtt') +file_sdh = Path('file_sdh.srt') +file_stripped = Path('file_stripped.srt') +srt, _ = fixer.from_srt(converter.from_file(file)) + +srt.save(file_sdh) +# saved to file_sdh.srt + +stripped, status = stripper.from_srt(srt) +if status is True: + print('stripping successful') + stripped.save(file_stripped) + # saved to file_stripped.srt +``` + +## Tests +To run tests, go to the "tests" directory and run `pytest`. + +## Contributors + + + diff --git a/scripts/subby/poetry.lock b/scripts/subby/poetry.lock new file mode 100644 index 0000000..241fc18 --- /dev/null +++ b/scripts/subby/poetry.lock @@ -0,0 +1,551 @@ +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. + +[[package]] +name = "beautifulsoup4" +version = "4.13.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"}, + {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"}, +] + +[package.dependencies] +soupsieve = ">1.2" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "click" +version = "8.1.8" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "construct" +version = "2.8.8" +description = "A powerful declarative parser/builder for binary data" +optional = false +python-versions = "*" +files = [ + {file = "construct-2.8.8.tar.gz", hash = "sha256:1b84b8147f6fd15bcf64b737c3e8ac5100811ad80c830cb4b2545140511c4157"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "langcodes" +version = "3.4.1" +description = "Tools for labeling human languages with IETF language tags" +optional = false +python-versions = ">=3.8" +files = [ + {file = "langcodes-3.4.1-py3-none-any.whl", hash = "sha256:68f686fc3d358f222674ecf697ddcee3ace3c2fe325083ecad2543fd28a20e77"}, + {file = "langcodes-3.4.1.tar.gz", hash = "sha256:a24879fed238013ac3af2424b9d1124e38b4a38b2044fd297c8ff38e5912e718"}, +] + +[package.dependencies] +language-data = ">=1.2" + +[package.extras] +build = ["build", "twine"] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "language-data" +version = "1.3.0" +description = "Supplementary data about languages used by the langcodes module" +optional = false +python-versions = "*" +files = [ + {file = "language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf"}, + {file = "language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec"}, +] + +[package.dependencies] +marisa-trie = ">=1.1.0" + +[package.extras] +build = ["build", "twine"] +test = ["pytest", "pytest-cov"] + +[[package]] +name = "lxml" +version = "5.3.1" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=3.6" +files = [ + {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"}, + {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:528f3a0498a8edc69af0559bdcf8a9f5a8bf7c00051a6ef3141fdcf27017bbf5"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db4743e30d6f5f92b6d2b7c86b3ad250e0bad8dee4b7ad8a0c44bfb276af89a3"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b5d7f8acf809465086d498d62a981fa6a56d2718135bb0e4aa48c502055f5c"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:928e75a7200a4c09e6efc7482a1337919cc61fe1ba289f297827a5b76d8969c2"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a997b784a639e05b9d4053ef3b20c7e447ea80814a762f25b8ed5a89d261eac"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7b82e67c5feb682dbb559c3e6b78355f234943053af61606af126df2183b9ef9"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:f1de541a9893cf8a1b1db9bf0bf670a2decab42e3e82233d36a74eda7822b4c9"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:de1fc314c3ad6bc2f6bd5b5a5b9357b8c6896333d27fdbb7049aea8bd5af2d79"}, + {file = "lxml-5.3.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:7c0536bd9178f754b277a3e53f90f9c9454a3bd108b1531ffff720e082d824f2"}, + {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:68018c4c67d7e89951a91fbd371e2e34cd8cfc71f0bb43b5332db38497025d51"}, + {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa826340a609d0c954ba52fd831f0fba2a4165659ab0ee1a15e4aac21f302406"}, + {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:796520afa499732191e39fc95b56a3b07f95256f2d22b1c26e217fb69a9db5b5"}, + {file = "lxml-5.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3effe081b3135237da6e4c4530ff2a868d3f80be0bda027e118a5971285d42d0"}, + {file = "lxml-5.3.1-cp310-cp310-win32.whl", hash = "sha256:a22f66270bd6d0804b02cd49dae2b33d4341015545d17f8426f2c4e22f557a23"}, + {file = "lxml-5.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:0bcfadea3cdc68e678d2b20cb16a16716887dd00a881e16f7d806c2138b8ff0c"}, + {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e220f7b3e8656ab063d2eb0cd536fafef396829cafe04cb314e734f87649058f"}, + {file = "lxml-5.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f2cfae0688fd01f7056a17367e3b84f37c545fb447d7282cf2c242b16262607"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:67d2f8ad9dcc3a9e826bdc7802ed541a44e124c29b7d95a679eeb58c1c14ade8"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db0c742aad702fd5d0c6611a73f9602f20aec2007c102630c06d7633d9c8f09a"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:198bb4b4dd888e8390afa4f170d4fa28467a7eaf857f1952589f16cfbb67af27"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2a3e412ce1849be34b45922bfef03df32d1410a06d1cdeb793a343c2f1fd666"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b8969dbc8d09d9cd2ae06362c3bad27d03f433252601ef658a49bd9f2b22d79"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5be8f5e4044146a69c96077c7e08f0709c13a314aa5315981185c1f00235fe65"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:133f3493253a00db2c870d3740bc458ebb7d937bd0a6a4f9328373e0db305709"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:52d82b0d436edd6a1d22d94a344b9a58abd6c68c357ed44f22d4ba8179b37629"}, + {file = "lxml-5.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b6f92e35e2658a5ed51c6634ceb5ddae32053182851d8cad2a5bc102a359b33"}, + {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:203b1d3eaebd34277be06a3eb880050f18a4e4d60861efba4fb946e31071a295"}, + {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:155e1a5693cf4b55af652f5c0f78ef36596c7f680ff3ec6eb4d7d85367259b2c"}, + {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22ec2b3c191f43ed21f9545e9df94c37c6b49a5af0a874008ddc9132d49a2d9c"}, + {file = "lxml-5.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7eda194dd46e40ec745bf76795a7cccb02a6a41f445ad49d3cf66518b0bd9cff"}, + {file = "lxml-5.3.1-cp311-cp311-win32.whl", hash = "sha256:fb7c61d4be18e930f75948705e9718618862e6fc2ed0d7159b2262be73f167a2"}, + {file = "lxml-5.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c809eef167bf4a57af4b03007004896f5c60bd38dc3852fcd97a26eae3d4c9e6"}, + {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c"}, + {file = "lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468"}, + {file = "lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367"}, + {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd"}, + {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c"}, + {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f"}, + {file = "lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645"}, + {file = "lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5"}, + {file = "lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf"}, + {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c093c7088b40d8266f57ed71d93112bd64c6724d31f0794c1e52cc4857c28e0e"}, + {file = "lxml-5.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b0884e3f22d87c30694e625b1e62e6f30d39782c806287450d9dc2fdf07692fd"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1637fa31ec682cd5760092adfabe86d9b718a75d43e65e211d5931809bc111e7"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a364e8e944d92dcbf33b6b494d4e0fb3499dcc3bd9485beb701aa4b4201fa414"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:779e851fd0e19795ccc8a9bb4d705d6baa0ef475329fe44a13cf1e962f18ff1e"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c4393600915c308e546dc7003d74371744234e8444a28622d76fe19b98fa59d1"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:673b9d8e780f455091200bba8534d5f4f465944cbdd61f31dc832d70e29064a5"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:2e4a570f6a99e96c457f7bec5ad459c9c420ee80b99eb04cbfcfe3fc18ec6423"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:71f31eda4e370f46af42fc9f264fafa1b09f46ba07bdbee98f25689a04b81c20"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:42978a68d3825eaac55399eb37a4d52012a205c0c6262199b8b44fcc6fd686e8"}, + {file = "lxml-5.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8b1942b3e4ed9ed551ed3083a2e6e0772de1e5e3aca872d955e2e86385fb7ff9"}, + {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:85c4f11be9cf08917ac2a5a8b6e1ef63b2f8e3799cec194417e76826e5f1de9c"}, + {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:231cf4d140b22a923b1d0a0a4e0b4f972e5893efcdec188934cc65888fd0227b"}, + {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5865b270b420eda7b68928d70bb517ccbe045e53b1a428129bb44372bf3d7dd5"}, + {file = "lxml-5.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dbf7bebc2275016cddf3c997bf8a0f7044160714c64a9b83975670a04e6d2252"}, + {file = "lxml-5.3.1-cp313-cp313-win32.whl", hash = "sha256:d0751528b97d2b19a388b302be2a0ee05817097bab46ff0ed76feeec24951f78"}, + {file = "lxml-5.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:91fb6a43d72b4f8863d21f347a9163eecbf36e76e2f51068d59cd004c506f332"}, + {file = "lxml-5.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:016b96c58e9a4528219bb563acf1aaaa8bc5452e7651004894a973f03b84ba81"}, + {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82a4bb10b0beef1434fb23a09f001ab5ca87895596b4581fd53f1e5145a8934a"}, + {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d68eeef7b4d08a25e51897dac29bcb62aba830e9ac6c4e3297ee7c6a0cf6439"}, + {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:f12582b8d3b4c6be1d298c49cb7ae64a3a73efaf4c2ab4e37db182e3545815ac"}, + {file = "lxml-5.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2df7ed5edeb6bd5590914cd61df76eb6cce9d590ed04ec7c183cf5509f73530d"}, + {file = "lxml-5.3.1-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:585c4dc429deebc4307187d2b71ebe914843185ae16a4d582ee030e6cfbb4d8a"}, + {file = "lxml-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:06a20d607a86fccab2fc15a77aa445f2bdef7b49ec0520a842c5c5afd8381576"}, + {file = "lxml-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:057e30d0012439bc54ca427a83d458752ccda725c1c161cc283db07bcad43cf9"}, + {file = "lxml-5.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4867361c049761a56bd21de507cab2c2a608c55102311d142ade7dab67b34f32"}, + {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dddf0fb832486cc1ea71d189cb92eb887826e8deebe128884e15020bb6e3f61"}, + {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bcc211542f7af6f2dfb705f5f8b74e865592778e6cafdfd19c792c244ccce19"}, + {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaca5a812f050ab55426c32177091130b1e49329b3f002a32934cd0245571307"}, + {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:236610b77589faf462337b3305a1be91756c8abc5a45ff7ca8f245a71c5dab70"}, + {file = "lxml-5.3.1-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:aed57b541b589fa05ac248f4cb1c46cbb432ab82cbd467d1c4f6a2bdc18aecf9"}, + {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:75fa3d6946d317ffc7016a6fcc44f42db6d514b7fdb8b4b28cbe058303cb6e53"}, + {file = "lxml-5.3.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:96eef5b9f336f623ffc555ab47a775495e7e8846dde88de5f941e2906453a1ce"}, + {file = "lxml-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:ef45f31aec9be01379fc6c10f1d9c677f032f2bac9383c827d44f620e8a88407"}, + {file = "lxml-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0611da6b07dd3720f492db1b463a4d1175b096b49438761cc9f35f0d9eaaef5"}, + {file = "lxml-5.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b2aca14c235c7a08558fe0a4786a1a05873a01e86b474dfa8f6df49101853a4e"}, + {file = "lxml-5.3.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82fce1d964f065c32c9517309f0c7be588772352d2f40b1574a214bd6e6098"}, + {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7aae7a3d63b935babfdc6864b31196afd5145878ddd22f5200729006366bc4d5"}, + {file = "lxml-5.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8e0d177b1fe251c3b1b914ab64135475c5273c8cfd2857964b2e3bb0fe196a7"}, + {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:6c4dd3bfd0c82400060896717dd261137398edb7e524527438c54a8c34f736bf"}, + {file = "lxml-5.3.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f1208c1c67ec9e151d78aa3435aa9b08a488b53d9cfac9b699f15255a3461ef2"}, + {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c6aacf00d05b38a5069826e50ae72751cb5bc27bdc4d5746203988e429b385bb"}, + {file = "lxml-5.3.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5881aaa4bf3a2d086c5f20371d3a5856199a0d8ac72dd8d0dbd7a2ecfc26ab73"}, + {file = "lxml-5.3.1-cp38-cp38-win32.whl", hash = "sha256:45fbb70ccbc8683f2fb58bea89498a7274af1d9ec7995e9f4af5604e028233fc"}, + {file = "lxml-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:7512b4d0fc5339d5abbb14d1843f70499cab90d0b864f790e73f780f041615d7"}, + {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5885bc586f1edb48e5d68e7a4b4757b5feb2a496b64f462b4d65950f5af3364f"}, + {file = "lxml-5.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1b92fe86e04f680b848fff594a908edfa72b31bfc3499ef7433790c11d4c8cd8"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a091026c3bf7519ab1e64655a3f52a59ad4a4e019a6f830c24d6430695b1cf6a"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ffb141361108e864ab5f1813f66e4e1164181227f9b1f105b042729b6c15125"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3715cdf0dd31b836433af9ee9197af10e3df41d273c19bb249230043667a5dfd"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88b72eb7222d918c967202024812c2bfb4048deeb69ca328363fb8e15254c549"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa59974880ab5ad8ef3afaa26f9bda148c5f39e06b11a8ada4660ecc9fb2feb3"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3bb8149840daf2c3f97cebf00e4ed4a65a0baff888bf2605a8d0135ff5cf764e"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:0d6b2fa86becfa81f0a0271ccb9eb127ad45fb597733a77b92e8a35e53414914"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:136bf638d92848a939fd8f0e06fcf92d9f2e4b57969d94faae27c55f3d85c05b"}, + {file = "lxml-5.3.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:89934f9f791566e54c1d92cdc8f8fd0009447a5ecdb1ec6b810d5f8c4955f6be"}, + {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a8ade0363f776f87f982572c2860cc43c65ace208db49c76df0a21dde4ddd16e"}, + {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:bfbbab9316330cf81656fed435311386610f78b6c93cc5db4bebbce8dd146675"}, + {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:172d65f7c72a35a6879217bcdb4bb11bc88d55fb4879e7569f55616062d387c2"}, + {file = "lxml-5.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e3c623923967f3e5961d272718655946e5322b8d058e094764180cdee7bab1af"}, + {file = "lxml-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ce0930a963ff593e8bb6fda49a503911accc67dee7e5445eec972668e672a0f0"}, + {file = "lxml-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:f7b64fcd670bca8800bc10ced36620c6bbb321e7bc1214b9c0c0df269c1dddc2"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:afa578b6524ff85fb365f454cf61683771d0170470c48ad9d170c48075f86725"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67f5e80adf0aafc7b5454f2c1cb0cde920c9b1f2cbd0485f07cc1d0497c35c5d"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd0b80ac2d8f13ffc906123a6f20b459cb50a99222d0da492360512f3e50f84"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:422c179022ecdedbe58b0e242607198580804253da220e9454ffe848daa1cfd2"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:524ccfded8989a6595dbdda80d779fb977dbc9a7bc458864fc9a0c2fc15dc877"}, + {file = "lxml-5.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:48fd46bf7155def2e15287c6f2b133a2f78e2d22cdf55647269977b873c65499"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:05123fad495a429f123307ac6d8fd6f977b71e9a0b6d9aeeb8f80c017cb17131"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a243132767150a44e6a93cd1dde41010036e1cbc63cc3e9fe1712b277d926ce3"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c92ea6d9dd84a750b2bae72ff5e8cf5fdd13e58dda79c33e057862c29a8d5b50"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2f1be45d4c15f237209bbf123a0e05b5d630c8717c42f59f31ea9eae2ad89394"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a83d3adea1e0ee36dac34627f78ddd7f093bb9cfc0a8e97f1572a949b695cb98"}, + {file = "lxml-5.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3edbb9c9130bac05d8c3fe150c51c337a471cc7fdb6d2a0a7d3a88e88a829314"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2f23cf50eccb3255b6e913188291af0150d89dab44137a69e14e4dcb7be981f1"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df7e5edac4778127f2bf452e0721a58a1cfa4d1d9eac63bdd650535eb8543615"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:094b28ed8a8a072b9e9e2113a81fda668d2053f2ca9f2d202c2c8c7c2d6516b1"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:514fe78fc4b87e7a7601c92492210b20a1b0c6ab20e71e81307d9c2e377c64de"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8fffc08de02071c37865a155e5ea5fce0282e1546fd5bde7f6149fcaa32558ac"}, + {file = "lxml-5.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4b0d5cdba1b655d5b18042ac9c9ff50bda33568eb80feaaca4fc237b9c4fbfde"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3031e4c16b59424e8d78522c69b062d301d951dc55ad8685736c3335a97fc270"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb659702a45136c743bc130760c6f137870d4df3a9e14386478b8a0511abcfca"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a11b16a33656ffc43c92a5343a28dc71eefe460bcc2a4923a96f292692709f6"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c5ae125276f254b01daa73e2c103363d3e99e3e10505686ac7d9d2442dd4627a"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c76722b5ed4a31ba103e0dc77ab869222ec36efe1a614e42e9bcea88a36186fe"}, + {file = "lxml-5.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:33e06717c00c788ab4e79bc4726ecc50c54b9bfb55355eae21473c145d83c2d2"}, + {file = "lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml_html_clean"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=3.0.11,<3.1.0)"] + +[[package]] +name = "lxml-stubs" +version = "0.4.0" +description = "Type annotations for the lxml package" +optional = false +python-versions = "*" +files = [ + {file = "lxml-stubs-0.4.0.tar.gz", hash = "sha256:184877b42127256abc2b932ba8bd0ab5ea80bd0b0fee618d16daa40e0b71abee"}, + {file = "lxml_stubs-0.4.0-py3-none-any.whl", hash = "sha256:3b381e9e82397c64ea3cc4d6f79d1255d015f7b114806d4826218805c10ec003"}, +] + +[package.extras] +test = ["coverage[toml] (==5.2)", "pytest (>=6.0.0)", "pytest-mypy-plugins (==1.9.3)"] + +[[package]] +name = "marisa-trie" +version = "1.2.1" +description = "Static memory-efficient and fast Trie-like structures for Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8"}, + {file = "marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6"}, + {file = "marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa"}, + {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c"}, + {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc"}, + {file = "marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f"}, + {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be"}, + {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2"}, + {file = "marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6"}, + {file = "marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5"}, + {file = "marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3"}, + {file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98"}, + {file = "marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3"}, + {file = "marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281"}, + {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d"}, + {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de"}, + {file = "marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509"}, + {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba"}, + {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4"}, + {file = "marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a"}, + {file = "marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571"}, + {file = "marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b"}, + {file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec"}, + {file = "marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4"}, + {file = "marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa"}, + {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10"}, + {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854"}, + {file = "marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb"}, + {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51"}, + {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44"}, + {file = "marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d"}, + {file = "marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a"}, + {file = "marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114"}, + {file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554"}, + {file = "marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf"}, + {file = "marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4"}, + {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9"}, + {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e"}, + {file = "marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48"}, + {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d"}, + {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff"}, + {file = "marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa"}, + {file = "marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f"}, + {file = "marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5e43891a37b0d7f618819fea14bd951289a0a8e3dd0da50c596139ca83ebb9b1"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6946100a43f933fad6bc458c502a59926d80b321d5ac1ed2ff9c56605360496f"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4177dc0bd1374e82be9b2ba4d0c2733b0a85b9d154ceeea83a5bee8c1e62fbf"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f35c2603a6be168088ed1db6ad1704b078aa8f39974c60888fbbced95dcadad4"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:d659fda873d8dcb2c14c2c331de1dee21f5a902d7f2de7978b62c6431a8850ef"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:b0ef26733d3c836be79e812071e1a431ce1f807955a27a981ebb7993d95f842b"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:536ea19ce6a2ce61c57fed4123ecd10d18d77a0db45cd2741afff2b8b68f15b3"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-win32.whl", hash = "sha256:0ee6cf6a16d9c3d1c94e21c8e63c93d8b34bede170ca4e937e16e1c0700d399f"}, + {file = "marisa_trie-1.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7e7b1786e852e014d03e5f32dbd991f9a9eb223dd3fa9a2564108b807e4b7e1c"}, + {file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:952af3a5859c3b20b15a00748c36e9eb8316eb2c70bd353ae1646da216322908"}, + {file = "marisa_trie-1.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24a81aa7566e4ec96fc4d934581fe26d62eac47fc02b35fa443a0bb718b471e8"}, + {file = "marisa_trie-1.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9c9b32b14651a6dcf9e8857d2df5d29d322a1ea8c0be5c8ffb88f9841c4ec62b"}, + {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ac170d20b97beb75059ba65d1ccad6b434d777c8992ab41ffabdade3b06dd74"}, + {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da4e4facb79614cc4653cfd859f398e4db4ca9ab26270ff12610e50ed7f1f6c6"}, + {file = "marisa_trie-1.2.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25688f34cac3bec01b4f655ffdd6c599a01f0bd596b4a79cf56c6f01a7df3560"}, + {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:1db3213b451bf058d558f6e619bceff09d1d130214448a207c55e1526e2773a1"}, + {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:d5648c6dcc5dc9200297fb779b1663b8a4467bda034a3c69bd9c32d8afb33b1d"}, + {file = "marisa_trie-1.2.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:5bd39a4e1cc839a88acca2889d17ebc3f202a5039cd6059a13148ce75c8a6244"}, + {file = "marisa_trie-1.2.1-cp38-cp38-win32.whl", hash = "sha256:594f98491a96c7f1ffe13ce292cef1b4e63c028f0707effdea0f113364c1ae6c"}, + {file = "marisa_trie-1.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:5fe5a286f997848a410eebe1c28657506adaeb405220ee1e16cfcfd10deb37f2"}, + {file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c0fe2ace0cb1806badbd1c551a8ec2f8d4cf97bf044313c082ef1acfe631ddca"}, + {file = "marisa_trie-1.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67f0c2ec82c20a02c16fc9ba81dee2586ef20270127c470cb1054767aa8ba310"}, + {file = "marisa_trie-1.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a3c98613180cf1730e221933ff74b454008161b1a82597e41054127719964188"}, + {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:429858a0452a7bedcf67bc7bb34383d00f666c980cb75a31bcd31285fbdd4403"}, + {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2eacb84446543082ec50f2fb563f1a94c96804d4057b7da8ed815958d0cdfbe"}, + {file = "marisa_trie-1.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:852d7bcf14b0c63404de26e7c4c8d5d65ecaeca935e93794331bc4e2f213660b"}, + {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:e58788004adda24c401d1751331618ed20c507ffc23bfd28d7c0661a1cf0ad16"}, + {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:aefe0973cc4698e0907289dc0517ab0c7cdb13d588201932ff567d08a50b0e2e"}, + {file = "marisa_trie-1.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6c50c861faad0a5c091bd763e0729f958c316e678dfa065d3984fbb9e4eacbcd"}, + {file = "marisa_trie-1.2.1-cp39-cp39-win32.whl", hash = "sha256:b1ce340da608530500ab4f963f12d6bfc8d8680900919a60dbdc9b78c02060a4"}, + {file = "marisa_trie-1.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:ce37d8ca462bb64cc13f529b9ed92f7b21fe8d1f1679b62e29f9cb7d0e888b49"}, + {file = "marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +test = ["hypothesis", "pytest", "readme-renderer"] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pymp4" +version = "1.4.0" +description = "Python parser for MP4 boxes" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "pymp4-1.4.0-py3-none-any.whl", hash = "sha256:3401666c1e2a97ac94dffb18c5a5dcbd46d0a436da5272d378a6f9f6506dd12d"}, + {file = "pymp4-1.4.0.tar.gz", hash = "sha256:bc9e77732a8a143d34c38aa862a54180716246938e4bf3e07585d19252b77bb5"}, +] + +[package.dependencies] +construct = "2.8.8" + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "setuptools" +version = "75.3.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] + +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + +[[package]] +name = "srt" +version = "3.5.3" +description = "A tiny library for parsing, modifying, and composing SRT files." +optional = false +python-versions = ">=2.7" +files = [ + {file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"}, +] + +[[package]] +name = "tinycss" +version = "0.4" +description = "tinycss is a complete yet simple CSS parser for Python." +optional = false +python-versions = "*" +files = [ + {file = "tinycss-0.4.tar.gz", hash = "sha256:12306fb50e5e9e7eaeef84b802ed877488ba80e35c672867f548c0924a76716e"}, +] + +[package.extras] +test = ["pytest-cov", "pytest-flake8", "pytest-isort", "pytest-runner"] + +[[package]] +name = "tomli" +version = "2.2.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.8" +content-hash = "d1dc09a1492c59373465886d52b572d761f7e370239ae93c63a7c88f4a69c47f" diff --git a/scripts/subby/pyproject.toml b/scripts/subby/pyproject.toml new file mode 100644 index 0000000..28db507 --- /dev/null +++ b/scripts/subby/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "subby" +version = "0.3.19" +description = "Advanced subtitle converter and processor" +authors = ["vevv"] +license = "GPL-3.0-or-later" +readme = "README.md" +repository = "https://github.com/vevv/subby" + +[tool.poetry.dependencies] +python = "^3.8" +pymp4 = "~1.4.0" +beautifulsoup4 = "^4.11.2" +tinycss = "^0.4" +click = "^8.1.3" +srt = "^3.5.3" +lxml = "^5.3.0" +langcodes = "^3.4.0" + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" +lxml-stubs = "^0.4.0" + + +[tool.poetry.scripts] +subby = "subby.cli:main" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/scripts/subby/setup.py b/scripts/subby/setup.py new file mode 100644 index 0000000..1abbd06 --- /dev/null +++ b/scripts/subby/setup.py @@ -0,0 +1,4 @@ +import setuptools + +if __name__ == "__main__": + setuptools.setup() diff --git a/scripts/subby/subby/__init__.py b/scripts/subby/subby/__init__.py new file mode 100644 index 0000000..df019f9 --- /dev/null +++ b/scripts/subby/subby/__init__.py @@ -0,0 +1,25 @@ +from subby.converters.bilibili_json import BilibiliJSONConverter +from subby.converters.mp4 import ISMTConverter, WVTTConverter +from subby.converters.sami import SAMIConverter +from subby.converters.smpte import SMPTEConverter +from subby.converters.webvtt import WebVTTConverter +from subby.processors.common_issues import CommonIssuesFixer +from subby.processors.sdh import SDHStripper +from subby.subripfile import SubRipFile + +__version__ = '0.3.19' + +__all__ = [ + # Converters + 'SAMIConverter', + 'SMPTEConverter', 'ISMTConverter', + 'WebVTTConverter', 'WVTTConverter', + 'BilibiliJSONConverter', + # Processors + 'CommonIssuesFixer', + 'SDHStripper', + # Utility + 'SubRipFile', + # Version + '__version__' +] diff --git a/scripts/subby/subby/cli.py b/scripts/subby/subby/cli.py new file mode 100644 index 0000000..b70de80 --- /dev/null +++ b/scripts/subby/subby/cli.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import logging +from datetime import datetime +from pathlib import Path + +import click + +from subby import (BilibiliJSONConverter, CommonIssuesFixer, ISMTConverter, + SAMIConverter, SDHStripper, SMPTEConverter, WebVTTConverter, + WVTTConverter, __version__) + + +@click.group() +@click.option("-d", "--debug", is_flag=True, default=False, help="Enable debug level logs.") +def main(debug: bool) -> None: + """Subby—Advanced Subtitle Converter and Processor.""" + logging.basicConfig(level=logging.DEBUG if debug else logging.INFO) + logging.getLogger('srt').setLevel(logging.DEBUG if debug else logging.CRITICAL) + + +@main.command() +def version(): + """Print version information.""" + log = logging.getLogger(__name__) + + copyright_years = 2023 + current_year = datetime.now().year + if copyright_years != current_year: + copyright_years = f"{copyright_years}-{current_year}" + + log.info("Subby version %s Copyright (c) %s vevv", __version__, copyright_years) + log.info("https://github.com/vevv/subby") + + +@main.command() +@click.argument("file", type=Path) +@click.option("-o", "--out", type=Path, default=None, help="Output path.") +@click.option( + "-l", + "--language", + type=str, + default=None, + help="Subtitle language (used for language specific processing)" +) +@click.option( + "-e", + "--encoding", + type=str, + default="utf-8", + help="Character encoding (default: utf-8)." +) +@click.option( + "-n", + "--no-post-processing", + is_flag=True, + default=False, + help="Disable post-processing after conversion." +) +@click.option( + "-g", + "--keep-short-gaps", + is_flag=True, + help="Keep short gaps between lines (< 85 ms; only with post-processing enabled)" +) +def convert( + file: Path, + out: Path | None, + language: str, + encoding: str, + no_post_processing: bool, + keep_short_gaps: bool +): + """Convert a Subtitle to SubRip (SRT).""" + if not isinstance(file, Path): + raise click.ClickException(f"Expected file to be a {Path} not {file!r}") + if out and not isinstance(out, Path): + raise click.ClickException(f"Expected out to be a {Path} not {out!r}") + + if not out: + out = file.with_suffix(".srt") + + log = logging.getLogger("convert") + + data = file.read_bytes() + converter = None + + if b"mdat" in data and b"moof" in data: + if b"" in data: + log.info("Subtitle format: ISMT (DFXP in MP4)") + converter = ISMTConverter() + elif b"vttc" in data: + log.info("Subtitle format: WVTT (WebVTT in MP4)") + converter = WVTTConverter() + elif b"" in data: + log.info("Subtitle format: SAMI") + converter = SAMIConverter() + elif b"" in data or b"" in data: + log.info("Subtitle format: DFXP/TTML/TTML2") + converter = SMPTEConverter() + elif b"WEBVTT" in data: + log.info("Subtitle format: WebVTT") + converter = WebVTTConverter() + elif data.startswith(b'{') and b'"Stroke"' in data and b'"background_color"' in data: + log.info("Subtitle format: JSON (Bilibili)") + converter = BilibiliJSONConverter() + + if not converter: + log.error("Subtitle format was unrecognized...") + return + + srt = converter.from_file(file) + log.info("Converted subtitle to SubRip (SRT)") + + if not no_post_processing: + processor = CommonIssuesFixer() + processor.remove_gaps = not keep_short_gaps + srt, status = processor.from_srt(srt, language=language) + log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}") + + srt.save(out, encoding=encoding) + log.info(f"Saved to: {out}") + log.debug(f"Used character encoding {encoding}") + + +@main.group() +@click.argument("file", type=Path) +@click.option("-o", "--out", type=Path, default=None, help="Output path.") +@click.option( + "-l", + "--language", + type=str, + default=None, + help="Subtitle language (used for language specific processing)" +) +@click.option( + "-e", + "--encoding", + type=str, + default="utf-8", + help="Character encoding (default: utf-8)." +) +@click.option( + "-n", + "--no-post-processing", + is_flag=True, + default=False, + help="Disable post-processing after SDH stripping." +) +@click.option( + "-g", + "--keep-short-gaps", + is_flag=True, + help="Keep short gaps between lines (< 85 ms)" +) +def process(file: Path, out: Path | None, **__): + """SubRip (SRT) post-processing.""" + if not isinstance(file, Path): + raise click.ClickException(f"Expected file to be a {Path} not {file!r}") + if out and not isinstance(out, Path): + raise click.ClickException(f"Expected out to be a {Path} not {out!r}") + + +@process.command() +@click.pass_context +def mend(ctx: click.Context): + """Repair or Mend common issues in a Subtitle.""" + file = ctx.parent.params["file"] + + if not ctx.parent.params["out"]: + ctx.parent.params["out"] = file.with_stem(file.stem + "_mend") + + log = logging.getLogger("process.mend") + + processor = CommonIssuesFixer() + processor.remove_gaps = not ctx.parent.params["keep_short_gaps"] + processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"]) + log.info(f"Processed subtitle {['but no issues were found...', 'and repaired some issues!'][status]}") + + return processed_srt, status + + +@process.command("strip-sdh") +@click.pass_context +def strip_sdh(ctx: click.Context): + """Remove Hard-of-hearing descriptions from Subtitles.""" + file = ctx.parent.params["file"] + + if not ctx.parent.params["out"]: + ctx.parent.params["out"] = file.with_stem(file.stem + "_sdh_stripped") + + log = logging.getLogger("process.strip_sdh") + + processor = SDHStripper() + processed_srt, status = processor.from_file(file, language=ctx.parent.params["language"]) + log.info(f"Processed subtitle {['but no SDH descriptions were found...', 'and removed SDH!'][status]}") + + if not ctx.parent.params["no_post_processing"]: + processor = CommonIssuesFixer() + processor.remove_gaps = not ctx.parent.params["keep_short_gaps"] + processed_srt, _ = processor.from_srt(processed_srt, language=ctx.parent.params["language"]) + log.info( + "Processed stripped subtitle " + + ['but no issues were found...', 'and repaired some issues!'][status] + ) + + return processed_srt, status + + +@process.result_callback() +def process_result(result, out, encoding, *_, **__): + log = logging.getLogger("process") + processed_srt, status = result + if status: + processed_srt.save(out, encoding=encoding) + log.info(f"Saved to: {out}") + log.debug(f"Used character encoding {encoding}") diff --git a/scripts/subby/subby/converters/__init__.py b/scripts/subby/subby/converters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/subby/subby/converters/base.py b/scripts/subby/subby/converters/base.py new file mode 100644 index 0000000..4464f9c --- /dev/null +++ b/scripts/subby/subby/converters/base.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod +from io import BytesIO +from pathlib import Path +from typing import BinaryIO + +from subby.subripfile import SubRipFile + + +class BaseConverter(ABC): + """Base subtitle converter class""" + + def from_file(self, file: Path) -> SubRipFile: + """Reads a given file and converts it to srt""" + with file.open(mode='rb') as stream: + return self.parse(stream) + + def from_string(self, data: str) -> SubRipFile: + """Reads a given string and converts it to srt""" + return self.parse(BytesIO(data.encode('utf-8'))) + + def from_bytes(self, data: bytes) -> SubRipFile: + """Parses given data and converts it to srt""" + return self.parse(BytesIO(data)) + + @abstractmethod + def parse(self, stream: BinaryIO) -> SubRipFile: + """Parses data from a given stream and converts it to srt""" diff --git a/scripts/subby/subby/converters/bilibili_json.py b/scripts/subby/subby/converters/bilibili_json.py new file mode 100644 index 0000000..2559cc8 --- /dev/null +++ b/scripts/subby/subby/converters/bilibili_json.py @@ -0,0 +1,27 @@ +import datetime +import json + +from srt import Subtitle + +from subby.converters.base import BaseConverter +from subby.subripfile import SubRipFile + + +class BilibiliJSONConverter(BaseConverter): + """Bilibili JSON subtitle converter""" + + def parse(self, stream): + json_data = json.load(stream) + srt = SubRipFile() + for i, line in enumerate(json_data['body']): + if line['location'] != 2: + line['content'] = ('{\\an%s}' % line['location']) + line['content'] + + srt.append(Subtitle( + index=i, + start=datetime.timedelta(seconds=line['from']), + end=datetime.timedelta(seconds=line['to']), + content=line['content'] + )) + + return srt diff --git a/scripts/subby/subby/converters/mp4.py b/scripts/subby/subby/converters/mp4.py new file mode 100644 index 0000000..05fd110 --- /dev/null +++ b/scripts/subby/subby/converters/mp4.py @@ -0,0 +1,111 @@ +from collections import deque + +from pymp4.parser import MP4 +from pymp4.util import BoxUtil + +from subby.converters.base import BaseConverter +from subby.converters.smpte import SMPTEConverter +from subby.converters.webvtt import WebVTTConverter +from subby.subripfile import SubRipFile +from subby.utils.time import timestamp_from_ms + + +class ISMTConverter(BaseConverter): + """ISMT (DFXP in MP4) subtitle converter""" + + def parse(self, stream): + srt = SubRipFile([]) + for box in MP4.parse(stream.read()): + if box.type == b'mdat': + new = SMPTEConverter().from_bytes(box.data) + + # Offset timecodes if necessary + # https://github.com/SubtitleEdit/subtitleedit/blob/abd36e5/src/libse/SubtitleFormats/IsmtDfxp.cs#L85-L90 + if srt and new and srt[-1].start > new[0].start: + new.offset(srt[-1].end) + + srt.extend(new) + + return srt + + +class WVTTConverter(BaseConverter): + """WVTT (WebVTT in MP4) subtitle converter""" + + def parse(self, stream): # pylint: disable=too-many-locals, too-many-branches + sample_durations = deque() + vtt_lines = [] + timescale = 0 + + for box in MP4.parse(stream.read()): + if box.type == b'moov': + for mdhd in BoxUtil.find(box, b'mdhd'): + timescale = mdhd.timescale + break + + for stsd in BoxUtil.find(box, b'stsd'): + wvtt = stsd.entries[0] + header = [box.config for box in wvtt.children + if box.type == b'vttC'][0] + vtt_lines.append(f'{header}\n\n') + break + + if box.type == b'moof': + start_offset = 0 + duration = 0 + for tfdt in BoxUtil.find(box, b'tfdt'): + start_offset = tfdt.baseMediaDecodeTime + break + + for trun in BoxUtil.find(box, b'trun'): + for sample in trun.sample_info: + start_offset += sample.sample_composition_time_offsets or 0 + duration += sample.sample_duration or 0 + sample_durations.append({ + 'start_ms': (start_offset / timescale) * 1000, + 'end_ms': ((start_offset + duration) / timescale) * 1000 + }) + + if box.type == b'mdat': + vtt_boxes = MP4.parse(box.data) + new_start = None + for vtt_box in vtt_boxes: + settings = None + for sttg in BoxUtil.find(vtt_box, b'sttg'): + settings = sttg.settings + break + + cue_text = None + for payl in BoxUtil.find(vtt_box, b'payl'): + cue_text = payl.cue_text + break + + try: + sample_duration = sample_durations.popleft() + except IndexError: # broken line, no durations found + continue + + if vtt_box.type == b'vttc': + try: + start_ms = end_ms + except UnboundLocalError: + end_ms = sample_duration['end_ms'] + start_ms = end_ms + else: + start_ms = sample_duration['start_ms'] + + end_ms = sample_duration['end_ms'] + + if vtt_box.type == b'vtte': + new_start = end_ms + continue + + if new_start: + start_ms = new_start + new_start = None + + vtt_lines.append((f'{timestamp_from_ms(start_ms)} --> ' + f'{timestamp_from_ms(end_ms)} ' + f'{settings}\n{cue_text}\n\n')) + + return WebVTTConverter().from_string(''.join(vtt_lines)) diff --git a/scripts/subby/subby/converters/sami.py b/scripts/subby/subby/converters/sami.py new file mode 100644 index 0000000..ea261e4 --- /dev/null +++ b/scripts/subby/subby/converters/sami.py @@ -0,0 +1,90 @@ +from html.parser import HTMLParser + +from srt import Subtitle + +from subby.converters.base import BaseConverter +from subby.subripfile import SubRipFile +from subby.utils.time import timedelta_from_ms + + +class SAMIConverter(BaseConverter): + """SAMI subtitle converter""" + + def parse(self, stream): + return _SAMIConverter(stream.read().decode('utf-8-sig')).srt + + +# Internal converter class as we inherit from HTMLParser +class _SAMIConverter(HTMLParser): + def __init__(self, subtitle): + super().__init__() + self.lines = [] + self.tags = [] + + self.srt = SubRipFile([]) + self.line_list = [] + + self.feed(self._correct_tags(subtitle)) + self._convert() + + def handle_starttag(self, tag, attrs_org): + attrs = {} + for attr, val in attrs_org: + attrs[attr] = val + + if tag == 'sync': + data = {'text': ''} + data.update(attrs) + self.lines.append(data) + + self.tags.append({'name': tag, 'attrs': attrs}) + + def handle_data(self, data): + last_tag = self.tags[-1]['name'] + if last_tag == 'br': + self.lines[-1]['text'] += '\n' + return + + if last_tag == 'i' and data.strip(): + self.lines[-1]['text'] += f'{data}' + return + + if last_tag != 'sync' and self.lines: + self.lines[-1]['text'] += data + + def _convert(self): + for num, line in enumerate(self.lines): + # Use empty lines as the end of previous line + if not line.get('text', '').strip(): + end_time = float(line['start']) + self.line_list[-1]['end'] = end_time + continue + + if not line.get('end'): + # Arbitrarily set duration to 4s if end time not present + line['end'] = float(line['start']) + 4000 + + srt_line = { + 'start': float(line['start']), + 'end': float(line['end']), + 'content': line['text'].strip() + } + self.line_list.append(srt_line) + + for num, line in enumerate(self.line_list): + srt_line = Subtitle( + index=num, + start=timedelta_from_ms(line['start']), + end=timedelta_from_ms(line['end']), + content=line['content'] + ) + self.srt.append(srt_line) + + @staticmethod + def _correct_tags(data): + data = data.replace('', '') + data = data.replace(';>', '>') + data = data.replace('
', '\n') + data = data.replace('
', '\n') + data = data.replace('
', '\n') + return data diff --git a/scripts/subby/subby/converters/smpte.py b/scripts/subby/subby/converters/smpte.py new file mode 100644 index 0000000..e5d7f38 --- /dev/null +++ b/scripts/subby/subby/converters/smpte.py @@ -0,0 +1,168 @@ +import html +import logging +import re + +import bs4 +from srt import Subtitle + +from subby.converters.base import BaseConverter +from subby.subripfile import SubRipFile +from subby.utils.time import timedelta_from_timestamp, timestamp_from_ms + + +class SMPTEConverter(BaseConverter): + """DFXP/TTML/TTML2 subtitle converter""" + + def parse(self, stream): + data = stream.read().decode('utf-8-sig') + + if data.count('') == 1: + return _SMPTEConverter(data).srt + + # Support for multiple XML documents in a single file + smpte_subs = [s + '' for s in data.strip().split('') if s] + srt = SubRipFile([]) + + for sub in smpte_subs: + srt.extend(_SMPTEConverter(sub).srt) + + return srt + + +# Internal converter class as we need to handle multiple subs in one stream +class _SMPTEConverter: + def __init__(self, data): + self.logger = logging.getLogger(__name__) + self.root = bs4.BeautifulSoup(data, 'lxml-xml') + # Unescape only if necessary (parsing fails) + if not self.root: + self.root = bs4.BeautifulSoup(html.unescape(data), 'lxml-xml') + + self.srt = SubRipFile([]) + + self.tickrate = int(self.root.tt.get('ttp:tickRate', 0)) + self.frame_duration = 1 + if (rate := self.root.tt.get('ttp:frameRate')) is not None: + num, denom = map(int, self.root.tt.get('ttp:frameRateMultiplier', '1 1').split()) + framerate = (int(rate) * num) / denom + self.frame_duration = (1 / framerate) * 1000 # ms + + self.italics = {} + self.an8 = {} + self.all_span_italics = '' not in data + + self._parse_styles() + self._convert() + + def _convert(self): + try: + assert self.root.tt.body.div is not None + except (AttributeError, AssertionError): + return + + for num, line in enumerate(self.root.tt.body.div.find_all('p'), 1): + line_text = '' + + try: + for time in ('begin', 'end'): + if line[time].endswith('t'): + line[time] = self._convert_ticks(line[time]) + elif line[time].endswith('ms'): + line[time] = timestamp_from_ms(line[time][:-2]) + else: + line[time] = self._parse_timestamp(line[time]) + except (AttributeError, KeyError): + self.logger.warning( + 'Could not parse %s timestamp for line %02d, skipping', + time, num + ) + continue + + srt_line = Subtitle( + index=num, + start=timedelta_from_timestamp(line['begin']), + end=timedelta_from_timestamp(line['end']), + content='' + ) + + for element in line: + line_text += self._parse_element(element) + + if self._is_italic(line) and line_text.strip(): + line_text = line_text.replace('', '') + line_text = line_text.replace('', '') + line_text = '%s' % line_text.strip() + + if self._is_an8(line) and line_text.strip(): + line_text = '{\\an8}%s' % line_text.strip() + + srt_line.content = line_text.strip().strip('\n') + if srt_line.content: + self.srt.append(srt_line) + + def _parse_styles(self): + for style in self.root.find_all('style'): + if style.get('xml:id'): + self.italics[style['xml:id']] = self._is_italic(style) + for region in self.root.find_all('region'): + if region.get('xml:id'): + self.an8[region['xml:id']] = self._is_an8(region) + + def _parse_element(self, element): + element_text = '' + if isinstance(element, bs4.element.NavigableString): + element_text += element + elif isinstance(element, bs4.element.Tag): + subelement_text = '' + for subelement in element: + subelement_text += self._parse_element(subelement) + element_text += subelement_text + if element.name == 'br': + element_text += '\n' + + if self._is_italic(element) and element_text.strip(): + element_text = element_text.replace('', '') + element_text = element_text.replace('', '') + element_text = '%s' % element_text + + if self._is_an8(element) and element_text.strip(): + element_text = '{\\an8}%s' % element_text + + return element_text + + def _is_italic(self, element): + if element.get('tts:fontStyle'): + return element.get('tts:fontStyle') == 'italic' + elif element.get('style'): + return self.italics.get(element['style']) + elif element.name == 'span' and not element.attrs and self.all_span_italics: + return not self._is_italic(element.parent) + + return False + + def _is_an8(self, element): + if element.get('tts:displayAlign'): + return element.get('tts:displayAlign') == 'before' + elif element.get('region'): + return self.an8.get(element['region']) + + return False + + def _convert_ticks(self, ticks): + ticks = int(ticks[:-1]) + offset = 1.0 / self.tickrate + seconds = (offset * ticks) * 1000 + + return timestamp_from_ms(seconds) + + def _parse_timestamp(self, timestamp): + regex = r'([0-9]{2}):([0-9]{2}):([0-9]{2})[:\.,]?([0-9]{0,3})?' + parsed = re.search(regex, timestamp) + hours = int(parsed.group(1)) + minutes = int(parsed.group(2)) + seconds = int(parsed.group(3)) + miliseconds = 0 + if frames := parsed.group(4): + miliseconds = self.frame_duration * int(frames) + + return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds) diff --git a/scripts/subby/subby/converters/webvtt.py b/scripts/subby/subby/converters/webvtt.py new file mode 100644 index 0000000..80f00f7 --- /dev/null +++ b/scripts/subby/subby/converters/webvtt.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import html +import re +from functools import partial +from typing import Optional + +import tinycss +from srt import Subtitle + +from subby.converters.base import BaseConverter +from subby.subripfile import SubRipFile +from subby.utils.time import timedelta_from_timestamp + +HTML_TAG = re.compile(r'\s]+>') +STYLE_TAG_OPEN = re.compile(r'^([^<]+)') +STYLE_TAG = re.compile(r'([^<]+)<\/c>') +STYLE_TAG_CLOSE = re.compile(r'<\/c>$') +SKIP_WORDS = ('WEBVTT', 'NOTE', '/*', 'X-TIMESTAMP-MAP') +SPEAKER_TAG = re.compile(r']+>') # Matches opening tags, closing tags handled by STYLE_TAG_CLOSE + + +class WebVTTConverter(BaseConverter): + """WebVTT subtitle converter""" + + def parse(self, stream): + srt = SubRipFile() + looking_for_text = False + looking_for_style = False + text = [] + position = None + line_number = 1 + styles = {} + current_style = [] + + css_parser = tinycss.make_parser('page3') + + for line in stream: + # As our stream is bytes we have to deal with line breaks here + line = line.decode('utf-8').replace('\r\n', '\n').replace('\r', '\n').strip() + + # Skip processing any unnecessary lines + if any(line.startswith(word) for word in SKIP_WORDS): + continue + + # Empty line separates cues + if line == '': + # Parse current style + if looking_for_style: + stylesheet = css_parser.parse_stylesheet('\n'.join(current_style)) + for rule in stylesheet.rules: + ft = next((e for e in rule.selector if e.type == 'FUNCTION'), None) + if not ft: + continue + name = next((t for t in ft.content if t.type == 'IDENT'), None) + if not name: + continue + styles[name.value] = {} + for dec in rule.declarations: + styles[name.value][dec.name] = dec.value.as_css() + + looking_for_style = False + + # Keep looking for text if last line has none + # this will only happen if there's an unexpected line break + if not text: + continue + + srt[-1].content = '\n'.join(text) + text = [] + looking_for_text = False + + # Check for style start + elif 'STYLE' in line: + looking_for_style = True + + # Check for style content + elif looking_for_style: + current_style.append(line) + + # Check for time line + elif ' --> ' in line: + parts = line.strip().split() + position = self._get_position([p for p in parts[3:] if ':' in p]) + + start, _, end, *_ = parts + # Fix short timecodes (no hour) + if start.count(':') == 1: + start = f'00:{start}' + if end.count(':') == 1: + end = f'00:{end}' + + srt.append(Subtitle( + index=line_number, + start=timedelta_from_timestamp(start), + end=timedelta_from_timestamp(end), + content='' + )) + looking_for_text = True + line_number += 1 + + # Append text if we're inside a line + elif looking_for_text: + # Unescape html entities + line = html.unescape(line) + + # Remove speaker tags here + line = re.sub(SPEAKER_TAG, '', line) + + # Set \an8 tag if position is below 25 + # (value taken from SubtitleEdit) + if position is not None and position < 25: + line = '{\\an8}' + line + position = None + + text.append(line.strip()) + + # Add any leftover text to the last line + if text: + srt[-1].content += '\n'.join(text) + + for line in srt: + # Replace styles with italics tag when appropriate + # (replace instead of match, to handle nested) + line.content = re.sub( + STYLE_TAG, + partial(self._replace_italics, styles=styles), + line.content + ) + + # Strip non-italic tags + line.content = re.sub(HTML_TAG, '', line.content) + + return srt + + @staticmethod + def _get_position(cue_settings: list[str]) -> Optional[float]: + """ + Parses list of cue settings and extracts position offset as a float + Line number based offset and alignment strings are ignored + + https://www.w3.org/TR/webvtt1/#webvtt-line-cue-setting + """ + if not cue_settings or cue_settings == ['None']: + return None + + position = None + for key, val in (pos.split(':') for pos in cue_settings): + if key == 'line' and val and (val := val.split(',')[0])[-1] == '%': + position = float(val[:-1]) + break + elif key == 'line' and val and val == '0': + position = 0 + break + + return position + + @staticmethod + def _replace_italics(match: re.Match, styles: dict[str, dict[str, str]]) -> str: + if (s := styles.get(match[1])) and s.get('font-style') == 'italic': + return f'{match[2]}' + return match[0] diff --git a/scripts/subby/subby/processors/__init__.py b/scripts/subby/subby/processors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/subby/subby/processors/base.py b/scripts/subby/subby/processors/base.py new file mode 100644 index 0000000..5e99e2d --- /dev/null +++ b/scripts/subby/subby/processors/base.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from pathlib import Path + +from subby.subripfile import SubRipFile + + +class BaseProcessor(ABC): + """Base subtitle processor class""" + + def from_srt(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]: + """Processes given SubRipFile""" + return self.process(srt, language) + + def from_file(self, file: Path, language: str | None = None) -> tuple[SubRipFile, bool]: + """Processes given srt file""" + with file.open(mode='r', encoding='utf-8') as stream: + return self.from_string(stream.read(), language) + + def from_string(self, data: str, language: str | None = None) -> tuple[SubRipFile, bool]: + """Processes given string with srt subtitles""" + return self.process(SubRipFile.from_string(data), language) + + @abstractmethod + def process(self, srt: SubRipFile, language: str | None = None) -> tuple[SubRipFile, bool]: + """ + Processes given SubRipFile + :return: Processed SubRipFile, success (whether any changes were made) + """ diff --git a/scripts/subby/subby/processors/common_issues.py b/scripts/subby/subby/processors/common_issues.py new file mode 100644 index 0000000..83425d1 --- /dev/null +++ b/scripts/subby/subby/processors/common_issues.py @@ -0,0 +1,278 @@ +import copy +import datetime +import html +import re +import unicodedata +from datetime import timedelta + +import langcodes + +from subby import regex as Regex +from subby.processors.base import BaseProcessor +from subby.processors.rtl import RTL_LANGUAGES, RTLFixer +from subby.subripfile import SubRipFile +from subby.utils.time import line_duration + + +class CommonIssuesFixer(BaseProcessor): + """Processor fixing common issues found in subtitles""" + + remove_gaps = True + + def process(self, srt, language=None): + fixed = self._fix_time_codes(copy.deepcopy(srt)) + corrected = self._correct_subtitles(fixed) + + if language and langcodes.get(language).language in RTL_LANGUAGES: + corrected, _ = RTLFixer().process(corrected, language=language) + + return corrected, corrected != srt + + def _correct_subtitles(self, srt: SubRipFile) -> SubRipFile: + def _fix_line(line): + # [GENERAL] - Affects other regexes + # Remove more than one space + line = re.sub(r' {2,}', ' ', line) + # Correct lines starting with space + line = re.sub(r'^\s*', '', line) + line = re.sub(r'\n\s*', '\n', line) + # + # [ENCODING FIXES, CHARACTER REPLACEMENTS] + # Fix musical notes garbled by encoding + # has to happen before normalization as that replaces the TM char + line = line.replace(r'♪', '♪') + # Normalize unicode characters + line = unicodedata.normalize('NFKC', line) + # Replace short hyphen with regular size + line = line.replace(r'‐', r'-') + # Replace double note with single note + line = line.replace(r'♫', r'♪') + # Replace hashes, asterisks at the start of a line with a musical note + line = re.sub( + r'^((?:{\\an8})?(?:)?)(- ?)?[#\*]{1,}(?=\s+)', + r'\1\2♪', + line, + flags=re.M + ) + # Replace hashes, asterisks at the end of a line with a musical note + line = re.sub( + r'(?<=\s)(?$|$)', + r'♪\1', + line, + flags=re.M + ) + line = re.sub(r'^[#\*]+$', r'♪', line, flags=re.M) + # Move notes into italics, if rest of the line is + line = re.sub(r'♪ (.*)', r'♪ \1', line) + line = re.sub(r'(♪.*)\s*♪', r'\1 ♪', line) + # Replace some pound signs with notes (Binge...) + # (Matches only start/end of a line with a space + # to avoid false positives) + line = re.sub(r'^£ ', r'♪ ', line) + line = re.sub(r' £$', r' ♪', line) + # Duplicated notes + line = re.sub(r'♪{1,}', r'♪', line) + # Add spaces between notes and text + line = re.sub(r'^♪([A-Za-z])', r'♪ \1', line) + line = re.sub(r'([A-Za-z])♪', r'\1 ♪', line) + # Replace \h (non-breaking space in ASS) with a regular space + # (result of ffmpeg extraction of mp4-embedded subtitles) + line = re.sub(r'(\\h)+', ' ', line).strip() + # Fix leftover amps (html unescape fixes those, but not when they're duped) + line = re.sub(r'&(amp;){1,}', r'&', line) + # Fix "it'`s" -> "it's" + line = re.sub(r"'[`’]", r"'", line) + + # [TAG STRIPPING AND CORRECTING] + # + # Replace ASS positioning tags with top only + line = re.sub(r'(\{\\an[0-9]\}){1,}', r'{\\an8}', line) + # Remove space after ASS positioning tags + line = re.sub(r'(\{\\an[0-9]\}) +(?=[A-Za-z-])', r'{\\an8}', line) + # Fix hanging tags + line = re.sub(r'^(<[a-z]>)\n', r'\1', line) + line = re.sub(r'$\n<([a-z])>', r'\n', line, flags=re.M) + # Remove duplicated tags + line = re.sub(r'(<[a-z]>){1,}', r'\1', line) + line = re.sub(r'(){1,}', r'\1', line) + # Remove an unnecessary space after italic tag open + line = re.sub(r'^(<[a-z]>) {1,}', r'\1', line) + line = re.sub(r'^ {1,}', '', line) + # Remove non-italic tags + line = re.sub(r')[a-z]+>', '', line) + # Remove spaces between tags + line = re.sub(r'(<[a-z]>|\{\\an8\}) (<[a-z]>|\{\\an8\})', r'\1\2', line) + # Move hanging opening tags onto separate lines + line = re.sub(r'(<[a-z]>)\n', r'\n\1', line) + # Move hanging closing tags onto separate lines + line = re.sub(r'\n()', r'\1\n', line) + # Move spaces outside italic tags + line = re.sub(r'(<[a-z]>) ', r' \1', line) + line = re.sub(r' ()', r'\1 ', line) + # Remove needless spaces inside italic tags + line = re.sub(r'^(<[a-z]>) ', r'\1', line) + # Fix "space" + line = re.sub(r'(?:)(\s*)(?:<[a-z]>)', r'\1', line, flags=re.M) + # Remove empty tags + line = re.sub(r'<[a-z]>\s*', r'', line) + # Move "{\an8}" to the rest of the text if it's on a new line + line = re.sub(r'({\\an8\})\n', r'\1', line) + + # [REFORMATTING] + # + # Remove spaces inside brackets ("( TEXT )" -> "(TEXT)") + line = re.sub(r'\( (.*) \)', r'(\1)', line) + # Remove ">> " before text + line = re.sub(r'(^|\n)(|\{\\an8\})?>> ', r'\1\2', line) + # Remove lines consisting only of ">>" + line = re.sub(r'(^|\n)(|\{\\an8\})?>>($|\n)', r'', line) + # Replace any leftover
tags with a proper line break + line = re.sub(r'
', '\n', line) + # Remove empty lines + line = re.sub(r'^\.?\s*$', '', line, flags=re.M) + line = re.sub(r'^-?\s*$', '', line, flags=re.M) + line = re.sub(r'^(|\{\\an8\})?\s*$', '', line, flags=re.M) + # Remove lines consisting only of a single character or digit + line = re.sub(r'^\[A-Za-z0-9]$', '', line) + # Adds missing spaces after "...", commas, and tags + line = re.sub(r'([a-z])(\.\.\.)([a-zA-Z][^.])', r'\1\2 \3', line) + line = re.sub(r'()(\w)', r'\1 \2', line) + line = re.sub(r'([a-z]),([a-zA-Z])', r'\1, \2', line) + line = re.sub(r',\n([a-z]+[\.\?])\s*$', r', \1', line) + # Correct front and end elypses + line = re.sub( + rf'({Regex.FRONT_OPTIONAL_TAGS_WITH_HYPHEN})' r'\.{1,}', + r'\1...', + line, flags=re.M + ) + line = re.sub(r'\.{2,}' rf'({Regex.TAGS})?' r'\s*$', r'...\1', line, flags=re.M) + # Add space after frontal speaker hyphen + line = re.sub(r"^(|\{\\an8\})?-+(?='?[\w\"\[\(\<\{\.\$♪¿¡])", r'\1- ', line, flags=re.M) + # Remove unnecessary space before "--" + line = re.sub(r'\s*--(\s*)', r'--\1', line, flags=re.M) + # Move notes inside tags ( ♪ ->
) + line = re.sub(r'()(\s*♪{1,})$', r'\2\1', line, flags=re.M) + # Remove trailing spaces + line = re.sub(r' +$', r'', line, flags=re.M).strip() + + # [LINE SPLITS AND LINE BREAKS] + # + # Adds missing line splits (primarily present in Amazon subtitles) + line = re.sub(r'(.*)([^.][\]\)])([A-Z][^.])', r'\1\2\n\3', line) + line = re.sub( + r'(.*)([^\.\sA-Z][!\.;:?])(?|\n<[a-z]>)(\w+)\n', r'\1\2 ', line) + # Add missing hyphens + line = re.sub(r'^\s*(?!-)(.*)\n- ([A-Z][a-z]+)$', r'- \1\n- \2', line) + # Remove linebreaks inside lines + line = re.sub(r'\r\n{1,}', r'\r\n', line).strip() + line = re.sub(r'\n{1,}', r'\n', line).strip() + # Remove duplicate spaces around italics + line = re.sub(r' + +', r' ', line).strip() + # Remove italics from hyphen, when content immediately following is not italics + line = re.sub(r'-([^<]+)', r'-\1', line).strip() + + return line + + for line in srt: + # Unescape html entities (twice, because yes, double encoding happens...) + for _ in range(2): + line.content = html.unescape(line.content) + + # Run fix_line twice, as some of the fixes can introduce issues, e.g. double spaces + for _ in range(2): + line.content = _fix_line(line.content) + line.content = line.content.strip() + + # Remove remaining linebreaks + line.content = line.content.strip('\n') + + # Remove italics if every line is italicized, as this is almost certainly a mistake + # (using slices should be more performant than regex or startswith/endswith) + if len(srt) > 10 \ + and all(line.content[:3] == '' and line.content[-4:] == '' for line in srt): + for line in srt: + line.content = line.content[3:-4] + + combined = self._combine_timecodes(srt) + if self.remove_gaps: + return self._remove_gaps(combined) + + return combined + + def _combine_timecodes(self, srt: SubRipFile) -> SubRipFile: + """Combines lines with timecodes and same content""" + subs_copy = SubRipFile([]) + for line in srt: + if len(subs_copy) == 0: + subs_copy.append(line) + continue + if line_duration(subs_copy[-1]) == line_duration(line) \ + and subs_copy[-1].start == line.start \ + and subs_copy[-1].end == line.end: + if subs_copy[-1].content != line.content: + subs_copy[-1].content += '\n' + line.content + # Merge lines with the same text within 10 ms + elif self._subtract_ts(line.start, subs_copy[-1].end) < 10 \ + and line.content == subs_copy[-1].content: + subs_copy[-1].end = line.end + # Merge lines with less than 2 frames of gap and same text + # to avoid duplicating lines as we remove gaps later + elif 0 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85 \ + and line.content.startswith(subs_copy[-1].content) \ + and self.remove_gaps: + subs_copy[-1].end = line.end + subs_copy[-1].content = line.content + # Fix overlapping times + elif self._subtract_ts(line.start, subs_copy[-1].end) == 0: + subs_copy[-1].end -= timedelta(milliseconds=1) + subs_copy.append(line) + elif line.content.strip(): + subs_copy.append(line) + + subs_copy = subs_copy or srt + subs_copy.clean_indexes() + return subs_copy + + def _remove_gaps(self, srt: SubRipFile) -> SubRipFile: + """Remove short gaps between lines""" + subs_copy = SubRipFile([]) + for line in srt: + if len(subs_copy) == 0: + subs_copy.append(line) + continue + # Remove 2-frame or smaller gaps (2 frames/83ms@24 is Netflix standard) + elif 1 < self._subtract_ts(line.start, subs_copy[-1].end) <= 85: + line.start = subs_copy[-1].end + subs_copy[-1].end -= timedelta(milliseconds=1) + subs_copy.append(line) + elif line.content.strip(): + subs_copy.append(line) + + subs_copy = subs_copy or srt + subs_copy.clean_indexes() + return subs_copy + + @staticmethod + def _fix_time_codes(srt: SubRipFile) -> SubRipFile: + """Fixes timecodes over 23:59, often present in live content""" + offset = 0 + for line in srt: + hours, _ = divmod(line.start.seconds, 3600) + hours += line.start.days * 24 + + if not offset and hours > 23: + offset = hours + if offset: + line.start -= datetime.timedelta(hours=offset) + line.end -= datetime.timedelta(hours=offset) + return srt + + @staticmethod + def _subtract_ts(ts1: datetime.timedelta, ts2: datetime.timedelta) -> int: + """Subtracts two timestamps and returns a difference as int of miliseconds""" + return round((ts1 - ts2).total_seconds() * 1000) diff --git a/scripts/subby/subby/processors/rtl.py b/scripts/subby/subby/processors/rtl.py new file mode 100644 index 0000000..0f324c8 --- /dev/null +++ b/scripts/subby/subby/processors/rtl.py @@ -0,0 +1,34 @@ +import logging + +import langcodes + +from subby.processors.base import BaseProcessor + +RTL_LANGUAGES = ('ar', 'fa', 'he', 'ps', 'syc', 'ug', 'ur') +RTL_CONTROL_CHARS = ('\u200e', '\u200f', '\u202a', '\u202b', '\u202c', '\u202d', '\u202e') +RTL_CHAR = '\u202b' + + +class RTLFixer(BaseProcessor): + """Processor fixing right-to-left language tagging""" + + def __init__(self): + self.logger = logging.getLogger(__name__) + + def process(self, srt, language=None): + if language and langcodes.get(language).language not in RTL_LANGUAGES: + self.logger.warning('RTL tagger running on an unexpected language (%s)', language) + + corrected = self._correct_subtitles(srt) + return srt, corrected != srt + + def _correct_subtitles(self, srt): + for line in srt: + # Remove previous RTL-related formatting + for char in RTL_CONTROL_CHARS: + line.content = line.content.replace(char, '') + + # Add RLM char at the start of every line + line.content = RTL_CHAR + line.content.replace("\n", f"\n{RTL_CHAR}") + + return srt diff --git a/scripts/subby/subby/processors/sdh.py b/scripts/subby/subby/processors/sdh.py new file mode 100644 index 0000000..353f348 --- /dev/null +++ b/scripts/subby/subby/processors/sdh.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import copy +import re + +from subby import regex as Regex +from subby.processors.base import BaseProcessor +from subby.subripfile import SubRipFile + + +class SDHStripper(BaseProcessor): + """Processor removing hard-of-hearing descriptions from subtitles""" + + def __init__(self, extra_regexes: list[str] | None = None): + self.extra_regexes = [ + re.compile(regex, re.MULTILINE) + for regex in extra_regexes or [] + ] + + def process(self, srt, language=None): + stripped = [line for line in copy.deepcopy(srt)] + stripped = self._clean_full_line_descriptions(stripped) + stripped = self._clean_new_line_descriptions(stripped) + stripped = self._clean_inline_descriptions(stripped) + stripped = self._clean_speaker_names(stripped) + stripped = self._strip_notes(stripped) + stripped = self._remove_extra_hyphens(stripped) + stripped = self._run_extra_regexes(stripped) + + stripped = SubRipFile([line for line in stripped if line.content]) + stripped.clean_indexes() + + return stripped, stripped != srt + + def _clean_full_line_descriptions(self, srt): + """Removes full line descriptions""" + for line in srt: + text = self._strip_tags(line.content) + for regex in (Regex.FULL_LINE_DESCIRPTION_BRACKET, Regex.FULL_LINE_DESCIRPTION_PARENTHESES): + text = re.sub(regex, r'', text, flags=re.S).strip() + + if not text: + continue + + yield line + + def _clean_new_line_descriptions(self, srt): + """Removes line descriptions taking up an entire line break""" + for line in srt: + position = re.match(Regex.POSITION_TAGS, line.content.strip()) + for regex in (Regex.NEW_LINE_DESCRIPTION_BRACKET, Regex.NEW_LINE_DESCRIPTION_PARENTHESES): + line.content = re.sub(regex, r'', line.content, flags=re.M).strip() + + # Restore position, if it has been removed with the description + if position and position[0] not in line.content: + line.content = position[0] + line.content + + yield line + + def _clean_inline_descriptions(self, srt): + """Removes inline""" + for line in srt: + line.content = re.sub(Regex.FRONT_DESCRIPTION_BRACKET, r'\10', line.content, flags=re.M) + line.content = re.sub(Regex.FRONT_DESCRIPTION_PARENTHESES, r'\1', line.content, flags=re.M) + for regex in ( + Regex.END_DESCRIPTION_BRACKET, + Regex.END_DESCRIPTION_PARENTHESES, + Regex.INLINE_DESCRIPTION + ): + line.content = re.sub(regex, r'', line.content, flags=re.M) + line.content = line.content.strip() + yield line + + def _clean_speaker_names(self, srt): + """Removes speaker names""" + for line in srt: + # Retain frontal tags/hyphens + for regex in (Regex.SPEAKER_PARENTHESES, Regex.SPEAKER): + line.content = re.sub(regex, r'\2\3', line.content, flags=re.M).strip() + yield line + + def _strip_notes(self, srt): + """Removes lines with just musical notes""" + for line in srt: + if re.match(r'^♪+$', re.sub(r'\s*', r'', self._strip_tags(line.content).strip())): + continue + + yield line + + def _run_extra_regexes(self, srt): + """Runs extra regexes provided by user""" + for line in srt: + for regex in self.extra_regexes: + line.content = re.sub(regex, r'', line.content) + yield line + + def _remove_extra_hyphens(self, srt): + """Remove speaker hyphens if there's only one line""" + for line in srt: + splits = len(re.findall(r'^(|\{\\an8\})?-\s*', line.content, flags=re.M)) + if splits == 1: + line.content = re.sub(r'^(|\{\\an8\})?-\s*', r'\1', line.content.strip()) + + yield line + + + @staticmethod + def _strip_tags(text: str) -> str: + return re.sub(Regex.TAGS, r'', text) diff --git a/scripts/subby/subby/regex.py b/scripts/subby/subby/regex.py new file mode 100644 index 0000000..44496d6 --- /dev/null +++ b/scripts/subby/subby/regex.py @@ -0,0 +1,22 @@ +TAGS = r'[<{][/\\]?[a-z0-9.]+[}>]' +POSITION_TAGS = r'^{\\an[0-9]}' +FRONT_OPTIONAL_TAGS_WITH_HYPHEN = rf'^\s*({TAGS})?\s*(-)?\s*({TAGS})?\s*' +TIME_LOOKAHEAD = r'(?![0-9]{2})' + +SPEAKER = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(Mc[A-Z][a-zA-Z]+|[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+):{TIME_LOOKAHEAD} ?' +SPEAKER_PARENTHESES = rf'({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})\s*(?:[A-Z0-9\&\[\]\.#\' ]+\s*|[A-Z][a-z]+)(?: \([a-zA-Z ]+\)): ?' + +FRONT_NOTES = r'(?:♪+\s+)' +BACK_NOTES = r'(?:\s+♪+)' + +DESCRIPTION_BRACKET = r'\[(?:[^\]]|\s)*\]' +DESCRIPTION_PARENTHESES = r'\((?:[^\)]|\s)*\)' +FULL_LINE_DESCIRPTION_BRACKET = rf'^-?\s*{FRONT_NOTES}?\[[^\]]+\]{BACK_NOTES}?$' +NEW_LINE_DESCRIPTION_BRACKET = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_BRACKET}(?:{TAGS})?{BACK_NOTES}?$' +FRONT_DESCRIPTION_BRACKET = rf'^(?:{SPEAKER}|{SPEAKER_PARENTHESES})?({FRONT_OPTIONAL_TAGS_WITH_HYPHEN}){DESCRIPTION_BRACKET}:?' +END_DESCRIPTION_BRACKET = rf'\s*{DESCRIPTION_BRACKET}\s*$' +FULL_LINE_DESCIRPTION_PARENTHESES = rf'^-?\s*{FRONT_NOTES}?\([^\)]+\){BACK_NOTES}?$' +NEW_LINE_DESCRIPTION_PARENTHESES = rf'^(?:{TAGS})?-?\s*{FRONT_NOTES}?{DESCRIPTION_PARENTHESES}{BACK_NOTES}?(?:{TAGS})?$' +FRONT_DESCRIPTION_PARENTHESES = rf'^({FRONT_OPTIONAL_TAGS_WITH_HYPHEN})(?:{SPEAKER}|{SPEAKER_PARENTHESES})?{DESCRIPTION_PARENTHESES}:?' +END_DESCRIPTION_PARENTHESES = rf'\s*{DESCRIPTION_PARENTHESES}:?\s*$' +INLINE_DESCRIPTION = r'(?:<[a-z]+>)?[\[(][A-Za-z]+[)\]](?:)?' diff --git a/scripts/subby/subby/subripfile.py b/scripts/subby/subby/subripfile.py new file mode 100644 index 0000000..7bfa8c0 --- /dev/null +++ b/scripts/subby/subby/subripfile.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from collections import UserList +from datetime import timedelta +from pathlib import Path + +import srt + + +class SubRipFile(UserList): + def __init__(self, data: list[srt.Subtitle] | None = None): + self.data: list[srt.Subtitle] = data or [] + + @classmethod + def from_string(cls, source: str): + return cls(list(srt.parse(source, ignore_errors=True))) + + def clean_indexes(self): + self.data = list(srt.sort_and_reindex(self.data)) + + def offset(self, offset: timedelta): + for line in self.data: + line.start += offset + line.end += offset + + def export(self, eol: str | None = None) -> str: + """Exports subtitle as text""" + return srt.compose(self.data, eol=eol) + + def save(self, path: Path, encoding: str = 'utf-8-sig', eol: str | None = None): + """Exports subtitle as text""" + with path.open(mode='wb') as fp: + fp.write(srt.compose(self.data, eol=eol).encode(encoding)) + + def __eq__(self, other): + if not isinstance(other, SubRipFile): + raise NotImplementedError + return self.export(eol='\n') == other.export(eol='\n') diff --git a/scripts/subby/subby/utils/time.py b/scripts/subby/subby/utils/time.py new file mode 100644 index 0000000..443623a --- /dev/null +++ b/scripts/subby/subby/utils/time.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import datetime +import re + +from srt import Subtitle + + +def timestamp_from_ms(duration: float | int) -> str: + """Returns a formatted timestamp from miliseconds""" + seconds, miliseconds = divmod(float(duration), 1000) + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + return "%02d:%02d:%02d.%03d" % (hours, minutes, seconds, miliseconds) + + +def timestamp_from_seconds(duration: float | int) -> str: + """Returns a formatted timestamp from seconds""" + return timestamp_from_ms(duration * 1000) + + +def ms_from_timestamp(timestamp: str) -> int: + """Returns miliseconds from a timestamp""" + timestamp = re.sub(r'[;\.\,]', r':', timestamp.replace('T:', '')) + hours, minutes, seconds, miliseconds = map(int, timestamp.split(':')) + miliseconds += hours * 3600000 + miliseconds += minutes * 60000 + miliseconds += seconds * 1000 + return miliseconds + + +def timedelta_from_timestamp(timestamp: str) -> datetime.timedelta: + """Returns timedelta from a timestamp""" + return datetime.timedelta(seconds=ms_from_timestamp(timestamp) / 1000) + + +def timedelta_from_ms(duration: float | int) -> datetime.timedelta: + """Returns timedelta from miliseconds""" + return datetime.timedelta(seconds=duration / 1000) + + +def line_duration(line: Subtitle): + """Returns duration of a srt.Subtitle line""" + return abs(line.end - line.start)