Mercurial > minori
view dep/anitomy/anitomy/keyword.cpp @ 130:a00180d2523c
builds/linux: fix path for libpugixml.so
???
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Thu, 09 Nov 2023 02:39:21 -0500 |
parents | 5c0397762b53 |
children | a0aa8c8c4307 |
line wrap: on
line source
/* ** Copyright (c) 2014-2018, Eren Okka ** ** This Source Code Form is subject to the terms of the Mozilla Public ** License, v. 2.0. If a copy of the MPL was not distributed with this ** file, You can obtain one at https://mozilla.org/MPL/2.0/. */ #include <algorithm> #include "keyword.h" #include "token.h" namespace anitomy { KeywordManager keyword_manager; KeywordManager::KeywordManager() { const KeywordOptions options_default; const KeywordOptions options_invalid{true, true, false}; const KeywordOptions options_unidentifiable{false, true, true}; const KeywordOptions options_unidentifiable_invalid{false, true, false}; const KeywordOptions options_unidentifiable_unsearchable{false, false, true}; Add(kElementAnimeSeasonPrefix, options_unidentifiable, {L"SAISON", L"SEASON"}); Add(kElementAnimeType, options_unidentifiable, {L"GEKIJOUBAN", L"MOVIE", L"OAD", L"OAV", L"ONA", L"OVA", L"SPECIAL", L"SPECIALS", L"TV"}); Add(kElementAnimeType, options_unidentifiable_unsearchable, {L"SP"}); // e.g. "Yumeiro Patissiere SP Professional" Add(kElementAnimeType, options_unidentifiable_invalid, {L"ED", L"ENDING", L"NCED", L"NCOP", L"OP", L"OPENING", L"PREVIEW", L"PV"}); Add(kElementAudioTerm, options_default, {// Audio channels L"2.0CH", L"2CH", L"5.1", L"5.1CH", L"DTS", L"DTS-ES", L"DTS5.1", L"TRUEHD5.1", // Audio codec L"AAC", L"AACX2", L"AACX3", L"AACX4", L"AC3", L"EAC3", L"E-AC-3", L"FLAC", L"FLACX2", L"FLACX3", L"FLACX4", L"LOSSLESS", L"MP3", L"OGG", L"VORBIS", // Audio language L"DUALAUDIO", L"DUAL AUDIO"}); Add(kElementDeviceCompatibility, options_default, {L"IPAD3", L"IPHONE5", L"IPOD", L"PS3", L"XBOX", L"XBOX360"}); Add(kElementDeviceCompatibility, options_unidentifiable, {L"ANDROID"}); Add(kElementEpisodePrefix, options_default, {L"EP", L"EP.", L"EPS", L"EPS.", L"EPISODE", L"EPISODE.", L"EPISODES", L"CAPITULO", L"EPISODIO", L"EPIS\u00F3DIO", L"FOLGE"}); Add(kElementEpisodePrefix, options_invalid, {L"E", L"\x7B2C"}); // single-letter episode keywords are not valid tokens Add(kElementFileExtension, options_default, {L"3GP", L"AVI", L"DIVX", L"FLV", L"M2TS", L"MKV", L"MOV", L"MP4", L"MPG", L"OGM", L"RM", L"RMVB", L"TS", L"WEBM", L"WMV"}); Add(kElementFileExtension, options_invalid, {L"AAC", L"AIFF", L"FLAC", L"M4A", L"MP3", L"MKA", L"OGG", L"WAV", L"WMA", L"7Z", L"RAR", L"ZIP", L"ASS", L"SRT"}); Add(kElementLanguage, options_default, {L"ENG", L"ENGLISH", L"ESPANOL", L"JAP", L"PT-BR", L"SPANISH", L"VOSTFR"}); Add(kElementLanguage, options_unidentifiable, {L"ESP", L"ITA"}); // e.g. "Tokyo ESP", "Bokura ga Ita" Add(kElementOther, options_default, {L"REMASTER", L"REMASTERED", L"UNCENSORED", L"UNCUT", L"TS", L"VFR", L"WIDESCREEN", L"WS"}); Add(kElementReleaseGroup, options_default, {L"THORA"}); Add(kElementReleaseInformation, options_default, {L"BATCH", L"COMPLETE", L"PATCH", L"REMUX"}); Add(kElementReleaseInformation, options_unidentifiable, {L"END", L"FINAL"}); // e.g. "The End of Evangelion", "Final Approach" Add(kElementReleaseVersion, options_default, {L"V0", L"V1", L"V2", L"V3", L"V4"}); Add(kElementSource, options_default, {L"BD", L"BDRIP", L"BLURAY", L"BLU-RAY", L"DVD", L"DVD5", L"DVD9", L"DVD-R2J", L"DVDRIP", L"DVD-RIP", L"R2DVD", L"R2J", L"R2JDVD", L"R2JDVDRIP", L"HDTV", L"HDTVRIP", L"TVRIP", L"TV-RIP", L"WEBCAST", L"WEBRIP"}); Add(kElementSubtitles, options_default, {L"ASS", L"BIG5", L"DUB", L"DUBBED", L"HARDSUB", L"HARDSUBS", L"RAW", L"SOFTSUB", L"SOFTSUBS", L"SUB", L"SUBBED", L"SUBTITLED"}); Add(kElementVideoTerm, options_default, {// Frame rate L"23.976FPS", L"24FPS", L"29.97FPS", L"30FPS", L"60FPS", L"120FPS", // Video codec L"8BIT", L"8-BIT", L"10BIT", L"10BITS", L"10-BIT", L"10-BITS", L"HI10", L"HI10P", L"HI444", L"HI444P", L"HI444PP", L"H264", L"H265", L"H.264", L"H.265", L"X264", L"X265", L"X.264", L"AVC", L"HEVC", L"HEVC2", L"DIVX", L"DIVX5", L"DIVX6", L"XVID", L"AV1", // Video format L"AVI", L"RMVB", L"WMV", L"WMV3", L"WMV9", // Video quality L"HQ", L"LQ", // Video resolution L"HD", L"SD"}); Add(kElementVolumePrefix, options_default, {L"VOL", L"VOL.", L"VOLUME"}); } void KeywordManager::Add(ElementCategory category, const KeywordOptions& options, const std::initializer_list<string_t>& keywords) { auto& keys = GetKeywordContainer(category); for (const auto& keyword : keywords) { if (keyword.empty()) continue; if (keys.find(keyword) != keys.end()) continue; keys.insert(std::make_pair(keyword, Keyword{category, options})); } } bool KeywordManager::Find(ElementCategory category, const string_t& str) const { const auto& keys = GetKeywordContainer(category); auto it = keys.find(str); if (it != keys.end() && it->second.category == category) return true; return false; } bool KeywordManager::Find(const string_t& str, ElementCategory& category, KeywordOptions& options) const { const auto& keys = GetKeywordContainer(category); auto it = keys.find(str); if (it != keys.end()) { if (category == kElementUnknown) { category = it->second.category; } else if (it->second.category != category) { return false; } options = it->second.options; return true; } return false; } string_t KeywordManager::Normalize(const string_t& str) const { return StringToUpperCopy(str); } KeywordManager::keyword_container_t& KeywordManager::GetKeywordContainer(ElementCategory category) const { return category == kElementFileExtension ? const_cast<keyword_container_t&>(file_extensions_) : const_cast<keyword_container_t&>(keys_); } //////////////////////////////////////////////////////////////////////////////// void KeywordManager::Peek(const string_t& filename, const TokenRange& range, Elements& elements, std::vector<TokenRange>& preidentified_tokens) const { using entry_t = std::pair<ElementCategory, std::vector<string_t>>; static const std::vector<entry_t> entries{ {kElementAudioTerm, {L"Dual Audio"} }, {kElementVideoTerm, {L"H264", L"H.264", L"h264", L"h.264"}}, {kElementVideoResolution, {L"480p", L"720p", L"1080p"} }, {kElementSource, {L"Blu-Ray"} } }; auto it_begin = filename.begin() + range.offset; auto it_end = it_begin + range.size; for (const auto& entry : entries) { for (const auto& keyword : entry.second) { auto it = std::search(it_begin, it_end, keyword.begin(), keyword.end()); if (it != it_end) { const auto offset = static_cast<size_t>(std::distance(filename.begin(), it)); elements.insert(entry.first, keyword); preidentified_tokens.push_back(TokenRange{offset, keyword.size()}); } } } } } // namespace anitomy