| | | 1 | | using System; |
| | | 2 | | using System.Globalization; |
| | | 3 | | using System.IO; |
| | | 4 | | using System.Linq; |
| | | 5 | | using System.Threading; |
| | | 6 | | using System.Xml; |
| | | 7 | | using Jellyfin.Data.Enums; |
| | | 8 | | using MediaBrowser.Controller.Entities; |
| | | 9 | | using MediaBrowser.Controller.Providers; |
| | | 10 | | using MediaBrowser.Model.Entities; |
| | | 11 | | using MediaBrowser.Model.Net; |
| | | 12 | | using Microsoft.Extensions.Logging; |
| | | 13 | | |
| | | 14 | | namespace MediaBrowser.Providers.Books.OpenPackagingFormat |
| | | 15 | | { |
| | | 16 | | /// <summary> |
| | | 17 | | /// Methods used to pull metadata and other information from Open Packaging Format in XML objects. |
| | | 18 | | /// </summary> |
| | | 19 | | /// <typeparam name="TCategoryName">The type of category.</typeparam> |
| | | 20 | | public class OpfReader<TCategoryName> |
| | | 21 | | { |
| | | 22 | | private const string DcNamespace = @"http://purl.org/dc/elements/1.1/"; |
| | | 23 | | private const string OpfNamespace = @"http://www.idpf.org/2007/opf"; |
| | | 24 | | |
| | | 25 | | private readonly XmlNamespaceManager _namespaceManager; |
| | | 26 | | private readonly XmlDocument _document; |
| | | 27 | | |
| | | 28 | | private readonly ILogger<TCategoryName> _logger; |
| | | 29 | | |
| | | 30 | | /// <summary> |
| | | 31 | | /// Initializes a new instance of the <see cref="OpfReader{TCategoryName}"/> class. |
| | | 32 | | /// </summary> |
| | | 33 | | /// <param name="document">The XML document to parse.</param> |
| | | 34 | | /// <param name="logger">Instance of the <see cref="ILogger{TCategoryName}"/> interface.</param> |
| | | 35 | | public OpfReader(XmlDocument document, ILogger<TCategoryName> logger) |
| | | 36 | | { |
| | 0 | 37 | | _document = document; |
| | 0 | 38 | | _logger = logger; |
| | 0 | 39 | | _namespaceManager = new XmlNamespaceManager(_document.NameTable); |
| | | 40 | | |
| | 0 | 41 | | _namespaceManager.AddNamespace("dc", DcNamespace); |
| | 0 | 42 | | _namespaceManager.AddNamespace("opf", OpfNamespace); |
| | 0 | 43 | | } |
| | | 44 | | |
| | | 45 | | /// <summary> |
| | | 46 | | /// Checks for the existence of a cover image. |
| | | 47 | | /// </summary> |
| | | 48 | | /// <param name="opfRootDirectory">The root directory in which the OPF file is located.</param> |
| | | 49 | | /// <returns>Returns the found cover and its type or null.</returns> |
| | | 50 | | public (string MimeType, string Path)? ReadCoverPath(string opfRootDirectory) |
| | | 51 | | { |
| | 0 | 52 | | var coverImage = ReadEpubCoverInto(opfRootDirectory, "//opf:item[@properties='cover-image']"); |
| | 0 | 53 | | if (coverImage is not null) |
| | | 54 | | { |
| | 0 | 55 | | return coverImage; |
| | | 56 | | } |
| | | 57 | | |
| | 0 | 58 | | var coverId = ReadEpubCoverInto(opfRootDirectory, "//opf:item[@id='cover' and @media-type='image/*']"); |
| | 0 | 59 | | if (coverId is not null) |
| | | 60 | | { |
| | 0 | 61 | | return coverId; |
| | | 62 | | } |
| | | 63 | | |
| | 0 | 64 | | var coverImageId = ReadEpubCoverInto(opfRootDirectory, "//opf:item[@id='*cover-image']"); |
| | 0 | 65 | | if (coverImageId is not null) |
| | | 66 | | { |
| | 0 | 67 | | return coverImageId; |
| | | 68 | | } |
| | | 69 | | |
| | 0 | 70 | | var metaCoverImage = _document.SelectSingleNode("//opf:meta[@name='cover']", _namespaceManager); |
| | 0 | 71 | | var content = metaCoverImage?.Attributes?["content"]?.Value; |
| | 0 | 72 | | if (string.IsNullOrEmpty(content) || metaCoverImage is null) |
| | | 73 | | { |
| | 0 | 74 | | return null; |
| | | 75 | | } |
| | | 76 | | |
| | 0 | 77 | | var coverPath = Path.Combine("Images", content); |
| | 0 | 78 | | var coverFileManifest = _document.SelectSingleNode($"//opf:item[@href='{coverPath}']", _namespaceManager); |
| | 0 | 79 | | var mediaType = coverFileManifest?.Attributes?["media-type"]?.Value; |
| | 0 | 80 | | if (coverFileManifest?.Attributes is not null && !string.IsNullOrEmpty(mediaType) && IsValidImage(mediaType) |
| | | 81 | | { |
| | 0 | 82 | | return (mediaType, Path.Combine(opfRootDirectory, coverPath)); |
| | | 83 | | } |
| | | 84 | | |
| | 0 | 85 | | var coverFileIdManifest = _document.SelectSingleNode($"//opf:item[@id='{content}']", _namespaceManager); |
| | 0 | 86 | | if (coverFileIdManifest is not null) |
| | | 87 | | { |
| | 0 | 88 | | return ReadManifestItem(coverFileIdManifest, opfRootDirectory); |
| | | 89 | | } |
| | | 90 | | |
| | 0 | 91 | | return null; |
| | | 92 | | } |
| | | 93 | | |
| | | 94 | | /// <summary> |
| | | 95 | | /// Read all supported OPF data from the file. |
| | | 96 | | /// </summary> |
| | | 97 | | /// <param name="cancellationToken">The cancellation token.</param> |
| | | 98 | | /// <returns>The metadata result to update.</returns> |
| | | 99 | | public MetadataResult<Book> ReadOpfData(CancellationToken cancellationToken) |
| | | 100 | | { |
| | 0 | 101 | | cancellationToken.ThrowIfCancellationRequested(); |
| | | 102 | | |
| | 0 | 103 | | var book = CreateBookFromOpf(); |
| | 0 | 104 | | var result = new MetadataResult<Book> { Item = book, HasMetadata = true }; |
| | | 105 | | |
| | 0 | 106 | | FindAuthors(result); |
| | 0 | 107 | | ReadStringInto("//dc:language", language => result.ResultLanguage = language); |
| | | 108 | | |
| | 0 | 109 | | return result; |
| | | 110 | | } |
| | | 111 | | |
| | | 112 | | private Book CreateBookFromOpf() |
| | | 113 | | { |
| | 0 | 114 | | var book = new Book |
| | 0 | 115 | | { |
| | 0 | 116 | | Name = FindMainTitle(), |
| | 0 | 117 | | ForcedSortName = FindSortTitle(), |
| | 0 | 118 | | }; |
| | | 119 | | |
| | 0 | 120 | | ReadStringInto("//dc:description", summary => book.Overview = summary); |
| | 0 | 121 | | ReadStringInto("//dc:publisher", publisher => book.AddStudio(publisher)); |
| | 0 | 122 | | ReadStringInto("//dc:identifier[@opf:scheme='AMAZON']", amazon => book.SetProviderId("Amazon", amazon)); |
| | 0 | 123 | | ReadStringInto("//dc:identifier[@opf:scheme='GOOGLE']", google => book.SetProviderId("GoogleBooks", google)) |
| | 0 | 124 | | ReadStringInto("//dc:identifier[@opf:scheme='ISBN']", isbn => book.SetProviderId("ISBN", isbn)); |
| | | 125 | | |
| | 0 | 126 | | ReadStringInto("//dc:date", date => |
| | 0 | 127 | | { |
| | 0 | 128 | | if (DateTime.TryParse(date, out var dateValue)) |
| | 0 | 129 | | { |
| | 0 | 130 | | book.PremiereDate = dateValue.Date; |
| | 0 | 131 | | book.ProductionYear = dateValue.Date.Year; |
| | 0 | 132 | | } |
| | 0 | 133 | | }); |
| | | 134 | | |
| | 0 | 135 | | var genreNodes = _document.SelectNodes("//dc:subject", _namespaceManager); |
| | | 136 | | |
| | 0 | 137 | | if (genreNodes?.Count > 0) |
| | | 138 | | { |
| | 0 | 139 | | foreach (var node in genreNodes.Cast<XmlNode>().Where(node => !string.IsNullOrEmpty(node.InnerText) && ! |
| | | 140 | | { |
| | | 141 | | // specification has no rules about content and some books combine every genre into a single element |
| | 0 | 142 | | foreach (var item in node.InnerText.Split(["/", "&", ",", ";", " - "], StringSplitOptions.RemoveEmpt |
| | | 143 | | { |
| | 0 | 144 | | book.AddGenre(item); |
| | | 145 | | } |
| | | 146 | | } |
| | | 147 | | } |
| | | 148 | | |
| | 0 | 149 | | ReadInt32AttributeInto("//opf:meta[@name='calibre:series_index']", index => book.IndexNumber = index); |
| | 0 | 150 | | ReadInt32AttributeInto("//opf:meta[@name='calibre:rating']", rating => book.CommunityRating = rating); |
| | | 151 | | |
| | 0 | 152 | | var seriesNameNode = _document.SelectSingleNode("//opf:meta[@name='calibre:series']", _namespaceManager); |
| | | 153 | | |
| | 0 | 154 | | if (!string.IsNullOrEmpty(seriesNameNode?.Attributes?["content"]?.Value)) |
| | | 155 | | { |
| | | 156 | | try |
| | | 157 | | { |
| | 0 | 158 | | book.SeriesName = seriesNameNode.Attributes["content"]?.Value; |
| | 0 | 159 | | } |
| | 0 | 160 | | catch (Exception) |
| | | 161 | | { |
| | 0 | 162 | | _logger.LogError("error parsing Calibre series name"); |
| | 0 | 163 | | } |
| | | 164 | | } |
| | | 165 | | |
| | 0 | 166 | | return book; |
| | | 167 | | } |
| | | 168 | | |
| | | 169 | | private string FindMainTitle() |
| | | 170 | | { |
| | 0 | 171 | | var title = string.Empty; |
| | 0 | 172 | | var titleTypes = _document.SelectNodes("//opf:meta[@property='title-type']", _namespaceManager); |
| | | 173 | | |
| | 0 | 174 | | if (titleTypes is not null && titleTypes.Count > 0) |
| | | 175 | | { |
| | 0 | 176 | | foreach (XmlElement titleNode in titleTypes) |
| | | 177 | | { |
| | 0 | 178 | | string refines = titleNode.GetAttribute("refines").TrimStart('#'); |
| | 0 | 179 | | string titleType = titleNode.InnerText; |
| | | 180 | | |
| | 0 | 181 | | var titleElement = _document.SelectSingleNode($"//dc:title[@id='{refines}']", _namespaceManager); |
| | 0 | 182 | | if (titleElement is not null && string.Equals(titleType, "main", StringComparison.OrdinalIgnoreCase) |
| | | 183 | | { |
| | 0 | 184 | | title = titleElement.InnerText; |
| | | 185 | | } |
| | | 186 | | } |
| | | 187 | | } |
| | | 188 | | |
| | | 189 | | // fallback in case there is no main title definition |
| | 0 | 190 | | if (string.IsNullOrEmpty(title)) |
| | | 191 | | { |
| | 0 | 192 | | ReadStringInto("//dc:title", titleString => title = titleString); |
| | | 193 | | } |
| | | 194 | | |
| | 0 | 195 | | return title; |
| | | 196 | | } |
| | | 197 | | |
| | | 198 | | private string? FindSortTitle() |
| | | 199 | | { |
| | 0 | 200 | | var titleTypes = _document.SelectNodes("//opf:meta[@property='file-as']", _namespaceManager); |
| | | 201 | | |
| | 0 | 202 | | if (titleTypes is not null && titleTypes.Count > 0) |
| | | 203 | | { |
| | 0 | 204 | | foreach (XmlElement titleNode in titleTypes) |
| | | 205 | | { |
| | 0 | 206 | | string refines = titleNode.GetAttribute("refines").TrimStart('#'); |
| | 0 | 207 | | string sortTitle = titleNode.InnerText; |
| | | 208 | | |
| | 0 | 209 | | var titleElement = _document.SelectSingleNode($"//dc:title[@id='{refines}']", _namespaceManager); |
| | 0 | 210 | | if (titleElement is not null) |
| | | 211 | | { |
| | 0 | 212 | | return sortTitle; |
| | | 213 | | } |
| | | 214 | | } |
| | | 215 | | } |
| | | 216 | | |
| | | 217 | | // search for OPF 2.0 style title_sort node |
| | 0 | 218 | | var resultElement = _document.SelectSingleNode("//opf:meta[@name='calibre:title_sort']", _namespaceManager); |
| | 0 | 219 | | var titleSort = resultElement?.Attributes?["content"]?.Value; |
| | | 220 | | |
| | 0 | 221 | | return titleSort; |
| | 0 | 222 | | } |
| | | 223 | | |
| | | 224 | | private void FindAuthors(MetadataResult<Book> book) |
| | | 225 | | { |
| | 0 | 226 | | var resultElement = _document.SelectNodes("//dc:creator", _namespaceManager); |
| | | 227 | | |
| | 0 | 228 | | if (resultElement != null && resultElement.Count > 0) |
| | | 229 | | { |
| | 0 | 230 | | foreach (XmlElement creator in resultElement) |
| | | 231 | | { |
| | 0 | 232 | | var creatorName = creator.InnerText; |
| | 0 | 233 | | var role = creator.GetAttribute("opf:role"); |
| | 0 | 234 | | var person = new PersonInfo { Name = creatorName, Type = GetRole(role) }; |
| | | 235 | | |
| | 0 | 236 | | book.AddPerson(person); |
| | | 237 | | } |
| | | 238 | | } |
| | 0 | 239 | | } |
| | | 240 | | |
| | | 241 | | private PersonKind GetRole(string? role) |
| | | 242 | | { |
| | | 243 | | switch (role) |
| | | 244 | | { |
| | | 245 | | case "arr": |
| | 0 | 246 | | return PersonKind.Arranger; |
| | | 247 | | case "art": |
| | 0 | 248 | | return PersonKind.Artist; |
| | | 249 | | case "aut": |
| | | 250 | | case "aqt": |
| | | 251 | | case "aft": |
| | | 252 | | case "aui": |
| | | 253 | | default: |
| | 0 | 254 | | return PersonKind.Author; |
| | | 255 | | case "edt": |
| | 0 | 256 | | return PersonKind.Editor; |
| | | 257 | | case "ill": |
| | 0 | 258 | | return PersonKind.Illustrator; |
| | | 259 | | case "lyr": |
| | 0 | 260 | | return PersonKind.Lyricist; |
| | | 261 | | case "mus": |
| | 0 | 262 | | return PersonKind.AlbumArtist; |
| | | 263 | | case "oth": |
| | 0 | 264 | | return PersonKind.Unknown; |
| | | 265 | | case "trl": |
| | 0 | 266 | | return PersonKind.Translator; |
| | | 267 | | } |
| | | 268 | | } |
| | | 269 | | |
| | | 270 | | private void ReadStringInto(string xmlPath, Action<string> commitResult) |
| | | 271 | | { |
| | 0 | 272 | | var resultElement = _document.SelectSingleNode(xmlPath, _namespaceManager); |
| | 0 | 273 | | if (resultElement is not null && !string.IsNullOrWhiteSpace(resultElement.InnerText)) |
| | | 274 | | { |
| | 0 | 275 | | commitResult(resultElement.InnerText); |
| | | 276 | | } |
| | 0 | 277 | | } |
| | | 278 | | |
| | | 279 | | private void ReadInt32AttributeInto(string xmlPath, Action<int> commitResult) |
| | | 280 | | { |
| | 0 | 281 | | var resultElement = _document.SelectSingleNode(xmlPath, _namespaceManager); |
| | 0 | 282 | | var resultValue = resultElement?.Attributes?["content"]?.Value; |
| | | 283 | | |
| | 0 | 284 | | if (!string.IsNullOrEmpty(resultValue)) |
| | | 285 | | { |
| | | 286 | | try |
| | | 287 | | { |
| | 0 | 288 | | commitResult(Convert.ToInt32(Convert.ToDouble(resultValue, CultureInfo.InvariantCulture))); |
| | 0 | 289 | | } |
| | 0 | 290 | | catch (Exception e) |
| | | 291 | | { |
| | 0 | 292 | | _logger.LogError(e, "error converting to Int32"); |
| | 0 | 293 | | } |
| | | 294 | | } |
| | 0 | 295 | | } |
| | | 296 | | |
| | | 297 | | private (string MimeType, string Path)? ReadEpubCoverInto(string opfRootDirectory, string xmlPath) |
| | | 298 | | { |
| | 0 | 299 | | var resultElement = _document.SelectSingleNode(xmlPath, _namespaceManager); |
| | | 300 | | |
| | 0 | 301 | | if (resultElement is not null) |
| | | 302 | | { |
| | 0 | 303 | | return ReadManifestItem(resultElement, opfRootDirectory); |
| | | 304 | | } |
| | | 305 | | |
| | 0 | 306 | | return null; |
| | | 307 | | } |
| | | 308 | | |
| | | 309 | | private (string MimeType, string Path)? ReadManifestItem(XmlNode manifestNode, string opfRootDirectory) |
| | | 310 | | { |
| | 0 | 311 | | var href = manifestNode.Attributes?["href"]?.Value; |
| | 0 | 312 | | var mediaType = manifestNode.Attributes?["media-type"]?.Value; |
| | | 313 | | |
| | 0 | 314 | | if (string.IsNullOrEmpty(href) || string.IsNullOrEmpty(mediaType) || !IsValidImage(mediaType)) |
| | | 315 | | { |
| | 0 | 316 | | return null; |
| | | 317 | | } |
| | | 318 | | |
| | 0 | 319 | | var coverPath = Path.Combine(opfRootDirectory, href); |
| | | 320 | | |
| | 0 | 321 | | return (MimeType: mediaType, Path: coverPath); |
| | | 322 | | } |
| | | 323 | | |
| | | 324 | | private static bool IsValidImage(string? mimeType) |
| | | 325 | | { |
| | 0 | 326 | | return !string.IsNullOrEmpty(mimeType) && !string.IsNullOrWhiteSpace(MimeTypes.ToExtension(mimeType)); |
| | | 327 | | } |
| | | 328 | | } |
| | | 329 | | } |