DwC Conversion Normal 0 / ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDTRANSNAMEYTRANSNAMESTATUSYSTATUSLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSSTARTDATEYSTARTDATEENDDATEYENDDATELOGDATEYLOGDATEDEPDATEYDEPDATEREPLAYDATEYREPLAYDATELOG_FIELDYLOG_FIELD
ID_BATCHYID_BATCHSEQ_NRYSEQ_NRLOGDATEYLOGDATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSINPUT_BUFFER_ROWSYINPUT_BUFFER_ROWSOUTPUT_BUFFER_ROWSYOUTPUT_BUFFER_ROWS
ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATELOGGING_OBJECT_TYPEYLOGGING_OBJECT_TYPEOBJECT_NAMEYOBJECT_NAMEOBJECT_COPYYOBJECT_COPYREPOSITORY_DIRECTORYYREPOSITORY_DIRECTORYFILENAMEYFILENAMEOBJECT_IDYOBJECT_IDOBJECT_REVISIONYOBJECT_REVISIONPARENT_CHANNEL_IDYPARENT_CHANNEL_IDROOT_CHANNEL_IDYROOT_CHANNEL_ID
ID_BATCHYID_BATCHCHANNEL_IDYCHANNEL_IDLOG_DATEYLOG_DATETRANSNAMEYTRANSNAMESTEPNAMEYSTEPNAMESTEP_COPYYSTEP_COPYLINES_READYLINES_READLINES_WRITTENYLINES_WRITTENLINES_UPDATEDYLINES_UPDATEDLINES_INPUTYLINES_INPUTLINES_OUTPUTYLINES_OUTPUTLINES_REJECTEDYLINES_REJECTEDERRORSYERRORSLOG_FIELDNLOG_FIELD
0.0 0.0 10000 50 50 N Y 50000 Y N 1000 100 - 2011/08/04 14:42:31.000 admin 2012/05/31 14:16:42.000 Check catalog number uniquenessAbortY Check catalog number uniquenessMerge join type infoY Sort occurrencesCheck catalog number uniquenessY Write imagesReduce to one image per datasetY Write identificationsReduce to one identification per datasetY UnionSort preferred identificationsY Replace state/provGroup named areasY Replace state/provDenormalise named areasY Replace latin ranksDenormalise higher taxaY Rename higher taxon fieldsSort higher taxaY Remove empty imagesTrash canY Reduce meta for metaSort metaY Reduce images for metaSort imgDatasetTitleY Reduce ident for metaSort identDatasetTitleY Read type informationGroup type infoY Read occurrencesOccurrence transformationsY Read named areasNormalise named area classesY Read metadataReduce to one per datasetY Read imagesImages transformationsY Read identificationsIdentification transformationsY Read higher taxaLower ranksY Check preferred flagGet first identification recordY Check preferred flagFilter preferred identificationsY Occurrence transformationsSort occurrencesY Normalise named area classesReplace state/provY Normalise preferred flag vocabularySort identificationsY Lower ranksReplace latin ranksY Lower preferred flagNormalise preferred flag vocabularyY Images transformationsRemove empty imagesY Identification transformationsLower preferred flagY Group type infoSort type infoY Group named areasSort higher geographyY Get first identification recordUnionY Filter preferred identificationsWrite identificationsY Filter preferred identificationsUnionY Denormalise named areasSort named areasY Denormalise higher taxaConcatenate higher taxaY Create meta document.xmlWrite meta documentY Create eml documentWrite eml documentY Concatenate higher taxaRename higher taxon fieldsY Sort type infoMerge join type infoY Sort preferred identificationsMerge join identificationsY Sort named areasMerge join named areasY Sort metaMerge images for metaY Sort imgDatasetTitleMerge images for metaY Sort identificationsMerge join higher taxaY Sort identDatasetTitleMerge identifications for metaY Sort higher taxaMerge join higher taxaY Sort higher geographyMerge join higher geographyY Reduce to one per datasetReduce meta for metaY Reduce to one per datasetCreate eml documentY Reduce to one image per datasetReduce images for metaY Reduce to one identification per datasetReduce ident for metaY Merge images for metaMerge identifications for metaY Merge identifications for metaCreate meta document.xmlY Merge join higher taxaCheck preferred flagY Merge join higher geographyMerge join named areasY Merge join type infoMerge join higher geographyY Merge join identificationsWrite occurrencesY AbortWrite duplicate catalog number to fileY Get file namesGet parametersY Get parametersRead occurrencesY Get parametersRead type informationY Get parametersRead named areasY Get parametersRead imagesY Get parametersRead identificationsY Get parametersRead higher taxaY Get parametersRead metadataY Remove empty imagesWrite imagesY Merge join named areasMerge join identificationsY Create eml documentWrite dataset listY Abort Abort Y 1 none 0 Duplicate catalog number found. N 642 29 Y Check catalog number uniqueness Unique Y 1 none N Y duplicates catalogNumber N 535 74 Y Check preferred flag SwitchCase Y 1 none PreferredFlag N String Get first identification record 0 Filter preferred identifications 1 Filter preferred identifications 933 459 Y Concatenate higher taxa ScriptValueMod Y 1 none N 0 Script 1 var higherClassification = replace(kingdom + "; " + phylum + "; " + classis + "; " + order + "; " + family, "null; ", "", "; null", "") higherClassification higherClassification String -1 -1 N 580 546 Y Create eml document ScriptValueMod N 1 none N 0 Script 1 // make up EML filename from dataset title // (replace forbidden characters first) emlFileName = base_dir + "/tmp/" + replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " eml"; datasetTitleModified = replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', ''); if (isEmpty(organisationName)) { organisationName = (isEmpty(creator))? 'n/a':creator } contactAddressSnippet = (contactAddress == "")? '':' <address><deliveryPoint>' + escapeXml(contactAddress) + '</deliveryPoint></address>\n'; var eml = '\ <?xml version="1.0" encoding="utf-8"?>\n\ <eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1"\n\ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\ xmlns:dc="http://purl.org/dc/terms/"\n\ xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0/eml.xsd"\n\ packageId="619a4b95-1a82-4006-be6a-7dbe3c9b33c5/v7" system="http://gbif.org" scope="system">\n\ <dataset>\n\ <title>' + escapeXml(title) + '</title>\n\ <creator>\n\ <organizationName>' + escapeXml(organisationName) + '</organizationName>\n\ </creator>\n\ <metadataProvider>\n\ <organizationName>' + escapeXml(contactName) + '</organizationName>\n' + contactAddressSnippet + '\ <phone>' + escapeXml(contactPhone) + '</phone>\n\ <electronicMailAddress>' + escapeXml(contactEmail) + '</electronicMailAddress>\n\ </metadataProvider>\n\ <pubDate>' + year(new Date()) + '</pubDate>\n\ <abstract>\n\ <para>' + escapeXml(details) + '</para>\n\ </abstract>\n\ <intellectualRights>\n\ <para>' + escapeXml(intellectualRights) + '</para>\n\ </intellectualRights>\n\ <contact>\n\ <individualName>\n\ <surName>' + escapeXml(contactName) + '</surName>\n\ </individualName>\n' + contactAddressSnippet + '\ <phone>' + escapeXml(contactPhone) + '</phone>\n\ <electronicMailAddress>' + escapeXml(contactEmail) + '</electronicMailAddress>\n\ </contact>\n\ </dataset>\n\ <additionalMetadata>\n\ <metadata>\n\ <gbif>\n\ <dateStamp>' + replace(date2str(new Date(), "yyyy-MM-dd hh:mm:ss"), " ", "T") + '</dateStamp>\n\ <hierarchyLevel>dataset</hierarchyLevel>\n\ <citation>' + escapeXml(citation) + '</citation>\n\ <resourceLogoUrl>' + escapeXml(resourceLogoUrl) + '</resourceLogoUrl>\n\ </gbif>\n\ </metadata>\n\ </additionalMetadata>\n\ </eml:eml>\ ' eml eml String -1 -1 N emlFileName emlFileName String -1 -1 N datasetTitleModified datasetTitleModified String -1 -1 N organisationName organisationName String -1 -1 Y contactAddressSnippet contactAddressSnippet String -1 -1 N 443 825 Y Create meta document.xml ScriptValueMod Y 1 none N 0 Script 1 // make up meta.xml filename from dataset title // (replace forbidden characters first) metaFileName = base_dir + "/tmp/" + replace(title, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " meta"; var metaCore = '\ <?xml version="1.0" encoding="UTF-8"?>\n\ <archive xmlns="http://rs.tdwg.org/dwc/text/">\n\ \n\ <core encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1" \n\ rowType="http://rs.tdwg.org/dwc/terms/Occurrence">\n\ <files>\n\ <location>occurrence.txt</location>\n\ </files>\n\ <id index="0" />\n\ <!-- Occurrence fields -->\n\ <field index="0" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>\n\ <field index="1" term="http://rs.tdwg.org/dwc/terms/institutionCode"/>\n\ <field index="2" term="http://rs.tdwg.org/dwc/terms/collectionCode"/>\n\ <field index="3" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>\n\ <field index="4" term="http://rs.tdwg.org/dwc/terms/occurrenceID"/>\n\ <field index="5" term="http://rs.tdwg.org/dwc/terms/fieldNumber"/>\n\ <field index="6" term="http://purl.org/dc/terms/modified"/>\n\ <field index="7" term="http://rs.tdwg.org/dwc/terms/eventID"/>\n\ <field index="8" term="http://rs.tdwg.org/dwc/terms/samplingProtocol"/>\n\ <field index="9" term="http://rs.tdwg.org/dwc/terms/habitat"/>\n\ <field index="10" term="http://rs.tdwg.org/dwc/terms/eventRemarks"/>\n\ <field index="11" term="http://rs.tdwg.org/dwc/terms/minimumElevationInMeters"/>\n\ <field index="12" term="http://rs.tdwg.org/dwc/terms/maximumElevationInMeters"/>\n\ <field index="13" term="http://rs.tdwg.org/dwc/terms/minimumDepthInMeters"/>\n\ <field index="14" term="http://rs.tdwg.org/dwc/terms/maximumDepthInMeters"/>\n\ <field index="15" term="http://rs.tdwg.org/dwc/terms/country"/>\n\ <field index="16" term="http://rs.tdwg.org/dwc/terms/countryCode"/>\n\ <field index="17" term="http://rs.tdwg.org/dwc/terms/locality"/>\n\ <field index="18" term="http://rs.tdwg.org/dwc/terms/verbatimLocality"/>\n\ <field index="19" term="http://rs.tdwg.org/dwc/terms/eventDate"/>\n\ <field index="20" term="http://rs.tdwg.org/dwc/terms/verbatimEventDate"/>\n\ <field index="21" term="http://rs.tdwg.org/dwc/terms/eventTime"/>\n\ <field index="22" term="http://rs.tdwg.org/dwc/terms/startDayOfYear"/>\n\ <field index="23" term="http://rs.tdwg.org/dwc/terms/endDayOfYear"/>\n\ <field index="24" term="http://rs.tdwg.org/dwc/terms/occurrenceDetails"/>\n\ <field index="25" term="http://rs.tdwg.org/dwc/terms/occurrenceRemarks"/>\n\ <field index="26" term="http://rs.tdwg.org/dwc/terms/sex"/>\n\ <field index="27" term="http://rs.tdwg.org/dwc/terms/decimalLatitude"/>\n\ <field index="28" term="http://rs.tdwg.org/dwc/terms/decimalLongitude"/>\n\ <field index="29" term="http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters"/>\n\ <field index="30" term="http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem"/>\n\ <field index="31" term="http://rs.tdwg.org/dwc/terms/verbatimSRS"/>\n\ <field index="32" term="http://rs.tdwg.org/dwc/terms/typeStatus"/>\n\ <field index="33" term="http://rs.tdwg.org/dwc/terms/stateProvince"/>\n\ <field index="34" term="http://rs.tdwg.org/dwc/terms/county"/>\n\ <field index="35" term="http://rs.tdwg.org/dwc/terms/municipality"/>\n\ <field index="36" term="http://rs.tdwg.org/dwc/terms/continent"/>\n\ <field index="37" term="http://rs.tdwg.org/dwc/terms/waterBody"/>\n\ <field index="38" term="http://rs.tdwg.org/dwc/terms/islandGroup"/>\n\ <field index="39" term="http://rs.tdwg.org/dwc/terms/island"/>\n\ <field index="40" term="http://rs.tdwg.org/dwc/terms/higherGeography"/>\n\ <!-- Identification fields -->\n\ <field index="41" term="http://rs.tdwg.org/dwc/terms/dateIdentified"/>\n\ <field index="42" term="http://rs.tdwg.org/dwc/terms/identifiedBy"/>\n\ <field index="43" term="http://rs.tdwg.org/dwc/terms/nomenclaturalCode"/>\n\ <field index="44" term="http://rs.tdwg.org/dwc/terms/taxonRemarks"/>\n\ <field index="45" term="http://rs.tdwg.org/dwc/terms/identificationQualifier"/>\n\ <field index="46" term="http://rs.tdwg.org/dwc/terms/identificationRemarks"/>\n\ <field index="47" term="http://rs.tdwg.org/dwc/terms/identificationReferences"/>\n\ <field index="48" term="http://rs.tdwg.org/dwc/terms/scientificName"/>\n\ <field index="49" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>\n\ <field index="50" term="http://rs.tdwg.org/dwc/terms/higherClassification"/>\n\ <field index="51" term="http://rs.tdwg.org/dwc/terms/kingdom"/>\n\ <field index="52" term="http://rs.tdwg.org/dwc/terms/phylum"/>\n\ <field index="53" term="http://rs.tdwg.org/dwc/terms/class"/>\n\ <field index="54" term="http://rs.tdwg.org/dwc/terms/order"/>\n\ <field index="55" term="http://rs.tdwg.org/dwc/terms/family"/>\n\ <field index="56" term="http://rs.tdwg.org/dwc/terms/genus"/>\n\ <field index="57" term="http://rs.tdwg.org/dwc/terms/subgenus"/>\n\ <field index="58" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>\n\ <field index="59" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>\n\ <field index="60" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>\n\ </core>\n\ ' var metaClosingTag = '\n</archive>' if (imgDatasetTitle == null) { var metaImages = '' } else { var metaImages = '\ \n\ <extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1"\n\ rowType="http://rs.gbif.org/terms/1.0/Image">\n\ <files>\n\ <location>image.txt</location>\n\ </files>\n\ <coreid index="0" />\n\ <field index="1" term="http://purl.org/dc/terms/identifier"/>\n\ <field index="2" term="http://purl.org/dc/terms/description"/>\n\ <field index="3" term="http://purl.org/dc/terms/format"/>\n\ <field index="4" term="http://purl.org/dc/terms/created"/>\n\ <field index="5" term="http://purl.org/dc/terms/creator"/>\n\ <field index="6" term="http://purl.org/dc/terms/license"/>\n\ <field index="7" term="http://purl.org/dc/terms/rightsHolder"/>\n\ </extension>\n\ '} if (identDatasetTitle == null) { var metaIdent = '' } else { var metaIdent = '\ \n\ <extension encoding="UTF-8" fieldsTerminatedBy="," linesTerminatedBy="\\n" fieldsEnclosedBy=\'"\' ignoreHeaderLines="1"\n\ rowType="http://rs.tdwg.org/dwc/terms/Identification">\n\ <files>\n\ <location>identification.txt</location>\n\ </files>\n\ <coreid index="0" />\n\ <field index="1" term="http://rs.tdwg.org/dwc/terms/dateIdentified"/>\n\ <field index="2" term="http://rs.tdwg.org/dwc/terms/identifiedBy"/>\n\ <field index="3" term="http://rs.tdwg.org/dwc/terms/nomenclaturalCode"/>\n\ <field index="4" term="http://rs.tdwg.org/dwc/terms/taxonRemarks"/>\n\ <field index="5" term="http://rs.tdwg.org/dwc/terms/identificationQualifier"/>\n\ <field index="6" term="http://rs.tdwg.org/dwc/terms/identificationRemarks"/>\n\ <field index="7" term="http://rs.tdwg.org/dwc/terms/identificationReferences"/>\n\ <field index="8" term="http://rs.tdwg.org/dwc/terms/scientificName"/>\n\ <field index="9" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>\n\ <field index="10" term="http://rs.tdwg.org/dwc/terms/higherClassification"/>\n\ <field index="11" term="http://rs.tdwg.org/dwc/terms/kingdom"/>\n\ <field index="12" term="http://rs.tdwg.org/dwc/terms/phylum"/>\n\ <field index="13" term="http://rs.tdwg.org/dwc/terms/class"/>\n\ <field index="14" term="http://rs.tdwg.org/dwc/terms/order"/>\n\ <field index="15" term="http://rs.tdwg.org/dwc/terms/family"/>\n\ <field index="16" term="http://rs.tdwg.org/dwc/terms/genus"/>\n\ <field index="17" term="http://rs.tdwg.org/dwc/terms/subgenus"/>\n\ <field index="18" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>\n\ <field index="19" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>\n\ <field index="20" term="http://rs.tdwg.org/dwc/terms/taxonRank"/>\n\ </extension>\n\ '} metaFileName metaFileName String -1 -1 N metaCore metaCore String -1 -1 N metaClosingTag metaClosingTag String -1 -1 N metaImages metaImages String -1 -1 N metaIdent metaIdent String -1 -1 N 952 739 Y Denormalise higher taxa Denormaliser Y 1 none HigherTaxonRank catalogNumber ScientificNameTree HigherTaxonName kingdom kingdom String -1 -1 CONCAT_COMMA HigherTaxonName pyhlum phylum String -1 -1 CONCAT_COMMA HigherTaxonName class classis String -1 -1 CONCAT_COMMA HigherTaxonName order order String -1 -1 CONCAT_COMMA HigherTaxonName family family String -1 -1 CONCAT_COMMA 476 545 Y Denormalise named areas Denormaliser Y 1 none AreaClass catalogNumber AreaName stateprovince stateProvince String -1 -1 CONCAT_COMMA AreaName county county String -1 -1 CONCAT_COMMA AreaName municipality municipality String -1 -1 CONCAT_COMMA AreaName continent continent String -1 -1 CONCAT_COMMA AreaName waterbody waterBody String -1 -1 CONCAT_COMMA AreaName islandgroup islandGroup String -1 -1 CONCAT_COMMA AreaName island island String -1 -1 CONCAT_COMMA 583 274 Y Filter preferred identifications FilterRows Y 1 none Union Write identifications N PreferredFlag = constantString1-1-1N 1027 502 Y Get file names FilesFromResult N 1 none 28 63 Y Get first identification record UniqueRowsByHashSet Y 1 none N N catalogNumber 1030 410 Y Get parameters GetVariable N 1 none sort_size ${sort_size} - 0 0 none base_dir ${base_dir} - 0 0 none 21 286 Y Group named areas MemoryGroupBy Y 1 none N catalogNumber higherGeography AreaName CONCAT_STRING ; 583 176 Y Group type info MemoryGroupBy Y 1 none N catalogNumber typeStatus typeStatus CONCAT_STRING ; 294 143 Y Identification transformations ScriptValueMod Y 1 none N 0 Script 1 // replace empty values with different concepts if (isEmpty(dateIdentified)) { dateIdentified = DateText; } if (isEmpty(identifiedBy)) { if (IdentifierFullName == '') { IdentifierFullName = trim(trim(IdentifierPrefix + " " + IdentifierGivenName) + " " + trim(IdentifierInheritedName + " " + IdentifierSuffix)); } identifiedBy = IdentifierFullName } if (isEmpty(scientificNameAuthorship)) { scientificNameAuthorship = (isEmpty(ZoologicalAuthorTeamOriginalAndYear))? BacterialAuthorTeamAndYear:ZoologicalAuthorTeamOriginalAndYear } if (isEmpty(genus)) { genus = (!isEmpty(ZoologicalGenusOrMonomial))? ZoologicalGenusOrMonomial: (!isEmpty(BacterialGenusOrMonomial))? BacterialGenusOrMonomial:ViralGenusOrMonomial } if (isEmpty(subgenus)) { subgenus = BacterialSubgenus } if (isEmpty(specificEpithet)) { specificEpithet = (!isEmpty(ZoologicalSpeciesEpithet))? ZoologicalSpeciesEpithet:BacterialSpeciesEpithet } if (isEmpty(infraspecificEpithet)) { infraspecificEpithet = (!isEmpty(ZoologicalSubspeciesEpithet))? ZoologicalSubspeciesEpithet:BacterialSubspeciesEpithet } // make up occurrence filename from dataset title identFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " identification"; identFileName identFileName String -1 -1 N dateIdentified dateIdentified String -1 -1 Y identifiedBy identifiedBy String -1 -1 Y IdentifierFullName IdentifierFullName String -1 -1 Y scientificNameAuthorship scientificNameAuthorship String -1 -1 Y genus genus String -1 -1 Y subgenus subgenus String -1 -1 Y specificEpithet specificEpithet String -1 -1 Y infraspecificEpithet infraspecificEpithet String -1 -1 Y 307 463 Y Images transformations ScriptValueMod Y 1 none N 0 Script 1 // replace empty value if (identifier == '') { var identifier = ProductURI; } // make up images filename from dataset title // (replace forbidden characters first) imgFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " image"; imgFileName imgFileName String -1 -1 N identifier identifier String -1 -1 Y 296 352 Y Lower preferred flag StringOperations Y 1 none PreferredFlag none lower none no none none none 438 464 Y Lower ranks StringOperations Y 1 none HigherTaxonRank none lower none no none none none 274 543 Y Merge identifications for meta MergeJoin Y 1 none LEFT OUTER Merge images for meta Sort identDatasetTitle title identDatasetTitle 796 736 Y Merge images for meta MergeJoin Y 1 none LEFT OUTER Sort meta Sort imgDatasetTitle title imgDatasetTitle 653 736 Y Merge join higher geography MergeJoin Y 1 none LEFT OUTER Merge join type info Sort higher geography catalogNumber catalogNumber 783 99 Y Merge join higher taxa MergeJoin N 1 none LEFT OUTER Sort identifications Sort higher taxa catalogNumber ScientificNameTree catalogNumber ScientificNameTree 823 461 Y Merge join identifications MergeJoin Y 1 none LEFT OUTER Merge join named areas Sort preferred identifications catalogNumber catalogNumber 1129 101 Y Merge join named areas MergeJoin Y 1 none LEFT OUTER Merge join higher geography Sort named areas catalogNumber catalogNumber 938 99 Y Merge join type info MergeJoin Y 1 none LEFT OUTER Check catalog number uniqueness Sort type info catalogNumber catalogNumber 635 98 Y Normalise named area classes StringOperations Y 1 none AreaClass both lower none no none none none 288 247 Y Normalise preferred flag vocabulary ValueMapper Y 1 none PreferredFlag true 1 yes 1 y 1 false 0 no 0 n 0 544 464 Y Occurrence transformations ScriptValueMod Y 1 none N 0 Script 1 // if coordinates are present, set verbatimCoordinateSystem if (isEmpty(decimalLongitude) && isEmpty(decimalLatitude)) { verbatimCoordinateSystem = ""; } else { verbatimCoordinateSystem = "decimal degrees"; } // make up occurrence filename from dataset title occFileName = base_dir + "/tmp/" + replace(DatasetTitle, "\\)", "", "\\(", "", "/", "_", " ", "_", "'", "", '"', '') + " occurrence"; errFileName = base_dir + "/duplicate" occFileName occFileName String -1 -1 N verbatimCoordinateSystem verbatimCoordinateSystem String -1 -1 N errFileName errFileName String -1 -1 N 289 55 Y Read higher taxa getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N HigherTaxonName hispid:HigherTaxonName node String -1 -1 none N HigherTaxonRank hispid:HigherTaxonRank node String -1 -1 none N catalogNumber ../../../../../../hispid:UnitID node String -1 -1 none N ScientificNameTree ../../hispid:ScientificName node String -1 -1 both N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit/hispid:Identifications/hispid:Identification/hispid:Result/hispid:TaxonIdentified/hispid:HigherTaxa/hispid:HigherTaxon Y Y path 160 543 Y Read identifications getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N catalogNumber ../../hispid:UnitID node String -1 -1 none N ScientificNameTree hispid:Result/hispid:TaxonIdentified/hispid:ScientificName node String -1 -1 both N dateIdentified hispid:Date/hispid:ISODateTimeBegin node String -1 -1 none N DateText hispid:Date/hispid:DateText node String -1 -1 none N identifiedBy hispid:Identifiers/hispid:IdentifiersText node String -1 -1 none N IdentifierFullName hispid:Identifiers/hispid:Identifier/hispid:PersonName/hispid:FullName node String -1 -1 none N IdentifierInheritedName hispid:Identifiers/hispid:Identifier/hispid:PersonName/hispid:AtomisedName/hispid:InheritedName node String -1 -1 none N IdentifierGivenName hispid:Identifiers/hispid:Identifier/hispid:PersonName/hispid:AtomisedName/hispid:GivenName node String -1 -1 none N IdentifierPrefix hispid:Identifiers/hispid:Identifier/hispid:PersonName/hispid:AtomisedName/hispid:Prefix node String -1 -1 none N IdentifierSuffix hispid:Identifiers/hispid:Identifier/hispid:PersonName/hispid:AtomisedName/hispid:Suffix node String -1 -1 none N nomenclaturalCode hispid:Result/hispid:TaxonIdentified/hispid:Code node String -1 -1 none N taxonRemarks hispid:Result/hispid:TaxonIdentified/hispid:NameComments node String -1 -1 none N identificationQualifier hispid:Result/hispid:TaxonIdentified/hispid:IdentificationQualifier node String -1 -1 none N identificationRemarks hispid:Notes node String -1 -1 none N identificationReferences hispid:References/hispid:Reference/hispid:TitleCitation node String -1 -1 none N scientificName hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:FullScientificNameString node String -1 -1 none N scientificNameAuthorship hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Botanical/hispid:AuthorTeam node String -1 -1 none N ZoologicalAuthorTeamOriginalAndYear hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Zoological/hispid:AuthorTeamOriginalAndYear node String -1 -1 none N BacterialAuthorTeamAndYear hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Bacterial/hispid:AuthorTeamAndYear node String -1 -1 none N genus hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Botanical/hispid:GenusOrMonomial node String -1 -1 none N ZoologicalGenusOrMonomial hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Zoological/hispid:GenusOrMonomial node String -1 -1 none N BacterialGenusOrMonomial hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Bacterial/hispid:GenusOrMonomial node String -1 -1 none N ViralGenusOrMonomial hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Viral/hispid:GenusOrMonomial node String -1 -1 none N subgenus hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Zoological/hispid:Subgenus node String -1 -1 none N BacterialSubgenus hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Bacterial/hispid:Subgenus node String -1 -1 none N specificEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Botanical/hispid:FirstEpithet node String -1 -1 none N ZoologicalSpeciesEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Zoological/hispid:SpeciesEpithet node String -1 -1 none N BacterialSpeciesEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Bacterial/hispid:SpeciesEpithet node String -1 -1 none N infraspecificEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Botanical/hispid:InfraspecificEpithet node String -1 -1 none N ZoologicalSubspeciesEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Zoological/hispid:SubspeciesEpithet node String -1 -1 none N BacterialSubspeciesEpithet hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Bacterial/hispid:SubspeciesEpithet node String -1 -1 none N taxonRank hispid:Result/hispid:TaxonIdentified/hispid:ScientificName/hispid:NameAtomised/hispid:Botanical/hispid:Rank node String -1 -1 none N PreferredFlag hispid:PreferredFlag node String -1 -1 none N DatasetTitle ../../../../hispid:Metadata/hispid:Description/hispid:Representation/hispid:Title node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit/hispid:Identifications/hispid:Identification Y Y path 165 462 Y Read images getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N identifier hispid:FileURI node String -1 -1 none N ProductURI hispid:ProductURI node String -1 -1 none N description hispid:Comment node String -1 -1 none N format hispid:Format node String -1 -1 none N created hispid:CreatedDate node String -1 -1 none N creator hispid:Creator node String -1 -1 none N license hispid:IPR/hispid:Licenses/hispid:License/hispid:Text node String -1 -1 none N rightsHolder hispid:IPR/hispid:IPRDeclarations/hispid:IPRDeclaration/hispid:Text node String -1 -1 none N catalogNumber ../../hispid:UnitID node String -1 -1 none N DatasetTitle ../../../../hispid:Metadata/hispid:Description/hispid:Representation/hispid:Title node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit/hispid:MultiMediaObjects/hispid:MultiMediaObject Y Y path 161 349 Y Read metadata getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N title hispid:Metadata/hispid:Description/hispid:Representation/hispid:Title node String -1 -1 none N details hispid:Metadata/hispid:Description/hispid:Representation/hispid:Details node String -1 -1 none N creator hispid:Metadata/hispid:RevisionData/hispid:Creators node String -1 -1 none N intellectualRights hispid:Metadata/hispid:IPRStatements/hispid:IPRDeclarations/hispid:IPRDeclaration/hispid:Text node String -1 -1 none N citation hispid:Metadata/hispid:IPRStatements/hispid:Citations/hispid:Citation/hispid:Text node String -1 -1 none N resourceLogoUrl hispid:Metadata/hispid:IconURI node String -1 -1 none N contactName hispid:ContentContacts/hispid:ContentContact/hispid:Name node String -1 -1 none N contactAddress hispid:ContentContacts/hispid:ContentContact/hispid:Address node String -1 -1 none N contactPhone hispid:ContentContacts/hispid:ContentContact/hispid:Phone node String -1 -1 none N contactEmail hispid:ContentContacts/hispid:ContentContact/hispid:Email node String -1 -1 none N organisationName hispid:Metadata/hispid:Owners/hispid:Owner/hispid:Organisation/hispid:Name/hispid:Representation/hispid:Text node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet Y Y path 167 734 Y Read named areas getXMLData Y 1 none N N N N N N N N N Y rowNum UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N AreaClass hispid:AreaClass node String -1 -1 none N AreaName hispid:AreaName node String -1 -1 none N catalogNumber ../../../hispid:UnitID node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit/hispid:Gathering/hispid:NamedAreas/hispid:NamedArea Y Y path 156 246 Y Read occurrences getXMLData Y 1 none N N N N N N N N N Y UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N institutionCode hispid:SourceInstitutionID node String -1 -1 none N collectionCode hispid:SourceID node String -1 -1 none N catalogNumber hispid:UnitID node String -1 -1 none N basisOfRecord hispid:RecordBasis node String -1 -1 none N occurrenceID hispid:UnitGUID node String -1 -1 none N fieldNumber hispid:CollectorsFieldNumber node String -1 -1 none N modified hispid:DateLastEdited node String -1 -1 none N minimumElevationInMeters hispid:Gathering/hispid:Altitude/hispid:MeasurementOrFactAtomised/hispid:LowerValue node String -1 -1 none N maximumElevationInMeters hispid:Gathering/hispid:Altitude/hispid:MeasurementOrFactAtomised/hispid:UpperValue node String -1 -1 none N minimumDepthInMeters hispid:Gathering/hispid:Depth/hispid:MeasurementOrFactAtomised/hispid:LowerValue node String -1 -1 none N maximumDepthInMeters hispid:Gathering/hispid:Depth/hispid:MeasurementOrFactAtomised/hispid:UpperValue node String -1 -1 none N country hispid:Gathering/hispid:Country/hispid:Name node String -1 -1 none N countryCode hispid:Gathering/hispid:Country/hispid:ISO3166Code node String -1 -1 none N locality hispid:Gathering/hispid:LocalityText node String -1 -1 none N verbatimLocality hispid:Gathering/hispid:AreaDetail node String -1 -1 none N eventDate hispid:Gathering/hispid:DateTime/hispid:ISODateTimeBegin node String -1 -1 none N verbatimEventDate hispid:Gathering/hispid:DateTime/hispid:DateText node String -1 -1 none N eventTime hispid:Gathering/hispid:DateTime/hispid:TimeOfDayBegin node String -1 -1 none N startDayOfYear hispid:Gathering/hispid:DateTime/hispid:DayNumberBegin node String -1 -1 none N endDayOfYear hispid:Gathering/hispid:DateTime/hispid:DayNumberEnd node String -1 -1 none N decimalLongitude hispid:Gathering/hispid:SiteCoordinateSets/hispid:SiteCoordinates/hispid:CoordinatesLatLong/hispid:LongitudeDecimal node String -1 -1 none N decimalLatitude hispid:Gathering/hispid:SiteCoordinateSets/hispid:SiteCoordinates/hispid:CoordinatesLatLong/hispid:LatitudeDecimal node String -1 -1 none N coordinateUncertaintyInMeters hispid:Gathering/hispid:SiteCoordinateSets/hispid:SiteCoordinates/hispid:CoordinatesLatLong/hispid:CoordinateErrorDistanceInMeters node String -1 -1 none N verbatimSRS hispid:Gathering/hispid:SiteCoordinateSets/hispid:SiteCoordinates/hispid:CoordinatesLatLong/hispid:SpatialDatum node String -1 -1 none N eventID hispid:Gathering/hispid:Code node String -1 -1 none N samplingProtocol hispid:Gathering/hispid:Method node String -1 -1 none N habitat hispid:Gathering/hispid:Biotope/hispid:Text node String -1 -1 none N eventRemarks hispid:Gathering/hispid:Notes node String -1 -1 none N occurrenceRemarks hispid:Notes node String -1 -1 none N occurrenceDetails hispid:RecordURI node String -1 -1 none N sex hispid:Sex node String -1 -1 none N DatasetTitle ../../hispid:Metadata/hispid:Description/hispid:Representation/hispid:Title node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit Y Y path 158 55 Y Read type information getXMLData Y 1 none N N N N N N N N N Y rowNum UTF-8 C:\Program Files\pentaho\design-tools\data-integration\abcd.xml N N N N typeStatus hispid:TypeStatus node String -1 -1 none N catalogNumber ../../../hispid:UnitID node String -1 -1 none N 0 /hispid:DataSets/hispid:DataSet/hispid:Units/hispid:Unit/hispid:SpecimenUnit/hispid:NomenclaturalTypeDesignations/hispid:NomenclaturalTypeDesignation Y Y path 156 143 Y Reduce ident for meta SelectValues Y 1 none DatasetTitle identDatasetTitle -2 -2 N 829 668 Y Reduce images for meta SelectValues Y 1 none DatasetTitle imgDatasetTitle -2 -2 N 591 611 Y Reduce meta for meta SelectValues Y 1 none title -2 -2 base_dir -2 -2 N 440 736 Y Reduce to one identification per dataset UniqueRowsByHashSet N 1 none N N DatasetTitle 1030 668 Y Reduce to one image per dataset UniqueRowsByHashSet N 1 none N N DatasetTitle 897 610 Y Reduce to one per dataset UniqueRowsByHashSet N 1 none N N title 297 735 Y Remove empty images FilterRows Y 1 none Write images Trash can N identifier IS NOT NULL 437 352 Y Rename higher taxon fields SelectValues Y 1 none N classis class - -2 -2 false 700 546 Y Replace latin ranks ValueMapper Y 1 none HigherTaxonRank regnum kingdom subregnum subkingdom superclassis superclass classis class subclassis subclass superordo superorder ordo order subordo suborder superfamilia superfamily familia family subfamilia subfamily tribus tribe 371 544 Y Replace state/prov ValueMapper N 1 none AreaClass state stateprovince province stateprovince 438 247 Y Sort higher geography SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 717 175 Y Sort higher taxa SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y ScientificNameTree Y Y 789 547 Y Sort identDatasetTitle SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N identDatasetTitle Y Y 679 668 Y Sort identifications SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y ScientificNameTree Y Y 688 464 Y Sort imgDatasetTitle SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N imgDatasetTitle Y Y 418 612 Y Sort meta SortRows N 1 none %%java.io.tmpdir%% out ${sort_size} N N title Y Y 538 736 Y Sort named areas SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 746 277 Y Sort occurrences SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 425 54 Y Sort preferred identifications SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 1126 234 Y Sort type info SortRows Y 1 none %%java.io.tmpdir%% out ${sort_size} N N catalogNumber Y Y 428 143 Y Trash can Dummy Y 1 none 565 387 Y Union Dummy Y 1 none 1126 456 Y Write dataset list TextFileOutput Y 1 none ; " N N
N
N
DOS None UTF-8 N ${base_dir}/tmp/datasets N Y list N N N N N N Y N N 0 datasetTitleModified String none -1 -1 575 868 Y
Write duplicate catalog number to file TextFileOutput Y 1 none ; " N N
N
N
DOS None Y errFileName output\duplicate N Y txt N N N N N N Y N N 0 catalogNumber String none -1 -1 800 27 Y
Write eml document TextFileOutput Y 1 none ; N N
N
N
DOS None UTF-8 Y emlFileName file N Y xml N N N N N N Y N Y 0 eml String none -1 -1 1149 829 Y
Write identifications TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y identFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 dateIdentified String none -1 -1 identifiedBy String none -1 -1 nomenclaturalCode String none -1 -1 taxonRemarks String none -1 -1 identificationQualifier String none -1 -1 identificationRemarks String none -1 -1 identificationReferences String none -1 -1 scientificName String none -1 -1 scientificNameAuthorship String none -1 -1 higherClassification String none -1 -1 kingdom String none -1 -1 phylum String none -1 -1 class String none -1 -1 order String none -1 -1 family String none -1 -1 genus String none -1 -1 subgenus String none -1 -1 specificEpithet String none -1 -1 infraspecificEpithet String none -1 -1 taxonRank String none -1 -1 1149 550 Y
Write images TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y imgFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 identifier String none -1 -1 description String none -1 -1 format String none -1 -1 created String none -1 -1 creator String none -1 -1 license String none -1 -1 rightsHolder String none -1 -1 896 354 Y
Write meta document TextFileOutput Y 1 none N N
N
N
DOS None UTF-8 Y metaFileName file N Y xml N N N N N N Y N Y 0 metaCore String none -1 -1 metaImages String none -1 -1 metaIdent String none -1 -1 metaClosingTag String none -1 -1 1151 739 Y
Write occurrences TextFileOutput Y 1 none , " Y N
Y
N
DOS None UTF-8 Y occFileName C:\Users\j.holetschek\Desktop\pentaho\occurrence N Y txt N N N N N N Y N Y 0 catalogNumber String none -1 -1 institutionCode String none -1 -1 collectionCode String none -1 -1 basisOfRecord String none -1 -1 occurrenceID String none -1 -1 fieldNumber String none -1 -1 modified String none -1 -1 eventID String none -1 -1 samplingProtocol String none -1 -1 habitat String none -1 -1 eventRemarks String none -1 -1 minimumElevationInMeters String none -1 -1 maximumElevationInMeters String none -1 -1 minimumDepthInMeters String none -1 -1 maximumDepthInMeters String none -1 -1 country String none -1 -1 countryCode String none -1 -1 locality String none -1 -1 verbatimLocality String none -1 -1 eventDate String none -1 -1 verbatimEventDate String none -1 -1 eventTime String none -1 -1 startDayOfYear String none -1 -1 endDayOfYear String none -1 -1 occurrenceDetails String none -1 -1 occurrenceRemarks String none -1 -1 sex String none -1 -1 decimalLatitude String none -1 -1 decimalLongitude String none -1 -1 coordinateUncertaintyInMeters String none -1 -1 verbatimCoordinateSystem String none -1 -1 verbatimSRS String none -1 -1 typeStatus String none -1 -1 stateProvince String none -1 -1 county String none -1 -1 municipality String none -1 -1 continent String none -1 -1 waterBody String none -1 -1 islandGroup String none -1 -1 island String none -1 -1 higherGeography String none -1 -1 dateIdentified String none -1 -1 identifiedBy String none -1 -1 nomenclaturalCode String none -1 -1 taxonRemarks String none -1 -1 identificationQualifier String none -1 -1 identificationRemarks String none -1 -1 identificationReferences String none -1 -1 scientificName String none -1 -1 scientificNameAuthorship String none -1 -1 higherClassification String none -1 -1 kingdom String none -1 -1 phylum String none -1 -1 class String none -1 -1 order String none -1 -1 family String none -1 -1 genus String none -1 -1 subgenus String none -1 -1 specificEpithet String none -1 -1 infraspecificEpithet String none -1 -1 taxonRank String none -1 -1 1140 15 Y
Check catalog number uniqueness Abort Y N